xref: /linux/net/ipv4/tcp_ipv4.c (revision 0f85feae6b710ced3abad5b2b47d31dfcb956b62)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
77076bb0c8SEliezer Tamir #include <net/busy_poll.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <linux/inet.h>
801da177e4SLinus Torvalds #include <linux/ipv6.h>
811da177e4SLinus Torvalds #include <linux/stddef.h>
821da177e4SLinus Torvalds #include <linux/proc_fs.h>
831da177e4SLinus Torvalds #include <linux/seq_file.h>
841da177e4SLinus Torvalds 
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
911da177e4SLinus Torvalds 
92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95cfb6eeb4SYOSHIFUJI Hideaki #endif
96cfb6eeb4SYOSHIFUJI Hideaki 
975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
991da177e4SLinus Torvalds 
100936b8bdbSOctavian Purdila static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1011da177e4SLinus Torvalds {
102eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
104aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1096d6ee43eSArnaldo Carvalho de Melo {
1106d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1116d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1126d6ee43eSArnaldo Carvalho de Melo 
1136d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1146d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1156d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1166d6ee43eSArnaldo Carvalho de Melo 
1176d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1186d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1196d6ee43eSArnaldo Carvalho de Melo 	   holder.
1206d6ee43eSArnaldo Carvalho de Melo 
1216d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1226d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1236d6ee43eSArnaldo Carvalho de Melo 	 */
1246d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1256d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1269d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1276d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1286d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1296d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1306d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1326d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1336d6ee43eSArnaldo Carvalho de Melo 		return 1;
1346d6ee43eSArnaldo Carvalho de Melo 	}
1356d6ee43eSArnaldo Carvalho de Melo 
1366d6ee43eSArnaldo Carvalho de Melo 	return 0;
1376d6ee43eSArnaldo Carvalho de Melo }
1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1396d6ee43eSArnaldo Carvalho de Melo 
1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1421da177e4SLinus Torvalds {
1432d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1441da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1451da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
146dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
147bada8adcSAl Viro 	__be32 daddr, nexthop;
148da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1492d7192d6SDavid S. Miller 	struct rtable *rt;
1501da177e4SLinus Torvalds 	int err;
151f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1541da177e4SLinus Torvalds 		return -EINVAL;
1551da177e4SLinus Torvalds 
1561da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1571da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
160f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
162f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1631da177e4SLinus Torvalds 		if (!daddr)
1641da177e4SLinus Torvalds 			return -EINVAL;
165f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1661da177e4SLinus Torvalds 	}
1671da177e4SLinus Torvalds 
168dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
169dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
170da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
171da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1721da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1731da177e4SLinus Torvalds 			      IPPROTO_TCP,
1740e0d44abSSteffen Klassert 			      orig_sport, orig_dport, sk);
175b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
176b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
177b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
178f1d8cba6SEric Dumazet 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179b23dd4feSDavid S. Miller 		return err;
180584bdf8cSWei Dong 	}
1811da177e4SLinus Torvalds 
1821da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1831da177e4SLinus Torvalds 		ip_rt_put(rt);
1841da177e4SLinus Torvalds 		return -ENETUNREACH;
1851da177e4SLinus Torvalds 	}
1861da177e4SLinus Torvalds 
187f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
188da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1891da177e4SLinus Torvalds 
190c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
191da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
192c720c7e8SEric Dumazet 	inet->inet_rcv_saddr = inet->inet_saddr;
1931da177e4SLinus Torvalds 
194c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1951da177e4SLinus Torvalds 		/* Reset inherited state */
1961da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
1971da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
198ee995283SPavel Emelyanov 		if (likely(!tp->repair))
1991da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2001da177e4SLinus Torvalds 	}
2011da177e4SLinus Torvalds 
202295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
20381166dd6SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
20481166dd6SDavid S. Miller 		tcp_fetch_timewait_stamp(sk, &rt->dst);
2051da177e4SLinus Torvalds 
206c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
207c720c7e8SEric Dumazet 	inet->inet_daddr = daddr;
2081da177e4SLinus Torvalds 
209d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
210f6d8bd05SEric Dumazet 	if (inet_opt)
211f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2121da177e4SLinus Torvalds 
213bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2141da177e4SLinus Torvalds 
2151da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2161da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2171da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2181da177e4SLinus Torvalds 	 * complete initialization after this.
2191da177e4SLinus Torvalds 	 */
2201da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
221a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2221da177e4SLinus Torvalds 	if (err)
2231da177e4SLinus Torvalds 		goto failure;
2241da177e4SLinus Torvalds 
2259e7ceb06SSathya Perla 	inet_set_txhash(sk);
2269e7ceb06SSathya Perla 
227da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
229b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
230b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
231b23dd4feSDavid S. Miller 		rt = NULL;
2321da177e4SLinus Torvalds 		goto failure;
233b23dd4feSDavid S. Miller 	}
2341da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
235bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
236d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2371da177e4SLinus Torvalds 
238ee995283SPavel Emelyanov 	if (!tp->write_seq && likely(!tp->repair))
239c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240c720c7e8SEric Dumazet 							   inet->inet_daddr,
241c720c7e8SEric Dumazet 							   inet->inet_sport,
2421da177e4SLinus Torvalds 							   usin->sin_port);
2431da177e4SLinus Torvalds 
244c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 	err = tcp_connect(sk);
247ee995283SPavel Emelyanov 
2481da177e4SLinus Torvalds 	rt = NULL;
2491da177e4SLinus Torvalds 	if (err)
2501da177e4SLinus Torvalds 		goto failure;
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds 	return 0;
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds failure:
2557174259eSArnaldo Carvalho de Melo 	/*
2567174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2577174259eSArnaldo Carvalho de Melo 	 * if necessary.
2587174259eSArnaldo Carvalho de Melo 	 */
2591da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2601da177e4SLinus Torvalds 	ip_rt_put(rt);
2611da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
262c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2631da177e4SLinus Torvalds 	return err;
2641da177e4SLinus Torvalds }
2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2661da177e4SLinus Torvalds 
2671da177e4SLinus Torvalds /*
268563d34d0SEric Dumazet  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269563d34d0SEric Dumazet  * It can be called through tcp_release_cb() if socket was owned by user
270563d34d0SEric Dumazet  * at the time tcp_v4_err() was called to handle ICMP message.
2711da177e4SLinus Torvalds  */
2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk)
2731da177e4SLinus Torvalds {
2741da177e4SLinus Torvalds 	struct dst_entry *dst;
2751da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
276563d34d0SEric Dumazet 	u32 mtu = tcp_sk(sk)->mtu_info;
2771da177e4SLinus Torvalds 
27880d0a69fSDavid S. Miller 	dst = inet_csk_update_pmtu(sk, mtu);
27980d0a69fSDavid S. Miller 	if (!dst)
2801da177e4SLinus Torvalds 		return;
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
2831da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
2841da177e4SLinus Torvalds 	 */
2851da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
2861da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
2891da177e4SLinus Torvalds 
2901da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291482fc609SHannes Frederic Sowa 	    ip_sk_accept_pmtu(sk) &&
292d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
2931da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
2961da177e4SLinus Torvalds 		 * clear that the old packet has been
2971da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
2981da177e4SLinus Torvalds 		 * discovery.
2991da177e4SLinus Torvalds 		 */
3001da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3011da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3021da177e4SLinus Torvalds }
3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced);
3041da177e4SLinus Torvalds 
30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk)
30655be7a9cSDavid S. Miller {
30755be7a9cSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
30855be7a9cSDavid S. Miller 
3091ed5c48fSDavid S. Miller 	if (dst)
3106700c270SDavid S. Miller 		dst->ops->redirect(dst, sk, skb);
31155be7a9cSDavid S. Miller }
31255be7a9cSDavid S. Miller 
3131da177e4SLinus Torvalds /*
3141da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3151da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3161da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3171da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3181da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3191da177e4SLinus Torvalds  * to find the appropriate port.
3201da177e4SLinus Torvalds  *
3211da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3221da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3231da177e4SLinus Torvalds  * and for some paths there is no check at all.
3241da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3251da177e4SLinus Torvalds  * is probably better.
3261da177e4SLinus Torvalds  *
3271da177e4SLinus Torvalds  */
3281da177e4SLinus Torvalds 
3294d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3301da177e4SLinus Torvalds {
331b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3324d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
333f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3341da177e4SLinus Torvalds 	struct tcp_sock *tp;
3351da177e4SLinus Torvalds 	struct inet_sock *inet;
3364d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3374d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3381da177e4SLinus Torvalds 	struct sock *sk;
339f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3400a672f74SYuchung Cheng 	struct request_sock *fastopen;
3410a672f74SYuchung Cheng 	__u32 seq, snd_una;
342f1ecd5d9SDamian Lukowski 	__u32 remaining;
3431da177e4SLinus Torvalds 	int err;
3444d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3451da177e4SLinus Torvalds 
346fd54d716SPavel Emelyanov 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
3474d1a2d9eSDamian Lukowski 			iph->saddr, th->source, inet_iif(icmp_skb));
3481da177e4SLinus Torvalds 	if (!sk) {
349dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3501da177e4SLinus Torvalds 		return;
3511da177e4SLinus Torvalds 	}
3521da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3539469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3541da177e4SLinus Torvalds 		return;
3551da177e4SLinus Torvalds 	}
3561da177e4SLinus Torvalds 
3571da177e4SLinus Torvalds 	bh_lock_sock(sk);
3581da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3591da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
360563d34d0SEric Dumazet 	 * We do take care of PMTU discovery (RFC1191) special case :
361563d34d0SEric Dumazet 	 * we can receive locally generated ICMP messages while socket is held.
3621da177e4SLinus Torvalds 	 */
363b74aa930SEric Dumazet 	if (sock_owned_by_user(sk)) {
364b74aa930SEric Dumazet 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
365de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
366b74aa930SEric Dumazet 	}
3671da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3681da177e4SLinus Torvalds 		goto out;
3691da177e4SLinus Torvalds 
37097e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
37197e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
37297e3ecd1Sstephen hemminger 		goto out;
37397e3ecd1Sstephen hemminger 	}
37497e3ecd1Sstephen hemminger 
375f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
3761da177e4SLinus Torvalds 	tp = tcp_sk(sk);
3771da177e4SLinus Torvalds 	seq = ntohl(th->seq);
3780a672f74SYuchung Cheng 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
3790a672f74SYuchung Cheng 	fastopen = tp->fastopen_rsk;
3800a672f74SYuchung Cheng 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
3811da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
3820a672f74SYuchung Cheng 	    !between(seq, snd_una, tp->snd_nxt)) {
383de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
3841da177e4SLinus Torvalds 		goto out;
3851da177e4SLinus Torvalds 	}
3861da177e4SLinus Torvalds 
3871da177e4SLinus Torvalds 	switch (type) {
38855be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
38955be7a9cSDavid S. Miller 		do_redirect(icmp_skb, sk);
39055be7a9cSDavid S. Miller 		goto out;
3911da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
3921da177e4SLinus Torvalds 		/* Just silently ignore these. */
3931da177e4SLinus Torvalds 		goto out;
3941da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
3951da177e4SLinus Torvalds 		err = EPROTO;
3961da177e4SLinus Torvalds 		break;
3971da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
3981da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
3991da177e4SLinus Torvalds 			goto out;
4001da177e4SLinus Torvalds 
4011da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4020d4f0608SEric Dumazet 			/* We are not interested in TCP_LISTEN and open_requests
4030d4f0608SEric Dumazet 			 * (SYN-ACKs send out by Linux are always <576bytes so
4040d4f0608SEric Dumazet 			 * they should go through unfragmented).
4050d4f0608SEric Dumazet 			 */
4060d4f0608SEric Dumazet 			if (sk->sk_state == TCP_LISTEN)
4070d4f0608SEric Dumazet 				goto out;
4080d4f0608SEric Dumazet 
409563d34d0SEric Dumazet 			tp->mtu_info = info;
410144d56e9SEric Dumazet 			if (!sock_owned_by_user(sk)) {
411563d34d0SEric Dumazet 				tcp_v4_mtu_reduced(sk);
412144d56e9SEric Dumazet 			} else {
413144d56e9SEric Dumazet 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
414144d56e9SEric Dumazet 					sock_hold(sk);
415144d56e9SEric Dumazet 			}
4161da177e4SLinus Torvalds 			goto out;
4171da177e4SLinus Torvalds 		}
4181da177e4SLinus Torvalds 
4191da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
420f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
421f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
422f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
423f1ecd5d9SDamian Lukowski 			break;
424f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
4250a672f74SYuchung Cheng 		    !icsk->icsk_backoff || fastopen)
426f1ecd5d9SDamian Lukowski 			break;
427f1ecd5d9SDamian Lukowski 
4288f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4298f49c270SDavid S. Miller 			break;
4308f49c270SDavid S. Miller 
431f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
432fcdd1cf4SEric Dumazet 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
433fcdd1cf4SEric Dumazet 					       TCP_TIMEOUT_INIT;
434fcdd1cf4SEric Dumazet 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
435f1ecd5d9SDamian Lukowski 
436f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
437f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
438f1ecd5d9SDamian Lukowski 
4397faee5c0SEric Dumazet 		remaining = icsk->icsk_rto -
4407faee5c0SEric Dumazet 			    min(icsk->icsk_rto,
4417faee5c0SEric Dumazet 				tcp_time_stamp - tcp_skb_timestamp(skb));
442f1ecd5d9SDamian Lukowski 
443f1ecd5d9SDamian Lukowski 		if (remaining) {
444f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
446f1ecd5d9SDamian Lukowski 		} else {
447f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
448f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
449f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
450f1ecd5d9SDamian Lukowski 		}
451f1ecd5d9SDamian Lukowski 
4521da177e4SLinus Torvalds 		break;
4531da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4541da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4551da177e4SLinus Torvalds 		break;
4561da177e4SLinus Torvalds 	default:
4571da177e4SLinus Torvalds 		goto out;
4581da177e4SLinus Torvalds 	}
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds 	switch (sk->sk_state) {
46160236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4621da177e4SLinus Torvalds 	case TCP_LISTEN:
4631da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4641da177e4SLinus Torvalds 			goto out;
4651da177e4SLinus Torvalds 
466463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4671da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4681da177e4SLinus Torvalds 		if (!req)
4691da177e4SLinus Torvalds 			goto out;
4701da177e4SLinus Torvalds 
4711da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4721da177e4SLinus Torvalds 		   an established socket here.
4731da177e4SLinus Torvalds 		 */
474547b792cSIlpo Järvinen 		WARN_ON(req->sk);
4751da177e4SLinus Torvalds 
4762e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
477de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4781da177e4SLinus Torvalds 			goto out;
4791da177e4SLinus Torvalds 		}
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds 		/*
4821da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
4831da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
4841da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
4851da177e4SLinus Torvalds 		 * errors returned from accept().
4861da177e4SLinus Torvalds 		 */
487463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
488848bf15fSVijay Subramanian 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
4891da177e4SLinus Torvalds 		goto out;
4901da177e4SLinus Torvalds 
4911da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4920a672f74SYuchung Cheng 	case TCP_SYN_RECV:
4930a672f74SYuchung Cheng 		/* Only in fast or simultaneous open. If a fast open socket is
4940a672f74SYuchung Cheng 		 * is already accepted it is treated as a connected one below.
4951da177e4SLinus Torvalds 		 */
4960a672f74SYuchung Cheng 		if (fastopen && fastopen->sk == NULL)
4970a672f74SYuchung Cheng 			break;
4980a672f74SYuchung Cheng 
4991da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5001da177e4SLinus Torvalds 			sk->sk_err = err;
5011da177e4SLinus Torvalds 
5021da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 			tcp_done(sk);
5051da177e4SLinus Torvalds 		} else {
5061da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5071da177e4SLinus Torvalds 		}
5081da177e4SLinus Torvalds 		goto out;
5091da177e4SLinus Torvalds 	}
5101da177e4SLinus Torvalds 
5111da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5121da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5131da177e4SLinus Torvalds 	 *
5141da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5151da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5161da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5171da177e4SLinus Torvalds 	 *
5181da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5191da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5201da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5211da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5221da177e4SLinus Torvalds 	 *
5231da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5241da177e4SLinus Torvalds 	 *							--ANK (980905)
5251da177e4SLinus Torvalds 	 */
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds 	inet = inet_sk(sk);
5281da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5291da177e4SLinus Torvalds 		sk->sk_err = err;
5301da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5311da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5321da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5331da177e4SLinus Torvalds 	}
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds out:
5361da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5371da177e4SLinus Torvalds 	sock_put(sk);
5381da177e4SLinus Torvalds }
5391da177e4SLinus Torvalds 
54028850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
5411da177e4SLinus Torvalds {
542aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5431da177e4SLinus Torvalds 
54484fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
545419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
546663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
547ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5481da177e4SLinus Torvalds 	} else {
549419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
55007f0757aSJoe Perches 					 csum_partial(th,
5511da177e4SLinus Torvalds 						      th->doff << 2,
5521da177e4SLinus Torvalds 						      skb->csum));
5531da177e4SLinus Torvalds 	}
5541da177e4SLinus Torvalds }
5551da177e4SLinus Torvalds 
556419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
557bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
558419f9f89SHerbert Xu {
559cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
560419f9f89SHerbert Xu 
561419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
562419f9f89SHerbert Xu }
5634bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
564419f9f89SHerbert Xu 
5651da177e4SLinus Torvalds /*
5661da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5671da177e4SLinus Torvalds  *
5681da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5691da177e4SLinus Torvalds  *		      for reset.
5701da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5711da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5721da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5731da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5741da177e4SLinus Torvalds  *		arrived with segment.
5751da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5761da177e4SLinus Torvalds  */
5771da177e4SLinus Torvalds 
578cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5791da177e4SLinus Torvalds {
580cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
581cfb6eeb4SYOSHIFUJI Hideaki 	struct {
582cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
583cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
584714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
585cfb6eeb4SYOSHIFUJI Hideaki #endif
586cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
5871da177e4SLinus Torvalds 	struct ip_reply_arg arg;
588cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
589cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
590658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
591658ddaafSShawn Lu 	unsigned char newhash[16];
592658ddaafSShawn Lu 	int genhash;
593658ddaafSShawn Lu 	struct sock *sk1 = NULL;
594cfb6eeb4SYOSHIFUJI Hideaki #endif
595a86b1e30SPavel Emelyanov 	struct net *net;
5961da177e4SLinus Torvalds 
5971da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
5981da177e4SLinus Torvalds 	if (th->rst)
5991da177e4SLinus Torvalds 		return;
6001da177e4SLinus Torvalds 
601c3658e8dSEric Dumazet 	/* If sk not NULL, it means we did a successful lookup and incoming
602c3658e8dSEric Dumazet 	 * route had to be correct. prequeue might have dropped our dst.
603c3658e8dSEric Dumazet 	 */
604c3658e8dSEric Dumazet 	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
6051da177e4SLinus Torvalds 		return;
6061da177e4SLinus Torvalds 
6071da177e4SLinus Torvalds 	/* Swap the send and the receive. */
608cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
609cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
610cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
611cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
612cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6131da177e4SLinus Torvalds 
6141da177e4SLinus Torvalds 	if (th->ack) {
615cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6161da177e4SLinus Torvalds 	} else {
617cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
618cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6191da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6201da177e4SLinus Torvalds 	}
6211da177e4SLinus Torvalds 
6227174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
623cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
624cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
625cfb6eeb4SYOSHIFUJI Hideaki 
626*0f85feaeSEric Dumazet 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
627cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
628658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
629658ddaafSShawn Lu 	if (!sk && hash_location) {
630658ddaafSShawn Lu 		/*
631658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
632658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
633658ddaafSShawn Lu 		 * we are not loose security here:
634658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
635658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
636658ddaafSShawn Lu 		 */
637*0f85feaeSEric Dumazet 		sk1 = __inet_lookup_listener(net,
638da5e3630STom Herbert 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
639da5e3630STom Herbert 					     th->source, ip_hdr(skb)->daddr,
640658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
641658ddaafSShawn Lu 		/* don't send rst if it can't find key */
642658ddaafSShawn Lu 		if (!sk1)
643658ddaafSShawn Lu 			return;
644658ddaafSShawn Lu 		rcu_read_lock();
645658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
646658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
647658ddaafSShawn Lu 		if (!key)
648658ddaafSShawn Lu 			goto release_sk1;
649658ddaafSShawn Lu 
650658ddaafSShawn Lu 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
651658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
652658ddaafSShawn Lu 			goto release_sk1;
653658ddaafSShawn Lu 	} else {
654658ddaafSShawn Lu 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
655658ddaafSShawn Lu 					     &ip_hdr(skb)->saddr,
656a915da9bSEric Dumazet 					     AF_INET) : NULL;
657658ddaafSShawn Lu 	}
658658ddaafSShawn Lu 
659cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
660cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
661cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
662cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
663cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
664cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
665cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
666cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
667cfb6eeb4SYOSHIFUJI Hideaki 
66849a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
66978e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
67078e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
671cfb6eeb4SYOSHIFUJI Hideaki 	}
672cfb6eeb4SYOSHIFUJI Hideaki #endif
673eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
674eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
67552cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6761da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
67788ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
678e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
6794c675258SAlexey Kuznetsov 	 * routing might fail in this case. No choice here, if we choose to force
6804c675258SAlexey Kuznetsov 	 * input interface, we will misroute in case of asymmetric route.
681e2446eaaSShawn Lu 	 */
6824c675258SAlexey Kuznetsov 	if (sk)
6834c675258SAlexey Kuznetsov 		arg.bound_dev_if = sk->sk_bound_dev_if;
6841da177e4SLinus Torvalds 
68566b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
68624a2d43dSEric Dumazet 	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
68724a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
68824a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
6891da177e4SLinus Torvalds 
69063231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
69163231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
692658ddaafSShawn Lu 
693658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
694658ddaafSShawn Lu release_sk1:
695658ddaafSShawn Lu 	if (sk1) {
696658ddaafSShawn Lu 		rcu_read_unlock();
697658ddaafSShawn Lu 		sock_put(sk1);
698658ddaafSShawn Lu 	}
699658ddaafSShawn Lu #endif
7001da177e4SLinus Torvalds }
7011da177e4SLinus Torvalds 
7021da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7031da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7041da177e4SLinus Torvalds  */
7051da177e4SLinus Torvalds 
7069501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
707ee684b6fSAndrey Vagin 			    u32 win, u32 tsval, u32 tsecr, int oif,
70888ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
70966b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7101da177e4SLinus Torvalds {
711cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7121da177e4SLinus Torvalds 	struct {
7131da177e4SLinus Torvalds 		struct tcphdr th;
714714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
715cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
716cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
717cfb6eeb4SYOSHIFUJI Hideaki #endif
718cfb6eeb4SYOSHIFUJI Hideaki 			];
7191da177e4SLinus Torvalds 	} rep;
7201da177e4SLinus Torvalds 	struct ip_reply_arg arg;
721adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
7221da177e4SLinus Torvalds 
7231da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7247174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7251da177e4SLinus Torvalds 
7261da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7271da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
728ee684b6fSAndrey Vagin 	if (tsecr) {
729cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7301da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7311da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
732ee684b6fSAndrey Vagin 		rep.opt[1] = htonl(tsval);
733ee684b6fSAndrey Vagin 		rep.opt[2] = htonl(tsecr);
734cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7351da177e4SLinus Torvalds 	}
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7381da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7391da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7401da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7411da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7421da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7431da177e4SLinus Torvalds 	rep.th.ack     = 1;
7441da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7451da177e4SLinus Torvalds 
746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
747cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
748ee684b6fSAndrey Vagin 		int offset = (tsecr) ? 3 : 0;
749cfb6eeb4SYOSHIFUJI Hideaki 
750cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
751cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
752cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
753cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
754cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
755cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
756cfb6eeb4SYOSHIFUJI Hideaki 
75749a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
75890b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
75990b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
760cfb6eeb4SYOSHIFUJI Hideaki 	}
761cfb6eeb4SYOSHIFUJI Hideaki #endif
76288ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
763eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
764eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7651da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7661da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7679501f972SYOSHIFUJI Hideaki 	if (oif)
7689501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
76966b13d99SEric Dumazet 	arg.tos = tos;
77024a2d43dSEric Dumazet 	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
77124a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
77224a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
7731da177e4SLinus Torvalds 
77463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7751da177e4SLinus Torvalds }
7761da177e4SLinus Torvalds 
7771da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7781da177e4SLinus Torvalds {
7798feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
780cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7811da177e4SLinus Torvalds 
7829501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7837174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
784ee684b6fSAndrey Vagin 			tcp_time_stamp + tcptw->tw_ts_offset,
7859501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7869501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
78788ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
78866b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
78966b13d99SEric Dumazet 			tw->tw_tos
7909501f972SYOSHIFUJI Hideaki 			);
7911da177e4SLinus Torvalds 
7928feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
7931da177e4SLinus Torvalds }
7941da177e4SLinus Torvalds 
7956edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7967174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
7971da177e4SLinus Torvalds {
798168a8f58SJerry Chu 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
799168a8f58SJerry Chu 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
800168a8f58SJerry Chu 	 */
801168a8f58SJerry Chu 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
802168a8f58SJerry Chu 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
803168a8f58SJerry Chu 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
804ee684b6fSAndrey Vagin 			tcp_time_stamp,
8059501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8069501f972SYOSHIFUJI Hideaki 			0,
807a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
808a915da9bSEric Dumazet 					  AF_INET),
80966b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
81066b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8111da177e4SLinus Torvalds }
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds /*
8149bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
81560236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8161da177e4SLinus Torvalds  *	socket.
8171da177e4SLinus Torvalds  */
81872659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
819d6274bd8SOctavian Purdila 			      struct flowi *fl,
820e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
821843f4a55SYuchung Cheng 			      u16 queue_mapping,
822843f4a55SYuchung Cheng 			      struct tcp_fastopen_cookie *foc)
8231da177e4SLinus Torvalds {
8242e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8256bd023f3SDavid S. Miller 	struct flowi4 fl4;
8261da177e4SLinus Torvalds 	int err = -1;
8271da177e4SLinus Torvalds 	struct sk_buff *skb;
8281da177e4SLinus Torvalds 
8291da177e4SLinus Torvalds 	/* First, grab a route. */
830ba3f7f04SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
831fd80eb94SDenis V. Lunev 		return -1;
8321da177e4SLinus Torvalds 
833843f4a55SYuchung Cheng 	skb = tcp_make_synack(sk, dst, req, foc);
8341da177e4SLinus Torvalds 
8351da177e4SLinus Torvalds 	if (skb) {
836634fb979SEric Dumazet 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
8371da177e4SLinus Torvalds 
838fff32699SEric Dumazet 		skb_set_queue_mapping(skb, queue_mapping);
839634fb979SEric Dumazet 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
840634fb979SEric Dumazet 					    ireq->ir_rmt_addr,
8412e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
842b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8431da177e4SLinus Torvalds 	}
8441da177e4SLinus Torvalds 
8451da177e4SLinus Torvalds 	return err;
8461da177e4SLinus Torvalds }
8471da177e4SLinus Torvalds 
8481da177e4SLinus Torvalds /*
84960236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8501da177e4SLinus Torvalds  */
85160236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8521da177e4SLinus Torvalds {
8532e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8541da177e4SLinus Torvalds }
8551da177e4SLinus Torvalds 
856946cedccSEric Dumazet /*
857a2a385d6SEric Dumazet  * Return true if a syncookie should be sent
858946cedccSEric Dumazet  */
859a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk,
860946cedccSEric Dumazet 			 const struct sk_buff *skb,
861946cedccSEric Dumazet 			 const char *proto)
8621da177e4SLinus Torvalds {
863946cedccSEric Dumazet 	const char *msg = "Dropping request";
864a2a385d6SEric Dumazet 	bool want_cookie = false;
865946cedccSEric Dumazet 	struct listen_sock *lopt;
866946cedccSEric Dumazet 
8672a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES
868946cedccSEric Dumazet 	if (sysctl_tcp_syncookies) {
8692a1d4bd4SFlorian Westphal 		msg = "Sending cookies";
870a2a385d6SEric Dumazet 		want_cookie = true;
871946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
872946cedccSEric Dumazet 	} else
87380e40daaSArnaldo Carvalho de Melo #endif
874946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
8752a1d4bd4SFlorian Westphal 
876946cedccSEric Dumazet 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
8775ad37d5dSHannes Frederic Sowa 	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
878946cedccSEric Dumazet 		lopt->synflood_warned = 1;
879afd46503SJoe Perches 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
880946cedccSEric Dumazet 			proto, ntohs(tcp_hdr(skb)->dest), msg);
8812a1d4bd4SFlorian Westphal 	}
882946cedccSEric Dumazet 	return want_cookie;
883946cedccSEric Dumazet }
884946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action);
8851da177e4SLinus Torvalds 
886cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
887cfb6eeb4SYOSHIFUJI Hideaki /*
888cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
889cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
890cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
891cfb6eeb4SYOSHIFUJI Hideaki  */
892cfb6eeb4SYOSHIFUJI Hideaki 
893cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
894a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
895a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
896a915da9bSEric Dumazet 					 int family)
897cfb6eeb4SYOSHIFUJI Hideaki {
898cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
899a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
900a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
901a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
902cfb6eeb4SYOSHIFUJI Hideaki 
903a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
904a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
905b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
906b4fb05eaSEric Dumazet 				       lockdep_is_held(&sk->sk_lock.slock));
907a8afca03SEric Dumazet 	if (!md5sig)
908cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
909a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
910a915da9bSEric Dumazet 	if (family == AF_INET6)
911a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
912a915da9bSEric Dumazet #endif
913b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
914a915da9bSEric Dumazet 		if (key->family != family)
915a915da9bSEric Dumazet 			continue;
916a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
917a915da9bSEric Dumazet 			return key;
918cfb6eeb4SYOSHIFUJI Hideaki 	}
919cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
920cfb6eeb4SYOSHIFUJI Hideaki }
921a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
922cfb6eeb4SYOSHIFUJI Hideaki 
923cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
924cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
925cfb6eeb4SYOSHIFUJI Hideaki {
926a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
927a915da9bSEric Dumazet 
928a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
929a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
930cfb6eeb4SYOSHIFUJI Hideaki }
931cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
932cfb6eeb4SYOSHIFUJI Hideaki 
933f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
934cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
935cfb6eeb4SYOSHIFUJI Hideaki {
936a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
937a915da9bSEric Dumazet 
938634fb979SEric Dumazet 	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
939a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
940cfb6eeb4SYOSHIFUJI Hideaki }
941cfb6eeb4SYOSHIFUJI Hideaki 
942cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
943a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
944a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
945cfb6eeb4SYOSHIFUJI Hideaki {
946cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
947b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
948cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
949f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
950f6685938SArnaldo Carvalho de Melo 
951c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
952a915da9bSEric Dumazet 	if (key) {
953a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
954a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
955a915da9bSEric Dumazet 		key->keylen = newkeylen;
956a915da9bSEric Dumazet 		return 0;
957cfb6eeb4SYOSHIFUJI Hideaki 	}
958260fcbebSYan, Zheng 
959a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
960a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
961a915da9bSEric Dumazet 	if (!md5sig) {
962a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
963a915da9bSEric Dumazet 		if (!md5sig)
964a915da9bSEric Dumazet 			return -ENOMEM;
965a915da9bSEric Dumazet 
966a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
967a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
968a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
969a915da9bSEric Dumazet 	}
970a915da9bSEric Dumazet 
9715f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
972a915da9bSEric Dumazet 	if (!key)
973a915da9bSEric Dumazet 		return -ENOMEM;
97471cea17eSEric Dumazet 	if (!tcp_alloc_md5sig_pool()) {
9755f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
976cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
977cfb6eeb4SYOSHIFUJI Hideaki 	}
978f6685938SArnaldo Carvalho de Melo 
979a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
980a915da9bSEric Dumazet 	key->keylen = newkeylen;
981a915da9bSEric Dumazet 	key->family = family;
982a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
983a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
984a915da9bSEric Dumazet 				      sizeof(struct in_addr));
985a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
986cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
987cfb6eeb4SYOSHIFUJI Hideaki }
988a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
989cfb6eeb4SYOSHIFUJI Hideaki 
990a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
991cfb6eeb4SYOSHIFUJI Hideaki {
992a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
993cfb6eeb4SYOSHIFUJI Hideaki 
994c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
995a915da9bSEric Dumazet 	if (!key)
996cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
997a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
9985f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
999a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
1000a915da9bSEric Dumazet 	return 0;
1001cfb6eeb4SYOSHIFUJI Hideaki }
1002a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
1003cfb6eeb4SYOSHIFUJI Hideaki 
1004e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk)
1005cfb6eeb4SYOSHIFUJI Hideaki {
1006cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1007a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1008b67bfe0dSSasha Levin 	struct hlist_node *n;
1009a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1010cfb6eeb4SYOSHIFUJI Hideaki 
1011a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1012a8afca03SEric Dumazet 
1013b67bfe0dSSasha Levin 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1014a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
10155f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1016a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
1017cfb6eeb4SYOSHIFUJI Hideaki 	}
1018cfb6eeb4SYOSHIFUJI Hideaki }
1019cfb6eeb4SYOSHIFUJI Hideaki 
1020cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1021cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
1022cfb6eeb4SYOSHIFUJI Hideaki {
1023cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1024cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1025cfb6eeb4SYOSHIFUJI Hideaki 
1026cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1027cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1028cfb6eeb4SYOSHIFUJI Hideaki 
1029cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1030cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1031cfb6eeb4SYOSHIFUJI Hideaki 
1032cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1033cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1034cfb6eeb4SYOSHIFUJI Hideaki 
103564a124edSDmitry Popov 	if (!cmd.tcpm_keylen)
1036a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1037a915da9bSEric Dumazet 				      AF_INET);
1038cfb6eeb4SYOSHIFUJI Hideaki 
1039cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1040cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1041cfb6eeb4SYOSHIFUJI Hideaki 
1042a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1043a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1044a915da9bSEric Dumazet 			      GFP_KERNEL);
1045cfb6eeb4SYOSHIFUJI Hideaki }
1046cfb6eeb4SYOSHIFUJI Hideaki 
104749a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
104849a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1049cfb6eeb4SYOSHIFUJI Hideaki {
1050cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
105149a72dfbSAdam Langley 	struct scatterlist sg;
1052cfb6eeb4SYOSHIFUJI Hideaki 
1053cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1054cfb6eeb4SYOSHIFUJI Hideaki 
1055cfb6eeb4SYOSHIFUJI Hideaki 	/*
105649a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1057cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1058cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1059cfb6eeb4SYOSHIFUJI Hideaki 	 */
1060cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1061cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1062cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1063076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
106449a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1065c7da57a1SDavid S. Miller 
106649a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
106749a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
106849a72dfbSAdam Langley }
106949a72dfbSAdam Langley 
1070a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1071318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
107249a72dfbSAdam Langley {
107349a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
107449a72dfbSAdam Langley 	struct hash_desc *desc;
107549a72dfbSAdam Langley 
107649a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
107749a72dfbSAdam Langley 	if (!hp)
107849a72dfbSAdam Langley 		goto clear_hash_noput;
107949a72dfbSAdam Langley 	desc = &hp->md5_desc;
108049a72dfbSAdam Langley 
108149a72dfbSAdam Langley 	if (crypto_hash_init(desc))
108249a72dfbSAdam Langley 		goto clear_hash;
108349a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
108449a72dfbSAdam Langley 		goto clear_hash;
108549a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
108649a72dfbSAdam Langley 		goto clear_hash;
108749a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
108849a72dfbSAdam Langley 		goto clear_hash;
108949a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1090cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1091cfb6eeb4SYOSHIFUJI Hideaki 
1092cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1093cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
109449a72dfbSAdam Langley 
1095cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1096cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1097cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1098cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
109949a72dfbSAdam Langley 	return 1;
1100cfb6eeb4SYOSHIFUJI Hideaki }
1101cfb6eeb4SYOSHIFUJI Hideaki 
110249a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1103318cf7aaSEric Dumazet 			const struct sock *sk, const struct request_sock *req,
1104318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1105cfb6eeb4SYOSHIFUJI Hideaki {
110649a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
110749a72dfbSAdam Langley 	struct hash_desc *desc;
1108318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1109cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1110cfb6eeb4SYOSHIFUJI Hideaki 
1111cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1112c720c7e8SEric Dumazet 		saddr = inet_sk(sk)->inet_saddr;
1113c720c7e8SEric Dumazet 		daddr = inet_sk(sk)->inet_daddr;
111449a72dfbSAdam Langley 	} else if (req) {
1115634fb979SEric Dumazet 		saddr = inet_rsk(req)->ir_loc_addr;
1116634fb979SEric Dumazet 		daddr = inet_rsk(req)->ir_rmt_addr;
1117cfb6eeb4SYOSHIFUJI Hideaki 	} else {
111849a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
111949a72dfbSAdam Langley 		saddr = iph->saddr;
112049a72dfbSAdam Langley 		daddr = iph->daddr;
1121cfb6eeb4SYOSHIFUJI Hideaki 	}
1122cfb6eeb4SYOSHIFUJI Hideaki 
112349a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
112449a72dfbSAdam Langley 	if (!hp)
112549a72dfbSAdam Langley 		goto clear_hash_noput;
112649a72dfbSAdam Langley 	desc = &hp->md5_desc;
112749a72dfbSAdam Langley 
112849a72dfbSAdam Langley 	if (crypto_hash_init(desc))
112949a72dfbSAdam Langley 		goto clear_hash;
113049a72dfbSAdam Langley 
113149a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
113249a72dfbSAdam Langley 		goto clear_hash;
113349a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
113449a72dfbSAdam Langley 		goto clear_hash;
113549a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
113649a72dfbSAdam Langley 		goto clear_hash;
113749a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
113849a72dfbSAdam Langley 		goto clear_hash;
113949a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
114049a72dfbSAdam Langley 		goto clear_hash;
114149a72dfbSAdam Langley 
114249a72dfbSAdam Langley 	tcp_put_md5sig_pool();
114349a72dfbSAdam Langley 	return 0;
114449a72dfbSAdam Langley 
114549a72dfbSAdam Langley clear_hash:
114649a72dfbSAdam Langley 	tcp_put_md5sig_pool();
114749a72dfbSAdam Langley clear_hash_noput:
114849a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
114949a72dfbSAdam Langley 	return 1;
115049a72dfbSAdam Langley }
115149a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1152cfb6eeb4SYOSHIFUJI Hideaki 
11539ea88a15SDmitry Popov static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
11549ea88a15SDmitry Popov 				      const struct sk_buff *skb)
1155cfb6eeb4SYOSHIFUJI Hideaki {
1156cfb6eeb4SYOSHIFUJI Hideaki 	/*
1157cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1158cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1159cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1160cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1161cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1162cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1163cfb6eeb4SYOSHIFUJI Hideaki 	 */
1164cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1165cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1166eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1167cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1168cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1169cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1170cfb6eeb4SYOSHIFUJI Hideaki 
1171a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1172a915da9bSEric Dumazet 					  AF_INET);
11737d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1174cfb6eeb4SYOSHIFUJI Hideaki 
1175cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1176cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1177a2a385d6SEric Dumazet 		return false;
1178cfb6eeb4SYOSHIFUJI Hideaki 
1179cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1180785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1181a2a385d6SEric Dumazet 		return true;
1182cfb6eeb4SYOSHIFUJI Hideaki 	}
1183cfb6eeb4SYOSHIFUJI Hideaki 
1184cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1185785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1186a2a385d6SEric Dumazet 		return true;
1187cfb6eeb4SYOSHIFUJI Hideaki 	}
1188cfb6eeb4SYOSHIFUJI Hideaki 
1189cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1190cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1191cfb6eeb4SYOSHIFUJI Hideaki 	 */
119249a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1193cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
119449a72dfbSAdam Langley 				      NULL, NULL, skb);
1195cfb6eeb4SYOSHIFUJI Hideaki 
1196cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1197e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1198673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1199673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1200e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1201e87cc472SJoe Perches 				     : "");
1202a2a385d6SEric Dumazet 		return true;
1203cfb6eeb4SYOSHIFUJI Hideaki 	}
1204a2a385d6SEric Dumazet 	return false;
1205cfb6eeb4SYOSHIFUJI Hideaki }
1206cfb6eeb4SYOSHIFUJI Hideaki 
12079ea88a15SDmitry Popov static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
12089ea88a15SDmitry Popov {
12099ea88a15SDmitry Popov 	bool ret;
12109ea88a15SDmitry Popov 
12119ea88a15SDmitry Popov 	rcu_read_lock();
12129ea88a15SDmitry Popov 	ret = __tcp_v4_inbound_md5_hash(sk, skb);
12139ea88a15SDmitry Popov 	rcu_read_unlock();
12149ea88a15SDmitry Popov 
12159ea88a15SDmitry Popov 	return ret;
12169ea88a15SDmitry Popov }
12179ea88a15SDmitry Popov 
1218cfb6eeb4SYOSHIFUJI Hideaki #endif
1219cfb6eeb4SYOSHIFUJI Hideaki 
122016bea70aSOctavian Purdila static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
122116bea70aSOctavian Purdila 			    struct sk_buff *skb)
122216bea70aSOctavian Purdila {
122316bea70aSOctavian Purdila 	struct inet_request_sock *ireq = inet_rsk(req);
122416bea70aSOctavian Purdila 
122516bea70aSOctavian Purdila 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
122616bea70aSOctavian Purdila 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
122716bea70aSOctavian Purdila 	ireq->no_srccheck = inet_sk(sk)->transparent;
122816bea70aSOctavian Purdila 	ireq->opt = tcp_v4_save_options(skb);
122916bea70aSOctavian Purdila }
123016bea70aSOctavian Purdila 
1231d94e0417SOctavian Purdila static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1232d94e0417SOctavian Purdila 					  const struct request_sock *req,
1233d94e0417SOctavian Purdila 					  bool *strict)
1234d94e0417SOctavian Purdila {
1235d94e0417SOctavian Purdila 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1236d94e0417SOctavian Purdila 
1237d94e0417SOctavian Purdila 	if (strict) {
1238d94e0417SOctavian Purdila 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1239d94e0417SOctavian Purdila 			*strict = true;
1240d94e0417SOctavian Purdila 		else
1241d94e0417SOctavian Purdila 			*strict = false;
1242d94e0417SOctavian Purdila 	}
1243d94e0417SOctavian Purdila 
1244d94e0417SOctavian Purdila 	return dst;
1245d94e0417SOctavian Purdila }
1246d94e0417SOctavian Purdila 
124772a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12481da177e4SLinus Torvalds 	.family		=	PF_INET,
12492e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
12505db92c99SOctavian Purdila 	.rtx_syn_ack	=	tcp_rtx_synack,
125160236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
125260236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12531da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
125472659eccSOctavian Purdila 	.syn_ack_timeout =	tcp_syn_ack_timeout,
12551da177e4SLinus Torvalds };
12561da177e4SLinus Torvalds 
1257b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
12582aec4a29SOctavian Purdila 	.mss_clamp	=	TCP_MSS_DEFAULT,
125916bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG
1260cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1261e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1262b6332e6cSAndrew Morton #endif
126316bea70aSOctavian Purdila 	.init_req	=	tcp_v4_init_req,
1264fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES
1265fb7b37a7SOctavian Purdila 	.cookie_init_seq =	cookie_v4_init_sequence,
1266fb7b37a7SOctavian Purdila #endif
1267d94e0417SOctavian Purdila 	.route_req	=	tcp_v4_route_req,
1268936b8bdbSOctavian Purdila 	.init_seq	=	tcp_v4_init_sequence,
1269d6274bd8SOctavian Purdila 	.send_synack	=	tcp_v4_send_synack,
1270695da14eSOctavian Purdila 	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
127116bea70aSOctavian Purdila };
1272cfb6eeb4SYOSHIFUJI Hideaki 
12731da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12741da177e4SLinus Torvalds {
12751da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1276511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12771da177e4SLinus Torvalds 		goto drop;
12781da177e4SLinus Torvalds 
12791fb6f159SOctavian Purdila 	return tcp_conn_request(&tcp_request_sock_ops,
12801fb6f159SOctavian Purdila 				&tcp_request_sock_ipv4_ops, sk, skb);
12811da177e4SLinus Torvalds 
12821da177e4SLinus Torvalds drop:
1283848bf15fSVijay Subramanian 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
12841da177e4SLinus Torvalds 	return 0;
12851da177e4SLinus Torvalds }
12864bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
12871da177e4SLinus Torvalds 
12881da177e4SLinus Torvalds 
12891da177e4SLinus Torvalds /*
12901da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
12911da177e4SLinus Torvalds  * now create the new socket.
12921da177e4SLinus Torvalds  */
12931da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
129460236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
12951da177e4SLinus Torvalds 				  struct dst_entry *dst)
12961da177e4SLinus Torvalds {
12972e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
12981da177e4SLinus Torvalds 	struct inet_sock *newinet;
12991da177e4SLinus Torvalds 	struct tcp_sock *newtp;
13001da177e4SLinus Torvalds 	struct sock *newsk;
1301cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1302cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1303cfb6eeb4SYOSHIFUJI Hideaki #endif
1304f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
13051da177e4SLinus Torvalds 
13061da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
13071da177e4SLinus Torvalds 		goto exit_overflow;
13081da177e4SLinus Torvalds 
13091da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
13101da177e4SLinus Torvalds 	if (!newsk)
1311093d2823SBalazs Scheidler 		goto exit_nonewsk;
13121da177e4SLinus Torvalds 
1313bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1314fae6ef87SNeal Cardwell 	inet_sk_rx_dst_set(newsk, skb);
13151da177e4SLinus Torvalds 
13161da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
13171da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
13182e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1319634fb979SEric Dumazet 	newinet->inet_daddr   = ireq->ir_rmt_addr;
1320634fb979SEric Dumazet 	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1321634fb979SEric Dumazet 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1322f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1323f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
13242e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1325463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1326eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
13274c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1328d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1329b73c3d0eSTom Herbert 	inet_set_txhash(newsk);
1330f6d8bd05SEric Dumazet 	if (inet_opt)
1331f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1332c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
13331da177e4SLinus Torvalds 
1334dfd25fffSEric Dumazet 	if (!dst) {
1335dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1336dfd25fffSEric Dumazet 		if (!dst)
13370e734419SDavid S. Miller 			goto put_and_exit;
1338dfd25fffSEric Dumazet 	} else {
1339dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1340dfd25fffSEric Dumazet 	}
13410e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
13420e734419SDavid S. Miller 
13431da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
13440dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1345f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1346f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1347f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1348f5fff5dcSTom Quetchenbach 
13491da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
13501da177e4SLinus Torvalds 
1351cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1352cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1353a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1354a915da9bSEric Dumazet 				AF_INET);
1355c720c7e8SEric Dumazet 	if (key != NULL) {
1356cfb6eeb4SYOSHIFUJI Hideaki 		/*
1357cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1358cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1359cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1360cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1361cfb6eeb4SYOSHIFUJI Hideaki 		 */
1362a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1363a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1364a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1365cfb6eeb4SYOSHIFUJI Hideaki 	}
1366cfb6eeb4SYOSHIFUJI Hideaki #endif
1367cfb6eeb4SYOSHIFUJI Hideaki 
13680e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
13690e734419SDavid S. Miller 		goto put_and_exit;
13709327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
13711da177e4SLinus Torvalds 
13721da177e4SLinus Torvalds 	return newsk;
13731da177e4SLinus Torvalds 
13741da177e4SLinus Torvalds exit_overflow:
1375de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1376093d2823SBalazs Scheidler exit_nonewsk:
1377093d2823SBalazs Scheidler 	dst_release(dst);
13781da177e4SLinus Torvalds exit:
1379de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
13801da177e4SLinus Torvalds 	return NULL;
13810e734419SDavid S. Miller put_and_exit:
1382e337e24dSChristoph Paasch 	inet_csk_prepare_forced_close(newsk);
1383e337e24dSChristoph Paasch 	tcp_done(newsk);
13840e734419SDavid S. Miller 	goto exit;
13851da177e4SLinus Torvalds }
13864bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
13871da177e4SLinus Torvalds 
13881da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
13891da177e4SLinus Torvalds {
1390aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
1391eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
13921da177e4SLinus Torvalds 	struct sock *nsk;
139360236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
13941da177e4SLinus Torvalds 	/* Find possible connection requests. */
1395463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
13961da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
13971da177e4SLinus Torvalds 	if (req)
13988336886fSJerry Chu 		return tcp_check_req(sk, skb, req, prev, false);
13991da177e4SLinus Torvalds 
14003b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1401c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
14021da177e4SLinus Torvalds 
14031da177e4SLinus Torvalds 	if (nsk) {
14041da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
14051da177e4SLinus Torvalds 			bh_lock_sock(nsk);
14061da177e4SLinus Torvalds 			return nsk;
14071da177e4SLinus Torvalds 		}
14089469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
14091da177e4SLinus Torvalds 		return NULL;
14101da177e4SLinus Torvalds 	}
14111da177e4SLinus Torvalds 
14121da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1413af9b4738SFlorian Westphal 	if (!th->syn)
1414461b74c3SCong Wang 		sk = cookie_v4_check(sk, skb);
14151da177e4SLinus Torvalds #endif
14161da177e4SLinus Torvalds 	return sk;
14171da177e4SLinus Torvalds }
14181da177e4SLinus Torvalds 
14191da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
14201da177e4SLinus Torvalds  * here.
14211da177e4SLinus Torvalds  *
14221da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
14231da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
14241da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
14251da177e4SLinus Torvalds  * held.
14261da177e4SLinus Torvalds  */
14271da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
14281da177e4SLinus Torvalds {
1429cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1430cfb6eeb4SYOSHIFUJI Hideaki 
14311da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
143292101b3bSDavid S. Miller 		struct dst_entry *dst = sk->sk_rx_dst;
1433404e0a8bSEric Dumazet 
1434404e0a8bSEric Dumazet 		sock_rps_save_rxhash(sk, skb);
1435404e0a8bSEric Dumazet 		if (dst) {
1436505fbcf0SEric Dumazet 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1437505fbcf0SEric Dumazet 			    dst->ops->check(dst, 0) == NULL) {
143892101b3bSDavid S. Miller 				dst_release(dst);
143992101b3bSDavid S. Miller 				sk->sk_rx_dst = NULL;
144092101b3bSDavid S. Miller 			}
144192101b3bSDavid S. Miller 		}
1442c995ae22SVijay Subramanian 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
14431da177e4SLinus Torvalds 		return 0;
14441da177e4SLinus Torvalds 	}
14451da177e4SLinus Torvalds 
1446ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
14471da177e4SLinus Torvalds 		goto csum_err;
14481da177e4SLinus Torvalds 
14491da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
14501da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
14511da177e4SLinus Torvalds 		if (!nsk)
14521da177e4SLinus Torvalds 			goto discard;
14531da177e4SLinus Torvalds 
14541da177e4SLinus Torvalds 		if (nsk != sk) {
1455bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
1456cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1457cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
14581da177e4SLinus Torvalds 				goto reset;
1459cfb6eeb4SYOSHIFUJI Hideaki 			}
14601da177e4SLinus Torvalds 			return 0;
14611da177e4SLinus Torvalds 		}
1462ca55158cSEric Dumazet 	} else
1463bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1464ca55158cSEric Dumazet 
1465aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1466cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
14671da177e4SLinus Torvalds 		goto reset;
1468cfb6eeb4SYOSHIFUJI Hideaki 	}
14691da177e4SLinus Torvalds 	return 0;
14701da177e4SLinus Torvalds 
14711da177e4SLinus Torvalds reset:
1472cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
14731da177e4SLinus Torvalds discard:
14741da177e4SLinus Torvalds 	kfree_skb(skb);
14751da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
14761da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
14771da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
14781da177e4SLinus Torvalds 	 * but you have been warned.
14791da177e4SLinus Torvalds 	 */
14801da177e4SLinus Torvalds 	return 0;
14811da177e4SLinus Torvalds 
14821da177e4SLinus Torvalds csum_err:
14836a5dc9e5SEric Dumazet 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
148463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
14851da177e4SLinus Torvalds 	goto discard;
14861da177e4SLinus Torvalds }
14874bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
14881da177e4SLinus Torvalds 
1489160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb)
149041063e9dSDavid S. Miller {
149141063e9dSDavid S. Miller 	const struct iphdr *iph;
149241063e9dSDavid S. Miller 	const struct tcphdr *th;
149341063e9dSDavid S. Miller 	struct sock *sk;
149441063e9dSDavid S. Miller 
149541063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
1496160eb5a6SDavid S. Miller 		return;
149741063e9dSDavid S. Miller 
149845f00f99SEric Dumazet 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1499160eb5a6SDavid S. Miller 		return;
150041063e9dSDavid S. Miller 
150141063e9dSDavid S. Miller 	iph = ip_hdr(skb);
150245f00f99SEric Dumazet 	th = tcp_hdr(skb);
150341063e9dSDavid S. Miller 
150441063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
1505160eb5a6SDavid S. Miller 		return;
150641063e9dSDavid S. Miller 
150745f00f99SEric Dumazet 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
150841063e9dSDavid S. Miller 				       iph->saddr, th->source,
15097011d085SVijay Subramanian 				       iph->daddr, ntohs(th->dest),
15109cb429d6SEric Dumazet 				       skb->skb_iif);
151141063e9dSDavid S. Miller 	if (sk) {
151241063e9dSDavid S. Miller 		skb->sk = sk;
151341063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
151441063e9dSDavid S. Miller 		if (sk->sk_state != TCP_TIME_WAIT) {
151541063e9dSDavid S. Miller 			struct dst_entry *dst = sk->sk_rx_dst;
1516505fbcf0SEric Dumazet 
151741063e9dSDavid S. Miller 			if (dst)
151841063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
151992101b3bSDavid S. Miller 			if (dst &&
1520505fbcf0SEric Dumazet 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
152141063e9dSDavid S. Miller 				skb_dst_set_noref(skb, dst);
152241063e9dSDavid S. Miller 		}
152341063e9dSDavid S. Miller 	}
152441063e9dSDavid S. Miller }
152541063e9dSDavid S. Miller 
1526b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process
1527b2fb4f54SEric Dumazet  * context, if a reader task is waiting. Apparently, this exciting
1528b2fb4f54SEric Dumazet  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1529b2fb4f54SEric Dumazet  * failed somewhere. Latency? Burstiness? Well, at least now we will
1530b2fb4f54SEric Dumazet  * see, why it failed. 8)8)				  --ANK
1531b2fb4f54SEric Dumazet  *
1532b2fb4f54SEric Dumazet  */
1533b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1534b2fb4f54SEric Dumazet {
1535b2fb4f54SEric Dumazet 	struct tcp_sock *tp = tcp_sk(sk);
1536b2fb4f54SEric Dumazet 
1537b2fb4f54SEric Dumazet 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1538b2fb4f54SEric Dumazet 		return false;
1539b2fb4f54SEric Dumazet 
1540b2fb4f54SEric Dumazet 	if (skb->len <= tcp_hdrlen(skb) &&
1541b2fb4f54SEric Dumazet 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1542b2fb4f54SEric Dumazet 		return false;
1543b2fb4f54SEric Dumazet 
1544ca777effSEric Dumazet 	/* Before escaping RCU protected region, we need to take care of skb
1545ca777effSEric Dumazet 	 * dst. Prequeue is only enabled for established sockets.
1546ca777effSEric Dumazet 	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1547ca777effSEric Dumazet 	 * Instead of doing full sk_rx_dst validity here, let's perform
1548ca777effSEric Dumazet 	 * an optimistic check.
1549ca777effSEric Dumazet 	 */
1550ca777effSEric Dumazet 	if (likely(sk->sk_rx_dst))
1551ca777effSEric Dumazet 		skb_dst_drop(skb);
1552ca777effSEric Dumazet 	else
155358717686SDavid S. Miller 		skb_dst_force(skb);
1554ca777effSEric Dumazet 
1555b2fb4f54SEric Dumazet 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1556b2fb4f54SEric Dumazet 	tp->ucopy.memory += skb->truesize;
1557b2fb4f54SEric Dumazet 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1558b2fb4f54SEric Dumazet 		struct sk_buff *skb1;
1559b2fb4f54SEric Dumazet 
1560b2fb4f54SEric Dumazet 		BUG_ON(sock_owned_by_user(sk));
1561b2fb4f54SEric Dumazet 
1562b2fb4f54SEric Dumazet 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1563b2fb4f54SEric Dumazet 			sk_backlog_rcv(sk, skb1);
1564b2fb4f54SEric Dumazet 			NET_INC_STATS_BH(sock_net(sk),
1565b2fb4f54SEric Dumazet 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1566b2fb4f54SEric Dumazet 		}
1567b2fb4f54SEric Dumazet 
1568b2fb4f54SEric Dumazet 		tp->ucopy.memory = 0;
1569b2fb4f54SEric Dumazet 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1570b2fb4f54SEric Dumazet 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1571b2fb4f54SEric Dumazet 					   POLLIN | POLLRDNORM | POLLRDBAND);
1572b2fb4f54SEric Dumazet 		if (!inet_csk_ack_scheduled(sk))
1573b2fb4f54SEric Dumazet 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1574b2fb4f54SEric Dumazet 						  (3 * tcp_rto_min(sk)) / 4,
1575b2fb4f54SEric Dumazet 						  TCP_RTO_MAX);
1576b2fb4f54SEric Dumazet 	}
1577b2fb4f54SEric Dumazet 	return true;
1578b2fb4f54SEric Dumazet }
1579b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue);
1580b2fb4f54SEric Dumazet 
15811da177e4SLinus Torvalds /*
15821da177e4SLinus Torvalds  *	From tcp_input.c
15831da177e4SLinus Torvalds  */
15841da177e4SLinus Torvalds 
15851da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
15861da177e4SLinus Torvalds {
1587eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1588cf533ea5SEric Dumazet 	const struct tcphdr *th;
15891da177e4SLinus Torvalds 	struct sock *sk;
15901da177e4SLinus Torvalds 	int ret;
1591a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
15921da177e4SLinus Torvalds 
15931da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
15941da177e4SLinus Torvalds 		goto discard_it;
15951da177e4SLinus Torvalds 
15961da177e4SLinus Torvalds 	/* Count it even if it's bad */
159763231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
15981da177e4SLinus Torvalds 
15991da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
16001da177e4SLinus Torvalds 		goto discard_it;
16011da177e4SLinus Torvalds 
1602aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
16031da177e4SLinus Torvalds 
16041da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
16051da177e4SLinus Torvalds 		goto bad_packet;
16061da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
16071da177e4SLinus Torvalds 		goto discard_it;
16081da177e4SLinus Torvalds 
16091da177e4SLinus Torvalds 	/* An explanation is required here, I think.
16101da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1611caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
16121da177e4SLinus Torvalds 	 * So, we defer the checks. */
1613ed70fcfcSTom Herbert 
1614ed70fcfcSTom Herbert 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
16156a5dc9e5SEric Dumazet 		goto csum_error;
16161da177e4SLinus Torvalds 
1617aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1618eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
1619971f10ecSEric Dumazet 	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1620971f10ecSEric Dumazet 	 * barrier() makes sure compiler wont play fool^Waliasing games.
1621971f10ecSEric Dumazet 	 */
1622971f10ecSEric Dumazet 	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1623971f10ecSEric Dumazet 		sizeof(struct inet_skb_parm));
1624971f10ecSEric Dumazet 	barrier();
1625971f10ecSEric Dumazet 
16261da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
16271da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
16281da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
16291da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1630e11ecddfSEric Dumazet 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
163104317dafSEric Dumazet 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1632b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
16331da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
16341da177e4SLinus Torvalds 
16359a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
16361da177e4SLinus Torvalds 	if (!sk)
16371da177e4SLinus Torvalds 		goto no_tcp_socket;
16381da177e4SLinus Torvalds 
1639bb134d5dSEric Dumazet process:
1640bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1641bb134d5dSEric Dumazet 		goto do_time_wait;
1642bb134d5dSEric Dumazet 
16436cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
16446cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1645d218d111SStephen Hemminger 		goto discard_and_relse;
16466cce09f8SEric Dumazet 	}
1647d218d111SStephen Hemminger 
16481da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
16491da177e4SLinus Torvalds 		goto discard_and_relse;
16509ea88a15SDmitry Popov 
16519ea88a15SDmitry Popov #ifdef CONFIG_TCP_MD5SIG
16529ea88a15SDmitry Popov 	/*
16539ea88a15SDmitry Popov 	 * We really want to reject the packet as early as possible
16549ea88a15SDmitry Popov 	 * if:
16559ea88a15SDmitry Popov 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
16569ea88a15SDmitry Popov 	 *  o There is an MD5 option and we're not expecting one
16579ea88a15SDmitry Popov 	 */
16589ea88a15SDmitry Popov 	if (tcp_v4_inbound_md5_hash(sk, skb))
16599ea88a15SDmitry Popov 		goto discard_and_relse;
16609ea88a15SDmitry Popov #endif
16619ea88a15SDmitry Popov 
1662b59c2701SPatrick McHardy 	nf_reset(skb);
16631da177e4SLinus Torvalds 
1664fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
16651da177e4SLinus Torvalds 		goto discard_and_relse;
16661da177e4SLinus Torvalds 
16678b80cda5SEliezer Tamir 	sk_mark_napi_id(sk, skb);
16681da177e4SLinus Torvalds 	skb->dev = NULL;
16691da177e4SLinus Torvalds 
1670c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
16711da177e4SLinus Torvalds 	ret = 0;
16721da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
16731da177e4SLinus Torvalds 		if (!tcp_prequeue(sk, skb))
16741da177e4SLinus Torvalds 			ret = tcp_v4_do_rcv(sk, skb);
1675da882c1fSEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb,
1676da882c1fSEric Dumazet 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
16776b03a53aSZhu Yi 		bh_unlock_sock(sk);
16786cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
16796b03a53aSZhu Yi 		goto discard_and_relse;
16806b03a53aSZhu Yi 	}
16811da177e4SLinus Torvalds 	bh_unlock_sock(sk);
16821da177e4SLinus Torvalds 
16831da177e4SLinus Torvalds 	sock_put(sk);
16841da177e4SLinus Torvalds 
16851da177e4SLinus Torvalds 	return ret;
16861da177e4SLinus Torvalds 
16871da177e4SLinus Torvalds no_tcp_socket:
16881da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
16891da177e4SLinus Torvalds 		goto discard_it;
16901da177e4SLinus Torvalds 
16911da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
16926a5dc9e5SEric Dumazet csum_error:
16936a5dc9e5SEric Dumazet 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
16941da177e4SLinus Torvalds bad_packet:
169563231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
16961da177e4SLinus Torvalds 	} else {
1697cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
16981da177e4SLinus Torvalds 	}
16991da177e4SLinus Torvalds 
17001da177e4SLinus Torvalds discard_it:
17011da177e4SLinus Torvalds 	/* Discard frame. */
17021da177e4SLinus Torvalds 	kfree_skb(skb);
17031da177e4SLinus Torvalds 	return 0;
17041da177e4SLinus Torvalds 
17051da177e4SLinus Torvalds discard_and_relse:
17061da177e4SLinus Torvalds 	sock_put(sk);
17071da177e4SLinus Torvalds 	goto discard_it;
17081da177e4SLinus Torvalds 
17091da177e4SLinus Torvalds do_time_wait:
17101da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
17119469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17121da177e4SLinus Torvalds 		goto discard_it;
17131da177e4SLinus Torvalds 	}
17141da177e4SLinus Torvalds 
17156a5dc9e5SEric Dumazet 	if (skb->len < (th->doff << 2)) {
17169469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17176a5dc9e5SEric Dumazet 		goto bad_packet;
17186a5dc9e5SEric Dumazet 	}
17196a5dc9e5SEric Dumazet 	if (tcp_checksum_complete(skb)) {
17206a5dc9e5SEric Dumazet 		inet_twsk_put(inet_twsk(sk));
17216a5dc9e5SEric Dumazet 		goto csum_error;
17221da177e4SLinus Torvalds 	}
17239469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
17241da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1725c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1726c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1727da5e3630STom Herbert 							iph->saddr, th->source,
1728eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1729463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
17301da177e4SLinus Torvalds 		if (sk2) {
17319469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
17329469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
17331da177e4SLinus Torvalds 			sk = sk2;
17341da177e4SLinus Torvalds 			goto process;
17351da177e4SLinus Torvalds 		}
17361da177e4SLinus Torvalds 		/* Fall through to ACK */
17371da177e4SLinus Torvalds 	}
17381da177e4SLinus Torvalds 	case TCP_TW_ACK:
17391da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17401da177e4SLinus Torvalds 		break;
17411da177e4SLinus Torvalds 	case TCP_TW_RST:
17421da177e4SLinus Torvalds 		goto no_tcp_socket;
17431da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17441da177e4SLinus Torvalds 	}
17451da177e4SLinus Torvalds 	goto discard_it;
17461da177e4SLinus Torvalds }
17471da177e4SLinus Torvalds 
1748ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1749ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1750ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1751ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1752ccb7c410SDavid S. Miller };
17531da177e4SLinus Torvalds 
175463d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
17555d299f3dSEric Dumazet {
17565d299f3dSEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
17575d299f3dSEric Dumazet 
1758ca777effSEric Dumazet 	if (dst) {
17595d299f3dSEric Dumazet 		dst_hold(dst);
17605d299f3dSEric Dumazet 		sk->sk_rx_dst = dst;
17615d299f3dSEric Dumazet 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
17625d299f3dSEric Dumazet 	}
1763ca777effSEric Dumazet }
176463d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set);
17655d299f3dSEric Dumazet 
17663b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
17671da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
17681da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
176932519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
17705d299f3dSEric Dumazet 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
17711da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
17721da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
17731da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
17741da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
17751da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1776543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1777543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1778ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
17793fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
17803fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
17813fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
17823fdadf7dSDmitry Mishin #endif
17834fab9071SNeal Cardwell 	.mtu_reduced	   = tcp_v4_mtu_reduced,
17841da177e4SLinus Torvalds };
17854bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
17861da177e4SLinus Torvalds 
1787cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1788b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1789cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
179049a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1791cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1792cfb6eeb4SYOSHIFUJI Hideaki };
1793b6332e6cSAndrew Morton #endif
1794cfb6eeb4SYOSHIFUJI Hideaki 
17951da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
17961da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
17971da177e4SLinus Torvalds  */
17981da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
17991da177e4SLinus Torvalds {
18006687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
18011da177e4SLinus Torvalds 
1802900f65d3SNeal Cardwell 	tcp_init_sock(sk);
18031da177e4SLinus Torvalds 
18048292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1805900f65d3SNeal Cardwell 
1806cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1807ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1808cfb6eeb4SYOSHIFUJI Hideaki #endif
18091da177e4SLinus Torvalds 
18101da177e4SLinus Torvalds 	return 0;
18111da177e4SLinus Torvalds }
18121da177e4SLinus Torvalds 
18137d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
18141da177e4SLinus Torvalds {
18151da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18161da177e4SLinus Torvalds 
18171da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
18181da177e4SLinus Torvalds 
18196687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1820317a76f9SStephen Hemminger 
18211da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1822fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
18231da177e4SLinus Torvalds 
18241da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
18251da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
18261da177e4SLinus Torvalds 
1827cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1828cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1829cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1830a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1831a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1832cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1833cfb6eeb4SYOSHIFUJI Hideaki 	}
1834cfb6eeb4SYOSHIFUJI Hideaki #endif
1835cfb6eeb4SYOSHIFUJI Hideaki 
18361da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
18371da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
18381da177e4SLinus Torvalds 
18391da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1840463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1841ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
18421da177e4SLinus Torvalds 
1843168a8f58SJerry Chu 	BUG_ON(tp->fastopen_rsk != NULL);
1844435cf559SWilliam Allen Simpson 
1845cf60af03SYuchung Cheng 	/* If socket is aborted during connect operation */
1846cf60af03SYuchung Cheng 	tcp_free_fastopen_req(tp);
1847cf60af03SYuchung Cheng 
1848180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
1849d1a4c0b3SGlauber Costa 	sock_release_memcg(sk);
18501da177e4SLinus Torvalds }
18511da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
18521da177e4SLinus Torvalds 
18531da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
18541da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
18551da177e4SLinus Torvalds 
1856a8b690f9STom Herbert /*
1857a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
1858a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
1859a8b690f9STom Herbert  * very first socket in the hash table is returned.
1860a8b690f9STom Herbert  */
18611da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
18621da177e4SLinus Torvalds {
1863463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
1864c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
18651da177e4SLinus Torvalds 	struct sock *sk = cur;
18665caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
18671da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1868a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
18691da177e4SLinus Torvalds 
18701da177e4SLinus Torvalds 	if (!sk) {
1871a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
18725caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1873c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
1874a8b690f9STom Herbert 		st->offset = 0;
18751da177e4SLinus Torvalds 		goto get_sk;
18761da177e4SLinus Torvalds 	}
18775caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
18781da177e4SLinus Torvalds 	++st->num;
1879a8b690f9STom Herbert 	++st->offset;
18801da177e4SLinus Torvalds 
18811da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
188260236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
18831da177e4SLinus Torvalds 
1884463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
18851da177e4SLinus Torvalds 		req = req->dl_next;
18861da177e4SLinus Torvalds 		while (1) {
18871da177e4SLinus Torvalds 			while (req) {
1888bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
18891da177e4SLinus Torvalds 					cur = req;
18901da177e4SLinus Torvalds 					goto out;
18911da177e4SLinus Torvalds 				}
18921da177e4SLinus Torvalds 				req = req->dl_next;
18931da177e4SLinus Torvalds 			}
189472a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
18951da177e4SLinus Torvalds 				break;
18961da177e4SLinus Torvalds get_req:
1897463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
18981da177e4SLinus Torvalds 		}
18991bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
19001da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
1901463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
19021da177e4SLinus Torvalds 	} else {
1903463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
1904463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1905463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
19061da177e4SLinus Torvalds 			goto start_req;
1907463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
19081bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
19091da177e4SLinus Torvalds 	}
19101da177e4SLinus Torvalds get_sk:
1911c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
19128475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
19138475ef9fSPavel Emelyanov 			continue;
19148475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
19151da177e4SLinus Torvalds 			cur = sk;
19161da177e4SLinus Torvalds 			goto out;
19171da177e4SLinus Torvalds 		}
1918463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
1919463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1920463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
19211da177e4SLinus Torvalds start_req:
19221da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
19231da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
19241da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
19251da177e4SLinus Torvalds 			st->sbucket	= 0;
19261da177e4SLinus Torvalds 			goto get_req;
19271da177e4SLinus Torvalds 		}
1928463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
19291da177e4SLinus Torvalds 	}
19305caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
1931a8b690f9STom Herbert 	st->offset = 0;
19320f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
19335caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
19345caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1935c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
19361da177e4SLinus Torvalds 		goto get_sk;
19371da177e4SLinus Torvalds 	}
19381da177e4SLinus Torvalds 	cur = NULL;
19391da177e4SLinus Torvalds out:
19401da177e4SLinus Torvalds 	return cur;
19411da177e4SLinus Torvalds }
19421da177e4SLinus Torvalds 
19431da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
19441da177e4SLinus Torvalds {
1945a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1946a8b690f9STom Herbert 	void *rc;
1947a8b690f9STom Herbert 
1948a8b690f9STom Herbert 	st->bucket = 0;
1949a8b690f9STom Herbert 	st->offset = 0;
1950a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
19511da177e4SLinus Torvalds 
19521da177e4SLinus Torvalds 	while (rc && *pos) {
19531da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
19541da177e4SLinus Torvalds 		--*pos;
19551da177e4SLinus Torvalds 	}
19561da177e4SLinus Torvalds 	return rc;
19571da177e4SLinus Torvalds }
19581da177e4SLinus Torvalds 
195905dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st)
19606eac5604SAndi Kleen {
196105dbc7b5SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
19626eac5604SAndi Kleen }
19636eac5604SAndi Kleen 
1964a8b690f9STom Herbert /*
1965a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
1966a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
1967a8b690f9STom Herbert  */
19681da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
19691da177e4SLinus Torvalds {
19701da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1971a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19721da177e4SLinus Torvalds 	void *rc = NULL;
19731da177e4SLinus Torvalds 
1974a8b690f9STom Herbert 	st->offset = 0;
1975a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
19761da177e4SLinus Torvalds 		struct sock *sk;
19773ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
19789db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
19791da177e4SLinus Torvalds 
19806eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
19816eac5604SAndi Kleen 		if (empty_bucket(st))
19826eac5604SAndi Kleen 			continue;
19836eac5604SAndi Kleen 
19849db66bdcSEric Dumazet 		spin_lock_bh(lock);
19853ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1986f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
1987878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
19881da177e4SLinus Torvalds 				continue;
19891da177e4SLinus Torvalds 			}
19901da177e4SLinus Torvalds 			rc = sk;
19911da177e4SLinus Torvalds 			goto out;
19921da177e4SLinus Torvalds 		}
19939db66bdcSEric Dumazet 		spin_unlock_bh(lock);
19941da177e4SLinus Torvalds 	}
19951da177e4SLinus Torvalds out:
19961da177e4SLinus Torvalds 	return rc;
19971da177e4SLinus Torvalds }
19981da177e4SLinus Torvalds 
19991da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
20001da177e4SLinus Torvalds {
20011da177e4SLinus Torvalds 	struct sock *sk = cur;
20023ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
20031da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2004a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
20051da177e4SLinus Torvalds 
20061da177e4SLinus Torvalds 	++st->num;
2007a8b690f9STom Herbert 	++st->offset;
20081da177e4SLinus Torvalds 
20093ab5aee7SEric Dumazet 	sk = sk_nulls_next(sk);
20101da177e4SLinus Torvalds 
20113ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2012878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
201305dbc7b5SEric Dumazet 			return sk;
20141da177e4SLinus Torvalds 	}
20151da177e4SLinus Torvalds 
201605dbc7b5SEric Dumazet 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
201705dbc7b5SEric Dumazet 	++st->bucket;
201805dbc7b5SEric Dumazet 	return established_get_first(seq);
20191da177e4SLinus Torvalds }
20201da177e4SLinus Torvalds 
20211da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
20221da177e4SLinus Torvalds {
2023a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2024a8b690f9STom Herbert 	void *rc;
2025a8b690f9STom Herbert 
2026a8b690f9STom Herbert 	st->bucket = 0;
2027a8b690f9STom Herbert 	rc = established_get_first(seq);
20281da177e4SLinus Torvalds 
20291da177e4SLinus Torvalds 	while (rc && pos) {
20301da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
20311da177e4SLinus Torvalds 		--pos;
20321da177e4SLinus Torvalds 	}
20331da177e4SLinus Torvalds 	return rc;
20341da177e4SLinus Torvalds }
20351da177e4SLinus Torvalds 
20361da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
20371da177e4SLinus Torvalds {
20381da177e4SLinus Torvalds 	void *rc;
20391da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
20401da177e4SLinus Torvalds 
20411da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20421da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
20431da177e4SLinus Torvalds 
20441da177e4SLinus Torvalds 	if (!rc) {
20451da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
20461da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
20471da177e4SLinus Torvalds 	}
20481da177e4SLinus Torvalds 
20491da177e4SLinus Torvalds 	return rc;
20501da177e4SLinus Torvalds }
20511da177e4SLinus Torvalds 
2052a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2053a8b690f9STom Herbert {
2054a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2055a8b690f9STom Herbert 	int offset = st->offset;
2056a8b690f9STom Herbert 	int orig_num = st->num;
2057a8b690f9STom Herbert 	void *rc = NULL;
2058a8b690f9STom Herbert 
2059a8b690f9STom Herbert 	switch (st->state) {
2060a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2061a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2062a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2063a8b690f9STom Herbert 			break;
2064a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2065a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2066a8b690f9STom Herbert 		while (offset-- && rc)
2067a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2068a8b690f9STom Herbert 		if (rc)
2069a8b690f9STom Herbert 			break;
2070a8b690f9STom Herbert 		st->bucket = 0;
207105dbc7b5SEric Dumazet 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2072a8b690f9STom Herbert 		/* Fallthrough */
2073a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2074a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2075a8b690f9STom Herbert 			break;
2076a8b690f9STom Herbert 		rc = established_get_first(seq);
2077a8b690f9STom Herbert 		while (offset-- && rc)
2078a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2079a8b690f9STom Herbert 	}
2080a8b690f9STom Herbert 
2081a8b690f9STom Herbert 	st->num = orig_num;
2082a8b690f9STom Herbert 
2083a8b690f9STom Herbert 	return rc;
2084a8b690f9STom Herbert }
2085a8b690f9STom Herbert 
20861da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
20871da177e4SLinus Torvalds {
20881da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2089a8b690f9STom Herbert 	void *rc;
2090a8b690f9STom Herbert 
2091a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2092a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2093a8b690f9STom Herbert 		if (rc)
2094a8b690f9STom Herbert 			goto out;
2095a8b690f9STom Herbert 	}
2096a8b690f9STom Herbert 
20971da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20981da177e4SLinus Torvalds 	st->num = 0;
2099a8b690f9STom Herbert 	st->bucket = 0;
2100a8b690f9STom Herbert 	st->offset = 0;
2101a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2102a8b690f9STom Herbert 
2103a8b690f9STom Herbert out:
2104a8b690f9STom Herbert 	st->last_pos = *pos;
2105a8b690f9STom Herbert 	return rc;
21061da177e4SLinus Torvalds }
21071da177e4SLinus Torvalds 
21081da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
21091da177e4SLinus Torvalds {
2110a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
21111da177e4SLinus Torvalds 	void *rc = NULL;
21121da177e4SLinus Torvalds 
21131da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
21141da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
21151da177e4SLinus Torvalds 		goto out;
21161da177e4SLinus Torvalds 	}
21171da177e4SLinus Torvalds 
21181da177e4SLinus Torvalds 	switch (st->state) {
21191da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
21201da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21211da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
21221da177e4SLinus Torvalds 		if (!rc) {
21231da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2124a8b690f9STom Herbert 			st->bucket = 0;
2125a8b690f9STom Herbert 			st->offset = 0;
21261da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
21271da177e4SLinus Torvalds 		}
21281da177e4SLinus Torvalds 		break;
21291da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21301da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
21311da177e4SLinus Torvalds 		break;
21321da177e4SLinus Torvalds 	}
21331da177e4SLinus Torvalds out:
21341da177e4SLinus Torvalds 	++*pos;
2135a8b690f9STom Herbert 	st->last_pos = *pos;
21361da177e4SLinus Torvalds 	return rc;
21371da177e4SLinus Torvalds }
21381da177e4SLinus Torvalds 
21391da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
21401da177e4SLinus Torvalds {
21411da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
21421da177e4SLinus Torvalds 
21431da177e4SLinus Torvalds 	switch (st->state) {
21441da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
21451da177e4SLinus Torvalds 		if (v) {
2146463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2147463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
21481da177e4SLinus Torvalds 		}
21491da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21501da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
21515caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
21521da177e4SLinus Torvalds 		break;
21531da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21541da177e4SLinus Torvalds 		if (v)
21559db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21561da177e4SLinus Torvalds 		break;
21571da177e4SLinus Torvalds 	}
21581da177e4SLinus Torvalds }
21591da177e4SLinus Torvalds 
216073cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
21611da177e4SLinus Torvalds {
2162d9dda78bSAl Viro 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
21631da177e4SLinus Torvalds 	struct tcp_iter_state *s;
216452d6f3f1SDenis V. Lunev 	int err;
21651da177e4SLinus Torvalds 
216652d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
216752d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
216852d6f3f1SDenis V. Lunev 	if (err < 0)
216952d6f3f1SDenis V. Lunev 		return err;
2170f40c8174SDaniel Lezcano 
217152d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
21721da177e4SLinus Torvalds 	s->family		= afinfo->family;
2173a8b690f9STom Herbert 	s->last_pos		= 0;
2174f40c8174SDaniel Lezcano 	return 0;
2175f40c8174SDaniel Lezcano }
217673cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2177f40c8174SDaniel Lezcano 
21786f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
21791da177e4SLinus Torvalds {
21801da177e4SLinus Torvalds 	int rc = 0;
21811da177e4SLinus Torvalds 	struct proc_dir_entry *p;
21821da177e4SLinus Torvalds 
21839427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
21849427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
21859427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
21869427c4b3SDenis V. Lunev 
218784841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
218873cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
218984841c3cSDenis V. Lunev 	if (!p)
21901da177e4SLinus Torvalds 		rc = -ENOMEM;
21911da177e4SLinus Torvalds 	return rc;
21921da177e4SLinus Torvalds }
21934bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
21941da177e4SLinus Torvalds 
21956f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
21961da177e4SLinus Torvalds {
2197ece31ffdSGao feng 	remove_proc_entry(afinfo->name, net->proc_net);
21981da177e4SLinus Torvalds }
21994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
22001da177e4SLinus Torvalds 
2201cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2202652586dfSTetsuo Handa 			 struct seq_file *f, int i, kuid_t uid)
22031da177e4SLinus Torvalds {
22042e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
2205a399a805SEric Dumazet 	long delta = req->expires - jiffies;
22061da177e4SLinus Torvalds 
22075e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2208652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
22091da177e4SLinus Torvalds 		i,
2210634fb979SEric Dumazet 		ireq->ir_loc_addr,
2211c720c7e8SEric Dumazet 		ntohs(inet_sk(sk)->inet_sport),
2212634fb979SEric Dumazet 		ireq->ir_rmt_addr,
2213634fb979SEric Dumazet 		ntohs(ireq->ir_rmt_port),
22141da177e4SLinus Torvalds 		TCP_SYN_RECV,
22151da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
22161da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
2217a399a805SEric Dumazet 		jiffies_delta_to_clock_t(delta),
2218e6c022a4SEric Dumazet 		req->num_timeout,
2219a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), uid),
22201da177e4SLinus Torvalds 		0,  /* non standard timer */
22211da177e4SLinus Torvalds 		0, /* open_requests have no inode */
22221da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
2223652586dfSTetsuo Handa 		req);
22241da177e4SLinus Torvalds }
22251da177e4SLinus Torvalds 
2226652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
22271da177e4SLinus Torvalds {
22281da177e4SLinus Torvalds 	int timer_active;
22291da177e4SLinus Torvalds 	unsigned long timer_expires;
2230cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2231cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2232cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2233168a8f58SJerry Chu 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2234c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2235c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2236c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2237c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
223849d09007SEric Dumazet 	int rx_queue;
22391da177e4SLinus Torvalds 
22406ba8a3b1SNandita Dukkipati 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
22416ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
22426ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
22431da177e4SLinus Torvalds 		timer_active	= 1;
2244463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2245463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
22461da177e4SLinus Torvalds 		timer_active	= 4;
2247463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2248cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
22491da177e4SLinus Torvalds 		timer_active	= 2;
2250cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
22511da177e4SLinus Torvalds 	} else {
22521da177e4SLinus Torvalds 		timer_active	= 0;
22531da177e4SLinus Torvalds 		timer_expires = jiffies;
22541da177e4SLinus Torvalds 	}
22551da177e4SLinus Torvalds 
225649d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
225749d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
225849d09007SEric Dumazet 	else
225949d09007SEric Dumazet 		/*
226049d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
226149d09007SEric Dumazet 		 */
226249d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
226349d09007SEric Dumazet 
22645e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2265652586dfSTetsuo Handa 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2266cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
226747da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
226849d09007SEric Dumazet 		rx_queue,
22691da177e4SLinus Torvalds 		timer_active,
2270a399a805SEric Dumazet 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2271463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2272a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
22736687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2274cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2275cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
22767be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
22777be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2278463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
22791da177e4SLinus Torvalds 		tp->snd_cwnd,
2280168a8f58SJerry Chu 		sk->sk_state == TCP_LISTEN ?
2281168a8f58SJerry Chu 		    (fastopenq ? fastopenq->max_qlen : 0) :
2282652586dfSTetsuo Handa 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
22831da177e4SLinus Torvalds }
22841da177e4SLinus Torvalds 
2285cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2286652586dfSTetsuo Handa 			       struct seq_file *f, int i)
22871da177e4SLinus Torvalds {
228823f33c2dSAl Viro 	__be32 dest, src;
22891da177e4SLinus Torvalds 	__u16 destp, srcp;
2290e2a1d3e4SEric Dumazet 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
22911da177e4SLinus Torvalds 
22921da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
22931da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
22941da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
22951da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
22961da177e4SLinus Torvalds 
22975e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2298652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
22991da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2300a399a805SEric Dumazet 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2301652586dfSTetsuo Handa 		atomic_read(&tw->tw_refcnt), tw);
23021da177e4SLinus Torvalds }
23031da177e4SLinus Torvalds 
23041da177e4SLinus Torvalds #define TMPSZ 150
23051da177e4SLinus Torvalds 
23061da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
23071da177e4SLinus Torvalds {
23081da177e4SLinus Torvalds 	struct tcp_iter_state *st;
230905dbc7b5SEric Dumazet 	struct sock *sk = v;
23101da177e4SLinus Torvalds 
2311652586dfSTetsuo Handa 	seq_setwidth(seq, TMPSZ - 1);
23121da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2313652586dfSTetsuo Handa 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
23141da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
23151da177e4SLinus Torvalds 			   "inode");
23161da177e4SLinus Torvalds 		goto out;
23171da177e4SLinus Torvalds 	}
23181da177e4SLinus Torvalds 	st = seq->private;
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds 	switch (st->state) {
23211da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23221da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
232305dbc7b5SEric Dumazet 		if (sk->sk_state == TCP_TIME_WAIT)
2324652586dfSTetsuo Handa 			get_timewait4_sock(v, seq, st->num);
232505dbc7b5SEric Dumazet 		else
2326652586dfSTetsuo Handa 			get_tcp4_sock(v, seq, st->num);
23271da177e4SLinus Torvalds 		break;
23281da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
2329652586dfSTetsuo Handa 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
23301da177e4SLinus Torvalds 		break;
23311da177e4SLinus Torvalds 	}
23321da177e4SLinus Torvalds out:
2333652586dfSTetsuo Handa 	seq_pad(seq, '\n');
23341da177e4SLinus Torvalds 	return 0;
23351da177e4SLinus Torvalds }
23361da177e4SLinus Torvalds 
233773cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
233873cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
233973cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
234073cb88ecSArjan van de Ven 	.read    = seq_read,
234173cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
234273cb88ecSArjan van de Ven 	.release = seq_release_net
234373cb88ecSArjan van de Ven };
234473cb88ecSArjan van de Ven 
23451da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
23461da177e4SLinus Torvalds 	.name		= "tcp",
23471da177e4SLinus Torvalds 	.family		= AF_INET,
234873cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
23499427c4b3SDenis V. Lunev 	.seq_ops	= {
23509427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
23519427c4b3SDenis V. Lunev 	},
23521da177e4SLinus Torvalds };
23531da177e4SLinus Torvalds 
23542c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2355757764f6SPavel Emelyanov {
2356757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2357757764f6SPavel Emelyanov }
2358757764f6SPavel Emelyanov 
23592c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2360757764f6SPavel Emelyanov {
2361757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2362757764f6SPavel Emelyanov }
2363757764f6SPavel Emelyanov 
2364757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2365757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2366757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2367757764f6SPavel Emelyanov };
2368757764f6SPavel Emelyanov 
23691da177e4SLinus Torvalds int __init tcp4_proc_init(void)
23701da177e4SLinus Torvalds {
2371757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
23721da177e4SLinus Torvalds }
23731da177e4SLinus Torvalds 
23741da177e4SLinus Torvalds void tcp4_proc_exit(void)
23751da177e4SLinus Torvalds {
2376757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
23771da177e4SLinus Torvalds }
23781da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
23791da177e4SLinus Torvalds 
23801da177e4SLinus Torvalds struct proto tcp_prot = {
23811da177e4SLinus Torvalds 	.name			= "TCP",
23821da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
23831da177e4SLinus Torvalds 	.close			= tcp_close,
23841da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
23851da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2386463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
23871da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
23881da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
23891da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
23901da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
23911da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
23921da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
23931da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
23947ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
23957ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
23961da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
239746d3ceabSEric Dumazet 	.release_cb		= tcp_release_cb,
2398ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2399ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2400ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
24011da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2402c9bee3b7SEric Dumazet 	.stream_memory_free	= tcp_stream_memory_free,
24031da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
24040a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
24051da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
24061da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
2407a4fe34bfSEric W. Biederman 	.sysctl_mem		= sysctl_tcp_mem,
24081da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
24091da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
24101da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
24111da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
24123ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
24136d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
241460236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
241539d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
24167ba42910SChangli Gao 	.no_autobind		= true,
2417543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2418543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2419543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2420543d9cfeSArnaldo Carvalho de Melo #endif
2421c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM
2422d1a4c0b3SGlauber Costa 	.init_cgroup		= tcp_init_cgroup,
2423d1a4c0b3SGlauber Costa 	.destroy_cgroup		= tcp_destroy_cgroup,
2424d1a4c0b3SGlauber Costa 	.proto_cgroup		= tcp_proto_cgroup,
2425d1a4c0b3SGlauber Costa #endif
24261da177e4SLinus Torvalds };
24274bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
24281da177e4SLinus Torvalds 
2429046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net)
2430046ee902SDenis V. Lunev {
24315d134f1cSHannes Frederic Sowa 	net->ipv4.sysctl_tcp_ecn = 2;
2432be9f4a44SEric Dumazet 	return 0;
2433046ee902SDenis V. Lunev }
2434046ee902SDenis V. Lunev 
2435046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2436046ee902SDenis V. Lunev {
2437b099ce26SEric W. Biederman }
2438b099ce26SEric W. Biederman 
2439b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2440b099ce26SEric W. Biederman {
2441b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2442046ee902SDenis V. Lunev }
2443046ee902SDenis V. Lunev 
2444046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2445046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2446046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2447b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2448046ee902SDenis V. Lunev };
2449046ee902SDenis V. Lunev 
24509b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
24511da177e4SLinus Torvalds {
24525caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
24536a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
24541da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
24551da177e4SLinus Torvalds }
2456