xref: /linux/net/ipv4/tcp_ipv4.c (revision 7586eceb0abc0ea1c2b023e3e5d4dfd4ff40930a)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
751a2449a8SChris Leech #include <net/netdma.h>
766e5714eaSDavid S. Miller #include <net/secure_seq.h>
77d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <linux/inet.h>
801da177e4SLinus Torvalds #include <linux/ipv6.h>
811da177e4SLinus Torvalds #include <linux/stddef.h>
821da177e4SLinus Torvalds #include <linux/proc_fs.h>
831da177e4SLinus Torvalds #include <linux/seq_file.h>
841da177e4SLinus Torvalds 
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds 
93cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
94a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96cfb6eeb4SYOSHIFUJI Hideaki #endif
97cfb6eeb4SYOSHIFUJI Hideaki 
985caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
1001da177e4SLinus Torvalds 
101cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1021da177e4SLinus Torvalds {
103eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
106aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1071da177e4SLinus Torvalds }
1081da177e4SLinus Torvalds 
1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1106d6ee43eSArnaldo Carvalho de Melo {
1116d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1126d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1136d6ee43eSArnaldo Carvalho de Melo 
1146d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1156d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1166d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1176d6ee43eSArnaldo Carvalho de Melo 
1186d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1196d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1206d6ee43eSArnaldo Carvalho de Melo 	   holder.
1216d6ee43eSArnaldo Carvalho de Melo 
1226d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1236d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1246d6ee43eSArnaldo Carvalho de Melo 	 */
1256d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1266d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1279d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1286d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1296d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1306d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1326d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1336d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1346d6ee43eSArnaldo Carvalho de Melo 		return 1;
1356d6ee43eSArnaldo Carvalho de Melo 	}
1366d6ee43eSArnaldo Carvalho de Melo 
1376d6ee43eSArnaldo Carvalho de Melo 	return 0;
1386d6ee43eSArnaldo Carvalho de Melo }
1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1406d6ee43eSArnaldo Carvalho de Melo 
141ee995283SPavel Emelyanov static int tcp_repair_connect(struct sock *sk)
142ee995283SPavel Emelyanov {
143ee995283SPavel Emelyanov 	tcp_connect_init(sk);
144ee995283SPavel Emelyanov 	tcp_finish_connect(sk, NULL);
145ee995283SPavel Emelyanov 
146ee995283SPavel Emelyanov 	return 0;
147ee995283SPavel Emelyanov }
148ee995283SPavel Emelyanov 
1491da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1501da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1511da177e4SLinus Torvalds {
1522d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1531da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1541da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
155dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
156bada8adcSAl Viro 	__be32 daddr, nexthop;
157da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1582d7192d6SDavid S. Miller 	struct rtable *rt;
1591da177e4SLinus Torvalds 	int err;
160f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1631da177e4SLinus Torvalds 		return -EINVAL;
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1661da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
169f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
170f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
171f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1721da177e4SLinus Torvalds 		if (!daddr)
1731da177e4SLinus Torvalds 			return -EINVAL;
174f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1751da177e4SLinus Torvalds 	}
1761da177e4SLinus Torvalds 
177dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
178dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
179da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
180da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1811da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1821da177e4SLinus Torvalds 			      IPPROTO_TCP,
183abdf7e72SDavid S. Miller 			      orig_sport, orig_dport, sk, true);
184b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
185b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
186b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
1877c73a6faSPavel Emelyanov 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188b23dd4feSDavid S. Miller 		return err;
189584bdf8cSWei Dong 	}
1901da177e4SLinus Torvalds 
1911da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1921da177e4SLinus Torvalds 		ip_rt_put(rt);
1931da177e4SLinus Torvalds 		return -ENETUNREACH;
1941da177e4SLinus Torvalds 	}
1951da177e4SLinus Torvalds 
196f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
197da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1981da177e4SLinus Torvalds 
199c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
200da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
201c720c7e8SEric Dumazet 	inet->inet_rcv_saddr = inet->inet_saddr;
2021da177e4SLinus Torvalds 
203c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
2041da177e4SLinus Torvalds 		/* Reset inherited state */
2051da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
2061da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
207ee995283SPavel Emelyanov 		if (likely(!tp->repair))
2081da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2091da177e4SLinus Torvalds 	}
2101da177e4SLinus Torvalds 
211295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
212da905bd1SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
213ed2361e6SDavid S. Miller 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
2147174259eSArnaldo Carvalho de Melo 		/*
2157174259eSArnaldo Carvalho de Melo 		 * VJ's idea. We save last timestamp seen from
2167174259eSArnaldo Carvalho de Melo 		 * the destination in peer table, when entering state
2177174259eSArnaldo Carvalho de Melo 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
2187174259eSArnaldo Carvalho de Melo 		 * when trying new connection.
2191da177e4SLinus Torvalds 		 */
220317fe0e6SEric Dumazet 		if (peer) {
221317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
222317fe0e6SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
2231da177e4SLinus Torvalds 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
2241da177e4SLinus Torvalds 				tp->rx_opt.ts_recent = peer->tcp_ts;
2251da177e4SLinus Torvalds 			}
2261da177e4SLinus Torvalds 		}
227317fe0e6SEric Dumazet 	}
2281da177e4SLinus Torvalds 
229c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
230c720c7e8SEric Dumazet 	inet->inet_daddr = daddr;
2311da177e4SLinus Torvalds 
232d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
233f6d8bd05SEric Dumazet 	if (inet_opt)
234f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2351da177e4SLinus Torvalds 
236bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2391da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2401da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2411da177e4SLinus Torvalds 	 * complete initialization after this.
2421da177e4SLinus Torvalds 	 */
2431da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
244a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2451da177e4SLinus Torvalds 	if (err)
2461da177e4SLinus Torvalds 		goto failure;
2471da177e4SLinus Torvalds 
248da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
249c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
250b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
251b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
252b23dd4feSDavid S. Miller 		rt = NULL;
2531da177e4SLinus Torvalds 		goto failure;
254b23dd4feSDavid S. Miller 	}
2551da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
256bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
257d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2581da177e4SLinus Torvalds 
259ee995283SPavel Emelyanov 	if (!tp->write_seq && likely(!tp->repair))
260c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
261c720c7e8SEric Dumazet 							   inet->inet_daddr,
262c720c7e8SEric Dumazet 							   inet->inet_sport,
2631da177e4SLinus Torvalds 							   usin->sin_port);
2641da177e4SLinus Torvalds 
265c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2661da177e4SLinus Torvalds 
267ee995283SPavel Emelyanov 	if (likely(!tp->repair))
2681da177e4SLinus Torvalds 		err = tcp_connect(sk);
269ee995283SPavel Emelyanov 	else
270ee995283SPavel Emelyanov 		err = tcp_repair_connect(sk);
271ee995283SPavel Emelyanov 
2721da177e4SLinus Torvalds 	rt = NULL;
2731da177e4SLinus Torvalds 	if (err)
2741da177e4SLinus Torvalds 		goto failure;
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds 	return 0;
2771da177e4SLinus Torvalds 
2781da177e4SLinus Torvalds failure:
2797174259eSArnaldo Carvalho de Melo 	/*
2807174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2817174259eSArnaldo Carvalho de Melo 	 * if necessary.
2827174259eSArnaldo Carvalho de Melo 	 */
2831da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2841da177e4SLinus Torvalds 	ip_rt_put(rt);
2851da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
286c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2871da177e4SLinus Torvalds 	return err;
2881da177e4SLinus Torvalds }
2894bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds /*
2921da177e4SLinus Torvalds  * This routine does path mtu discovery as defined in RFC1191.
2931da177e4SLinus Torvalds  */
294b71d1d42SEric Dumazet static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
2951da177e4SLinus Torvalds {
2961da177e4SLinus Torvalds 	struct dst_entry *dst;
2971da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
3001da177e4SLinus Torvalds 	 * send out by Linux are always <576bytes so they should go through
3011da177e4SLinus Torvalds 	 * unfragmented).
3021da177e4SLinus Torvalds 	 */
3031da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN)
3041da177e4SLinus Torvalds 		return;
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds 	/* We don't check in the destentry if pmtu discovery is forbidden
3071da177e4SLinus Torvalds 	 * on this route. We just assume that no packet_to_big packets
3081da177e4SLinus Torvalds 	 * are send back when pmtu discovery is not active.
3091da177e4SLinus Torvalds 	 * There is a small race when the user changes this flag in the
3101da177e4SLinus Torvalds 	 * route, but I think that's acceptable.
3111da177e4SLinus Torvalds 	 */
3121da177e4SLinus Torvalds 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
3131da177e4SLinus Torvalds 		return;
3141da177e4SLinus Torvalds 
3151da177e4SLinus Torvalds 	dst->ops->update_pmtu(dst, mtu);
3161da177e4SLinus Torvalds 
3171da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
3181da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
3191da177e4SLinus Torvalds 	 */
3201da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
3211da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
326d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3271da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3301da177e4SLinus Torvalds 		 * clear that the old packet has been
3311da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3321da177e4SLinus Torvalds 		 * discovery.
3331da177e4SLinus Torvalds 		 */
3341da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3351da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3361da177e4SLinus Torvalds }
3371da177e4SLinus Torvalds 
3381da177e4SLinus Torvalds /*
3391da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3401da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3411da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3421da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3431da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3441da177e4SLinus Torvalds  * to find the appropriate port.
3451da177e4SLinus Torvalds  *
3461da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3471da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3481da177e4SLinus Torvalds  * and for some paths there is no check at all.
3491da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3501da177e4SLinus Torvalds  * is probably better.
3511da177e4SLinus Torvalds  *
3521da177e4SLinus Torvalds  */
3531da177e4SLinus Torvalds 
3544d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3551da177e4SLinus Torvalds {
356b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3574d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
358f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3591da177e4SLinus Torvalds 	struct tcp_sock *tp;
3601da177e4SLinus Torvalds 	struct inet_sock *inet;
3614d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3624d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3631da177e4SLinus Torvalds 	struct sock *sk;
364f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3651da177e4SLinus Torvalds 	__u32 seq;
366f1ecd5d9SDamian Lukowski 	__u32 remaining;
3671da177e4SLinus Torvalds 	int err;
3684d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3691da177e4SLinus Torvalds 
3704d1a2d9eSDamian Lukowski 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
371dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3721da177e4SLinus Torvalds 		return;
3731da177e4SLinus Torvalds 	}
3741da177e4SLinus Torvalds 
375fd54d716SPavel Emelyanov 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
3764d1a2d9eSDamian Lukowski 			iph->saddr, th->source, inet_iif(icmp_skb));
3771da177e4SLinus Torvalds 	if (!sk) {
378dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3791da177e4SLinus Torvalds 		return;
3801da177e4SLinus Torvalds 	}
3811da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3829469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3831da177e4SLinus Torvalds 		return;
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	bh_lock_sock(sk);
3871da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3881da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
3891da177e4SLinus Torvalds 	 */
3901da177e4SLinus Torvalds 	if (sock_owned_by_user(sk))
391de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
3921da177e4SLinus Torvalds 
3931da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3941da177e4SLinus Torvalds 		goto out;
3951da177e4SLinus Torvalds 
39697e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
39797e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
39897e3ecd1Sstephen hemminger 		goto out;
39997e3ecd1Sstephen hemminger 	}
40097e3ecd1Sstephen hemminger 
401f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
4021da177e4SLinus Torvalds 	tp = tcp_sk(sk);
4031da177e4SLinus Torvalds 	seq = ntohl(th->seq);
4041da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
4051da177e4SLinus Torvalds 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
406de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4071da177e4SLinus Torvalds 		goto out;
4081da177e4SLinus Torvalds 	}
4091da177e4SLinus Torvalds 
4101da177e4SLinus Torvalds 	switch (type) {
4111da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4121da177e4SLinus Torvalds 		/* Just silently ignore these. */
4131da177e4SLinus Torvalds 		goto out;
4141da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4151da177e4SLinus Torvalds 		err = EPROTO;
4161da177e4SLinus Torvalds 		break;
4171da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4181da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4191da177e4SLinus Torvalds 			goto out;
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4221da177e4SLinus Torvalds 			if (!sock_owned_by_user(sk))
4231da177e4SLinus Torvalds 				do_pmtu_discovery(sk, iph, info);
4241da177e4SLinus Torvalds 			goto out;
4251da177e4SLinus Torvalds 		}
4261da177e4SLinus Torvalds 
4271da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
428f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
429f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
430f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
431f1ecd5d9SDamian Lukowski 			break;
432f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
433f1ecd5d9SDamian Lukowski 		    !icsk->icsk_backoff)
434f1ecd5d9SDamian Lukowski 			break;
435f1ecd5d9SDamian Lukowski 
4368f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4378f49c270SDavid S. Miller 			break;
4388f49c270SDavid S. Miller 
439f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
4409ad7c049SJerry Chu 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
4419ad7c049SJerry Chu 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
442f1ecd5d9SDamian Lukowski 		tcp_bound_rto(sk);
443f1ecd5d9SDamian Lukowski 
444f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
445f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
446f1ecd5d9SDamian Lukowski 
447f1ecd5d9SDamian Lukowski 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
448f1ecd5d9SDamian Lukowski 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
449f1ecd5d9SDamian Lukowski 
450f1ecd5d9SDamian Lukowski 		if (remaining) {
451f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
452f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
453f1ecd5d9SDamian Lukowski 		} else {
454f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
455f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
456f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
457f1ecd5d9SDamian Lukowski 		}
458f1ecd5d9SDamian Lukowski 
4591da177e4SLinus Torvalds 		break;
4601da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4611da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4621da177e4SLinus Torvalds 		break;
4631da177e4SLinus Torvalds 	default:
4641da177e4SLinus Torvalds 		goto out;
4651da177e4SLinus Torvalds 	}
4661da177e4SLinus Torvalds 
4671da177e4SLinus Torvalds 	switch (sk->sk_state) {
46860236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4691da177e4SLinus Torvalds 	case TCP_LISTEN:
4701da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4711da177e4SLinus Torvalds 			goto out;
4721da177e4SLinus Torvalds 
473463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4741da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4751da177e4SLinus Torvalds 		if (!req)
4761da177e4SLinus Torvalds 			goto out;
4771da177e4SLinus Torvalds 
4781da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4791da177e4SLinus Torvalds 		   an established socket here.
4801da177e4SLinus Torvalds 		 */
481547b792cSIlpo Järvinen 		WARN_ON(req->sk);
4821da177e4SLinus Torvalds 
4832e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
484de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4851da177e4SLinus Torvalds 			goto out;
4861da177e4SLinus Torvalds 		}
4871da177e4SLinus Torvalds 
4881da177e4SLinus Torvalds 		/*
4891da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
4901da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
4911da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
4921da177e4SLinus Torvalds 		 * errors returned from accept().
4931da177e4SLinus Torvalds 		 */
494463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
4951da177e4SLinus Torvalds 		goto out;
4961da177e4SLinus Torvalds 
4971da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4981da177e4SLinus Torvalds 	case TCP_SYN_RECV:  /* Cannot happen.
4991da177e4SLinus Torvalds 			       It can f.e. if SYNs crossed.
5001da177e4SLinus Torvalds 			     */
5011da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5021da177e4SLinus Torvalds 			sk->sk_err = err;
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds 			tcp_done(sk);
5071da177e4SLinus Torvalds 		} else {
5081da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5091da177e4SLinus Torvalds 		}
5101da177e4SLinus Torvalds 		goto out;
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5141da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5151da177e4SLinus Torvalds 	 *
5161da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5171da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5181da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5191da177e4SLinus Torvalds 	 *
5201da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5211da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5221da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5231da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5241da177e4SLinus Torvalds 	 *
5251da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5261da177e4SLinus Torvalds 	 *							--ANK (980905)
5271da177e4SLinus Torvalds 	 */
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 	inet = inet_sk(sk);
5301da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5311da177e4SLinus Torvalds 		sk->sk_err = err;
5321da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5331da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5341da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5351da177e4SLinus Torvalds 	}
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds out:
5381da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5391da177e4SLinus Torvalds 	sock_put(sk);
5401da177e4SLinus Torvalds }
5411da177e4SLinus Torvalds 
542419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb,
543419f9f89SHerbert Xu 				__be32 saddr, __be32 daddr)
5441da177e4SLinus Torvalds {
545aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5461da177e4SLinus Torvalds 
54784fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
548419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
549663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
550ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5511da177e4SLinus Torvalds 	} else {
552419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
55307f0757aSJoe Perches 					 csum_partial(th,
5541da177e4SLinus Torvalds 						      th->doff << 2,
5551da177e4SLinus Torvalds 						      skb->csum));
5561da177e4SLinus Torvalds 	}
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
559419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
560bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
561419f9f89SHerbert Xu {
562cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
563419f9f89SHerbert Xu 
564419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565419f9f89SHerbert Xu }
5664bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
567419f9f89SHerbert Xu 
568a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb)
569a430a43dSHerbert Xu {
570eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
571a430a43dSHerbert Xu 	struct tcphdr *th;
572a430a43dSHerbert Xu 
573a430a43dSHerbert Xu 	if (!pskb_may_pull(skb, sizeof(*th)))
574a430a43dSHerbert Xu 		return -EINVAL;
575a430a43dSHerbert Xu 
576eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
577aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
578a430a43dSHerbert Xu 
579a430a43dSHerbert Xu 	th->check = 0;
58084fa7933SPatrick McHardy 	skb->ip_summed = CHECKSUM_PARTIAL;
581419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
582a430a43dSHerbert Xu 	return 0;
583a430a43dSHerbert Xu }
584a430a43dSHerbert Xu 
5851da177e4SLinus Torvalds /*
5861da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5871da177e4SLinus Torvalds  *
5881da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5891da177e4SLinus Torvalds  *		      for reset.
5901da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5911da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5921da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5931da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5941da177e4SLinus Torvalds  *		arrived with segment.
5951da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5961da177e4SLinus Torvalds  */
5971da177e4SLinus Torvalds 
598cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5991da177e4SLinus Torvalds {
600cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
601cfb6eeb4SYOSHIFUJI Hideaki 	struct {
602cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
603cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
604714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
605cfb6eeb4SYOSHIFUJI Hideaki #endif
606cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
6071da177e4SLinus Torvalds 	struct ip_reply_arg arg;
608cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
609cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
610658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
611658ddaafSShawn Lu 	unsigned char newhash[16];
612658ddaafSShawn Lu 	int genhash;
613658ddaafSShawn Lu 	struct sock *sk1 = NULL;
614cfb6eeb4SYOSHIFUJI Hideaki #endif
615a86b1e30SPavel Emelyanov 	struct net *net;
6161da177e4SLinus Torvalds 
6171da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
6181da177e4SLinus Torvalds 	if (th->rst)
6191da177e4SLinus Torvalds 		return;
6201da177e4SLinus Torvalds 
621511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
6221da177e4SLinus Torvalds 		return;
6231da177e4SLinus Torvalds 
6241da177e4SLinus Torvalds 	/* Swap the send and the receive. */
625cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
626cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
627cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
628cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
629cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	if (th->ack) {
632cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6331da177e4SLinus Torvalds 	} else {
634cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
635cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6361da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 
6397174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
640cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
641cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
642cfb6eeb4SYOSHIFUJI Hideaki 
643cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
644658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
645658ddaafSShawn Lu 	if (!sk && hash_location) {
646658ddaafSShawn Lu 		/*
647658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
648658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
649658ddaafSShawn Lu 		 * we are not loose security here:
650658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
651658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
652658ddaafSShawn Lu 		 */
653658ddaafSShawn Lu 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
654658ddaafSShawn Lu 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
655658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
656658ddaafSShawn Lu 		/* don't send rst if it can't find key */
657658ddaafSShawn Lu 		if (!sk1)
658658ddaafSShawn Lu 			return;
659658ddaafSShawn Lu 		rcu_read_lock();
660658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
661658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
662658ddaafSShawn Lu 		if (!key)
663658ddaafSShawn Lu 			goto release_sk1;
664658ddaafSShawn Lu 
665658ddaafSShawn Lu 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
666658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
667658ddaafSShawn Lu 			goto release_sk1;
668658ddaafSShawn Lu 	} else {
669658ddaafSShawn Lu 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
670658ddaafSShawn Lu 					     &ip_hdr(skb)->saddr,
671a915da9bSEric Dumazet 					     AF_INET) : NULL;
672658ddaafSShawn Lu 	}
673658ddaafSShawn Lu 
674cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
675cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
676cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
677cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
678cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
679cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
680cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
681cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
682cfb6eeb4SYOSHIFUJI Hideaki 
68349a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
68478e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
68578e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
686cfb6eeb4SYOSHIFUJI Hideaki 	}
687cfb6eeb4SYOSHIFUJI Hideaki #endif
688eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
689eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
69052cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6911da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
69288ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
693e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
694e2446eaaSShawn Lu 	 * routing might fail in this case. using iif for oif to
695e2446eaaSShawn Lu 	 * make sure we can deliver it
696e2446eaaSShawn Lu 	 */
697e2446eaaSShawn Lu 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
6981da177e4SLinus Torvalds 
699adf30907SEric Dumazet 	net = dev_net(skb_dst(skb)->dev);
70066b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
7010a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7027feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
7031da177e4SLinus Torvalds 
70463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
70563231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
706658ddaafSShawn Lu 
707658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
708658ddaafSShawn Lu release_sk1:
709658ddaafSShawn Lu 	if (sk1) {
710658ddaafSShawn Lu 		rcu_read_unlock();
711658ddaafSShawn Lu 		sock_put(sk1);
712658ddaafSShawn Lu 	}
713658ddaafSShawn Lu #endif
7141da177e4SLinus Torvalds }
7151da177e4SLinus Torvalds 
7161da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7171da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7181da177e4SLinus Torvalds  */
7191da177e4SLinus Torvalds 
7209501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
7219501f972SYOSHIFUJI Hideaki 			    u32 win, u32 ts, int oif,
72288ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
72366b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7241da177e4SLinus Torvalds {
725cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7261da177e4SLinus Torvalds 	struct {
7271da177e4SLinus Torvalds 		struct tcphdr th;
728714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
729cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
730cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
731cfb6eeb4SYOSHIFUJI Hideaki #endif
732cfb6eeb4SYOSHIFUJI Hideaki 			];
7331da177e4SLinus Torvalds 	} rep;
7341da177e4SLinus Torvalds 	struct ip_reply_arg arg;
735adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7387174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7391da177e4SLinus Torvalds 
7401da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7411da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
7421da177e4SLinus Torvalds 	if (ts) {
743cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7441da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7451da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
746cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[1] = htonl(tcp_time_stamp);
747cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[2] = htonl(ts);
748cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7491da177e4SLinus Torvalds 	}
7501da177e4SLinus Torvalds 
7511da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7521da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7531da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7541da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7551da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7561da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7571da177e4SLinus Torvalds 	rep.th.ack     = 1;
7581da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7591da177e4SLinus Torvalds 
760cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
761cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
762cfb6eeb4SYOSHIFUJI Hideaki 		int offset = (ts) ? 3 : 0;
763cfb6eeb4SYOSHIFUJI Hideaki 
764cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
765cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
766cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
767cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
768cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
769cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
770cfb6eeb4SYOSHIFUJI Hideaki 
77149a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
77290b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
77390b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
774cfb6eeb4SYOSHIFUJI Hideaki 	}
775cfb6eeb4SYOSHIFUJI Hideaki #endif
77688ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
777eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
778eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7791da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7801da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7819501f972SYOSHIFUJI Hideaki 	if (oif)
7829501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
78366b13d99SEric Dumazet 	arg.tos = tos;
7840a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7857feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
7861da177e4SLinus Torvalds 
78763231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7881da177e4SLinus Torvalds }
7891da177e4SLinus Torvalds 
7901da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7911da177e4SLinus Torvalds {
7928feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
793cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7941da177e4SLinus Torvalds 
7959501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7967174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
7979501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7989501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
79988ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
80066b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
80166b13d99SEric Dumazet 			tw->tw_tos
8029501f972SYOSHIFUJI Hideaki 			);
8031da177e4SLinus Torvalds 
8048feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
8051da177e4SLinus Torvalds }
8061da177e4SLinus Torvalds 
8076edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
8087174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
8091da177e4SLinus Torvalds {
8109501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
811cfb6eeb4SYOSHIFUJI Hideaki 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
8129501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8139501f972SYOSHIFUJI Hideaki 			0,
814a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
815a915da9bSEric Dumazet 					  AF_INET),
81666b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
81766b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8181da177e4SLinus Torvalds }
8191da177e4SLinus Torvalds 
8201da177e4SLinus Torvalds /*
8219bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
82260236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8231da177e4SLinus Torvalds  *	socket.
8241da177e4SLinus Torvalds  */
82572659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
827fff32699SEric Dumazet 			      struct request_values *rvp,
828*7586ecebSEric Dumazet 			      u16 queue_mapping,
829*7586ecebSEric Dumazet 			      bool nocache)
8301da177e4SLinus Torvalds {
8312e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8326bd023f3SDavid S. Miller 	struct flowi4 fl4;
8331da177e4SLinus Torvalds 	int err = -1;
8341da177e4SLinus Torvalds 	struct sk_buff * skb;
8351da177e4SLinus Torvalds 
8361da177e4SLinus Torvalds 	/* First, grab a route. */
837*7586ecebSEric Dumazet 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
838fd80eb94SDenis V. Lunev 		return -1;
8391da177e4SLinus Torvalds 
840e6b4d113SWilliam Allen Simpson 	skb = tcp_make_synack(sk, dst, req, rvp);
8411da177e4SLinus Torvalds 
8421da177e4SLinus Torvalds 	if (skb) {
843419f9f89SHerbert Xu 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
8441da177e4SLinus Torvalds 
845fff32699SEric Dumazet 		skb_set_queue_mapping(skb, queue_mapping);
8462e6599cbSArnaldo Carvalho de Melo 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
8472e6599cbSArnaldo Carvalho de Melo 					    ireq->rmt_addr,
8482e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
849b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8501da177e4SLinus Torvalds 	}
8511da177e4SLinus Torvalds 
8521da177e4SLinus Torvalds 	return err;
8531da177e4SLinus Torvalds }
8541da177e4SLinus Torvalds 
85572659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
856e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
857fd80eb94SDenis V. Lunev {
85872659eccSOctavian Purdila 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859*7586ecebSEric Dumazet 	return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
860fd80eb94SDenis V. Lunev }
861fd80eb94SDenis V. Lunev 
8621da177e4SLinus Torvalds /*
86360236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8641da177e4SLinus Torvalds  */
86560236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8661da177e4SLinus Torvalds {
8672e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8681da177e4SLinus Torvalds }
8691da177e4SLinus Torvalds 
870946cedccSEric Dumazet /*
871a2a385d6SEric Dumazet  * Return true if a syncookie should be sent
872946cedccSEric Dumazet  */
873a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk,
874946cedccSEric Dumazet 			 const struct sk_buff *skb,
875946cedccSEric Dumazet 			 const char *proto)
8761da177e4SLinus Torvalds {
877946cedccSEric Dumazet 	const char *msg = "Dropping request";
878a2a385d6SEric Dumazet 	bool want_cookie = false;
879946cedccSEric Dumazet 	struct listen_sock *lopt;
880946cedccSEric Dumazet 
881946cedccSEric Dumazet 
8821da177e4SLinus Torvalds 
8832a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES
884946cedccSEric Dumazet 	if (sysctl_tcp_syncookies) {
8852a1d4bd4SFlorian Westphal 		msg = "Sending cookies";
886a2a385d6SEric Dumazet 		want_cookie = true;
887946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
888946cedccSEric Dumazet 	} else
88980e40daaSArnaldo Carvalho de Melo #endif
890946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
8912a1d4bd4SFlorian Westphal 
892946cedccSEric Dumazet 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
893946cedccSEric Dumazet 	if (!lopt->synflood_warned) {
894946cedccSEric Dumazet 		lopt->synflood_warned = 1;
895afd46503SJoe Perches 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
896946cedccSEric Dumazet 			proto, ntohs(tcp_hdr(skb)->dest), msg);
8972a1d4bd4SFlorian Westphal 	}
898946cedccSEric Dumazet 	return want_cookie;
899946cedccSEric Dumazet }
900946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action);
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds /*
90360236fddSArnaldo Carvalho de Melo  * Save and compile IPv4 options into the request_sock if needed.
9041da177e4SLinus Torvalds  */
905f6d8bd05SEric Dumazet static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
9061da177e4SLinus Torvalds 						  struct sk_buff *skb)
9071da177e4SLinus Torvalds {
908f6d8bd05SEric Dumazet 	const struct ip_options *opt = &(IPCB(skb)->opt);
909f6d8bd05SEric Dumazet 	struct ip_options_rcu *dopt = NULL;
9101da177e4SLinus Torvalds 
9111da177e4SLinus Torvalds 	if (opt && opt->optlen) {
912f6d8bd05SEric Dumazet 		int opt_size = sizeof(*dopt) + opt->optlen;
913f6d8bd05SEric Dumazet 
9141da177e4SLinus Torvalds 		dopt = kmalloc(opt_size, GFP_ATOMIC);
9151da177e4SLinus Torvalds 		if (dopt) {
916f6d8bd05SEric Dumazet 			if (ip_options_echo(&dopt->opt, skb)) {
9171da177e4SLinus Torvalds 				kfree(dopt);
9181da177e4SLinus Torvalds 				dopt = NULL;
9191da177e4SLinus Torvalds 			}
9201da177e4SLinus Torvalds 		}
9211da177e4SLinus Torvalds 	}
9221da177e4SLinus Torvalds 	return dopt;
9231da177e4SLinus Torvalds }
9241da177e4SLinus Torvalds 
925cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
926cfb6eeb4SYOSHIFUJI Hideaki /*
927cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
928cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
929cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
930cfb6eeb4SYOSHIFUJI Hideaki  */
931cfb6eeb4SYOSHIFUJI Hideaki 
932cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
933a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
934a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
935a915da9bSEric Dumazet 					 int family)
936cfb6eeb4SYOSHIFUJI Hideaki {
937cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
938a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
939a915da9bSEric Dumazet 	struct hlist_node *pos;
940a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
941a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
942cfb6eeb4SYOSHIFUJI Hideaki 
943a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
944a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
945b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
946b4fb05eaSEric Dumazet 				       lockdep_is_held(&sk->sk_lock.slock));
947a8afca03SEric Dumazet 	if (!md5sig)
948cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
949a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
950a915da9bSEric Dumazet 	if (family == AF_INET6)
951a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
952a915da9bSEric Dumazet #endif
953a8afca03SEric Dumazet 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
954a915da9bSEric Dumazet 		if (key->family != family)
955a915da9bSEric Dumazet 			continue;
956a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
957a915da9bSEric Dumazet 			return key;
958cfb6eeb4SYOSHIFUJI Hideaki 	}
959cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
960cfb6eeb4SYOSHIFUJI Hideaki }
961a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
962cfb6eeb4SYOSHIFUJI Hideaki 
963cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
964cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
965cfb6eeb4SYOSHIFUJI Hideaki {
966a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
967a915da9bSEric Dumazet 
968a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
969a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
970cfb6eeb4SYOSHIFUJI Hideaki }
971cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
972cfb6eeb4SYOSHIFUJI Hideaki 
973f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
974cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
975cfb6eeb4SYOSHIFUJI Hideaki {
976a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
977a915da9bSEric Dumazet 
978a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
979a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
980cfb6eeb4SYOSHIFUJI Hideaki }
981cfb6eeb4SYOSHIFUJI Hideaki 
982cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
983a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
984a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
985cfb6eeb4SYOSHIFUJI Hideaki {
986cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
987b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
988cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
989f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
990f6685938SArnaldo Carvalho de Melo 
991a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
992a915da9bSEric Dumazet 	if (key) {
993a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
994a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
995a915da9bSEric Dumazet 		key->keylen = newkeylen;
996a915da9bSEric Dumazet 		return 0;
997cfb6eeb4SYOSHIFUJI Hideaki 	}
998260fcbebSYan, Zheng 
999a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1000a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
1001a915da9bSEric Dumazet 	if (!md5sig) {
1002a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
1003a915da9bSEric Dumazet 		if (!md5sig)
1004a915da9bSEric Dumazet 			return -ENOMEM;
1005a915da9bSEric Dumazet 
1006a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1007a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
1008a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
1009a915da9bSEric Dumazet 	}
1010a915da9bSEric Dumazet 
10115f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1012a915da9bSEric Dumazet 	if (!key)
1013a915da9bSEric Dumazet 		return -ENOMEM;
1014a915da9bSEric Dumazet 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
10155f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
1016cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1017cfb6eeb4SYOSHIFUJI Hideaki 	}
1018f6685938SArnaldo Carvalho de Melo 
1019a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
1020a915da9bSEric Dumazet 	key->keylen = newkeylen;
1021a915da9bSEric Dumazet 	key->family = family;
1022a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
1023a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1024a915da9bSEric Dumazet 				      sizeof(struct in_addr));
1025a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
1026cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1027cfb6eeb4SYOSHIFUJI Hideaki }
1028a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
1029cfb6eeb4SYOSHIFUJI Hideaki 
1030a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1031cfb6eeb4SYOSHIFUJI Hideaki {
1032cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1033a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1034a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1035cfb6eeb4SYOSHIFUJI Hideaki 
1036a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1037a915da9bSEric Dumazet 	if (!key)
1038cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
1039a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
10405f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1041a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
1042a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1043a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
1044a8afca03SEric Dumazet 	if (hlist_empty(&md5sig->head))
1045a915da9bSEric Dumazet 		tcp_free_md5sig_pool();
1046a915da9bSEric Dumazet 	return 0;
1047cfb6eeb4SYOSHIFUJI Hideaki }
1048a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
1049cfb6eeb4SYOSHIFUJI Hideaki 
1050a915da9bSEric Dumazet void tcp_clear_md5_list(struct sock *sk)
1051cfb6eeb4SYOSHIFUJI Hideaki {
1052cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1053a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1054a915da9bSEric Dumazet 	struct hlist_node *pos, *n;
1055a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1056cfb6eeb4SYOSHIFUJI Hideaki 
1057a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1058a8afca03SEric Dumazet 
1059a8afca03SEric Dumazet 	if (!hlist_empty(&md5sig->head))
1060cfb6eeb4SYOSHIFUJI Hideaki 		tcp_free_md5sig_pool();
1061a8afca03SEric Dumazet 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1062a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
10635f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1064a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
1065cfb6eeb4SYOSHIFUJI Hideaki 	}
1066cfb6eeb4SYOSHIFUJI Hideaki }
1067cfb6eeb4SYOSHIFUJI Hideaki 
1068cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1069cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
1070cfb6eeb4SYOSHIFUJI Hideaki {
1071cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1072cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1073cfb6eeb4SYOSHIFUJI Hideaki 
1074cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1075cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1076cfb6eeb4SYOSHIFUJI Hideaki 
1077cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1078cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1079cfb6eeb4SYOSHIFUJI Hideaki 
1080cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1081cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1082cfb6eeb4SYOSHIFUJI Hideaki 
1083a8afca03SEric Dumazet 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1084a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1085a915da9bSEric Dumazet 				      AF_INET);
1086cfb6eeb4SYOSHIFUJI Hideaki 
1087cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1088cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1089cfb6eeb4SYOSHIFUJI Hideaki 
1090a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1091a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1092a915da9bSEric Dumazet 			      GFP_KERNEL);
1093cfb6eeb4SYOSHIFUJI Hideaki }
1094cfb6eeb4SYOSHIFUJI Hideaki 
109549a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
109649a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1097cfb6eeb4SYOSHIFUJI Hideaki {
1098cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
109949a72dfbSAdam Langley 	struct scatterlist sg;
1100cfb6eeb4SYOSHIFUJI Hideaki 
1101cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1102cfb6eeb4SYOSHIFUJI Hideaki 
1103cfb6eeb4SYOSHIFUJI Hideaki 	/*
110449a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1105cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1106cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1107cfb6eeb4SYOSHIFUJI Hideaki 	 */
1108cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1109cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1110cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1111076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
111249a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1113c7da57a1SDavid S. Miller 
111449a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
111549a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
111649a72dfbSAdam Langley }
111749a72dfbSAdam Langley 
1118a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1119318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
112049a72dfbSAdam Langley {
112149a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
112249a72dfbSAdam Langley 	struct hash_desc *desc;
112349a72dfbSAdam Langley 
112449a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
112549a72dfbSAdam Langley 	if (!hp)
112649a72dfbSAdam Langley 		goto clear_hash_noput;
112749a72dfbSAdam Langley 	desc = &hp->md5_desc;
112849a72dfbSAdam Langley 
112949a72dfbSAdam Langley 	if (crypto_hash_init(desc))
113049a72dfbSAdam Langley 		goto clear_hash;
113149a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
113249a72dfbSAdam Langley 		goto clear_hash;
113349a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
113449a72dfbSAdam Langley 		goto clear_hash;
113549a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
113649a72dfbSAdam Langley 		goto clear_hash;
113749a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1138cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1139cfb6eeb4SYOSHIFUJI Hideaki 
1140cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1141cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
114249a72dfbSAdam Langley 
1143cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1144cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1145cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1146cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
114749a72dfbSAdam Langley 	return 1;
1148cfb6eeb4SYOSHIFUJI Hideaki }
1149cfb6eeb4SYOSHIFUJI Hideaki 
115049a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1151318cf7aaSEric Dumazet 			const struct sock *sk, const struct request_sock *req,
1152318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1153cfb6eeb4SYOSHIFUJI Hideaki {
115449a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
115549a72dfbSAdam Langley 	struct hash_desc *desc;
1156318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1157cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1158cfb6eeb4SYOSHIFUJI Hideaki 
1159cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1160c720c7e8SEric Dumazet 		saddr = inet_sk(sk)->inet_saddr;
1161c720c7e8SEric Dumazet 		daddr = inet_sk(sk)->inet_daddr;
116249a72dfbSAdam Langley 	} else if (req) {
116349a72dfbSAdam Langley 		saddr = inet_rsk(req)->loc_addr;
116449a72dfbSAdam Langley 		daddr = inet_rsk(req)->rmt_addr;
1165cfb6eeb4SYOSHIFUJI Hideaki 	} else {
116649a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
116749a72dfbSAdam Langley 		saddr = iph->saddr;
116849a72dfbSAdam Langley 		daddr = iph->daddr;
1169cfb6eeb4SYOSHIFUJI Hideaki 	}
1170cfb6eeb4SYOSHIFUJI Hideaki 
117149a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
117249a72dfbSAdam Langley 	if (!hp)
117349a72dfbSAdam Langley 		goto clear_hash_noput;
117449a72dfbSAdam Langley 	desc = &hp->md5_desc;
117549a72dfbSAdam Langley 
117649a72dfbSAdam Langley 	if (crypto_hash_init(desc))
117749a72dfbSAdam Langley 		goto clear_hash;
117849a72dfbSAdam Langley 
117949a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
118049a72dfbSAdam Langley 		goto clear_hash;
118149a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
118249a72dfbSAdam Langley 		goto clear_hash;
118349a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
118449a72dfbSAdam Langley 		goto clear_hash;
118549a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
118649a72dfbSAdam Langley 		goto clear_hash;
118749a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
118849a72dfbSAdam Langley 		goto clear_hash;
118949a72dfbSAdam Langley 
119049a72dfbSAdam Langley 	tcp_put_md5sig_pool();
119149a72dfbSAdam Langley 	return 0;
119249a72dfbSAdam Langley 
119349a72dfbSAdam Langley clear_hash:
119449a72dfbSAdam Langley 	tcp_put_md5sig_pool();
119549a72dfbSAdam Langley clear_hash_noput:
119649a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
119749a72dfbSAdam Langley 	return 1;
119849a72dfbSAdam Langley }
119949a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1200cfb6eeb4SYOSHIFUJI Hideaki 
1201a2a385d6SEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1202cfb6eeb4SYOSHIFUJI Hideaki {
1203cfb6eeb4SYOSHIFUJI Hideaki 	/*
1204cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1205cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1206cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1207cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1208cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1209cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1210cfb6eeb4SYOSHIFUJI Hideaki 	 */
1211cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1212cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1213eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1214cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1215cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1216cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1217cfb6eeb4SYOSHIFUJI Hideaki 
1218a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1219a915da9bSEric Dumazet 					  AF_INET);
12207d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1221cfb6eeb4SYOSHIFUJI Hideaki 
1222cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1223cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1224a2a385d6SEric Dumazet 		return false;
1225cfb6eeb4SYOSHIFUJI Hideaki 
1226cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1227785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1228a2a385d6SEric Dumazet 		return true;
1229cfb6eeb4SYOSHIFUJI Hideaki 	}
1230cfb6eeb4SYOSHIFUJI Hideaki 
1231cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1232785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1233a2a385d6SEric Dumazet 		return true;
1234cfb6eeb4SYOSHIFUJI Hideaki 	}
1235cfb6eeb4SYOSHIFUJI Hideaki 
1236cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1237cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1238cfb6eeb4SYOSHIFUJI Hideaki 	 */
123949a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1240cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
124149a72dfbSAdam Langley 				      NULL, NULL, skb);
1242cfb6eeb4SYOSHIFUJI Hideaki 
1243cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1244e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1245673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1246673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1247e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1248e87cc472SJoe Perches 				     : "");
1249a2a385d6SEric Dumazet 		return true;
1250cfb6eeb4SYOSHIFUJI Hideaki 	}
1251a2a385d6SEric Dumazet 	return false;
1252cfb6eeb4SYOSHIFUJI Hideaki }
1253cfb6eeb4SYOSHIFUJI Hideaki 
1254cfb6eeb4SYOSHIFUJI Hideaki #endif
1255cfb6eeb4SYOSHIFUJI Hideaki 
125672a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12571da177e4SLinus Torvalds 	.family		=	PF_INET,
12582e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
125972659eccSOctavian Purdila 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
126060236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
126160236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12621da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
126372659eccSOctavian Purdila 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
12641da177e4SLinus Torvalds };
12651da177e4SLinus Torvalds 
1266cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1267b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1268cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1269e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1270cfb6eeb4SYOSHIFUJI Hideaki };
1271b6332e6cSAndrew Morton #endif
1272cfb6eeb4SYOSHIFUJI Hideaki 
12731da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12741da177e4SLinus Torvalds {
12754957faadSWilliam Allen Simpson 	struct tcp_extend_values tmp_ext;
12761da177e4SLinus Torvalds 	struct tcp_options_received tmp_opt;
1277cf533ea5SEric Dumazet 	const u8 *hash_location;
127860236fddSArnaldo Carvalho de Melo 	struct request_sock *req;
1279e6b4d113SWilliam Allen Simpson 	struct inet_request_sock *ireq;
12804957faadSWilliam Allen Simpson 	struct tcp_sock *tp = tcp_sk(sk);
1281e6b4d113SWilliam Allen Simpson 	struct dst_entry *dst = NULL;
1282eddc9ec5SArnaldo Carvalho de Melo 	__be32 saddr = ip_hdr(skb)->saddr;
1283eddc9ec5SArnaldo Carvalho de Melo 	__be32 daddr = ip_hdr(skb)->daddr;
12841da177e4SLinus Torvalds 	__u32 isn = TCP_SKB_CB(skb)->when;
1285a2a385d6SEric Dumazet 	bool want_cookie = false;
12861da177e4SLinus Torvalds 
12871da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1288511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12891da177e4SLinus Torvalds 		goto drop;
12901da177e4SLinus Torvalds 
12911da177e4SLinus Torvalds 	/* TW buckets are converted to open requests without
12921da177e4SLinus Torvalds 	 * limitations, they conserve resources and peer is
12931da177e4SLinus Torvalds 	 * evidently real one.
12941da177e4SLinus Torvalds 	 */
1295463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1296946cedccSEric Dumazet 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1297946cedccSEric Dumazet 		if (!want_cookie)
12981da177e4SLinus Torvalds 			goto drop;
12991da177e4SLinus Torvalds 	}
13001da177e4SLinus Torvalds 
13011da177e4SLinus Torvalds 	/* Accept backlog is full. If we have already queued enough
13021da177e4SLinus Torvalds 	 * of warm entries in syn queue, drop request. It is better than
13031da177e4SLinus Torvalds 	 * clogging syn queue with openreqs with exponentially increasing
13041da177e4SLinus Torvalds 	 * timeout.
13051da177e4SLinus Torvalds 	 */
1306463c84b9SArnaldo Carvalho de Melo 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
13071da177e4SLinus Torvalds 		goto drop;
13081da177e4SLinus Torvalds 
1309ce4a7d0dSArnaldo Carvalho de Melo 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
13101da177e4SLinus Torvalds 	if (!req)
13111da177e4SLinus Torvalds 		goto drop;
13121da177e4SLinus Torvalds 
1313cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1314cfb6eeb4SYOSHIFUJI Hideaki 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1315cfb6eeb4SYOSHIFUJI Hideaki #endif
1316cfb6eeb4SYOSHIFUJI Hideaki 
13171da177e4SLinus Torvalds 	tcp_clear_options(&tmp_opt);
1318bee7ca9eSWilliam Allen Simpson 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
13194957faadSWilliam Allen Simpson 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1320bb5b7c11SDavid S. Miller 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
13211da177e4SLinus Torvalds 
13224957faadSWilliam Allen Simpson 	if (tmp_opt.cookie_plus > 0 &&
13234957faadSWilliam Allen Simpson 	    tmp_opt.saw_tstamp &&
13244957faadSWilliam Allen Simpson 	    !tp->rx_opt.cookie_out_never &&
13254957faadSWilliam Allen Simpson 	    (sysctl_tcp_cookie_size > 0 ||
13264957faadSWilliam Allen Simpson 	     (tp->cookie_values != NULL &&
13274957faadSWilliam Allen Simpson 	      tp->cookie_values->cookie_desired > 0))) {
13284957faadSWilliam Allen Simpson 		u8 *c;
13294957faadSWilliam Allen Simpson 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
13304957faadSWilliam Allen Simpson 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
13314957faadSWilliam Allen Simpson 
13324957faadSWilliam Allen Simpson 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
13334957faadSWilliam Allen Simpson 			goto drop_and_release;
13344957faadSWilliam Allen Simpson 
13354957faadSWilliam Allen Simpson 		/* Secret recipe starts with IP addresses */
13360eae88f3SEric Dumazet 		*mess++ ^= (__force u32)daddr;
13370eae88f3SEric Dumazet 		*mess++ ^= (__force u32)saddr;
13384957faadSWilliam Allen Simpson 
13394957faadSWilliam Allen Simpson 		/* plus variable length Initiator Cookie */
13404957faadSWilliam Allen Simpson 		c = (u8 *)mess;
13414957faadSWilliam Allen Simpson 		while (l-- > 0)
13424957faadSWilliam Allen Simpson 			*c++ ^= *hash_location++;
13434957faadSWilliam Allen Simpson 
1344a2a385d6SEric Dumazet 		want_cookie = false;	/* not our kind of cookie */
13454957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 0; /* false */
13464957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
13474957faadSWilliam Allen Simpson 	} else if (!tp->rx_opt.cookie_in_always) {
13484957faadSWilliam Allen Simpson 		/* redundant indications, but ensure initialization. */
13494957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 1; /* true */
13504957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = 0;
13514957faadSWilliam Allen Simpson 	} else {
13524957faadSWilliam Allen Simpson 		goto drop_and_release;
13534957faadSWilliam Allen Simpson 	}
13544957faadSWilliam Allen Simpson 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
13551da177e4SLinus Torvalds 
13564dfc2817SFlorian Westphal 	if (want_cookie && !tmp_opt.saw_tstamp)
13571da177e4SLinus Torvalds 		tcp_clear_options(&tmp_opt);
13581da177e4SLinus Torvalds 
13591da177e4SLinus Torvalds 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
13601da177e4SLinus Torvalds 	tcp_openreq_init(req, &tmp_opt, skb);
13611da177e4SLinus Torvalds 
1362bb5b7c11SDavid S. Miller 	ireq = inet_rsk(req);
1363bb5b7c11SDavid S. Miller 	ireq->loc_addr = daddr;
1364bb5b7c11SDavid S. Miller 	ireq->rmt_addr = saddr;
1365bb5b7c11SDavid S. Miller 	ireq->no_srccheck = inet_sk(sk)->transparent;
1366bb5b7c11SDavid S. Miller 	ireq->opt = tcp_v4_save_options(sk, skb);
1367bb5b7c11SDavid S. Miller 
1368284904aaSPaul Moore 	if (security_inet_conn_request(sk, skb, req))
1369bb5b7c11SDavid S. Miller 		goto drop_and_free;
1370284904aaSPaul Moore 
1371172d69e6SFlorian Westphal 	if (!want_cookie || tmp_opt.tstamp_ok)
1372bd14b1b2SEric Dumazet 		TCP_ECN_create_request(req, skb);
13731da177e4SLinus Torvalds 
13741da177e4SLinus Torvalds 	if (want_cookie) {
13751da177e4SLinus Torvalds 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1376172d69e6SFlorian Westphal 		req->cookie_ts = tmp_opt.tstamp_ok;
13771da177e4SLinus Torvalds 	} else if (!isn) {
13781da177e4SLinus Torvalds 		struct inet_peer *peer = NULL;
13796bd023f3SDavid S. Miller 		struct flowi4 fl4;
13801da177e4SLinus Torvalds 
13811da177e4SLinus Torvalds 		/* VJ's idea. We save last timestamp seen
13821da177e4SLinus Torvalds 		 * from the destination in peer table, when entering
13831da177e4SLinus Torvalds 		 * state TIME-WAIT, and check against it before
13841da177e4SLinus Torvalds 		 * accepting new connection request.
13851da177e4SLinus Torvalds 		 *
13861da177e4SLinus Torvalds 		 * If "isn" is not zero, this request hit alive
13871da177e4SLinus Torvalds 		 * timewait bucket, so that all the necessary checks
13881da177e4SLinus Torvalds 		 * are made in the function processing timewait state.
13891da177e4SLinus Torvalds 		 */
13901da177e4SLinus Torvalds 		if (tmp_opt.saw_tstamp &&
1391295ff7edSArnaldo Carvalho de Melo 		    tcp_death_row.sysctl_tw_recycle &&
1392*7586ecebSEric Dumazet 		    (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
1393ed2361e6SDavid S. Miller 		    fl4.daddr == saddr &&
1394ed2361e6SDavid S. Miller 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1395317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
13962c1409a0SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
13971da177e4SLinus Torvalds 			    (s32)(peer->tcp_ts - req->ts_recent) >
13981da177e4SLinus Torvalds 							TCP_PAWS_WINDOW) {
1399de0744afSPavel Emelyanov 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
14007cd04fa7SDenis V. Lunev 				goto drop_and_release;
14011da177e4SLinus Torvalds 			}
14021da177e4SLinus Torvalds 		}
14031da177e4SLinus Torvalds 		/* Kill the following clause, if you dislike this way. */
14041da177e4SLinus Torvalds 		else if (!sysctl_tcp_syncookies &&
1405463c84b9SArnaldo Carvalho de Melo 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
14061da177e4SLinus Torvalds 			  (sysctl_max_syn_backlog >> 2)) &&
14071da177e4SLinus Torvalds 			 (!peer || !peer->tcp_ts_stamp) &&
14081da177e4SLinus Torvalds 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
14091da177e4SLinus Torvalds 			/* Without syncookies last quarter of
14101da177e4SLinus Torvalds 			 * backlog is filled with destinations,
14111da177e4SLinus Torvalds 			 * proven to be alive.
14121da177e4SLinus Torvalds 			 * It means that we continue to communicate
14131da177e4SLinus Torvalds 			 * to destinations, already remembered
14141da177e4SLinus Torvalds 			 * to the moment of synflood.
14151da177e4SLinus Torvalds 			 */
1416afd46503SJoe Perches 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1417673d57e7SHarvey Harrison 				       &saddr, ntohs(tcp_hdr(skb)->source));
14187cd04fa7SDenis V. Lunev 			goto drop_and_release;
14191da177e4SLinus Torvalds 		}
14201da177e4SLinus Torvalds 
1421a94f723dSGerrit Renker 		isn = tcp_v4_init_sequence(skb);
14221da177e4SLinus Torvalds 	}
14232e6599cbSArnaldo Carvalho de Melo 	tcp_rsk(req)->snt_isn = isn;
14249ad7c049SJerry Chu 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
14251da177e4SLinus Torvalds 
142672659eccSOctavian Purdila 	if (tcp_v4_send_synack(sk, dst, req,
1427fff32699SEric Dumazet 			       (struct request_values *)&tmp_ext,
1428*7586ecebSEric Dumazet 			       skb_get_queue_mapping(skb),
1429*7586ecebSEric Dumazet 			       want_cookie) ||
14304957faadSWilliam Allen Simpson 	    want_cookie)
14311da177e4SLinus Torvalds 		goto drop_and_free;
14321da177e4SLinus Torvalds 
14333f421baaSArnaldo Carvalho de Melo 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
14341da177e4SLinus Torvalds 	return 0;
14351da177e4SLinus Torvalds 
14367cd04fa7SDenis V. Lunev drop_and_release:
14377cd04fa7SDenis V. Lunev 	dst_release(dst);
14381da177e4SLinus Torvalds drop_and_free:
143960236fddSArnaldo Carvalho de Melo 	reqsk_free(req);
14401da177e4SLinus Torvalds drop:
14411da177e4SLinus Torvalds 	return 0;
14421da177e4SLinus Torvalds }
14434bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
14441da177e4SLinus Torvalds 
14451da177e4SLinus Torvalds 
14461da177e4SLinus Torvalds /*
14471da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
14481da177e4SLinus Torvalds  * now create the new socket.
14491da177e4SLinus Torvalds  */
14501da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
145160236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
14521da177e4SLinus Torvalds 				  struct dst_entry *dst)
14531da177e4SLinus Torvalds {
14542e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
14551da177e4SLinus Torvalds 	struct inet_sock *newinet;
14561da177e4SLinus Torvalds 	struct tcp_sock *newtp;
14571da177e4SLinus Torvalds 	struct sock *newsk;
1458cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1459cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1460cfb6eeb4SYOSHIFUJI Hideaki #endif
1461f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
14621da177e4SLinus Torvalds 
14631da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
14641da177e4SLinus Torvalds 		goto exit_overflow;
14651da177e4SLinus Torvalds 
14661da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
14671da177e4SLinus Torvalds 	if (!newsk)
1468093d2823SBalazs Scheidler 		goto exit_nonewsk;
14691da177e4SLinus Torvalds 
1470bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
14711da177e4SLinus Torvalds 
14721da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
14731da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
14742e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1475c720c7e8SEric Dumazet 	newinet->inet_daddr   = ireq->rmt_addr;
1476c720c7e8SEric Dumazet 	newinet->inet_rcv_saddr = ireq->loc_addr;
1477c720c7e8SEric Dumazet 	newinet->inet_saddr	      = ireq->loc_addr;
1478f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1479f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
14802e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1481463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1482eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
14834c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1484d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1485f6d8bd05SEric Dumazet 	if (inet_opt)
1486f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1487c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
14881da177e4SLinus Torvalds 
1489dfd25fffSEric Dumazet 	if (!dst) {
1490dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1491dfd25fffSEric Dumazet 		if (!dst)
14920e734419SDavid S. Miller 			goto put_and_exit;
1493dfd25fffSEric Dumazet 	} else {
1494dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1495dfd25fffSEric Dumazet 	}
14960e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
14970e734419SDavid S. Miller 
14985d424d5aSJohn Heffner 	tcp_mtup_init(newsk);
14991da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
15000dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1501f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1502f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1503f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1504f5fff5dcSTom Quetchenbach 
15051da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
15069ad7c049SJerry Chu 	if (tcp_rsk(req)->snt_synack)
15079ad7c049SJerry Chu 		tcp_valid_rtt_meas(newsk,
15089ad7c049SJerry Chu 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
15099ad7c049SJerry Chu 	newtp->total_retrans = req->retrans;
15101da177e4SLinus Torvalds 
1511cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1512cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1513a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1514a915da9bSEric Dumazet 				AF_INET);
1515c720c7e8SEric Dumazet 	if (key != NULL) {
1516cfb6eeb4SYOSHIFUJI Hideaki 		/*
1517cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1518cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1519cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1520cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1521cfb6eeb4SYOSHIFUJI Hideaki 		 */
1522a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1523a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1524a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1525cfb6eeb4SYOSHIFUJI Hideaki 	}
1526cfb6eeb4SYOSHIFUJI Hideaki #endif
1527cfb6eeb4SYOSHIFUJI Hideaki 
15280e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
15290e734419SDavid S. Miller 		goto put_and_exit;
15309327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
15311da177e4SLinus Torvalds 
15321da177e4SLinus Torvalds 	return newsk;
15331da177e4SLinus Torvalds 
15341da177e4SLinus Torvalds exit_overflow:
1535de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1536093d2823SBalazs Scheidler exit_nonewsk:
1537093d2823SBalazs Scheidler 	dst_release(dst);
15381da177e4SLinus Torvalds exit:
1539de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
15401da177e4SLinus Torvalds 	return NULL;
15410e734419SDavid S. Miller put_and_exit:
1542709e8697SEric Dumazet 	tcp_clear_xmit_timers(newsk);
1543d8a6e65fSEric Dumazet 	tcp_cleanup_congestion_control(newsk);
1544918eb399SEric Dumazet 	bh_unlock_sock(newsk);
15450e734419SDavid S. Miller 	sock_put(newsk);
15460e734419SDavid S. Miller 	goto exit;
15471da177e4SLinus Torvalds }
15484bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
15491da177e4SLinus Torvalds 
15501da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
15511da177e4SLinus Torvalds {
1552aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
1553eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
15541da177e4SLinus Torvalds 	struct sock *nsk;
155560236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
15561da177e4SLinus Torvalds 	/* Find possible connection requests. */
1557463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
15581da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
15591da177e4SLinus Torvalds 	if (req)
15601da177e4SLinus Torvalds 		return tcp_check_req(sk, skb, req, prev);
15611da177e4SLinus Torvalds 
15623b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1563c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
15641da177e4SLinus Torvalds 
15651da177e4SLinus Torvalds 	if (nsk) {
15661da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
15671da177e4SLinus Torvalds 			bh_lock_sock(nsk);
15681da177e4SLinus Torvalds 			return nsk;
15691da177e4SLinus Torvalds 		}
15709469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
15711da177e4SLinus Torvalds 		return NULL;
15721da177e4SLinus Torvalds 	}
15731da177e4SLinus Torvalds 
15741da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1575af9b4738SFlorian Westphal 	if (!th->syn)
15761da177e4SLinus Torvalds 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
15771da177e4SLinus Torvalds #endif
15781da177e4SLinus Torvalds 	return sk;
15791da177e4SLinus Torvalds }
15801da177e4SLinus Torvalds 
1581b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
15821da177e4SLinus Torvalds {
1583eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1584eddc9ec5SArnaldo Carvalho de Melo 
158584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1586eddc9ec5SArnaldo Carvalho de Melo 		if (!tcp_v4_check(skb->len, iph->saddr,
1587eddc9ec5SArnaldo Carvalho de Melo 				  iph->daddr, skb->csum)) {
15881da177e4SLinus Torvalds 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1589fb286bb2SHerbert Xu 			return 0;
1590fb286bb2SHerbert Xu 		}
1591fb286bb2SHerbert Xu 	}
1592fb286bb2SHerbert Xu 
1593eddc9ec5SArnaldo Carvalho de Melo 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1594fb286bb2SHerbert Xu 				       skb->len, IPPROTO_TCP, 0);
1595fb286bb2SHerbert Xu 
1596fb286bb2SHerbert Xu 	if (skb->len <= 76) {
1597fb286bb2SHerbert Xu 		return __skb_checksum_complete(skb);
15981da177e4SLinus Torvalds 	}
15991da177e4SLinus Torvalds 	return 0;
16001da177e4SLinus Torvalds }
16011da177e4SLinus Torvalds 
16021da177e4SLinus Torvalds 
16031da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
16041da177e4SLinus Torvalds  * here.
16051da177e4SLinus Torvalds  *
16061da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
16071da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
16081da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
16091da177e4SLinus Torvalds  * held.
16101da177e4SLinus Torvalds  */
16111da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
16121da177e4SLinus Torvalds {
1613cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1614cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1615cfb6eeb4SYOSHIFUJI Hideaki 	/*
1616cfb6eeb4SYOSHIFUJI Hideaki 	 * We really want to reject the packet as early as possible
1617cfb6eeb4SYOSHIFUJI Hideaki 	 * if:
1618cfb6eeb4SYOSHIFUJI Hideaki 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1619cfb6eeb4SYOSHIFUJI Hideaki 	 *  o There is an MD5 option and we're not expecting one
1620cfb6eeb4SYOSHIFUJI Hideaki 	 */
1621cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_v4_inbound_md5_hash(sk, skb))
1622cfb6eeb4SYOSHIFUJI Hideaki 		goto discard;
1623cfb6eeb4SYOSHIFUJI Hideaki #endif
1624cfb6eeb4SYOSHIFUJI Hideaki 
16251da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1626bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1627aa8223c7SArnaldo Carvalho de Melo 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1628cfb6eeb4SYOSHIFUJI Hideaki 			rsk = sk;
16291da177e4SLinus Torvalds 			goto reset;
1630cfb6eeb4SYOSHIFUJI Hideaki 		}
16311da177e4SLinus Torvalds 		return 0;
16321da177e4SLinus Torvalds 	}
16331da177e4SLinus Torvalds 
1634ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
16351da177e4SLinus Torvalds 		goto csum_err;
16361da177e4SLinus Torvalds 
16371da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
16381da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
16391da177e4SLinus Torvalds 		if (!nsk)
16401da177e4SLinus Torvalds 			goto discard;
16411da177e4SLinus Torvalds 
16421da177e4SLinus Torvalds 		if (nsk != sk) {
1643bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
1644cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1645cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
16461da177e4SLinus Torvalds 				goto reset;
1647cfb6eeb4SYOSHIFUJI Hideaki 			}
16481da177e4SLinus Torvalds 			return 0;
16491da177e4SLinus Torvalds 		}
1650ca55158cSEric Dumazet 	} else
1651bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1652ca55158cSEric Dumazet 
1653aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1654cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
16551da177e4SLinus Torvalds 		goto reset;
1656cfb6eeb4SYOSHIFUJI Hideaki 	}
16571da177e4SLinus Torvalds 	return 0;
16581da177e4SLinus Torvalds 
16591da177e4SLinus Torvalds reset:
1660cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
16611da177e4SLinus Torvalds discard:
16621da177e4SLinus Torvalds 	kfree_skb(skb);
16631da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
16641da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
16651da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
16661da177e4SLinus Torvalds 	 * but you have been warned.
16671da177e4SLinus Torvalds 	 */
16681da177e4SLinus Torvalds 	return 0;
16691da177e4SLinus Torvalds 
16701da177e4SLinus Torvalds csum_err:
167163231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
16721da177e4SLinus Torvalds 	goto discard;
16731da177e4SLinus Torvalds }
16744bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
16751da177e4SLinus Torvalds 
167641063e9dSDavid S. Miller int tcp_v4_early_demux(struct sk_buff *skb)
167741063e9dSDavid S. Miller {
167841063e9dSDavid S. Miller 	struct net *net = dev_net(skb->dev);
167941063e9dSDavid S. Miller 	const struct iphdr *iph;
168041063e9dSDavid S. Miller 	const struct tcphdr *th;
1681fd62e09bSDavid S. Miller 	struct net_device *dev;
168241063e9dSDavid S. Miller 	struct sock *sk;
168341063e9dSDavid S. Miller 	int err;
168441063e9dSDavid S. Miller 
168541063e9dSDavid S. Miller 	err = -ENOENT;
168641063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
168741063e9dSDavid S. Miller 		goto out_err;
168841063e9dSDavid S. Miller 
168941063e9dSDavid S. Miller 	if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
169041063e9dSDavid S. Miller 		goto out_err;
169141063e9dSDavid S. Miller 
169241063e9dSDavid S. Miller 	iph = ip_hdr(skb);
169341063e9dSDavid S. Miller 	th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
169441063e9dSDavid S. Miller 
169541063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
169641063e9dSDavid S. Miller 		goto out_err;
169741063e9dSDavid S. Miller 
169841063e9dSDavid S. Miller 	if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4))
169941063e9dSDavid S. Miller 		goto out_err;
170041063e9dSDavid S. Miller 
1701fd62e09bSDavid S. Miller 	dev = skb->dev;
170241063e9dSDavid S. Miller 	sk = __inet_lookup_established(net, &tcp_hashinfo,
170341063e9dSDavid S. Miller 				       iph->saddr, th->source,
170441063e9dSDavid S. Miller 				       iph->daddr, th->dest,
1705fd62e09bSDavid S. Miller 				       dev->ifindex);
170641063e9dSDavid S. Miller 	if (sk) {
170741063e9dSDavid S. Miller 		skb->sk = sk;
170841063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
170941063e9dSDavid S. Miller 		if (sk->sk_state != TCP_TIME_WAIT) {
171041063e9dSDavid S. Miller 			struct dst_entry *dst = sk->sk_rx_dst;
171141063e9dSDavid S. Miller 			if (dst)
171241063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
171341063e9dSDavid S. Miller 			if (dst) {
1714fd62e09bSDavid S. Miller 				struct rtable *rt = (struct rtable *) dst;
1715fd62e09bSDavid S. Miller 
1716fd62e09bSDavid S. Miller 				if (rt->rt_iif == dev->ifindex) {
171741063e9dSDavid S. Miller 					skb_dst_set_noref(skb, dst);
171841063e9dSDavid S. Miller 					err = 0;
171941063e9dSDavid S. Miller 				}
172041063e9dSDavid S. Miller 			}
172141063e9dSDavid S. Miller 		}
1722fd62e09bSDavid S. Miller 	}
172341063e9dSDavid S. Miller 
172441063e9dSDavid S. Miller out_err:
172541063e9dSDavid S. Miller 	return err;
172641063e9dSDavid S. Miller }
172741063e9dSDavid S. Miller 
17281da177e4SLinus Torvalds /*
17291da177e4SLinus Torvalds  *	From tcp_input.c
17301da177e4SLinus Torvalds  */
17311da177e4SLinus Torvalds 
17321da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
17331da177e4SLinus Torvalds {
1734eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1735cf533ea5SEric Dumazet 	const struct tcphdr *th;
17361da177e4SLinus Torvalds 	struct sock *sk;
17371da177e4SLinus Torvalds 	int ret;
1738a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
17391da177e4SLinus Torvalds 
17401da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
17411da177e4SLinus Torvalds 		goto discard_it;
17421da177e4SLinus Torvalds 
17431da177e4SLinus Torvalds 	/* Count it even if it's bad */
174463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
17451da177e4SLinus Torvalds 
17461da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
17471da177e4SLinus Torvalds 		goto discard_it;
17481da177e4SLinus Torvalds 
1749aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
17501da177e4SLinus Torvalds 
17511da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
17521da177e4SLinus Torvalds 		goto bad_packet;
17531da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
17541da177e4SLinus Torvalds 		goto discard_it;
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds 	/* An explanation is required here, I think.
17571da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1758caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
17591da177e4SLinus Torvalds 	 * So, we defer the checks. */
176060476372SHerbert Xu 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
17611da177e4SLinus Torvalds 		goto bad_packet;
17621da177e4SLinus Torvalds 
1763aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1764eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
17651da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
17661da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
17671da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
17681da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
17691da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->when	 = 0;
1770b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
17711da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
17721da177e4SLinus Torvalds 
17739a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
17741da177e4SLinus Torvalds 	if (!sk)
17751da177e4SLinus Torvalds 		goto no_tcp_socket;
17761da177e4SLinus Torvalds 
1777bb134d5dSEric Dumazet process:
1778bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1779bb134d5dSEric Dumazet 		goto do_time_wait;
1780bb134d5dSEric Dumazet 
17816cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
17826cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1783d218d111SStephen Hemminger 		goto discard_and_relse;
17846cce09f8SEric Dumazet 	}
1785d218d111SStephen Hemminger 
17861da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
17871da177e4SLinus Torvalds 		goto discard_and_relse;
1788b59c2701SPatrick McHardy 	nf_reset(skb);
17891da177e4SLinus Torvalds 
1790fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
17911da177e4SLinus Torvalds 		goto discard_and_relse;
17921da177e4SLinus Torvalds 
17931da177e4SLinus Torvalds 	skb->dev = NULL;
17941da177e4SLinus Torvalds 
1795c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
17961da177e4SLinus Torvalds 	ret = 0;
17971da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
17981a2449a8SChris Leech #ifdef CONFIG_NET_DMA
17991a2449a8SChris Leech 		struct tcp_sock *tp = tcp_sk(sk);
18001a2449a8SChris Leech 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1801a2bd1140SDave Jiang 			tp->ucopy.dma_chan = net_dma_find_channel();
18021a2449a8SChris Leech 		if (tp->ucopy.dma_chan)
18031a2449a8SChris Leech 			ret = tcp_v4_do_rcv(sk, skb);
18041a2449a8SChris Leech 		else
18051a2449a8SChris Leech #endif
18061a2449a8SChris Leech 		{
18071da177e4SLinus Torvalds 			if (!tcp_prequeue(sk, skb))
18081da177e4SLinus Torvalds 				ret = tcp_v4_do_rcv(sk, skb);
18091a2449a8SChris Leech 		}
1810da882c1fSEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb,
1811da882c1fSEric Dumazet 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
18126b03a53aSZhu Yi 		bh_unlock_sock(sk);
18136cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
18146b03a53aSZhu Yi 		goto discard_and_relse;
18156b03a53aSZhu Yi 	}
18161da177e4SLinus Torvalds 	bh_unlock_sock(sk);
18171da177e4SLinus Torvalds 
18181da177e4SLinus Torvalds 	sock_put(sk);
18191da177e4SLinus Torvalds 
18201da177e4SLinus Torvalds 	return ret;
18211da177e4SLinus Torvalds 
18221da177e4SLinus Torvalds no_tcp_socket:
18231da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
18241da177e4SLinus Torvalds 		goto discard_it;
18251da177e4SLinus Torvalds 
18261da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
18271da177e4SLinus Torvalds bad_packet:
182863231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
18291da177e4SLinus Torvalds 	} else {
1830cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
18311da177e4SLinus Torvalds 	}
18321da177e4SLinus Torvalds 
18331da177e4SLinus Torvalds discard_it:
18341da177e4SLinus Torvalds 	/* Discard frame. */
18351da177e4SLinus Torvalds 	kfree_skb(skb);
18361da177e4SLinus Torvalds 	return 0;
18371da177e4SLinus Torvalds 
18381da177e4SLinus Torvalds discard_and_relse:
18391da177e4SLinus Torvalds 	sock_put(sk);
18401da177e4SLinus Torvalds 	goto discard_it;
18411da177e4SLinus Torvalds 
18421da177e4SLinus Torvalds do_time_wait:
18431da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
18449469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
18451da177e4SLinus Torvalds 		goto discard_it;
18461da177e4SLinus Torvalds 	}
18471da177e4SLinus Torvalds 
18481da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
184963231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
18509469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
18511da177e4SLinus Torvalds 		goto discard_it;
18521da177e4SLinus Torvalds 	}
18539469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
18541da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1855c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1856c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1857eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1858463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
18591da177e4SLinus Torvalds 		if (sk2) {
18609469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
18619469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
18621da177e4SLinus Torvalds 			sk = sk2;
18631da177e4SLinus Torvalds 			goto process;
18641da177e4SLinus Torvalds 		}
18651da177e4SLinus Torvalds 		/* Fall through to ACK */
18661da177e4SLinus Torvalds 	}
18671da177e4SLinus Torvalds 	case TCP_TW_ACK:
18681da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
18691da177e4SLinus Torvalds 		break;
18701da177e4SLinus Torvalds 	case TCP_TW_RST:
18711da177e4SLinus Torvalds 		goto no_tcp_socket;
18721da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
18731da177e4SLinus Torvalds 	}
18741da177e4SLinus Torvalds 	goto discard_it;
18751da177e4SLinus Torvalds }
18761da177e4SLinus Torvalds 
18774670fd81SDavid S. Miller struct inet_peer *tcp_v4_get_peer(struct sock *sk)
18781da177e4SLinus Torvalds {
18791da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
18803f419d2dSDavid S. Miller 	struct inet_sock *inet = inet_sk(sk);
18811da177e4SLinus Torvalds 
18824670fd81SDavid S. Miller 	/* If we don't have a valid cached route, or we're doing IP
18834670fd81SDavid S. Miller 	 * options which make the IPv4 header destination address
18844670fd81SDavid S. Miller 	 * different from our peer's, do not bother with this.
18854670fd81SDavid S. Miller 	 */
18864670fd81SDavid S. Miller 	if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr)
18874670fd81SDavid S. Miller 		return NULL;
18884670fd81SDavid S. Miller 	return rt_get_peer_create(rt, inet->inet_daddr);
18891da177e4SLinus Torvalds }
18903f419d2dSDavid S. Miller EXPORT_SYMBOL(tcp_v4_get_peer);
18911da177e4SLinus Torvalds 
1892ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1893ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1894ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1895ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1896ccb7c410SDavid S. Miller };
18971da177e4SLinus Torvalds 
18983b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
18991da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
19001da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
190132519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
19021da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
19031da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
19043f419d2dSDavid S. Miller 	.get_peer	   = tcp_v4_get_peer,
19051da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
19061da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
19071da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1908543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1909543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1910ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
19113fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
19123fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
19133fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
19143fdadf7dSDmitry Mishin #endif
19151da177e4SLinus Torvalds };
19164bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
19171da177e4SLinus Torvalds 
1918cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1919b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1920cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
192149a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1922cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1923cfb6eeb4SYOSHIFUJI Hideaki };
1924b6332e6cSAndrew Morton #endif
1925cfb6eeb4SYOSHIFUJI Hideaki 
19261da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
19271da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
19281da177e4SLinus Torvalds  */
19291da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
19301da177e4SLinus Torvalds {
19316687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
19321da177e4SLinus Torvalds 
1933900f65d3SNeal Cardwell 	tcp_init_sock(sk);
19341da177e4SLinus Torvalds 
19358292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1936900f65d3SNeal Cardwell 
1937cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1938ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1939cfb6eeb4SYOSHIFUJI Hideaki #endif
19401da177e4SLinus Torvalds 
19411da177e4SLinus Torvalds 	return 0;
19421da177e4SLinus Torvalds }
19431da177e4SLinus Torvalds 
19447d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
19451da177e4SLinus Torvalds {
19461da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
19471da177e4SLinus Torvalds 
19481da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
19491da177e4SLinus Torvalds 
19506687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1951317a76f9SStephen Hemminger 
19521da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1953fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
19541da177e4SLinus Torvalds 
19551da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
19561da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
19571da177e4SLinus Torvalds 
1958cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1959cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1960cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1961a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1962a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1963cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1964cfb6eeb4SYOSHIFUJI Hideaki 	}
1965cfb6eeb4SYOSHIFUJI Hideaki #endif
1966cfb6eeb4SYOSHIFUJI Hideaki 
19671a2449a8SChris Leech #ifdef CONFIG_NET_DMA
19681a2449a8SChris Leech 	/* Cleans up our sk_async_wait_queue */
19691a2449a8SChris Leech 	__skb_queue_purge(&sk->sk_async_wait_queue);
19701a2449a8SChris Leech #endif
19711a2449a8SChris Leech 
19721da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
19731da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
19741da177e4SLinus Torvalds 
19751da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1976463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1977ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
19781da177e4SLinus Torvalds 
19791da177e4SLinus Torvalds 	/*
19801da177e4SLinus Torvalds 	 * If sendmsg cached page exists, toss it.
19811da177e4SLinus Torvalds 	 */
19821da177e4SLinus Torvalds 	if (sk->sk_sndmsg_page) {
19831da177e4SLinus Torvalds 		__free_page(sk->sk_sndmsg_page);
19841da177e4SLinus Torvalds 		sk->sk_sndmsg_page = NULL;
19851da177e4SLinus Torvalds 	}
19861da177e4SLinus Torvalds 
1987435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
1988435cf559SWilliam Allen Simpson 	if (tp->cookie_values != NULL) {
1989435cf559SWilliam Allen Simpson 		kref_put(&tp->cookie_values->kref,
1990435cf559SWilliam Allen Simpson 			 tcp_cookie_values_release);
1991435cf559SWilliam Allen Simpson 		tp->cookie_values = NULL;
1992435cf559SWilliam Allen Simpson 	}
1993435cf559SWilliam Allen Simpson 
1994180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
1995d1a4c0b3SGlauber Costa 	sock_release_memcg(sk);
19961da177e4SLinus Torvalds }
19971da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
19981da177e4SLinus Torvalds 
19991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
20001da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
20011da177e4SLinus Torvalds 
20023ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
20031da177e4SLinus Torvalds {
20043ab5aee7SEric Dumazet 	return hlist_nulls_empty(head) ? NULL :
20058feaf0c0SArnaldo Carvalho de Melo 		list_entry(head->first, struct inet_timewait_sock, tw_node);
20061da177e4SLinus Torvalds }
20071da177e4SLinus Torvalds 
20088feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
20091da177e4SLinus Torvalds {
20103ab5aee7SEric Dumazet 	return !is_a_nulls(tw->tw_node.next) ?
20113ab5aee7SEric Dumazet 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
20121da177e4SLinus Torvalds }
20131da177e4SLinus Torvalds 
2014a8b690f9STom Herbert /*
2015a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
2016a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
2017a8b690f9STom Herbert  * very first socket in the hash table is returned.
2018a8b690f9STom Herbert  */
20191da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
20201da177e4SLinus Torvalds {
2021463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
2022c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
20231da177e4SLinus Torvalds 	struct sock *sk = cur;
20245caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
20251da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2026a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
20271da177e4SLinus Torvalds 
20281da177e4SLinus Torvalds 	if (!sk) {
2029a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20305caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2031c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
2032a8b690f9STom Herbert 		st->offset = 0;
20331da177e4SLinus Torvalds 		goto get_sk;
20341da177e4SLinus Torvalds 	}
20355caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
20361da177e4SLinus Torvalds 	++st->num;
2037a8b690f9STom Herbert 	++st->offset;
20381da177e4SLinus Torvalds 
20391da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
204060236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
20411da177e4SLinus Torvalds 
2042463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
20431da177e4SLinus Torvalds 		req = req->dl_next;
20441da177e4SLinus Torvalds 		while (1) {
20451da177e4SLinus Torvalds 			while (req) {
2046bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
20471da177e4SLinus Torvalds 					cur = req;
20481da177e4SLinus Torvalds 					goto out;
20491da177e4SLinus Torvalds 				}
20501da177e4SLinus Torvalds 				req = req->dl_next;
20511da177e4SLinus Torvalds 			}
205272a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
20531da177e4SLinus Torvalds 				break;
20541da177e4SLinus Torvalds get_req:
2055463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
20561da177e4SLinus Torvalds 		}
20571bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
20581da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
2059463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20601da177e4SLinus Torvalds 	} else {
2061463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2062463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2063463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
20641da177e4SLinus Torvalds 			goto start_req;
2065463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20661bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
20671da177e4SLinus Torvalds 	}
20681da177e4SLinus Torvalds get_sk:
2069c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
20708475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
20718475ef9fSPavel Emelyanov 			continue;
20728475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
20731da177e4SLinus Torvalds 			cur = sk;
20741da177e4SLinus Torvalds 			goto out;
20751da177e4SLinus Torvalds 		}
2076463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2077463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2078463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
20791da177e4SLinus Torvalds start_req:
20801da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
20811da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
20821da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
20831da177e4SLinus Torvalds 			st->sbucket	= 0;
20841da177e4SLinus Torvalds 			goto get_req;
20851da177e4SLinus Torvalds 		}
2086463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20871da177e4SLinus Torvalds 	}
20885caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
2089a8b690f9STom Herbert 	st->offset = 0;
20900f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
20915caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20925caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2093c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
20941da177e4SLinus Torvalds 		goto get_sk;
20951da177e4SLinus Torvalds 	}
20961da177e4SLinus Torvalds 	cur = NULL;
20971da177e4SLinus Torvalds out:
20981da177e4SLinus Torvalds 	return cur;
20991da177e4SLinus Torvalds }
21001da177e4SLinus Torvalds 
21011da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
21021da177e4SLinus Torvalds {
2103a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2104a8b690f9STom Herbert 	void *rc;
2105a8b690f9STom Herbert 
2106a8b690f9STom Herbert 	st->bucket = 0;
2107a8b690f9STom Herbert 	st->offset = 0;
2108a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
21091da177e4SLinus Torvalds 
21101da177e4SLinus Torvalds 	while (rc && *pos) {
21111da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
21121da177e4SLinus Torvalds 		--*pos;
21131da177e4SLinus Torvalds 	}
21141da177e4SLinus Torvalds 	return rc;
21151da177e4SLinus Torvalds }
21161da177e4SLinus Torvalds 
2117a2a385d6SEric Dumazet static inline bool empty_bucket(struct tcp_iter_state *st)
21186eac5604SAndi Kleen {
21193ab5aee7SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
21203ab5aee7SEric Dumazet 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
21216eac5604SAndi Kleen }
21226eac5604SAndi Kleen 
2123a8b690f9STom Herbert /*
2124a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
2125a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
2126a8b690f9STom Herbert  */
21271da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
21281da177e4SLinus Torvalds {
21291da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2130a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21311da177e4SLinus Torvalds 	void *rc = NULL;
21321da177e4SLinus Torvalds 
2133a8b690f9STom Herbert 	st->offset = 0;
2134a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
21351da177e4SLinus Torvalds 		struct sock *sk;
21363ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
21378feaf0c0SArnaldo Carvalho de Melo 		struct inet_timewait_sock *tw;
21389db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
21391da177e4SLinus Torvalds 
21406eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
21416eac5604SAndi Kleen 		if (empty_bucket(st))
21426eac5604SAndi Kleen 			continue;
21436eac5604SAndi Kleen 
21449db66bdcSEric Dumazet 		spin_lock_bh(lock);
21453ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2146f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
2147878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
21481da177e4SLinus Torvalds 				continue;
21491da177e4SLinus Torvalds 			}
21501da177e4SLinus Torvalds 			rc = sk;
21511da177e4SLinus Torvalds 			goto out;
21521da177e4SLinus Torvalds 		}
21531da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_TIME_WAIT;
21548feaf0c0SArnaldo Carvalho de Melo 		inet_twsk_for_each(tw, node,
2155dbca9b27SEric Dumazet 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
215628518fc1SPavel Emelyanov 			if (tw->tw_family != st->family ||
2157878628fbSYOSHIFUJI Hideaki 			    !net_eq(twsk_net(tw), net)) {
21581da177e4SLinus Torvalds 				continue;
21591da177e4SLinus Torvalds 			}
21601da177e4SLinus Torvalds 			rc = tw;
21611da177e4SLinus Torvalds 			goto out;
21621da177e4SLinus Torvalds 		}
21639db66bdcSEric Dumazet 		spin_unlock_bh(lock);
21641da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21651da177e4SLinus Torvalds 	}
21661da177e4SLinus Torvalds out:
21671da177e4SLinus Torvalds 	return rc;
21681da177e4SLinus Torvalds }
21691da177e4SLinus Torvalds 
21701da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
21711da177e4SLinus Torvalds {
21721da177e4SLinus Torvalds 	struct sock *sk = cur;
21738feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw;
21743ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
21751da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2176a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21771da177e4SLinus Torvalds 
21781da177e4SLinus Torvalds 	++st->num;
2179a8b690f9STom Herbert 	++st->offset;
21801da177e4SLinus Torvalds 
21811da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
21821da177e4SLinus Torvalds 		tw = cur;
21831da177e4SLinus Torvalds 		tw = tw_next(tw);
21841da177e4SLinus Torvalds get_tw:
2185878628fbSYOSHIFUJI Hideaki 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
21861da177e4SLinus Torvalds 			tw = tw_next(tw);
21871da177e4SLinus Torvalds 		}
21881da177e4SLinus Torvalds 		if (tw) {
21891da177e4SLinus Torvalds 			cur = tw;
21901da177e4SLinus Torvalds 			goto out;
21911da177e4SLinus Torvalds 		}
21929db66bdcSEric Dumazet 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21931da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21941da177e4SLinus Torvalds 
21956eac5604SAndi Kleen 		/* Look for next non empty bucket */
2196a8b690f9STom Herbert 		st->offset = 0;
2197f373b53bSEric Dumazet 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
21986eac5604SAndi Kleen 				empty_bucket(st))
21996eac5604SAndi Kleen 			;
2200f373b53bSEric Dumazet 		if (st->bucket > tcp_hashinfo.ehash_mask)
22016eac5604SAndi Kleen 			return NULL;
22026eac5604SAndi Kleen 
22039db66bdcSEric Dumazet 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
22043ab5aee7SEric Dumazet 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
22051da177e4SLinus Torvalds 	} else
22063ab5aee7SEric Dumazet 		sk = sk_nulls_next(sk);
22071da177e4SLinus Torvalds 
22083ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2209878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
22101da177e4SLinus Torvalds 			goto found;
22111da177e4SLinus Torvalds 	}
22121da177e4SLinus Torvalds 
22131da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2214dbca9b27SEric Dumazet 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
22151da177e4SLinus Torvalds 	goto get_tw;
22161da177e4SLinus Torvalds found:
22171da177e4SLinus Torvalds 	cur = sk;
22181da177e4SLinus Torvalds out:
22191da177e4SLinus Torvalds 	return cur;
22201da177e4SLinus Torvalds }
22211da177e4SLinus Torvalds 
22221da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
22231da177e4SLinus Torvalds {
2224a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2225a8b690f9STom Herbert 	void *rc;
2226a8b690f9STom Herbert 
2227a8b690f9STom Herbert 	st->bucket = 0;
2228a8b690f9STom Herbert 	rc = established_get_first(seq);
22291da177e4SLinus Torvalds 
22301da177e4SLinus Torvalds 	while (rc && pos) {
22311da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
22321da177e4SLinus Torvalds 		--pos;
22331da177e4SLinus Torvalds 	}
22341da177e4SLinus Torvalds 	return rc;
22351da177e4SLinus Torvalds }
22361da177e4SLinus Torvalds 
22371da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
22381da177e4SLinus Torvalds {
22391da177e4SLinus Torvalds 	void *rc;
22401da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
22411da177e4SLinus Torvalds 
22421da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
22431da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
22441da177e4SLinus Torvalds 
22451da177e4SLinus Torvalds 	if (!rc) {
22461da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
22471da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
22481da177e4SLinus Torvalds 	}
22491da177e4SLinus Torvalds 
22501da177e4SLinus Torvalds 	return rc;
22511da177e4SLinus Torvalds }
22521da177e4SLinus Torvalds 
2253a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2254a8b690f9STom Herbert {
2255a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2256a8b690f9STom Herbert 	int offset = st->offset;
2257a8b690f9STom Herbert 	int orig_num = st->num;
2258a8b690f9STom Herbert 	void *rc = NULL;
2259a8b690f9STom Herbert 
2260a8b690f9STom Herbert 	switch (st->state) {
2261a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2262a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2263a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2264a8b690f9STom Herbert 			break;
2265a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2266a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2267a8b690f9STom Herbert 		while (offset-- && rc)
2268a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2269a8b690f9STom Herbert 		if (rc)
2270a8b690f9STom Herbert 			break;
2271a8b690f9STom Herbert 		st->bucket = 0;
2272a8b690f9STom Herbert 		/* Fallthrough */
2273a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2274a8b690f9STom Herbert 	case TCP_SEQ_STATE_TIME_WAIT:
2275a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2276a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2277a8b690f9STom Herbert 			break;
2278a8b690f9STom Herbert 		rc = established_get_first(seq);
2279a8b690f9STom Herbert 		while (offset-- && rc)
2280a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2281a8b690f9STom Herbert 	}
2282a8b690f9STom Herbert 
2283a8b690f9STom Herbert 	st->num = orig_num;
2284a8b690f9STom Herbert 
2285a8b690f9STom Herbert 	return rc;
2286a8b690f9STom Herbert }
2287a8b690f9STom Herbert 
22881da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
22891da177e4SLinus Torvalds {
22901da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2291a8b690f9STom Herbert 	void *rc;
2292a8b690f9STom Herbert 
2293a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2294a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2295a8b690f9STom Herbert 		if (rc)
2296a8b690f9STom Herbert 			goto out;
2297a8b690f9STom Herbert 	}
2298a8b690f9STom Herbert 
22991da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
23001da177e4SLinus Torvalds 	st->num = 0;
2301a8b690f9STom Herbert 	st->bucket = 0;
2302a8b690f9STom Herbert 	st->offset = 0;
2303a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2304a8b690f9STom Herbert 
2305a8b690f9STom Herbert out:
2306a8b690f9STom Herbert 	st->last_pos = *pos;
2307a8b690f9STom Herbert 	return rc;
23081da177e4SLinus Torvalds }
23091da177e4SLinus Torvalds 
23101da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
23111da177e4SLinus Torvalds {
2312a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
23131da177e4SLinus Torvalds 	void *rc = NULL;
23141da177e4SLinus Torvalds 
23151da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
23161da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
23171da177e4SLinus Torvalds 		goto out;
23181da177e4SLinus Torvalds 	}
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds 	switch (st->state) {
23211da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23221da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23231da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
23241da177e4SLinus Torvalds 		if (!rc) {
23251da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2326a8b690f9STom Herbert 			st->bucket = 0;
2327a8b690f9STom Herbert 			st->offset = 0;
23281da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
23291da177e4SLinus Torvalds 		}
23301da177e4SLinus Torvalds 		break;
23311da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23321da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23331da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
23341da177e4SLinus Torvalds 		break;
23351da177e4SLinus Torvalds 	}
23361da177e4SLinus Torvalds out:
23371da177e4SLinus Torvalds 	++*pos;
2338a8b690f9STom Herbert 	st->last_pos = *pos;
23391da177e4SLinus Torvalds 	return rc;
23401da177e4SLinus Torvalds }
23411da177e4SLinus Torvalds 
23421da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
23431da177e4SLinus Torvalds {
23441da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
23451da177e4SLinus Torvalds 
23461da177e4SLinus Torvalds 	switch (st->state) {
23471da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23481da177e4SLinus Torvalds 		if (v) {
2349463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2350463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
23511da177e4SLinus Torvalds 		}
23521da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23531da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
23545caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
23551da177e4SLinus Torvalds 		break;
23561da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23571da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23581da177e4SLinus Torvalds 		if (v)
23599db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
23601da177e4SLinus Torvalds 		break;
23611da177e4SLinus Torvalds 	}
23621da177e4SLinus Torvalds }
23631da177e4SLinus Torvalds 
236473cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
23651da177e4SLinus Torvalds {
23661da177e4SLinus Torvalds 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
23671da177e4SLinus Torvalds 	struct tcp_iter_state *s;
236852d6f3f1SDenis V. Lunev 	int err;
23691da177e4SLinus Torvalds 
237052d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
237152d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
237252d6f3f1SDenis V. Lunev 	if (err < 0)
237352d6f3f1SDenis V. Lunev 		return err;
2374f40c8174SDaniel Lezcano 
237552d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
23761da177e4SLinus Torvalds 	s->family		= afinfo->family;
2377a8b690f9STom Herbert 	s->last_pos 		= 0;
2378f40c8174SDaniel Lezcano 	return 0;
2379f40c8174SDaniel Lezcano }
238073cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2381f40c8174SDaniel Lezcano 
23826f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
23831da177e4SLinus Torvalds {
23841da177e4SLinus Torvalds 	int rc = 0;
23851da177e4SLinus Torvalds 	struct proc_dir_entry *p;
23861da177e4SLinus Torvalds 
23879427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
23889427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
23899427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
23909427c4b3SDenis V. Lunev 
239184841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
239273cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
239384841c3cSDenis V. Lunev 	if (!p)
23941da177e4SLinus Torvalds 		rc = -ENOMEM;
23951da177e4SLinus Torvalds 	return rc;
23961da177e4SLinus Torvalds }
23974bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
23981da177e4SLinus Torvalds 
23996f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
24001da177e4SLinus Torvalds {
24016f8b13bcSDaniel Lezcano 	proc_net_remove(net, afinfo->name);
24021da177e4SLinus Torvalds }
24034bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
24041da177e4SLinus Torvalds 
2405cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req,
24065e659e4cSPavel Emelyanov 			 struct seq_file *f, int i, int uid, int *len)
24071da177e4SLinus Torvalds {
24082e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
24091da177e4SLinus Torvalds 	int ttd = req->expires - jiffies;
24101da177e4SLinus Torvalds 
24115e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
241271338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
24131da177e4SLinus Torvalds 		i,
24142e6599cbSArnaldo Carvalho de Melo 		ireq->loc_addr,
2415c720c7e8SEric Dumazet 		ntohs(inet_sk(sk)->inet_sport),
24162e6599cbSArnaldo Carvalho de Melo 		ireq->rmt_addr,
24172e6599cbSArnaldo Carvalho de Melo 		ntohs(ireq->rmt_port),
24181da177e4SLinus Torvalds 		TCP_SYN_RECV,
24191da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
24201da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
24211da177e4SLinus Torvalds 		jiffies_to_clock_t(ttd),
24221da177e4SLinus Torvalds 		req->retrans,
24231da177e4SLinus Torvalds 		uid,
24241da177e4SLinus Torvalds 		0,  /* non standard timer */
24251da177e4SLinus Torvalds 		0, /* open_requests have no inode */
24261da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
24275e659e4cSPavel Emelyanov 		req,
24285e659e4cSPavel Emelyanov 		len);
24291da177e4SLinus Torvalds }
24301da177e4SLinus Torvalds 
24315e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
24321da177e4SLinus Torvalds {
24331da177e4SLinus Torvalds 	int timer_active;
24341da177e4SLinus Torvalds 	unsigned long timer_expires;
2435cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2436cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2437cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2438c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2439c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2440c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2441c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
244249d09007SEric Dumazet 	int rx_queue;
24431da177e4SLinus Torvalds 
2444463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
24451da177e4SLinus Torvalds 		timer_active	= 1;
2446463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2447463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
24481da177e4SLinus Torvalds 		timer_active	= 4;
2449463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2450cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
24511da177e4SLinus Torvalds 		timer_active	= 2;
2452cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
24531da177e4SLinus Torvalds 	} else {
24541da177e4SLinus Torvalds 		timer_active	= 0;
24551da177e4SLinus Torvalds 		timer_expires = jiffies;
24561da177e4SLinus Torvalds 	}
24571da177e4SLinus Torvalds 
245849d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
245949d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
246049d09007SEric Dumazet 	else
246149d09007SEric Dumazet 		/*
246249d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
246349d09007SEric Dumazet 		 */
246449d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
246549d09007SEric Dumazet 
24665e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
246771338aa7SDan Rosenberg 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2468cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
246947da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
247049d09007SEric Dumazet 		rx_queue,
24711da177e4SLinus Torvalds 		timer_active,
24721da177e4SLinus Torvalds 		jiffies_to_clock_t(timer_expires - jiffies),
2473463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2474cf4c6bf8SIlpo Järvinen 		sock_i_uid(sk),
24756687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2476cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2477cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
24787be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
24797be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2480463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
24811da177e4SLinus Torvalds 		tp->snd_cwnd,
24820b6a05c1SIlpo Järvinen 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
24835e659e4cSPavel Emelyanov 		len);
24841da177e4SLinus Torvalds }
24851da177e4SLinus Torvalds 
2486cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
24875e659e4cSPavel Emelyanov 			       struct seq_file *f, int i, int *len)
24881da177e4SLinus Torvalds {
248923f33c2dSAl Viro 	__be32 dest, src;
24901da177e4SLinus Torvalds 	__u16 destp, srcp;
24911da177e4SLinus Torvalds 	int ttd = tw->tw_ttd - jiffies;
24921da177e4SLinus Torvalds 
24931da177e4SLinus Torvalds 	if (ttd < 0)
24941da177e4SLinus Torvalds 		ttd = 0;
24951da177e4SLinus Torvalds 
24961da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
24971da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
24981da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
24991da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
25001da177e4SLinus Torvalds 
25015e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
250271338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
25031da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
25041da177e4SLinus Torvalds 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
25055e659e4cSPavel Emelyanov 		atomic_read(&tw->tw_refcnt), tw, len);
25061da177e4SLinus Torvalds }
25071da177e4SLinus Torvalds 
25081da177e4SLinus Torvalds #define TMPSZ 150
25091da177e4SLinus Torvalds 
25101da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
25111da177e4SLinus Torvalds {
25121da177e4SLinus Torvalds 	struct tcp_iter_state *st;
25135e659e4cSPavel Emelyanov 	int len;
25141da177e4SLinus Torvalds 
25151da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
25161da177e4SLinus Torvalds 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
25171da177e4SLinus Torvalds 			   "  sl  local_address rem_address   st tx_queue "
25181da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
25191da177e4SLinus Torvalds 			   "inode");
25201da177e4SLinus Torvalds 		goto out;
25211da177e4SLinus Torvalds 	}
25221da177e4SLinus Torvalds 	st = seq->private;
25231da177e4SLinus Torvalds 
25241da177e4SLinus Torvalds 	switch (st->state) {
25251da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
25261da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
25275e659e4cSPavel Emelyanov 		get_tcp4_sock(v, seq, st->num, &len);
25281da177e4SLinus Torvalds 		break;
25291da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
25305e659e4cSPavel Emelyanov 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
25311da177e4SLinus Torvalds 		break;
25321da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
25335e659e4cSPavel Emelyanov 		get_timewait4_sock(v, seq, st->num, &len);
25341da177e4SLinus Torvalds 		break;
25351da177e4SLinus Torvalds 	}
25365e659e4cSPavel Emelyanov 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
25371da177e4SLinus Torvalds out:
25381da177e4SLinus Torvalds 	return 0;
25391da177e4SLinus Torvalds }
25401da177e4SLinus Torvalds 
254173cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
254273cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
254373cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
254473cb88ecSArjan van de Ven 	.read    = seq_read,
254573cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
254673cb88ecSArjan van de Ven 	.release = seq_release_net
254773cb88ecSArjan van de Ven };
254873cb88ecSArjan van de Ven 
25491da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
25501da177e4SLinus Torvalds 	.name		= "tcp",
25511da177e4SLinus Torvalds 	.family		= AF_INET,
255273cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
25539427c4b3SDenis V. Lunev 	.seq_ops	= {
25549427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
25559427c4b3SDenis V. Lunev 	},
25561da177e4SLinus Torvalds };
25571da177e4SLinus Torvalds 
25582c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2559757764f6SPavel Emelyanov {
2560757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2561757764f6SPavel Emelyanov }
2562757764f6SPavel Emelyanov 
25632c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2564757764f6SPavel Emelyanov {
2565757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2566757764f6SPavel Emelyanov }
2567757764f6SPavel Emelyanov 
2568757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2569757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2570757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2571757764f6SPavel Emelyanov };
2572757764f6SPavel Emelyanov 
25731da177e4SLinus Torvalds int __init tcp4_proc_init(void)
25741da177e4SLinus Torvalds {
2575757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
25761da177e4SLinus Torvalds }
25771da177e4SLinus Torvalds 
25781da177e4SLinus Torvalds void tcp4_proc_exit(void)
25791da177e4SLinus Torvalds {
2580757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
25811da177e4SLinus Torvalds }
25821da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
25831da177e4SLinus Torvalds 
2584bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2585bf296b12SHerbert Xu {
2586b71d1d42SEric Dumazet 	const struct iphdr *iph = skb_gro_network_header(skb);
2587bf296b12SHerbert Xu 
2588bf296b12SHerbert Xu 	switch (skb->ip_summed) {
2589bf296b12SHerbert Xu 	case CHECKSUM_COMPLETE:
259086911732SHerbert Xu 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2591bf296b12SHerbert Xu 				  skb->csum)) {
2592bf296b12SHerbert Xu 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2593bf296b12SHerbert Xu 			break;
2594bf296b12SHerbert Xu 		}
2595bf296b12SHerbert Xu 
2596bf296b12SHerbert Xu 		/* fall through */
2597bf296b12SHerbert Xu 	case CHECKSUM_NONE:
2598bf296b12SHerbert Xu 		NAPI_GRO_CB(skb)->flush = 1;
2599bf296b12SHerbert Xu 		return NULL;
2600bf296b12SHerbert Xu 	}
2601bf296b12SHerbert Xu 
2602bf296b12SHerbert Xu 	return tcp_gro_receive(head, skb);
2603bf296b12SHerbert Xu }
2604bf296b12SHerbert Xu 
2605bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb)
2606bf296b12SHerbert Xu {
2607b71d1d42SEric Dumazet 	const struct iphdr *iph = ip_hdr(skb);
2608bf296b12SHerbert Xu 	struct tcphdr *th = tcp_hdr(skb);
2609bf296b12SHerbert Xu 
2610bf296b12SHerbert Xu 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2611bf296b12SHerbert Xu 				  iph->saddr, iph->daddr, 0);
2612bf296b12SHerbert Xu 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2613bf296b12SHerbert Xu 
2614bf296b12SHerbert Xu 	return tcp_gro_complete(skb);
2615bf296b12SHerbert Xu }
2616bf296b12SHerbert Xu 
26171da177e4SLinus Torvalds struct proto tcp_prot = {
26181da177e4SLinus Torvalds 	.name			= "TCP",
26191da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
26201da177e4SLinus Torvalds 	.close			= tcp_close,
26211da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
26221da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2623463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
26241da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
26251da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
26261da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
26271da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
26281da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
26291da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
26301da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
26317ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
26327ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
26331da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
2634ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2635ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2636ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
26371da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
26381da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
26390a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
26401da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
26411da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
26421da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
26431da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
26441da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
26451da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
26463ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
26476d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
264860236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
264939d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
26507ba42910SChangli Gao 	.no_autobind		= true,
2651543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2652543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2653543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2654543d9cfeSArnaldo Carvalho de Melo #endif
2655d1a4c0b3SGlauber Costa #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2656d1a4c0b3SGlauber Costa 	.init_cgroup		= tcp_init_cgroup,
2657d1a4c0b3SGlauber Costa 	.destroy_cgroup		= tcp_destroy_cgroup,
2658d1a4c0b3SGlauber Costa 	.proto_cgroup		= tcp_proto_cgroup,
2659d1a4c0b3SGlauber Costa #endif
26601da177e4SLinus Torvalds };
26614bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
26621da177e4SLinus Torvalds 
2663046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net)
2664046ee902SDenis V. Lunev {
2665046ee902SDenis V. Lunev 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2666046ee902SDenis V. Lunev 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2667046ee902SDenis V. Lunev }
2668046ee902SDenis V. Lunev 
2669046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2670046ee902SDenis V. Lunev {
2671046ee902SDenis V. Lunev 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2672b099ce26SEric W. Biederman }
2673b099ce26SEric W. Biederman 
2674b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2675b099ce26SEric W. Biederman {
2676b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2677046ee902SDenis V. Lunev }
2678046ee902SDenis V. Lunev 
2679046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2680046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2681046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2682b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2683046ee902SDenis V. Lunev };
2684046ee902SDenis V. Lunev 
26859b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
26861da177e4SLinus Torvalds {
26875caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
26886a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
26891da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
26901da177e4SLinus Torvalds }
2691