xref: /linux/net/ipv4/tcp_ipv4.c (revision b4fb05ea402cb6930b40d3152d8acabc391b23e2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds 
54eb4dea58SHerbert Xu #include <linux/bottom_half.h>
551da177e4SLinus Torvalds #include <linux/types.h>
561da177e4SLinus Torvalds #include <linux/fcntl.h>
571da177e4SLinus Torvalds #include <linux/module.h>
581da177e4SLinus Torvalds #include <linux/random.h>
591da177e4SLinus Torvalds #include <linux/cache.h>
601da177e4SLinus Torvalds #include <linux/jhash.h>
611da177e4SLinus Torvalds #include <linux/init.h>
621da177e4SLinus Torvalds #include <linux/times.h>
635a0e3ad6STejun Heo #include <linux/slab.h>
641da177e4SLinus Torvalds 
65457c4cbcSEric W. Biederman #include <net/net_namespace.h>
661da177e4SLinus Torvalds #include <net/icmp.h>
67304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
681da177e4SLinus Torvalds #include <net/tcp.h>
6920380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
701da177e4SLinus Torvalds #include <net/ipv6.h>
711da177e4SLinus Torvalds #include <net/inet_common.h>
726d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
731da177e4SLinus Torvalds #include <net/xfrm.h>
741a2449a8SChris Leech #include <net/netdma.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds #include <linux/inet.h>
791da177e4SLinus Torvalds #include <linux/ipv6.h>
801da177e4SLinus Torvalds #include <linux/stddef.h>
811da177e4SLinus Torvalds #include <linux/proc_fs.h>
821da177e4SLinus Torvalds #include <linux/seq_file.h>
831da177e4SLinus Torvalds 
84cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
86cfb6eeb4SYOSHIFUJI Hideaki 
87ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
88ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
894bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
901da177e4SLinus Torvalds 
911da177e4SLinus Torvalds 
92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95cfb6eeb4SYOSHIFUJI Hideaki #endif
96cfb6eeb4SYOSHIFUJI Hideaki 
975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
991da177e4SLinus Torvalds 
100cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1011da177e4SLinus Torvalds {
102eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
104aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1096d6ee43eSArnaldo Carvalho de Melo {
1106d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1116d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1126d6ee43eSArnaldo Carvalho de Melo 
1136d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1146d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1156d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1166d6ee43eSArnaldo Carvalho de Melo 
1176d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1186d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1196d6ee43eSArnaldo Carvalho de Melo 	   holder.
1206d6ee43eSArnaldo Carvalho de Melo 
1216d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1226d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1236d6ee43eSArnaldo Carvalho de Melo 	 */
1246d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1256d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1269d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1276d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1286d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1296d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1306d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1326d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1336d6ee43eSArnaldo Carvalho de Melo 		return 1;
1346d6ee43eSArnaldo Carvalho de Melo 	}
1356d6ee43eSArnaldo Carvalho de Melo 
1366d6ee43eSArnaldo Carvalho de Melo 	return 0;
1376d6ee43eSArnaldo Carvalho de Melo }
1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1396d6ee43eSArnaldo Carvalho de Melo 
1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1421da177e4SLinus Torvalds {
1432d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1441da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1451da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
146dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
147bada8adcSAl Viro 	__be32 daddr, nexthop;
148da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1492d7192d6SDavid S. Miller 	struct rtable *rt;
1501da177e4SLinus Torvalds 	int err;
151f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1541da177e4SLinus Torvalds 		return -EINVAL;
1551da177e4SLinus Torvalds 
1561da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1571da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
160f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
162f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1631da177e4SLinus Torvalds 		if (!daddr)
1641da177e4SLinus Torvalds 			return -EINVAL;
165f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1661da177e4SLinus Torvalds 	}
1671da177e4SLinus Torvalds 
168dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
169dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
170da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
171da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1721da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1731da177e4SLinus Torvalds 			      IPPROTO_TCP,
174abdf7e72SDavid S. Miller 			      orig_sport, orig_dport, sk, true);
175b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
176b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
177b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
1787c73a6faSPavel Emelyanov 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179b23dd4feSDavid S. Miller 		return err;
180584bdf8cSWei Dong 	}
1811da177e4SLinus Torvalds 
1821da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1831da177e4SLinus Torvalds 		ip_rt_put(rt);
1841da177e4SLinus Torvalds 		return -ENETUNREACH;
1851da177e4SLinus Torvalds 	}
1861da177e4SLinus Torvalds 
187f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
188da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1891da177e4SLinus Torvalds 
190c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
191da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
192c720c7e8SEric Dumazet 	inet->inet_rcv_saddr = inet->inet_saddr;
1931da177e4SLinus Torvalds 
194c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1951da177e4SLinus Torvalds 		/* Reset inherited state */
1961da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
1971da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
1981da177e4SLinus Torvalds 		tp->write_seq		   = 0;
1991da177e4SLinus Torvalds 	}
2001da177e4SLinus Torvalds 
201295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
202da905bd1SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
203ed2361e6SDavid S. Miller 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
2047174259eSArnaldo Carvalho de Melo 		/*
2057174259eSArnaldo Carvalho de Melo 		 * VJ's idea. We save last timestamp seen from
2067174259eSArnaldo Carvalho de Melo 		 * the destination in peer table, when entering state
2077174259eSArnaldo Carvalho de Melo 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
2087174259eSArnaldo Carvalho de Melo 		 * when trying new connection.
2091da177e4SLinus Torvalds 		 */
210317fe0e6SEric Dumazet 		if (peer) {
211317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
212317fe0e6SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
2131da177e4SLinus Torvalds 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
2141da177e4SLinus Torvalds 				tp->rx_opt.ts_recent = peer->tcp_ts;
2151da177e4SLinus Torvalds 			}
2161da177e4SLinus Torvalds 		}
217317fe0e6SEric Dumazet 	}
2181da177e4SLinus Torvalds 
219c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
220c720c7e8SEric Dumazet 	inet->inet_daddr = daddr;
2211da177e4SLinus Torvalds 
222d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
223f6d8bd05SEric Dumazet 	if (inet_opt)
224f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2251da177e4SLinus Torvalds 
226bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2271da177e4SLinus Torvalds 
2281da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2291da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2301da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2311da177e4SLinus Torvalds 	 * complete initialization after this.
2321da177e4SLinus Torvalds 	 */
2331da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
234a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2351da177e4SLinus Torvalds 	if (err)
2361da177e4SLinus Torvalds 		goto failure;
2371da177e4SLinus Torvalds 
238da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
239c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
240b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
241b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
242b23dd4feSDavid S. Miller 		rt = NULL;
2431da177e4SLinus Torvalds 		goto failure;
244b23dd4feSDavid S. Miller 	}
2451da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
246bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
247d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 	if (!tp->write_seq)
250c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
251c720c7e8SEric Dumazet 							   inet->inet_daddr,
252c720c7e8SEric Dumazet 							   inet->inet_sport,
2531da177e4SLinus Torvalds 							   usin->sin_port);
2541da177e4SLinus Torvalds 
255c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds 	err = tcp_connect(sk);
2581da177e4SLinus Torvalds 	rt = NULL;
2591da177e4SLinus Torvalds 	if (err)
2601da177e4SLinus Torvalds 		goto failure;
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	return 0;
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds failure:
2657174259eSArnaldo Carvalho de Melo 	/*
2667174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2677174259eSArnaldo Carvalho de Melo 	 * if necessary.
2687174259eSArnaldo Carvalho de Melo 	 */
2691da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2701da177e4SLinus Torvalds 	ip_rt_put(rt);
2711da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
272c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2731da177e4SLinus Torvalds 	return err;
2741da177e4SLinus Torvalds }
2754bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds /*
2781da177e4SLinus Torvalds  * This routine does path mtu discovery as defined in RFC1191.
2791da177e4SLinus Torvalds  */
280b71d1d42SEric Dumazet static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
2811da177e4SLinus Torvalds {
2821da177e4SLinus Torvalds 	struct dst_entry *dst;
2831da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
2841da177e4SLinus Torvalds 
2851da177e4SLinus Torvalds 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
2861da177e4SLinus Torvalds 	 * send out by Linux are always <576bytes so they should go through
2871da177e4SLinus Torvalds 	 * unfragmented).
2881da177e4SLinus Torvalds 	 */
2891da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN)
2901da177e4SLinus Torvalds 		return;
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds 	/* We don't check in the destentry if pmtu discovery is forbidden
2931da177e4SLinus Torvalds 	 * on this route. We just assume that no packet_to_big packets
2941da177e4SLinus Torvalds 	 * are send back when pmtu discovery is not active.
2951da177e4SLinus Torvalds 	 * There is a small race when the user changes this flag in the
2961da177e4SLinus Torvalds 	 * route, but I think that's acceptable.
2971da177e4SLinus Torvalds 	 */
2981da177e4SLinus Torvalds 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
2991da177e4SLinus Torvalds 		return;
3001da177e4SLinus Torvalds 
3011da177e4SLinus Torvalds 	dst->ops->update_pmtu(dst, mtu);
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
3041da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
3051da177e4SLinus Torvalds 	 */
3061da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
3071da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
3081da177e4SLinus Torvalds 
3091da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
312d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3131da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3141da177e4SLinus Torvalds 
3151da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3161da177e4SLinus Torvalds 		 * clear that the old packet has been
3171da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3181da177e4SLinus Torvalds 		 * discovery.
3191da177e4SLinus Torvalds 		 */
3201da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3211da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3221da177e4SLinus Torvalds }
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds /*
3251da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3261da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3271da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3281da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3291da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3301da177e4SLinus Torvalds  * to find the appropriate port.
3311da177e4SLinus Torvalds  *
3321da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3331da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3341da177e4SLinus Torvalds  * and for some paths there is no check at all.
3351da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3361da177e4SLinus Torvalds  * is probably better.
3371da177e4SLinus Torvalds  *
3381da177e4SLinus Torvalds  */
3391da177e4SLinus Torvalds 
3404d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3411da177e4SLinus Torvalds {
342b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3434d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
344f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3451da177e4SLinus Torvalds 	struct tcp_sock *tp;
3461da177e4SLinus Torvalds 	struct inet_sock *inet;
3474d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3484d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3491da177e4SLinus Torvalds 	struct sock *sk;
350f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3511da177e4SLinus Torvalds 	__u32 seq;
352f1ecd5d9SDamian Lukowski 	__u32 remaining;
3531da177e4SLinus Torvalds 	int err;
3544d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3551da177e4SLinus Torvalds 
3564d1a2d9eSDamian Lukowski 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
357dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3581da177e4SLinus Torvalds 		return;
3591da177e4SLinus Torvalds 	}
3601da177e4SLinus Torvalds 
361fd54d716SPavel Emelyanov 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
3624d1a2d9eSDamian Lukowski 			iph->saddr, th->source, inet_iif(icmp_skb));
3631da177e4SLinus Torvalds 	if (!sk) {
364dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3651da177e4SLinus Torvalds 		return;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3689469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3691da177e4SLinus Torvalds 		return;
3701da177e4SLinus Torvalds 	}
3711da177e4SLinus Torvalds 
3721da177e4SLinus Torvalds 	bh_lock_sock(sk);
3731da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3741da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
3751da177e4SLinus Torvalds 	 */
3761da177e4SLinus Torvalds 	if (sock_owned_by_user(sk))
377de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3801da177e4SLinus Torvalds 		goto out;
3811da177e4SLinus Torvalds 
38297e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
38397e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
38497e3ecd1Sstephen hemminger 		goto out;
38597e3ecd1Sstephen hemminger 	}
38697e3ecd1Sstephen hemminger 
387f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
3881da177e4SLinus Torvalds 	tp = tcp_sk(sk);
3891da177e4SLinus Torvalds 	seq = ntohl(th->seq);
3901da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
3911da177e4SLinus Torvalds 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
392de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
3931da177e4SLinus Torvalds 		goto out;
3941da177e4SLinus Torvalds 	}
3951da177e4SLinus Torvalds 
3961da177e4SLinus Torvalds 	switch (type) {
3971da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
3981da177e4SLinus Torvalds 		/* Just silently ignore these. */
3991da177e4SLinus Torvalds 		goto out;
4001da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4011da177e4SLinus Torvalds 		err = EPROTO;
4021da177e4SLinus Torvalds 		break;
4031da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4041da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4051da177e4SLinus Torvalds 			goto out;
4061da177e4SLinus Torvalds 
4071da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4081da177e4SLinus Torvalds 			if (!sock_owned_by_user(sk))
4091da177e4SLinus Torvalds 				do_pmtu_discovery(sk, iph, info);
4101da177e4SLinus Torvalds 			goto out;
4111da177e4SLinus Torvalds 		}
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
414f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
415f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
416f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417f1ecd5d9SDamian Lukowski 			break;
418f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
419f1ecd5d9SDamian Lukowski 		    !icsk->icsk_backoff)
420f1ecd5d9SDamian Lukowski 			break;
421f1ecd5d9SDamian Lukowski 
4228f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4238f49c270SDavid S. Miller 			break;
4248f49c270SDavid S. Miller 
425f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
4269ad7c049SJerry Chu 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
4279ad7c049SJerry Chu 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
428f1ecd5d9SDamian Lukowski 		tcp_bound_rto(sk);
429f1ecd5d9SDamian Lukowski 
430f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
431f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
432f1ecd5d9SDamian Lukowski 
433f1ecd5d9SDamian Lukowski 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434f1ecd5d9SDamian Lukowski 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
435f1ecd5d9SDamian Lukowski 
436f1ecd5d9SDamian Lukowski 		if (remaining) {
437f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
439f1ecd5d9SDamian Lukowski 		} else {
440f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
441f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
442f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
443f1ecd5d9SDamian Lukowski 		}
444f1ecd5d9SDamian Lukowski 
4451da177e4SLinus Torvalds 		break;
4461da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4471da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4481da177e4SLinus Torvalds 		break;
4491da177e4SLinus Torvalds 	default:
4501da177e4SLinus Torvalds 		goto out;
4511da177e4SLinus Torvalds 	}
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	switch (sk->sk_state) {
45460236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4551da177e4SLinus Torvalds 	case TCP_LISTEN:
4561da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4571da177e4SLinus Torvalds 			goto out;
4581da177e4SLinus Torvalds 
459463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4601da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4611da177e4SLinus Torvalds 		if (!req)
4621da177e4SLinus Torvalds 			goto out;
4631da177e4SLinus Torvalds 
4641da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4651da177e4SLinus Torvalds 		   an established socket here.
4661da177e4SLinus Torvalds 		 */
467547b792cSIlpo Järvinen 		WARN_ON(req->sk);
4681da177e4SLinus Torvalds 
4692e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
470de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4711da177e4SLinus Torvalds 			goto out;
4721da177e4SLinus Torvalds 		}
4731da177e4SLinus Torvalds 
4741da177e4SLinus Torvalds 		/*
4751da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
4761da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
4771da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
4781da177e4SLinus Torvalds 		 * errors returned from accept().
4791da177e4SLinus Torvalds 		 */
480463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
4811da177e4SLinus Torvalds 		goto out;
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4841da177e4SLinus Torvalds 	case TCP_SYN_RECV:  /* Cannot happen.
4851da177e4SLinus Torvalds 			       It can f.e. if SYNs crossed.
4861da177e4SLinus Torvalds 			     */
4871da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
4881da177e4SLinus Torvalds 			sk->sk_err = err;
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 			sk->sk_error_report(sk);
4911da177e4SLinus Torvalds 
4921da177e4SLinus Torvalds 			tcp_done(sk);
4931da177e4SLinus Torvalds 		} else {
4941da177e4SLinus Torvalds 			sk->sk_err_soft = err;
4951da177e4SLinus Torvalds 		}
4961da177e4SLinus Torvalds 		goto out;
4971da177e4SLinus Torvalds 	}
4981da177e4SLinus Torvalds 
4991da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5001da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5011da177e4SLinus Torvalds 	 *
5021da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5031da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5041da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5051da177e4SLinus Torvalds 	 *
5061da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5071da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5081da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5091da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5101da177e4SLinus Torvalds 	 *
5111da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5121da177e4SLinus Torvalds 	 *							--ANK (980905)
5131da177e4SLinus Torvalds 	 */
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds 	inet = inet_sk(sk);
5161da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5171da177e4SLinus Torvalds 		sk->sk_err = err;
5181da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5191da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5201da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5211da177e4SLinus Torvalds 	}
5221da177e4SLinus Torvalds 
5231da177e4SLinus Torvalds out:
5241da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5251da177e4SLinus Torvalds 	sock_put(sk);
5261da177e4SLinus Torvalds }
5271da177e4SLinus Torvalds 
528419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb,
529419f9f89SHerbert Xu 				__be32 saddr, __be32 daddr)
5301da177e4SLinus Torvalds {
531aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5321da177e4SLinus Torvalds 
53384fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
534419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
535663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
536ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5371da177e4SLinus Torvalds 	} else {
538419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
53907f0757aSJoe Perches 					 csum_partial(th,
5401da177e4SLinus Torvalds 						      th->doff << 2,
5411da177e4SLinus Torvalds 						      skb->csum));
5421da177e4SLinus Torvalds 	}
5431da177e4SLinus Torvalds }
5441da177e4SLinus Torvalds 
545419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
546bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
547419f9f89SHerbert Xu {
548cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
549419f9f89SHerbert Xu 
550419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551419f9f89SHerbert Xu }
5524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
553419f9f89SHerbert Xu 
554a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb)
555a430a43dSHerbert Xu {
556eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
557a430a43dSHerbert Xu 	struct tcphdr *th;
558a430a43dSHerbert Xu 
559a430a43dSHerbert Xu 	if (!pskb_may_pull(skb, sizeof(*th)))
560a430a43dSHerbert Xu 		return -EINVAL;
561a430a43dSHerbert Xu 
562eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
563aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
564a430a43dSHerbert Xu 
565a430a43dSHerbert Xu 	th->check = 0;
56684fa7933SPatrick McHardy 	skb->ip_summed = CHECKSUM_PARTIAL;
567419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
568a430a43dSHerbert Xu 	return 0;
569a430a43dSHerbert Xu }
570a430a43dSHerbert Xu 
5711da177e4SLinus Torvalds /*
5721da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5731da177e4SLinus Torvalds  *
5741da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5751da177e4SLinus Torvalds  *		      for reset.
5761da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5771da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5781da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5791da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5801da177e4SLinus Torvalds  *		arrived with segment.
5811da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5821da177e4SLinus Torvalds  */
5831da177e4SLinus Torvalds 
584cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5851da177e4SLinus Torvalds {
586cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
587cfb6eeb4SYOSHIFUJI Hideaki 	struct {
588cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
590714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
591cfb6eeb4SYOSHIFUJI Hideaki #endif
592cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
5931da177e4SLinus Torvalds 	struct ip_reply_arg arg;
594cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
595cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
596658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
597658ddaafSShawn Lu 	unsigned char newhash[16];
598658ddaafSShawn Lu 	int genhash;
599658ddaafSShawn Lu 	struct sock *sk1 = NULL;
600cfb6eeb4SYOSHIFUJI Hideaki #endif
601a86b1e30SPavel Emelyanov 	struct net *net;
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
6041da177e4SLinus Torvalds 	if (th->rst)
6051da177e4SLinus Torvalds 		return;
6061da177e4SLinus Torvalds 
607511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
6081da177e4SLinus Torvalds 		return;
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 	/* Swap the send and the receive. */
611cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
612cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
613cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
614cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
615cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6161da177e4SLinus Torvalds 
6171da177e4SLinus Torvalds 	if (th->ack) {
618cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6191da177e4SLinus Torvalds 	} else {
620cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
621cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6221da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6231da177e4SLinus Torvalds 	}
6241da177e4SLinus Torvalds 
6257174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
626cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
627cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
628cfb6eeb4SYOSHIFUJI Hideaki 
629cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
630658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
631658ddaafSShawn Lu 	if (!sk && hash_location) {
632658ddaafSShawn Lu 		/*
633658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
634658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
635658ddaafSShawn Lu 		 * we are not loose security here:
636658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
637658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
638658ddaafSShawn Lu 		 */
639658ddaafSShawn Lu 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
640658ddaafSShawn Lu 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
641658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
642658ddaafSShawn Lu 		/* don't send rst if it can't find key */
643658ddaafSShawn Lu 		if (!sk1)
644658ddaafSShawn Lu 			return;
645658ddaafSShawn Lu 		rcu_read_lock();
646658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
647658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
648658ddaafSShawn Lu 		if (!key)
649658ddaafSShawn Lu 			goto release_sk1;
650658ddaafSShawn Lu 
651658ddaafSShawn Lu 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
652658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
653658ddaafSShawn Lu 			goto release_sk1;
654658ddaafSShawn Lu 	} else {
655658ddaafSShawn Lu 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
656658ddaafSShawn Lu 					     &ip_hdr(skb)->saddr,
657a915da9bSEric Dumazet 					     AF_INET) : NULL;
658658ddaafSShawn Lu 	}
659658ddaafSShawn Lu 
660cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
661cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
662cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
663cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
664cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
665cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
666cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
667cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
668cfb6eeb4SYOSHIFUJI Hideaki 
66949a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
67078e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
67178e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
672cfb6eeb4SYOSHIFUJI Hideaki 	}
673cfb6eeb4SYOSHIFUJI Hideaki #endif
674eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
675eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
67652cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6771da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
67888ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
679e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
680e2446eaaSShawn Lu 	 * routing might fail in this case. using iif for oif to
681e2446eaaSShawn Lu 	 * make sure we can deliver it
682e2446eaaSShawn Lu 	 */
683e2446eaaSShawn Lu 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
6841da177e4SLinus Torvalds 
685adf30907SEric Dumazet 	net = dev_net(skb_dst(skb)->dev);
68666b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
6870a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
6887feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
6891da177e4SLinus Torvalds 
69063231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
69163231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
692658ddaafSShawn Lu 
693658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
694658ddaafSShawn Lu release_sk1:
695658ddaafSShawn Lu 	if (sk1) {
696658ddaafSShawn Lu 		rcu_read_unlock();
697658ddaafSShawn Lu 		sock_put(sk1);
698658ddaafSShawn Lu 	}
699658ddaafSShawn Lu #endif
7001da177e4SLinus Torvalds }
7011da177e4SLinus Torvalds 
7021da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7031da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7041da177e4SLinus Torvalds  */
7051da177e4SLinus Torvalds 
7069501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
7079501f972SYOSHIFUJI Hideaki 			    u32 win, u32 ts, int oif,
70888ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
70966b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7101da177e4SLinus Torvalds {
711cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7121da177e4SLinus Torvalds 	struct {
7131da177e4SLinus Torvalds 		struct tcphdr th;
714714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
715cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
716cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
717cfb6eeb4SYOSHIFUJI Hideaki #endif
718cfb6eeb4SYOSHIFUJI Hideaki 			];
7191da177e4SLinus Torvalds 	} rep;
7201da177e4SLinus Torvalds 	struct ip_reply_arg arg;
721adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
7221da177e4SLinus Torvalds 
7231da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7247174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7251da177e4SLinus Torvalds 
7261da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7271da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
7281da177e4SLinus Torvalds 	if (ts) {
729cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7301da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7311da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
732cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[1] = htonl(tcp_time_stamp);
733cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[2] = htonl(ts);
734cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7351da177e4SLinus Torvalds 	}
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7381da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7391da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7401da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7411da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7421da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7431da177e4SLinus Torvalds 	rep.th.ack     = 1;
7441da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7451da177e4SLinus Torvalds 
746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
747cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
748cfb6eeb4SYOSHIFUJI Hideaki 		int offset = (ts) ? 3 : 0;
749cfb6eeb4SYOSHIFUJI Hideaki 
750cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
751cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
752cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
753cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
754cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
755cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
756cfb6eeb4SYOSHIFUJI Hideaki 
75749a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
75890b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
75990b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
760cfb6eeb4SYOSHIFUJI Hideaki 	}
761cfb6eeb4SYOSHIFUJI Hideaki #endif
76288ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
763eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
764eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7651da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7661da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7679501f972SYOSHIFUJI Hideaki 	if (oif)
7689501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
76966b13d99SEric Dumazet 	arg.tos = tos;
7700a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7717feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
7721da177e4SLinus Torvalds 
77363231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7741da177e4SLinus Torvalds }
7751da177e4SLinus Torvalds 
7761da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7771da177e4SLinus Torvalds {
7788feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
779cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7801da177e4SLinus Torvalds 
7819501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7827174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
7839501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7849501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
78588ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
78666b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
78766b13d99SEric Dumazet 			tw->tw_tos
7889501f972SYOSHIFUJI Hideaki 			);
7891da177e4SLinus Torvalds 
7908feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
7911da177e4SLinus Torvalds }
7921da177e4SLinus Torvalds 
7936edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7947174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
7951da177e4SLinus Torvalds {
7969501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
797cfb6eeb4SYOSHIFUJI Hideaki 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
7989501f972SYOSHIFUJI Hideaki 			req->ts_recent,
7999501f972SYOSHIFUJI Hideaki 			0,
800a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
801a915da9bSEric Dumazet 					  AF_INET),
80266b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
80366b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8041da177e4SLinus Torvalds }
8051da177e4SLinus Torvalds 
8061da177e4SLinus Torvalds /*
8079bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
80860236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8091da177e4SLinus Torvalds  *	socket.
8101da177e4SLinus Torvalds  */
81172659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
812e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
813e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
8141da177e4SLinus Torvalds {
8152e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8166bd023f3SDavid S. Miller 	struct flowi4 fl4;
8171da177e4SLinus Torvalds 	int err = -1;
8181da177e4SLinus Torvalds 	struct sk_buff * skb;
8191da177e4SLinus Torvalds 
8201da177e4SLinus Torvalds 	/* First, grab a route. */
8216bd023f3SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
822fd80eb94SDenis V. Lunev 		return -1;
8231da177e4SLinus Torvalds 
824e6b4d113SWilliam Allen Simpson 	skb = tcp_make_synack(sk, dst, req, rvp);
8251da177e4SLinus Torvalds 
8261da177e4SLinus Torvalds 	if (skb) {
827419f9f89SHerbert Xu 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
8281da177e4SLinus Torvalds 
8292e6599cbSArnaldo Carvalho de Melo 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
8302e6599cbSArnaldo Carvalho de Melo 					    ireq->rmt_addr,
8312e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
832b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8331da177e4SLinus Torvalds 	}
8341da177e4SLinus Torvalds 
8351da177e4SLinus Torvalds 	dst_release(dst);
8361da177e4SLinus Torvalds 	return err;
8371da177e4SLinus Torvalds }
8381da177e4SLinus Torvalds 
83972659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
840e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
841fd80eb94SDenis V. Lunev {
84272659eccSOctavian Purdila 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
84372659eccSOctavian Purdila 	return tcp_v4_send_synack(sk, NULL, req, rvp);
844fd80eb94SDenis V. Lunev }
845fd80eb94SDenis V. Lunev 
8461da177e4SLinus Torvalds /*
84760236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8481da177e4SLinus Torvalds  */
84960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8501da177e4SLinus Torvalds {
8512e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8521da177e4SLinus Torvalds }
8531da177e4SLinus Torvalds 
854946cedccSEric Dumazet /*
855946cedccSEric Dumazet  * Return 1 if a syncookie should be sent
856946cedccSEric Dumazet  */
857946cedccSEric Dumazet int tcp_syn_flood_action(struct sock *sk,
858946cedccSEric Dumazet 			 const struct sk_buff *skb,
859946cedccSEric Dumazet 			 const char *proto)
8601da177e4SLinus Torvalds {
861946cedccSEric Dumazet 	const char *msg = "Dropping request";
862946cedccSEric Dumazet 	int want_cookie = 0;
863946cedccSEric Dumazet 	struct listen_sock *lopt;
864946cedccSEric Dumazet 
865946cedccSEric Dumazet 
8661da177e4SLinus Torvalds 
8672a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES
868946cedccSEric Dumazet 	if (sysctl_tcp_syncookies) {
8692a1d4bd4SFlorian Westphal 		msg = "Sending cookies";
870946cedccSEric Dumazet 		want_cookie = 1;
871946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
872946cedccSEric Dumazet 	} else
87380e40daaSArnaldo Carvalho de Melo #endif
874946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
8752a1d4bd4SFlorian Westphal 
876946cedccSEric Dumazet 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
877946cedccSEric Dumazet 	if (!lopt->synflood_warned) {
878946cedccSEric Dumazet 		lopt->synflood_warned = 1;
879946cedccSEric Dumazet 		pr_info("%s: Possible SYN flooding on port %d. %s. "
880946cedccSEric Dumazet 			" Check SNMP counters.\n",
881946cedccSEric Dumazet 			proto, ntohs(tcp_hdr(skb)->dest), msg);
8822a1d4bd4SFlorian Westphal 	}
883946cedccSEric Dumazet 	return want_cookie;
884946cedccSEric Dumazet }
885946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action);
8861da177e4SLinus Torvalds 
8871da177e4SLinus Torvalds /*
88860236fddSArnaldo Carvalho de Melo  * Save and compile IPv4 options into the request_sock if needed.
8891da177e4SLinus Torvalds  */
890f6d8bd05SEric Dumazet static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
8911da177e4SLinus Torvalds 						  struct sk_buff *skb)
8921da177e4SLinus Torvalds {
893f6d8bd05SEric Dumazet 	const struct ip_options *opt = &(IPCB(skb)->opt);
894f6d8bd05SEric Dumazet 	struct ip_options_rcu *dopt = NULL;
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds 	if (opt && opt->optlen) {
897f6d8bd05SEric Dumazet 		int opt_size = sizeof(*dopt) + opt->optlen;
898f6d8bd05SEric Dumazet 
8991da177e4SLinus Torvalds 		dopt = kmalloc(opt_size, GFP_ATOMIC);
9001da177e4SLinus Torvalds 		if (dopt) {
901f6d8bd05SEric Dumazet 			if (ip_options_echo(&dopt->opt, skb)) {
9021da177e4SLinus Torvalds 				kfree(dopt);
9031da177e4SLinus Torvalds 				dopt = NULL;
9041da177e4SLinus Torvalds 			}
9051da177e4SLinus Torvalds 		}
9061da177e4SLinus Torvalds 	}
9071da177e4SLinus Torvalds 	return dopt;
9081da177e4SLinus Torvalds }
9091da177e4SLinus Torvalds 
910cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
911cfb6eeb4SYOSHIFUJI Hideaki /*
912cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
913cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
914cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
915cfb6eeb4SYOSHIFUJI Hideaki  */
916cfb6eeb4SYOSHIFUJI Hideaki 
917cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
918a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
919a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
920a915da9bSEric Dumazet 					 int family)
921cfb6eeb4SYOSHIFUJI Hideaki {
922cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
923a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
924a915da9bSEric Dumazet 	struct hlist_node *pos;
925a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
926a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
927cfb6eeb4SYOSHIFUJI Hideaki 
928a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
929a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
930*b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
931*b4fb05eaSEric Dumazet 				       lockdep_is_held(&sk->sk_lock.slock));
932a8afca03SEric Dumazet 	if (!md5sig)
933cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
934a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
935a915da9bSEric Dumazet 	if (family == AF_INET6)
936a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
937a915da9bSEric Dumazet #endif
938a8afca03SEric Dumazet 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
939a915da9bSEric Dumazet 		if (key->family != family)
940a915da9bSEric Dumazet 			continue;
941a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
942a915da9bSEric Dumazet 			return key;
943cfb6eeb4SYOSHIFUJI Hideaki 	}
944cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
945cfb6eeb4SYOSHIFUJI Hideaki }
946a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
947cfb6eeb4SYOSHIFUJI Hideaki 
948cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
949cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
950cfb6eeb4SYOSHIFUJI Hideaki {
951a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
952a915da9bSEric Dumazet 
953a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
954a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
955cfb6eeb4SYOSHIFUJI Hideaki }
956cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
957cfb6eeb4SYOSHIFUJI Hideaki 
958f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
959cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
960cfb6eeb4SYOSHIFUJI Hideaki {
961a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
962a915da9bSEric Dumazet 
963a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
964a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
965cfb6eeb4SYOSHIFUJI Hideaki }
966cfb6eeb4SYOSHIFUJI Hideaki 
967cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
968a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
969a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
970cfb6eeb4SYOSHIFUJI Hideaki {
971cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
972b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
973cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
974f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
975f6685938SArnaldo Carvalho de Melo 
976a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
977a915da9bSEric Dumazet 	if (key) {
978a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
979a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
980a915da9bSEric Dumazet 		key->keylen = newkeylen;
981a915da9bSEric Dumazet 		return 0;
982cfb6eeb4SYOSHIFUJI Hideaki 	}
983260fcbebSYan, Zheng 
984a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
985a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
986a915da9bSEric Dumazet 	if (!md5sig) {
987a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
988a915da9bSEric Dumazet 		if (!md5sig)
989a915da9bSEric Dumazet 			return -ENOMEM;
990a915da9bSEric Dumazet 
991a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
992a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
993a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
994a915da9bSEric Dumazet 	}
995a915da9bSEric Dumazet 
9965f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
997a915da9bSEric Dumazet 	if (!key)
998a915da9bSEric Dumazet 		return -ENOMEM;
999a915da9bSEric Dumazet 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
10005f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
1001cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1002cfb6eeb4SYOSHIFUJI Hideaki 	}
1003f6685938SArnaldo Carvalho de Melo 
1004a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
1005a915da9bSEric Dumazet 	key->keylen = newkeylen;
1006a915da9bSEric Dumazet 	key->family = family;
1007a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
1008a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1009a915da9bSEric Dumazet 				      sizeof(struct in_addr));
1010a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
1011cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1012cfb6eeb4SYOSHIFUJI Hideaki }
1013a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
1014cfb6eeb4SYOSHIFUJI Hideaki 
1015a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1016cfb6eeb4SYOSHIFUJI Hideaki {
1017cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1018a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1019a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1020cfb6eeb4SYOSHIFUJI Hideaki 
1021a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1022a915da9bSEric Dumazet 	if (!key)
1023cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
1024a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
10255f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1026a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
1027a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1028a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
1029a8afca03SEric Dumazet 	if (hlist_empty(&md5sig->head))
1030a915da9bSEric Dumazet 		tcp_free_md5sig_pool();
1031a915da9bSEric Dumazet 	return 0;
1032cfb6eeb4SYOSHIFUJI Hideaki }
1033a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
1034cfb6eeb4SYOSHIFUJI Hideaki 
1035a915da9bSEric Dumazet void tcp_clear_md5_list(struct sock *sk)
1036cfb6eeb4SYOSHIFUJI Hideaki {
1037cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1038a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1039a915da9bSEric Dumazet 	struct hlist_node *pos, *n;
1040a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1041cfb6eeb4SYOSHIFUJI Hideaki 
1042a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1043a8afca03SEric Dumazet 
1044a8afca03SEric Dumazet 	if (!hlist_empty(&md5sig->head))
1045cfb6eeb4SYOSHIFUJI Hideaki 		tcp_free_md5sig_pool();
1046a8afca03SEric Dumazet 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1047a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
10485f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1049a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
1050cfb6eeb4SYOSHIFUJI Hideaki 	}
1051cfb6eeb4SYOSHIFUJI Hideaki }
1052cfb6eeb4SYOSHIFUJI Hideaki 
1053cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1054cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
1055cfb6eeb4SYOSHIFUJI Hideaki {
1056cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1057cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1058cfb6eeb4SYOSHIFUJI Hideaki 
1059cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1060cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1061cfb6eeb4SYOSHIFUJI Hideaki 
1062cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1063cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1064cfb6eeb4SYOSHIFUJI Hideaki 
1065cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1066cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1067cfb6eeb4SYOSHIFUJI Hideaki 
1068a8afca03SEric Dumazet 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1069a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070a915da9bSEric Dumazet 				      AF_INET);
1071cfb6eeb4SYOSHIFUJI Hideaki 
1072cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1073cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1074cfb6eeb4SYOSHIFUJI Hideaki 
1075a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1077a915da9bSEric Dumazet 			      GFP_KERNEL);
1078cfb6eeb4SYOSHIFUJI Hideaki }
1079cfb6eeb4SYOSHIFUJI Hideaki 
108049a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
108149a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1082cfb6eeb4SYOSHIFUJI Hideaki {
1083cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
108449a72dfbSAdam Langley 	struct scatterlist sg;
1085cfb6eeb4SYOSHIFUJI Hideaki 
1086cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1087cfb6eeb4SYOSHIFUJI Hideaki 
1088cfb6eeb4SYOSHIFUJI Hideaki 	/*
108949a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1090cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1091cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1092cfb6eeb4SYOSHIFUJI Hideaki 	 */
1093cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1094cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1095cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1096076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
109749a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1098c7da57a1SDavid S. Miller 
109949a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
110049a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
110149a72dfbSAdam Langley }
110249a72dfbSAdam Langley 
1103a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1104318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
110549a72dfbSAdam Langley {
110649a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
110749a72dfbSAdam Langley 	struct hash_desc *desc;
110849a72dfbSAdam Langley 
110949a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
111049a72dfbSAdam Langley 	if (!hp)
111149a72dfbSAdam Langley 		goto clear_hash_noput;
111249a72dfbSAdam Langley 	desc = &hp->md5_desc;
111349a72dfbSAdam Langley 
111449a72dfbSAdam Langley 	if (crypto_hash_init(desc))
111549a72dfbSAdam Langley 		goto clear_hash;
111649a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
111749a72dfbSAdam Langley 		goto clear_hash;
111849a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
111949a72dfbSAdam Langley 		goto clear_hash;
112049a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
112149a72dfbSAdam Langley 		goto clear_hash;
112249a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1123cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1124cfb6eeb4SYOSHIFUJI Hideaki 
1125cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1126cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
112749a72dfbSAdam Langley 
1128cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1129cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1130cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1131cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
113249a72dfbSAdam Langley 	return 1;
1133cfb6eeb4SYOSHIFUJI Hideaki }
1134cfb6eeb4SYOSHIFUJI Hideaki 
113549a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1136318cf7aaSEric Dumazet 			const struct sock *sk, const struct request_sock *req,
1137318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1138cfb6eeb4SYOSHIFUJI Hideaki {
113949a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
114049a72dfbSAdam Langley 	struct hash_desc *desc;
1141318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1142cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1143cfb6eeb4SYOSHIFUJI Hideaki 
1144cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1145c720c7e8SEric Dumazet 		saddr = inet_sk(sk)->inet_saddr;
1146c720c7e8SEric Dumazet 		daddr = inet_sk(sk)->inet_daddr;
114749a72dfbSAdam Langley 	} else if (req) {
114849a72dfbSAdam Langley 		saddr = inet_rsk(req)->loc_addr;
114949a72dfbSAdam Langley 		daddr = inet_rsk(req)->rmt_addr;
1150cfb6eeb4SYOSHIFUJI Hideaki 	} else {
115149a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
115249a72dfbSAdam Langley 		saddr = iph->saddr;
115349a72dfbSAdam Langley 		daddr = iph->daddr;
1154cfb6eeb4SYOSHIFUJI Hideaki 	}
1155cfb6eeb4SYOSHIFUJI Hideaki 
115649a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
115749a72dfbSAdam Langley 	if (!hp)
115849a72dfbSAdam Langley 		goto clear_hash_noput;
115949a72dfbSAdam Langley 	desc = &hp->md5_desc;
116049a72dfbSAdam Langley 
116149a72dfbSAdam Langley 	if (crypto_hash_init(desc))
116249a72dfbSAdam Langley 		goto clear_hash;
116349a72dfbSAdam Langley 
116449a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
116549a72dfbSAdam Langley 		goto clear_hash;
116649a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
116749a72dfbSAdam Langley 		goto clear_hash;
116849a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
116949a72dfbSAdam Langley 		goto clear_hash;
117049a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
117149a72dfbSAdam Langley 		goto clear_hash;
117249a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
117349a72dfbSAdam Langley 		goto clear_hash;
117449a72dfbSAdam Langley 
117549a72dfbSAdam Langley 	tcp_put_md5sig_pool();
117649a72dfbSAdam Langley 	return 0;
117749a72dfbSAdam Langley 
117849a72dfbSAdam Langley clear_hash:
117949a72dfbSAdam Langley 	tcp_put_md5sig_pool();
118049a72dfbSAdam Langley clear_hash_noput:
118149a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
118249a72dfbSAdam Langley 	return 1;
118349a72dfbSAdam Langley }
118449a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1185cfb6eeb4SYOSHIFUJI Hideaki 
1186318cf7aaSEric Dumazet static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1187cfb6eeb4SYOSHIFUJI Hideaki {
1188cfb6eeb4SYOSHIFUJI Hideaki 	/*
1189cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1190cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1191cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1192cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1193cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1194cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1195cfb6eeb4SYOSHIFUJI Hideaki 	 */
1196cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1197cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1198eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1199cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1200cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1201cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1202cfb6eeb4SYOSHIFUJI Hideaki 
1203a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1204a915da9bSEric Dumazet 					  AF_INET);
12057d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1206cfb6eeb4SYOSHIFUJI Hideaki 
1207cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1208cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1209cfb6eeb4SYOSHIFUJI Hideaki 		return 0;
1210cfb6eeb4SYOSHIFUJI Hideaki 
1211cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1212785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1213cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1214cfb6eeb4SYOSHIFUJI Hideaki 	}
1215cfb6eeb4SYOSHIFUJI Hideaki 
1216cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1217785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1218cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1219cfb6eeb4SYOSHIFUJI Hideaki 	}
1220cfb6eeb4SYOSHIFUJI Hideaki 
1221cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1222cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1223cfb6eeb4SYOSHIFUJI Hideaki 	 */
122449a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1225cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
122649a72dfbSAdam Langley 				      NULL, NULL, skb);
1227cfb6eeb4SYOSHIFUJI Hideaki 
1228cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229cfb6eeb4SYOSHIFUJI Hideaki 		if (net_ratelimit()) {
1230673d57e7SHarvey Harrison 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231673d57e7SHarvey Harrison 			       &iph->saddr, ntohs(th->source),
1232673d57e7SHarvey Harrison 			       &iph->daddr, ntohs(th->dest),
1233cfb6eeb4SYOSHIFUJI Hideaki 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1234cfb6eeb4SYOSHIFUJI Hideaki 		}
1235cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1236cfb6eeb4SYOSHIFUJI Hideaki 	}
1237cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1238cfb6eeb4SYOSHIFUJI Hideaki }
1239cfb6eeb4SYOSHIFUJI Hideaki 
1240cfb6eeb4SYOSHIFUJI Hideaki #endif
1241cfb6eeb4SYOSHIFUJI Hideaki 
124272a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12431da177e4SLinus Torvalds 	.family		=	PF_INET,
12442e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
124572659eccSOctavian Purdila 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
124660236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
124760236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12481da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
124972659eccSOctavian Purdila 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
12501da177e4SLinus Torvalds };
12511da177e4SLinus Torvalds 
1252cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1253b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1254cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1255e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1256cfb6eeb4SYOSHIFUJI Hideaki };
1257b6332e6cSAndrew Morton #endif
1258cfb6eeb4SYOSHIFUJI Hideaki 
12591da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12601da177e4SLinus Torvalds {
12614957faadSWilliam Allen Simpson 	struct tcp_extend_values tmp_ext;
12621da177e4SLinus Torvalds 	struct tcp_options_received tmp_opt;
1263cf533ea5SEric Dumazet 	const u8 *hash_location;
126460236fddSArnaldo Carvalho de Melo 	struct request_sock *req;
1265e6b4d113SWilliam Allen Simpson 	struct inet_request_sock *ireq;
12664957faadSWilliam Allen Simpson 	struct tcp_sock *tp = tcp_sk(sk);
1267e6b4d113SWilliam Allen Simpson 	struct dst_entry *dst = NULL;
1268eddc9ec5SArnaldo Carvalho de Melo 	__be32 saddr = ip_hdr(skb)->saddr;
1269eddc9ec5SArnaldo Carvalho de Melo 	__be32 daddr = ip_hdr(skb)->daddr;
12701da177e4SLinus Torvalds 	__u32 isn = TCP_SKB_CB(skb)->when;
12711da177e4SLinus Torvalds 	int want_cookie = 0;
12721da177e4SLinus Torvalds 
12731da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1274511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12751da177e4SLinus Torvalds 		goto drop;
12761da177e4SLinus Torvalds 
12771da177e4SLinus Torvalds 	/* TW buckets are converted to open requests without
12781da177e4SLinus Torvalds 	 * limitations, they conserve resources and peer is
12791da177e4SLinus Torvalds 	 * evidently real one.
12801da177e4SLinus Torvalds 	 */
1281463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1282946cedccSEric Dumazet 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283946cedccSEric Dumazet 		if (!want_cookie)
12841da177e4SLinus Torvalds 			goto drop;
12851da177e4SLinus Torvalds 	}
12861da177e4SLinus Torvalds 
12871da177e4SLinus Torvalds 	/* Accept backlog is full. If we have already queued enough
12881da177e4SLinus Torvalds 	 * of warm entries in syn queue, drop request. It is better than
12891da177e4SLinus Torvalds 	 * clogging syn queue with openreqs with exponentially increasing
12901da177e4SLinus Torvalds 	 * timeout.
12911da177e4SLinus Torvalds 	 */
1292463c84b9SArnaldo Carvalho de Melo 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
12931da177e4SLinus Torvalds 		goto drop;
12941da177e4SLinus Torvalds 
1295ce4a7d0dSArnaldo Carvalho de Melo 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
12961da177e4SLinus Torvalds 	if (!req)
12971da177e4SLinus Torvalds 		goto drop;
12981da177e4SLinus Torvalds 
1299cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1300cfb6eeb4SYOSHIFUJI Hideaki 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301cfb6eeb4SYOSHIFUJI Hideaki #endif
1302cfb6eeb4SYOSHIFUJI Hideaki 
13031da177e4SLinus Torvalds 	tcp_clear_options(&tmp_opt);
1304bee7ca9eSWilliam Allen Simpson 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
13054957faadSWilliam Allen Simpson 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1306bb5b7c11SDavid S. Miller 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
13071da177e4SLinus Torvalds 
13084957faadSWilliam Allen Simpson 	if (tmp_opt.cookie_plus > 0 &&
13094957faadSWilliam Allen Simpson 	    tmp_opt.saw_tstamp &&
13104957faadSWilliam Allen Simpson 	    !tp->rx_opt.cookie_out_never &&
13114957faadSWilliam Allen Simpson 	    (sysctl_tcp_cookie_size > 0 ||
13124957faadSWilliam Allen Simpson 	     (tp->cookie_values != NULL &&
13134957faadSWilliam Allen Simpson 	      tp->cookie_values->cookie_desired > 0))) {
13144957faadSWilliam Allen Simpson 		u8 *c;
13154957faadSWilliam Allen Simpson 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
13164957faadSWilliam Allen Simpson 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
13174957faadSWilliam Allen Simpson 
13184957faadSWilliam Allen Simpson 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
13194957faadSWilliam Allen Simpson 			goto drop_and_release;
13204957faadSWilliam Allen Simpson 
13214957faadSWilliam Allen Simpson 		/* Secret recipe starts with IP addresses */
13220eae88f3SEric Dumazet 		*mess++ ^= (__force u32)daddr;
13230eae88f3SEric Dumazet 		*mess++ ^= (__force u32)saddr;
13244957faadSWilliam Allen Simpson 
13254957faadSWilliam Allen Simpson 		/* plus variable length Initiator Cookie */
13264957faadSWilliam Allen Simpson 		c = (u8 *)mess;
13274957faadSWilliam Allen Simpson 		while (l-- > 0)
13284957faadSWilliam Allen Simpson 			*c++ ^= *hash_location++;
13294957faadSWilliam Allen Simpson 
13304957faadSWilliam Allen Simpson 		want_cookie = 0;	/* not our kind of cookie */
13314957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 0; /* false */
13324957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
13334957faadSWilliam Allen Simpson 	} else if (!tp->rx_opt.cookie_in_always) {
13344957faadSWilliam Allen Simpson 		/* redundant indications, but ensure initialization. */
13354957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 1; /* true */
13364957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = 0;
13374957faadSWilliam Allen Simpson 	} else {
13384957faadSWilliam Allen Simpson 		goto drop_and_release;
13394957faadSWilliam Allen Simpson 	}
13404957faadSWilliam Allen Simpson 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
13411da177e4SLinus Torvalds 
13424dfc2817SFlorian Westphal 	if (want_cookie && !tmp_opt.saw_tstamp)
13431da177e4SLinus Torvalds 		tcp_clear_options(&tmp_opt);
13441da177e4SLinus Torvalds 
13451da177e4SLinus Torvalds 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
13461da177e4SLinus Torvalds 	tcp_openreq_init(req, &tmp_opt, skb);
13471da177e4SLinus Torvalds 
1348bb5b7c11SDavid S. Miller 	ireq = inet_rsk(req);
1349bb5b7c11SDavid S. Miller 	ireq->loc_addr = daddr;
1350bb5b7c11SDavid S. Miller 	ireq->rmt_addr = saddr;
1351bb5b7c11SDavid S. Miller 	ireq->no_srccheck = inet_sk(sk)->transparent;
1352bb5b7c11SDavid S. Miller 	ireq->opt = tcp_v4_save_options(sk, skb);
1353bb5b7c11SDavid S. Miller 
1354284904aaSPaul Moore 	if (security_inet_conn_request(sk, skb, req))
1355bb5b7c11SDavid S. Miller 		goto drop_and_free;
1356284904aaSPaul Moore 
1357172d69e6SFlorian Westphal 	if (!want_cookie || tmp_opt.tstamp_ok)
1358aa8223c7SArnaldo Carvalho de Melo 		TCP_ECN_create_request(req, tcp_hdr(skb));
13591da177e4SLinus Torvalds 
13601da177e4SLinus Torvalds 	if (want_cookie) {
13611da177e4SLinus Torvalds 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1362172d69e6SFlorian Westphal 		req->cookie_ts = tmp_opt.tstamp_ok;
13631da177e4SLinus Torvalds 	} else if (!isn) {
13641da177e4SLinus Torvalds 		struct inet_peer *peer = NULL;
13656bd023f3SDavid S. Miller 		struct flowi4 fl4;
13661da177e4SLinus Torvalds 
13671da177e4SLinus Torvalds 		/* VJ's idea. We save last timestamp seen
13681da177e4SLinus Torvalds 		 * from the destination in peer table, when entering
13691da177e4SLinus Torvalds 		 * state TIME-WAIT, and check against it before
13701da177e4SLinus Torvalds 		 * accepting new connection request.
13711da177e4SLinus Torvalds 		 *
13721da177e4SLinus Torvalds 		 * If "isn" is not zero, this request hit alive
13731da177e4SLinus Torvalds 		 * timewait bucket, so that all the necessary checks
13741da177e4SLinus Torvalds 		 * are made in the function processing timewait state.
13751da177e4SLinus Torvalds 		 */
13761da177e4SLinus Torvalds 		if (tmp_opt.saw_tstamp &&
1377295ff7edSArnaldo Carvalho de Melo 		    tcp_death_row.sysctl_tw_recycle &&
13786bd023f3SDavid S. Miller 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1379ed2361e6SDavid S. Miller 		    fl4.daddr == saddr &&
1380ed2361e6SDavid S. Miller 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1381317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
13822c1409a0SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
13831da177e4SLinus Torvalds 			    (s32)(peer->tcp_ts - req->ts_recent) >
13841da177e4SLinus Torvalds 							TCP_PAWS_WINDOW) {
1385de0744afSPavel Emelyanov 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
13867cd04fa7SDenis V. Lunev 				goto drop_and_release;
13871da177e4SLinus Torvalds 			}
13881da177e4SLinus Torvalds 		}
13891da177e4SLinus Torvalds 		/* Kill the following clause, if you dislike this way. */
13901da177e4SLinus Torvalds 		else if (!sysctl_tcp_syncookies &&
1391463c84b9SArnaldo Carvalho de Melo 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
13921da177e4SLinus Torvalds 			  (sysctl_max_syn_backlog >> 2)) &&
13931da177e4SLinus Torvalds 			 (!peer || !peer->tcp_ts_stamp) &&
13941da177e4SLinus Torvalds 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
13951da177e4SLinus Torvalds 			/* Without syncookies last quarter of
13961da177e4SLinus Torvalds 			 * backlog is filled with destinations,
13971da177e4SLinus Torvalds 			 * proven to be alive.
13981da177e4SLinus Torvalds 			 * It means that we continue to communicate
13991da177e4SLinus Torvalds 			 * to destinations, already remembered
14001da177e4SLinus Torvalds 			 * to the moment of synflood.
14011da177e4SLinus Torvalds 			 */
1402673d57e7SHarvey Harrison 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1403673d57e7SHarvey Harrison 				       &saddr, ntohs(tcp_hdr(skb)->source));
14047cd04fa7SDenis V. Lunev 			goto drop_and_release;
14051da177e4SLinus Torvalds 		}
14061da177e4SLinus Torvalds 
1407a94f723dSGerrit Renker 		isn = tcp_v4_init_sequence(skb);
14081da177e4SLinus Torvalds 	}
14092e6599cbSArnaldo Carvalho de Melo 	tcp_rsk(req)->snt_isn = isn;
14109ad7c049SJerry Chu 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
14111da177e4SLinus Torvalds 
141272659eccSOctavian Purdila 	if (tcp_v4_send_synack(sk, dst, req,
14134957faadSWilliam Allen Simpson 			       (struct request_values *)&tmp_ext) ||
14144957faadSWilliam Allen Simpson 	    want_cookie)
14151da177e4SLinus Torvalds 		goto drop_and_free;
14161da177e4SLinus Torvalds 
14173f421baaSArnaldo Carvalho de Melo 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
14181da177e4SLinus Torvalds 	return 0;
14191da177e4SLinus Torvalds 
14207cd04fa7SDenis V. Lunev drop_and_release:
14217cd04fa7SDenis V. Lunev 	dst_release(dst);
14221da177e4SLinus Torvalds drop_and_free:
142360236fddSArnaldo Carvalho de Melo 	reqsk_free(req);
14241da177e4SLinus Torvalds drop:
14251da177e4SLinus Torvalds 	return 0;
14261da177e4SLinus Torvalds }
14274bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
14281da177e4SLinus Torvalds 
14291da177e4SLinus Torvalds 
14301da177e4SLinus Torvalds /*
14311da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
14321da177e4SLinus Torvalds  * now create the new socket.
14331da177e4SLinus Torvalds  */
14341da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
143560236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
14361da177e4SLinus Torvalds 				  struct dst_entry *dst)
14371da177e4SLinus Torvalds {
14382e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
14391da177e4SLinus Torvalds 	struct inet_sock *newinet;
14401da177e4SLinus Torvalds 	struct tcp_sock *newtp;
14411da177e4SLinus Torvalds 	struct sock *newsk;
1442cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1443cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1444cfb6eeb4SYOSHIFUJI Hideaki #endif
1445f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
14481da177e4SLinus Torvalds 		goto exit_overflow;
14491da177e4SLinus Torvalds 
14501da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
14511da177e4SLinus Torvalds 	if (!newsk)
1452093d2823SBalazs Scheidler 		goto exit_nonewsk;
14531da177e4SLinus Torvalds 
1454bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
14551da177e4SLinus Torvalds 
14561da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
14571da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
14582e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1459c720c7e8SEric Dumazet 	newinet->inet_daddr   = ireq->rmt_addr;
1460c720c7e8SEric Dumazet 	newinet->inet_rcv_saddr = ireq->loc_addr;
1461c720c7e8SEric Dumazet 	newinet->inet_saddr	      = ireq->loc_addr;
1462f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1463f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
14642e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1465463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1466eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
14674c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1468d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1469f6d8bd05SEric Dumazet 	if (inet_opt)
1470f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1471c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
14721da177e4SLinus Torvalds 
14730e734419SDavid S. Miller 	if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
14740e734419SDavid S. Miller 		goto put_and_exit;
14750e734419SDavid S. Miller 
14760e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
14770e734419SDavid S. Miller 
14785d424d5aSJohn Heffner 	tcp_mtup_init(newsk);
14791da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
14800dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1481f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1482f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1483f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1484f5fff5dcSTom Quetchenbach 
14851da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
14869ad7c049SJerry Chu 	if (tcp_rsk(req)->snt_synack)
14879ad7c049SJerry Chu 		tcp_valid_rtt_meas(newsk,
14889ad7c049SJerry Chu 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
14899ad7c049SJerry Chu 	newtp->total_retrans = req->retrans;
14901da177e4SLinus Torvalds 
1491cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1492cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1493a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1494a915da9bSEric Dumazet 				AF_INET);
1495c720c7e8SEric Dumazet 	if (key != NULL) {
1496cfb6eeb4SYOSHIFUJI Hideaki 		/*
1497cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1498cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1499cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1500cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1501cfb6eeb4SYOSHIFUJI Hideaki 		 */
1502a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1503a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1504a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1505cfb6eeb4SYOSHIFUJI Hideaki 	}
1506cfb6eeb4SYOSHIFUJI Hideaki #endif
1507cfb6eeb4SYOSHIFUJI Hideaki 
15080e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
15090e734419SDavid S. Miller 		goto put_and_exit;
15109327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
15111da177e4SLinus Torvalds 
15121da177e4SLinus Torvalds 	return newsk;
15131da177e4SLinus Torvalds 
15141da177e4SLinus Torvalds exit_overflow:
1515de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1516093d2823SBalazs Scheidler exit_nonewsk:
1517093d2823SBalazs Scheidler 	dst_release(dst);
15181da177e4SLinus Torvalds exit:
1519de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
15201da177e4SLinus Torvalds 	return NULL;
15210e734419SDavid S. Miller put_and_exit:
1522709e8697SEric Dumazet 	tcp_clear_xmit_timers(newsk);
1523d8a6e65fSEric Dumazet 	tcp_cleanup_congestion_control(newsk);
1524918eb399SEric Dumazet 	bh_unlock_sock(newsk);
15250e734419SDavid S. Miller 	sock_put(newsk);
15260e734419SDavid S. Miller 	goto exit;
15271da177e4SLinus Torvalds }
15284bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
15291da177e4SLinus Torvalds 
15301da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
15311da177e4SLinus Torvalds {
1532aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
1533eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
15341da177e4SLinus Torvalds 	struct sock *nsk;
153560236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
15361da177e4SLinus Torvalds 	/* Find possible connection requests. */
1537463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
15381da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
15391da177e4SLinus Torvalds 	if (req)
15401da177e4SLinus Torvalds 		return tcp_check_req(sk, skb, req, prev);
15411da177e4SLinus Torvalds 
15423b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1543c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
15441da177e4SLinus Torvalds 
15451da177e4SLinus Torvalds 	if (nsk) {
15461da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
15471da177e4SLinus Torvalds 			bh_lock_sock(nsk);
15481da177e4SLinus Torvalds 			return nsk;
15491da177e4SLinus Torvalds 		}
15509469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
15511da177e4SLinus Torvalds 		return NULL;
15521da177e4SLinus Torvalds 	}
15531da177e4SLinus Torvalds 
15541da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1555af9b4738SFlorian Westphal 	if (!th->syn)
15561da177e4SLinus Torvalds 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
15571da177e4SLinus Torvalds #endif
15581da177e4SLinus Torvalds 	return sk;
15591da177e4SLinus Torvalds }
15601da177e4SLinus Torvalds 
1561b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
15621da177e4SLinus Torvalds {
1563eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1564eddc9ec5SArnaldo Carvalho de Melo 
156584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1566eddc9ec5SArnaldo Carvalho de Melo 		if (!tcp_v4_check(skb->len, iph->saddr,
1567eddc9ec5SArnaldo Carvalho de Melo 				  iph->daddr, skb->csum)) {
15681da177e4SLinus Torvalds 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1569fb286bb2SHerbert Xu 			return 0;
1570fb286bb2SHerbert Xu 		}
1571fb286bb2SHerbert Xu 	}
1572fb286bb2SHerbert Xu 
1573eddc9ec5SArnaldo Carvalho de Melo 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1574fb286bb2SHerbert Xu 				       skb->len, IPPROTO_TCP, 0);
1575fb286bb2SHerbert Xu 
1576fb286bb2SHerbert Xu 	if (skb->len <= 76) {
1577fb286bb2SHerbert Xu 		return __skb_checksum_complete(skb);
15781da177e4SLinus Torvalds 	}
15791da177e4SLinus Torvalds 	return 0;
15801da177e4SLinus Torvalds }
15811da177e4SLinus Torvalds 
15821da177e4SLinus Torvalds 
15831da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
15841da177e4SLinus Torvalds  * here.
15851da177e4SLinus Torvalds  *
15861da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
15871da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
15881da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
15891da177e4SLinus Torvalds  * held.
15901da177e4SLinus Torvalds  */
15911da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
15921da177e4SLinus Torvalds {
1593cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1594cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1595cfb6eeb4SYOSHIFUJI Hideaki 	/*
1596cfb6eeb4SYOSHIFUJI Hideaki 	 * We really want to reject the packet as early as possible
1597cfb6eeb4SYOSHIFUJI Hideaki 	 * if:
1598cfb6eeb4SYOSHIFUJI Hideaki 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1599cfb6eeb4SYOSHIFUJI Hideaki 	 *  o There is an MD5 option and we're not expecting one
1600cfb6eeb4SYOSHIFUJI Hideaki 	 */
1601cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_v4_inbound_md5_hash(sk, skb))
1602cfb6eeb4SYOSHIFUJI Hideaki 		goto discard;
1603cfb6eeb4SYOSHIFUJI Hideaki #endif
1604cfb6eeb4SYOSHIFUJI Hideaki 
16051da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1606bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1607aa8223c7SArnaldo Carvalho de Melo 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1608cfb6eeb4SYOSHIFUJI Hideaki 			rsk = sk;
16091da177e4SLinus Torvalds 			goto reset;
1610cfb6eeb4SYOSHIFUJI Hideaki 		}
16111da177e4SLinus Torvalds 		return 0;
16121da177e4SLinus Torvalds 	}
16131da177e4SLinus Torvalds 
1614ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
16151da177e4SLinus Torvalds 		goto csum_err;
16161da177e4SLinus Torvalds 
16171da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
16181da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
16191da177e4SLinus Torvalds 		if (!nsk)
16201da177e4SLinus Torvalds 			goto discard;
16211da177e4SLinus Torvalds 
16221da177e4SLinus Torvalds 		if (nsk != sk) {
1623bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
1624cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1625cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
16261da177e4SLinus Torvalds 				goto reset;
1627cfb6eeb4SYOSHIFUJI Hideaki 			}
16281da177e4SLinus Torvalds 			return 0;
16291da177e4SLinus Torvalds 		}
1630ca55158cSEric Dumazet 	} else
1631bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1632ca55158cSEric Dumazet 
1633aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1634cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
16351da177e4SLinus Torvalds 		goto reset;
1636cfb6eeb4SYOSHIFUJI Hideaki 	}
16371da177e4SLinus Torvalds 	return 0;
16381da177e4SLinus Torvalds 
16391da177e4SLinus Torvalds reset:
1640cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
16411da177e4SLinus Torvalds discard:
16421da177e4SLinus Torvalds 	kfree_skb(skb);
16431da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
16441da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
16451da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
16461da177e4SLinus Torvalds 	 * but you have been warned.
16471da177e4SLinus Torvalds 	 */
16481da177e4SLinus Torvalds 	return 0;
16491da177e4SLinus Torvalds 
16501da177e4SLinus Torvalds csum_err:
165163231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
16521da177e4SLinus Torvalds 	goto discard;
16531da177e4SLinus Torvalds }
16544bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
16551da177e4SLinus Torvalds 
16561da177e4SLinus Torvalds /*
16571da177e4SLinus Torvalds  *	From tcp_input.c
16581da177e4SLinus Torvalds  */
16591da177e4SLinus Torvalds 
16601da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
16611da177e4SLinus Torvalds {
1662eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1663cf533ea5SEric Dumazet 	const struct tcphdr *th;
16641da177e4SLinus Torvalds 	struct sock *sk;
16651da177e4SLinus Torvalds 	int ret;
1666a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
16671da177e4SLinus Torvalds 
16681da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
16691da177e4SLinus Torvalds 		goto discard_it;
16701da177e4SLinus Torvalds 
16711da177e4SLinus Torvalds 	/* Count it even if it's bad */
167263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
16731da177e4SLinus Torvalds 
16741da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
16751da177e4SLinus Torvalds 		goto discard_it;
16761da177e4SLinus Torvalds 
1677aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
16781da177e4SLinus Torvalds 
16791da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
16801da177e4SLinus Torvalds 		goto bad_packet;
16811da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
16821da177e4SLinus Torvalds 		goto discard_it;
16831da177e4SLinus Torvalds 
16841da177e4SLinus Torvalds 	/* An explanation is required here, I think.
16851da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1686caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
16871da177e4SLinus Torvalds 	 * So, we defer the checks. */
168860476372SHerbert Xu 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
16891da177e4SLinus Torvalds 		goto bad_packet;
16901da177e4SLinus Torvalds 
1691aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1692eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
16931da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
16941da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
16951da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
16961da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
16971da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->when	 = 0;
1698b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
16991da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
17001da177e4SLinus Torvalds 
17019a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
17021da177e4SLinus Torvalds 	if (!sk)
17031da177e4SLinus Torvalds 		goto no_tcp_socket;
17041da177e4SLinus Torvalds 
1705bb134d5dSEric Dumazet process:
1706bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1707bb134d5dSEric Dumazet 		goto do_time_wait;
1708bb134d5dSEric Dumazet 
17096cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
17106cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1711d218d111SStephen Hemminger 		goto discard_and_relse;
17126cce09f8SEric Dumazet 	}
1713d218d111SStephen Hemminger 
17141da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
17151da177e4SLinus Torvalds 		goto discard_and_relse;
1716b59c2701SPatrick McHardy 	nf_reset(skb);
17171da177e4SLinus Torvalds 
1718fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
17191da177e4SLinus Torvalds 		goto discard_and_relse;
17201da177e4SLinus Torvalds 
17211da177e4SLinus Torvalds 	skb->dev = NULL;
17221da177e4SLinus Torvalds 
1723c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
17241da177e4SLinus Torvalds 	ret = 0;
17251da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
17261a2449a8SChris Leech #ifdef CONFIG_NET_DMA
17271a2449a8SChris Leech 		struct tcp_sock *tp = tcp_sk(sk);
17281a2449a8SChris Leech 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1729f67b4599SDan Williams 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
17301a2449a8SChris Leech 		if (tp->ucopy.dma_chan)
17311a2449a8SChris Leech 			ret = tcp_v4_do_rcv(sk, skb);
17321a2449a8SChris Leech 		else
17331a2449a8SChris Leech #endif
17341a2449a8SChris Leech 		{
17351da177e4SLinus Torvalds 			if (!tcp_prequeue(sk, skb))
17361da177e4SLinus Torvalds 				ret = tcp_v4_do_rcv(sk, skb);
17371a2449a8SChris Leech 		}
17386cce09f8SEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb))) {
17396b03a53aSZhu Yi 		bh_unlock_sock(sk);
17406cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
17416b03a53aSZhu Yi 		goto discard_and_relse;
17426b03a53aSZhu Yi 	}
17431da177e4SLinus Torvalds 	bh_unlock_sock(sk);
17441da177e4SLinus Torvalds 
17451da177e4SLinus Torvalds 	sock_put(sk);
17461da177e4SLinus Torvalds 
17471da177e4SLinus Torvalds 	return ret;
17481da177e4SLinus Torvalds 
17491da177e4SLinus Torvalds no_tcp_socket:
17501da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
17511da177e4SLinus Torvalds 		goto discard_it;
17521da177e4SLinus Torvalds 
17531da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
17541da177e4SLinus Torvalds bad_packet:
175563231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
17561da177e4SLinus Torvalds 	} else {
1757cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
17581da177e4SLinus Torvalds 	}
17591da177e4SLinus Torvalds 
17601da177e4SLinus Torvalds discard_it:
17611da177e4SLinus Torvalds 	/* Discard frame. */
17621da177e4SLinus Torvalds 	kfree_skb(skb);
17631da177e4SLinus Torvalds 	return 0;
17641da177e4SLinus Torvalds 
17651da177e4SLinus Torvalds discard_and_relse:
17661da177e4SLinus Torvalds 	sock_put(sk);
17671da177e4SLinus Torvalds 	goto discard_it;
17681da177e4SLinus Torvalds 
17691da177e4SLinus Torvalds do_time_wait:
17701da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
17719469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17721da177e4SLinus Torvalds 		goto discard_it;
17731da177e4SLinus Torvalds 	}
17741da177e4SLinus Torvalds 
17751da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
177663231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
17779469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17781da177e4SLinus Torvalds 		goto discard_it;
17791da177e4SLinus Torvalds 	}
17809469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
17811da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1782c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1783c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1784eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1785463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
17861da177e4SLinus Torvalds 		if (sk2) {
17879469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
17889469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
17891da177e4SLinus Torvalds 			sk = sk2;
17901da177e4SLinus Torvalds 			goto process;
17911da177e4SLinus Torvalds 		}
17921da177e4SLinus Torvalds 		/* Fall through to ACK */
17931da177e4SLinus Torvalds 	}
17941da177e4SLinus Torvalds 	case TCP_TW_ACK:
17951da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17961da177e4SLinus Torvalds 		break;
17971da177e4SLinus Torvalds 	case TCP_TW_RST:
17981da177e4SLinus Torvalds 		goto no_tcp_socket;
17991da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
18001da177e4SLinus Torvalds 	}
18011da177e4SLinus Torvalds 	goto discard_it;
18021da177e4SLinus Torvalds }
18031da177e4SLinus Torvalds 
18043f419d2dSDavid S. Miller struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
18051da177e4SLinus Torvalds {
18061da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
18073f419d2dSDavid S. Miller 	struct inet_sock *inet = inet_sk(sk);
18083f419d2dSDavid S. Miller 	struct inet_peer *peer;
18091da177e4SLinus Torvalds 
1810c5216cc7SDavid S. Miller 	if (!rt ||
1811c5216cc7SDavid S. Miller 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1812b534ecf1SDavid S. Miller 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
18133f419d2dSDavid S. Miller 		*release_it = true;
18141da177e4SLinus Torvalds 	} else {
18151da177e4SLinus Torvalds 		if (!rt->peer)
1816a48eff12SDavid S. Miller 			rt_bind_peer(rt, inet->inet_daddr, 1);
18171da177e4SLinus Torvalds 		peer = rt->peer;
18183f419d2dSDavid S. Miller 		*release_it = false;
18191da177e4SLinus Torvalds 	}
18201da177e4SLinus Torvalds 
18213f419d2dSDavid S. Miller 	return peer;
18221da177e4SLinus Torvalds }
18233f419d2dSDavid S. Miller EXPORT_SYMBOL(tcp_v4_get_peer);
18241da177e4SLinus Torvalds 
1825ccb7c410SDavid S. Miller void *tcp_v4_tw_get_peer(struct sock *sk)
18261da177e4SLinus Torvalds {
1827cf533ea5SEric Dumazet 	const struct inet_timewait_sock *tw = inet_twsk(sk);
18281da177e4SLinus Torvalds 
1829ccb7c410SDavid S. Miller 	return inet_getpeer_v4(tw->tw_daddr, 1);
1830ccb7c410SDavid S. Miller }
1831ccb7c410SDavid S. Miller EXPORT_SYMBOL(tcp_v4_tw_get_peer);
18328feaf0c0SArnaldo Carvalho de Melo 
1833ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1834ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1835ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1836ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1837ccb7c410SDavid S. Miller 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1838ccb7c410SDavid S. Miller };
18391da177e4SLinus Torvalds 
18403b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
18411da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
18421da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
184332519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
18441da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
18451da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
18463f419d2dSDavid S. Miller 	.get_peer	   = tcp_v4_get_peer,
18471da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
18481da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
18491da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1850543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1851543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1852ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
18533fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
18543fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
18553fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
18563fdadf7dSDmitry Mishin #endif
18571da177e4SLinus Torvalds };
18584bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
18591da177e4SLinus Torvalds 
1860cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1861b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1862cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
186349a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1864cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1865cfb6eeb4SYOSHIFUJI Hideaki };
1866b6332e6cSAndrew Morton #endif
1867cfb6eeb4SYOSHIFUJI Hideaki 
18681da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
18691da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
18701da177e4SLinus Torvalds  */
18711da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
18721da177e4SLinus Torvalds {
18736687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
18741da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18751da177e4SLinus Torvalds 
18761da177e4SLinus Torvalds 	skb_queue_head_init(&tp->out_of_order_queue);
18771da177e4SLinus Torvalds 	tcp_init_xmit_timers(sk);
18781da177e4SLinus Torvalds 	tcp_prequeue_init(tp);
18791da177e4SLinus Torvalds 
18806687e988SArnaldo Carvalho de Melo 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
18811da177e4SLinus Torvalds 	tp->mdev = TCP_TIMEOUT_INIT;
18821da177e4SLinus Torvalds 
18831da177e4SLinus Torvalds 	/* So many TCP implementations out there (incorrectly) count the
18841da177e4SLinus Torvalds 	 * initial SYN frame in their delayed-ACK and congestion control
18851da177e4SLinus Torvalds 	 * algorithms that we must have the following bandaid to talk
18861da177e4SLinus Torvalds 	 * efficiently to them.  -DaveM
18871da177e4SLinus Torvalds 	 */
18889ad7c049SJerry Chu 	tp->snd_cwnd = TCP_INIT_CWND;
18891da177e4SLinus Torvalds 
18901da177e4SLinus Torvalds 	/* See draft-stevens-tcpca-spec-01 for discussion of the
18911da177e4SLinus Torvalds 	 * initialization of these values.
18921da177e4SLinus Torvalds 	 */
18930b6a05c1SIlpo Järvinen 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
18941da177e4SLinus Torvalds 	tp->snd_cwnd_clamp = ~0;
1895bee7ca9eSWilliam Allen Simpson 	tp->mss_cache = TCP_MSS_DEFAULT;
18961da177e4SLinus Torvalds 
18971da177e4SLinus Torvalds 	tp->reordering = sysctl_tcp_reordering;
18986687e988SArnaldo Carvalho de Melo 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
18991da177e4SLinus Torvalds 
19001da177e4SLinus Torvalds 	sk->sk_state = TCP_CLOSE;
19011da177e4SLinus Torvalds 
19021da177e4SLinus Torvalds 	sk->sk_write_space = sk_stream_write_space;
19031da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
19041da177e4SLinus Torvalds 
19058292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1906d83d8461SArnaldo Carvalho de Melo 	icsk->icsk_sync_mss = tcp_sync_mss;
1907cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1908cfb6eeb4SYOSHIFUJI Hideaki 	tp->af_specific = &tcp_sock_ipv4_specific;
1909cfb6eeb4SYOSHIFUJI Hideaki #endif
19101da177e4SLinus Torvalds 
1911435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
1912435cf559SWilliam Allen Simpson 	if (sysctl_tcp_cookie_size > 0) {
1913435cf559SWilliam Allen Simpson 		/* Default, cookies without s_data_payload. */
1914435cf559SWilliam Allen Simpson 		tp->cookie_values =
1915435cf559SWilliam Allen Simpson 			kzalloc(sizeof(*tp->cookie_values),
1916435cf559SWilliam Allen Simpson 				sk->sk_allocation);
1917435cf559SWilliam Allen Simpson 		if (tp->cookie_values != NULL)
1918435cf559SWilliam Allen Simpson 			kref_init(&tp->cookie_values->kref);
1919435cf559SWilliam Allen Simpson 	}
1920435cf559SWilliam Allen Simpson 	/* Presumed zeroed, in order of appearance:
1921435cf559SWilliam Allen Simpson 	 *	cookie_in_always, cookie_out_never,
1922435cf559SWilliam Allen Simpson 	 *	s_data_constant, s_data_in, s_data_out
1923435cf559SWilliam Allen Simpson 	 */
19241da177e4SLinus Torvalds 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
19251da177e4SLinus Torvalds 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
19261da177e4SLinus Torvalds 
1927eb4dea58SHerbert Xu 	local_bh_disable();
1928d1a4c0b3SGlauber Costa 	sock_update_memcg(sk);
1929180d8cd9SGlauber Costa 	sk_sockets_allocated_inc(sk);
1930eb4dea58SHerbert Xu 	local_bh_enable();
19311da177e4SLinus Torvalds 
19321da177e4SLinus Torvalds 	return 0;
19331da177e4SLinus Torvalds }
19341da177e4SLinus Torvalds 
19357d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
19361da177e4SLinus Torvalds {
19371da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
19381da177e4SLinus Torvalds 
19391da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
19401da177e4SLinus Torvalds 
19416687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1942317a76f9SStephen Hemminger 
19431da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1944fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
19451da177e4SLinus Torvalds 
19461da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
19471da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
19481da177e4SLinus Torvalds 
1949cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1950cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1951cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1952a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1953a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1954cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1955cfb6eeb4SYOSHIFUJI Hideaki 	}
1956cfb6eeb4SYOSHIFUJI Hideaki #endif
1957cfb6eeb4SYOSHIFUJI Hideaki 
19581a2449a8SChris Leech #ifdef CONFIG_NET_DMA
19591a2449a8SChris Leech 	/* Cleans up our sk_async_wait_queue */
19601a2449a8SChris Leech 	__skb_queue_purge(&sk->sk_async_wait_queue);
19611a2449a8SChris Leech #endif
19621a2449a8SChris Leech 
19631da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
19641da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
19651da177e4SLinus Torvalds 
19661da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1967463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1968ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
19691da177e4SLinus Torvalds 
19701da177e4SLinus Torvalds 	/*
19711da177e4SLinus Torvalds 	 * If sendmsg cached page exists, toss it.
19721da177e4SLinus Torvalds 	 */
19731da177e4SLinus Torvalds 	if (sk->sk_sndmsg_page) {
19741da177e4SLinus Torvalds 		__free_page(sk->sk_sndmsg_page);
19751da177e4SLinus Torvalds 		sk->sk_sndmsg_page = NULL;
19761da177e4SLinus Torvalds 	}
19771da177e4SLinus Torvalds 
1978435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
1979435cf559SWilliam Allen Simpson 	if (tp->cookie_values != NULL) {
1980435cf559SWilliam Allen Simpson 		kref_put(&tp->cookie_values->kref,
1981435cf559SWilliam Allen Simpson 			 tcp_cookie_values_release);
1982435cf559SWilliam Allen Simpson 		tp->cookie_values = NULL;
1983435cf559SWilliam Allen Simpson 	}
1984435cf559SWilliam Allen Simpson 
1985180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
1986d1a4c0b3SGlauber Costa 	sock_release_memcg(sk);
19871da177e4SLinus Torvalds }
19881da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
19891da177e4SLinus Torvalds 
19901da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
19911da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
19921da177e4SLinus Torvalds 
19933ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
19941da177e4SLinus Torvalds {
19953ab5aee7SEric Dumazet 	return hlist_nulls_empty(head) ? NULL :
19968feaf0c0SArnaldo Carvalho de Melo 		list_entry(head->first, struct inet_timewait_sock, tw_node);
19971da177e4SLinus Torvalds }
19981da177e4SLinus Torvalds 
19998feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
20001da177e4SLinus Torvalds {
20013ab5aee7SEric Dumazet 	return !is_a_nulls(tw->tw_node.next) ?
20023ab5aee7SEric Dumazet 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
20031da177e4SLinus Torvalds }
20041da177e4SLinus Torvalds 
2005a8b690f9STom Herbert /*
2006a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
2007a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
2008a8b690f9STom Herbert  * very first socket in the hash table is returned.
2009a8b690f9STom Herbert  */
20101da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
20111da177e4SLinus Torvalds {
2012463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
2013c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
20141da177e4SLinus Torvalds 	struct sock *sk = cur;
20155caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
20161da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2017a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
20181da177e4SLinus Torvalds 
20191da177e4SLinus Torvalds 	if (!sk) {
2020a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20215caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2022c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
2023a8b690f9STom Herbert 		st->offset = 0;
20241da177e4SLinus Torvalds 		goto get_sk;
20251da177e4SLinus Torvalds 	}
20265caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
20271da177e4SLinus Torvalds 	++st->num;
2028a8b690f9STom Herbert 	++st->offset;
20291da177e4SLinus Torvalds 
20301da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
203160236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
20321da177e4SLinus Torvalds 
2033463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
20341da177e4SLinus Torvalds 		req = req->dl_next;
20351da177e4SLinus Torvalds 		while (1) {
20361da177e4SLinus Torvalds 			while (req) {
2037bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
20381da177e4SLinus Torvalds 					cur = req;
20391da177e4SLinus Torvalds 					goto out;
20401da177e4SLinus Torvalds 				}
20411da177e4SLinus Torvalds 				req = req->dl_next;
20421da177e4SLinus Torvalds 			}
204372a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
20441da177e4SLinus Torvalds 				break;
20451da177e4SLinus Torvalds get_req:
2046463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
20471da177e4SLinus Torvalds 		}
20481bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
20491da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
2050463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20511da177e4SLinus Torvalds 	} else {
2052463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2053463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
20551da177e4SLinus Torvalds 			goto start_req;
2056463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20571bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
20581da177e4SLinus Torvalds 	}
20591da177e4SLinus Torvalds get_sk:
2060c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
20618475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
20628475ef9fSPavel Emelyanov 			continue;
20638475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
20641da177e4SLinus Torvalds 			cur = sk;
20651da177e4SLinus Torvalds 			goto out;
20661da177e4SLinus Torvalds 		}
2067463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2068463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2069463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
20701da177e4SLinus Torvalds start_req:
20711da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
20721da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
20731da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
20741da177e4SLinus Torvalds 			st->sbucket	= 0;
20751da177e4SLinus Torvalds 			goto get_req;
20761da177e4SLinus Torvalds 		}
2077463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20781da177e4SLinus Torvalds 	}
20795caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
2080a8b690f9STom Herbert 	st->offset = 0;
20810f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
20825caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20835caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2084c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
20851da177e4SLinus Torvalds 		goto get_sk;
20861da177e4SLinus Torvalds 	}
20871da177e4SLinus Torvalds 	cur = NULL;
20881da177e4SLinus Torvalds out:
20891da177e4SLinus Torvalds 	return cur;
20901da177e4SLinus Torvalds }
20911da177e4SLinus Torvalds 
20921da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
20931da177e4SLinus Torvalds {
2094a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2095a8b690f9STom Herbert 	void *rc;
2096a8b690f9STom Herbert 
2097a8b690f9STom Herbert 	st->bucket = 0;
2098a8b690f9STom Herbert 	st->offset = 0;
2099a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
21001da177e4SLinus Torvalds 
21011da177e4SLinus Torvalds 	while (rc && *pos) {
21021da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
21031da177e4SLinus Torvalds 		--*pos;
21041da177e4SLinus Torvalds 	}
21051da177e4SLinus Torvalds 	return rc;
21061da177e4SLinus Torvalds }
21071da177e4SLinus Torvalds 
21086eac5604SAndi Kleen static inline int empty_bucket(struct tcp_iter_state *st)
21096eac5604SAndi Kleen {
21103ab5aee7SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
21113ab5aee7SEric Dumazet 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
21126eac5604SAndi Kleen }
21136eac5604SAndi Kleen 
2114a8b690f9STom Herbert /*
2115a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
2116a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
2117a8b690f9STom Herbert  */
21181da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
21191da177e4SLinus Torvalds {
21201da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2121a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21221da177e4SLinus Torvalds 	void *rc = NULL;
21231da177e4SLinus Torvalds 
2124a8b690f9STom Herbert 	st->offset = 0;
2125a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
21261da177e4SLinus Torvalds 		struct sock *sk;
21273ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
21288feaf0c0SArnaldo Carvalho de Melo 		struct inet_timewait_sock *tw;
21299db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
21301da177e4SLinus Torvalds 
21316eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
21326eac5604SAndi Kleen 		if (empty_bucket(st))
21336eac5604SAndi Kleen 			continue;
21346eac5604SAndi Kleen 
21359db66bdcSEric Dumazet 		spin_lock_bh(lock);
21363ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2137f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
2138878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
21391da177e4SLinus Torvalds 				continue;
21401da177e4SLinus Torvalds 			}
21411da177e4SLinus Torvalds 			rc = sk;
21421da177e4SLinus Torvalds 			goto out;
21431da177e4SLinus Torvalds 		}
21441da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_TIME_WAIT;
21458feaf0c0SArnaldo Carvalho de Melo 		inet_twsk_for_each(tw, node,
2146dbca9b27SEric Dumazet 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
214728518fc1SPavel Emelyanov 			if (tw->tw_family != st->family ||
2148878628fbSYOSHIFUJI Hideaki 			    !net_eq(twsk_net(tw), net)) {
21491da177e4SLinus Torvalds 				continue;
21501da177e4SLinus Torvalds 			}
21511da177e4SLinus Torvalds 			rc = tw;
21521da177e4SLinus Torvalds 			goto out;
21531da177e4SLinus Torvalds 		}
21549db66bdcSEric Dumazet 		spin_unlock_bh(lock);
21551da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21561da177e4SLinus Torvalds 	}
21571da177e4SLinus Torvalds out:
21581da177e4SLinus Torvalds 	return rc;
21591da177e4SLinus Torvalds }
21601da177e4SLinus Torvalds 
21611da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
21621da177e4SLinus Torvalds {
21631da177e4SLinus Torvalds 	struct sock *sk = cur;
21648feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw;
21653ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
21661da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2167a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21681da177e4SLinus Torvalds 
21691da177e4SLinus Torvalds 	++st->num;
2170a8b690f9STom Herbert 	++st->offset;
21711da177e4SLinus Torvalds 
21721da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
21731da177e4SLinus Torvalds 		tw = cur;
21741da177e4SLinus Torvalds 		tw = tw_next(tw);
21751da177e4SLinus Torvalds get_tw:
2176878628fbSYOSHIFUJI Hideaki 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
21771da177e4SLinus Torvalds 			tw = tw_next(tw);
21781da177e4SLinus Torvalds 		}
21791da177e4SLinus Torvalds 		if (tw) {
21801da177e4SLinus Torvalds 			cur = tw;
21811da177e4SLinus Torvalds 			goto out;
21821da177e4SLinus Torvalds 		}
21839db66bdcSEric Dumazet 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21841da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21851da177e4SLinus Torvalds 
21866eac5604SAndi Kleen 		/* Look for next non empty bucket */
2187a8b690f9STom Herbert 		st->offset = 0;
2188f373b53bSEric Dumazet 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
21896eac5604SAndi Kleen 				empty_bucket(st))
21906eac5604SAndi Kleen 			;
2191f373b53bSEric Dumazet 		if (st->bucket > tcp_hashinfo.ehash_mask)
21926eac5604SAndi Kleen 			return NULL;
21936eac5604SAndi Kleen 
21949db66bdcSEric Dumazet 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21953ab5aee7SEric Dumazet 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
21961da177e4SLinus Torvalds 	} else
21973ab5aee7SEric Dumazet 		sk = sk_nulls_next(sk);
21981da177e4SLinus Torvalds 
21993ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2200878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
22011da177e4SLinus Torvalds 			goto found;
22021da177e4SLinus Torvalds 	}
22031da177e4SLinus Torvalds 
22041da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2205dbca9b27SEric Dumazet 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
22061da177e4SLinus Torvalds 	goto get_tw;
22071da177e4SLinus Torvalds found:
22081da177e4SLinus Torvalds 	cur = sk;
22091da177e4SLinus Torvalds out:
22101da177e4SLinus Torvalds 	return cur;
22111da177e4SLinus Torvalds }
22121da177e4SLinus Torvalds 
22131da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
22141da177e4SLinus Torvalds {
2215a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2216a8b690f9STom Herbert 	void *rc;
2217a8b690f9STom Herbert 
2218a8b690f9STom Herbert 	st->bucket = 0;
2219a8b690f9STom Herbert 	rc = established_get_first(seq);
22201da177e4SLinus Torvalds 
22211da177e4SLinus Torvalds 	while (rc && pos) {
22221da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
22231da177e4SLinus Torvalds 		--pos;
22241da177e4SLinus Torvalds 	}
22251da177e4SLinus Torvalds 	return rc;
22261da177e4SLinus Torvalds }
22271da177e4SLinus Torvalds 
22281da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
22291da177e4SLinus Torvalds {
22301da177e4SLinus Torvalds 	void *rc;
22311da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
22321da177e4SLinus Torvalds 
22331da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
22341da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
22351da177e4SLinus Torvalds 
22361da177e4SLinus Torvalds 	if (!rc) {
22371da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
22381da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
22391da177e4SLinus Torvalds 	}
22401da177e4SLinus Torvalds 
22411da177e4SLinus Torvalds 	return rc;
22421da177e4SLinus Torvalds }
22431da177e4SLinus Torvalds 
2244a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2245a8b690f9STom Herbert {
2246a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2247a8b690f9STom Herbert 	int offset = st->offset;
2248a8b690f9STom Herbert 	int orig_num = st->num;
2249a8b690f9STom Herbert 	void *rc = NULL;
2250a8b690f9STom Herbert 
2251a8b690f9STom Herbert 	switch (st->state) {
2252a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2253a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2254a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2255a8b690f9STom Herbert 			break;
2256a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2257a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2258a8b690f9STom Herbert 		while (offset-- && rc)
2259a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2260a8b690f9STom Herbert 		if (rc)
2261a8b690f9STom Herbert 			break;
2262a8b690f9STom Herbert 		st->bucket = 0;
2263a8b690f9STom Herbert 		/* Fallthrough */
2264a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2265a8b690f9STom Herbert 	case TCP_SEQ_STATE_TIME_WAIT:
2266a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2267a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2268a8b690f9STom Herbert 			break;
2269a8b690f9STom Herbert 		rc = established_get_first(seq);
2270a8b690f9STom Herbert 		while (offset-- && rc)
2271a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2272a8b690f9STom Herbert 	}
2273a8b690f9STom Herbert 
2274a8b690f9STom Herbert 	st->num = orig_num;
2275a8b690f9STom Herbert 
2276a8b690f9STom Herbert 	return rc;
2277a8b690f9STom Herbert }
2278a8b690f9STom Herbert 
22791da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
22801da177e4SLinus Torvalds {
22811da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2282a8b690f9STom Herbert 	void *rc;
2283a8b690f9STom Herbert 
2284a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2285a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2286a8b690f9STom Herbert 		if (rc)
2287a8b690f9STom Herbert 			goto out;
2288a8b690f9STom Herbert 	}
2289a8b690f9STom Herbert 
22901da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
22911da177e4SLinus Torvalds 	st->num = 0;
2292a8b690f9STom Herbert 	st->bucket = 0;
2293a8b690f9STom Herbert 	st->offset = 0;
2294a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2295a8b690f9STom Herbert 
2296a8b690f9STom Herbert out:
2297a8b690f9STom Herbert 	st->last_pos = *pos;
2298a8b690f9STom Herbert 	return rc;
22991da177e4SLinus Torvalds }
23001da177e4SLinus Torvalds 
23011da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
23021da177e4SLinus Torvalds {
2303a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
23041da177e4SLinus Torvalds 	void *rc = NULL;
23051da177e4SLinus Torvalds 
23061da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
23071da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
23081da177e4SLinus Torvalds 		goto out;
23091da177e4SLinus Torvalds 	}
23101da177e4SLinus Torvalds 
23111da177e4SLinus Torvalds 	switch (st->state) {
23121da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23131da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23141da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
23151da177e4SLinus Torvalds 		if (!rc) {
23161da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2317a8b690f9STom Herbert 			st->bucket = 0;
2318a8b690f9STom Herbert 			st->offset = 0;
23191da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
23201da177e4SLinus Torvalds 		}
23211da177e4SLinus Torvalds 		break;
23221da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23231da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23241da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
23251da177e4SLinus Torvalds 		break;
23261da177e4SLinus Torvalds 	}
23271da177e4SLinus Torvalds out:
23281da177e4SLinus Torvalds 	++*pos;
2329a8b690f9STom Herbert 	st->last_pos = *pos;
23301da177e4SLinus Torvalds 	return rc;
23311da177e4SLinus Torvalds }
23321da177e4SLinus Torvalds 
23331da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
23341da177e4SLinus Torvalds {
23351da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
23361da177e4SLinus Torvalds 
23371da177e4SLinus Torvalds 	switch (st->state) {
23381da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23391da177e4SLinus Torvalds 		if (v) {
2340463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2341463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
23421da177e4SLinus Torvalds 		}
23431da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23441da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
23455caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
23461da177e4SLinus Torvalds 		break;
23471da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23481da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23491da177e4SLinus Torvalds 		if (v)
23509db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
23511da177e4SLinus Torvalds 		break;
23521da177e4SLinus Torvalds 	}
23531da177e4SLinus Torvalds }
23541da177e4SLinus Torvalds 
235573cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
23561da177e4SLinus Torvalds {
23571da177e4SLinus Torvalds 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
23581da177e4SLinus Torvalds 	struct tcp_iter_state *s;
235952d6f3f1SDenis V. Lunev 	int err;
23601da177e4SLinus Torvalds 
236152d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
236252d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
236352d6f3f1SDenis V. Lunev 	if (err < 0)
236452d6f3f1SDenis V. Lunev 		return err;
2365f40c8174SDaniel Lezcano 
236652d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
23671da177e4SLinus Torvalds 	s->family		= afinfo->family;
2368a8b690f9STom Herbert 	s->last_pos 		= 0;
2369f40c8174SDaniel Lezcano 	return 0;
2370f40c8174SDaniel Lezcano }
237173cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2372f40c8174SDaniel Lezcano 
23736f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
23741da177e4SLinus Torvalds {
23751da177e4SLinus Torvalds 	int rc = 0;
23761da177e4SLinus Torvalds 	struct proc_dir_entry *p;
23771da177e4SLinus Torvalds 
23789427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
23799427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
23809427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
23819427c4b3SDenis V. Lunev 
238284841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
238373cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
238484841c3cSDenis V. Lunev 	if (!p)
23851da177e4SLinus Torvalds 		rc = -ENOMEM;
23861da177e4SLinus Torvalds 	return rc;
23871da177e4SLinus Torvalds }
23884bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
23891da177e4SLinus Torvalds 
23906f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
23911da177e4SLinus Torvalds {
23926f8b13bcSDaniel Lezcano 	proc_net_remove(net, afinfo->name);
23931da177e4SLinus Torvalds }
23944bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
23951da177e4SLinus Torvalds 
2396cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req,
23975e659e4cSPavel Emelyanov 			 struct seq_file *f, int i, int uid, int *len)
23981da177e4SLinus Torvalds {
23992e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
24001da177e4SLinus Torvalds 	int ttd = req->expires - jiffies;
24011da177e4SLinus Torvalds 
24025e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
240371338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
24041da177e4SLinus Torvalds 		i,
24052e6599cbSArnaldo Carvalho de Melo 		ireq->loc_addr,
2406c720c7e8SEric Dumazet 		ntohs(inet_sk(sk)->inet_sport),
24072e6599cbSArnaldo Carvalho de Melo 		ireq->rmt_addr,
24082e6599cbSArnaldo Carvalho de Melo 		ntohs(ireq->rmt_port),
24091da177e4SLinus Torvalds 		TCP_SYN_RECV,
24101da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
24111da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
24121da177e4SLinus Torvalds 		jiffies_to_clock_t(ttd),
24131da177e4SLinus Torvalds 		req->retrans,
24141da177e4SLinus Torvalds 		uid,
24151da177e4SLinus Torvalds 		0,  /* non standard timer */
24161da177e4SLinus Torvalds 		0, /* open_requests have no inode */
24171da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
24185e659e4cSPavel Emelyanov 		req,
24195e659e4cSPavel Emelyanov 		len);
24201da177e4SLinus Torvalds }
24211da177e4SLinus Torvalds 
24225e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
24231da177e4SLinus Torvalds {
24241da177e4SLinus Torvalds 	int timer_active;
24251da177e4SLinus Torvalds 	unsigned long timer_expires;
2426cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2427cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2428cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2429c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2430c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2431c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2432c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
243349d09007SEric Dumazet 	int rx_queue;
24341da177e4SLinus Torvalds 
2435463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
24361da177e4SLinus Torvalds 		timer_active	= 1;
2437463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2438463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
24391da177e4SLinus Torvalds 		timer_active	= 4;
2440463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2441cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
24421da177e4SLinus Torvalds 		timer_active	= 2;
2443cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
24441da177e4SLinus Torvalds 	} else {
24451da177e4SLinus Torvalds 		timer_active	= 0;
24461da177e4SLinus Torvalds 		timer_expires = jiffies;
24471da177e4SLinus Torvalds 	}
24481da177e4SLinus Torvalds 
244949d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
245049d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
245149d09007SEric Dumazet 	else
245249d09007SEric Dumazet 		/*
245349d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
245449d09007SEric Dumazet 		 */
245549d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
245649d09007SEric Dumazet 
24575e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
245871338aa7SDan Rosenberg 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2459cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
246047da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
246149d09007SEric Dumazet 		rx_queue,
24621da177e4SLinus Torvalds 		timer_active,
24631da177e4SLinus Torvalds 		jiffies_to_clock_t(timer_expires - jiffies),
2464463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2465cf4c6bf8SIlpo Järvinen 		sock_i_uid(sk),
24666687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2467cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2468cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
24697be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
24707be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2471463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
24721da177e4SLinus Torvalds 		tp->snd_cwnd,
24730b6a05c1SIlpo Järvinen 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
24745e659e4cSPavel Emelyanov 		len);
24751da177e4SLinus Torvalds }
24761da177e4SLinus Torvalds 
2477cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
24785e659e4cSPavel Emelyanov 			       struct seq_file *f, int i, int *len)
24791da177e4SLinus Torvalds {
248023f33c2dSAl Viro 	__be32 dest, src;
24811da177e4SLinus Torvalds 	__u16 destp, srcp;
24821da177e4SLinus Torvalds 	int ttd = tw->tw_ttd - jiffies;
24831da177e4SLinus Torvalds 
24841da177e4SLinus Torvalds 	if (ttd < 0)
24851da177e4SLinus Torvalds 		ttd = 0;
24861da177e4SLinus Torvalds 
24871da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
24881da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
24891da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
24901da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
24911da177e4SLinus Torvalds 
24925e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
249371338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
24941da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
24951da177e4SLinus Torvalds 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
24965e659e4cSPavel Emelyanov 		atomic_read(&tw->tw_refcnt), tw, len);
24971da177e4SLinus Torvalds }
24981da177e4SLinus Torvalds 
24991da177e4SLinus Torvalds #define TMPSZ 150
25001da177e4SLinus Torvalds 
25011da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
25021da177e4SLinus Torvalds {
25031da177e4SLinus Torvalds 	struct tcp_iter_state *st;
25045e659e4cSPavel Emelyanov 	int len;
25051da177e4SLinus Torvalds 
25061da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
25071da177e4SLinus Torvalds 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
25081da177e4SLinus Torvalds 			   "  sl  local_address rem_address   st tx_queue "
25091da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
25101da177e4SLinus Torvalds 			   "inode");
25111da177e4SLinus Torvalds 		goto out;
25121da177e4SLinus Torvalds 	}
25131da177e4SLinus Torvalds 	st = seq->private;
25141da177e4SLinus Torvalds 
25151da177e4SLinus Torvalds 	switch (st->state) {
25161da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
25171da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
25185e659e4cSPavel Emelyanov 		get_tcp4_sock(v, seq, st->num, &len);
25191da177e4SLinus Torvalds 		break;
25201da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
25215e659e4cSPavel Emelyanov 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
25221da177e4SLinus Torvalds 		break;
25231da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
25245e659e4cSPavel Emelyanov 		get_timewait4_sock(v, seq, st->num, &len);
25251da177e4SLinus Torvalds 		break;
25261da177e4SLinus Torvalds 	}
25275e659e4cSPavel Emelyanov 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
25281da177e4SLinus Torvalds out:
25291da177e4SLinus Torvalds 	return 0;
25301da177e4SLinus Torvalds }
25311da177e4SLinus Torvalds 
253273cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
253373cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
253473cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
253573cb88ecSArjan van de Ven 	.read    = seq_read,
253673cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
253773cb88ecSArjan van de Ven 	.release = seq_release_net
253873cb88ecSArjan van de Ven };
253973cb88ecSArjan van de Ven 
25401da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
25411da177e4SLinus Torvalds 	.name		= "tcp",
25421da177e4SLinus Torvalds 	.family		= AF_INET,
254373cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
25449427c4b3SDenis V. Lunev 	.seq_ops	= {
25459427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
25469427c4b3SDenis V. Lunev 	},
25471da177e4SLinus Torvalds };
25481da177e4SLinus Torvalds 
25492c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2550757764f6SPavel Emelyanov {
2551757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2552757764f6SPavel Emelyanov }
2553757764f6SPavel Emelyanov 
25542c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2555757764f6SPavel Emelyanov {
2556757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2557757764f6SPavel Emelyanov }
2558757764f6SPavel Emelyanov 
2559757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2560757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2561757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2562757764f6SPavel Emelyanov };
2563757764f6SPavel Emelyanov 
25641da177e4SLinus Torvalds int __init tcp4_proc_init(void)
25651da177e4SLinus Torvalds {
2566757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
25671da177e4SLinus Torvalds }
25681da177e4SLinus Torvalds 
25691da177e4SLinus Torvalds void tcp4_proc_exit(void)
25701da177e4SLinus Torvalds {
2571757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
25721da177e4SLinus Torvalds }
25731da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
25741da177e4SLinus Torvalds 
2575bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2576bf296b12SHerbert Xu {
2577b71d1d42SEric Dumazet 	const struct iphdr *iph = skb_gro_network_header(skb);
2578bf296b12SHerbert Xu 
2579bf296b12SHerbert Xu 	switch (skb->ip_summed) {
2580bf296b12SHerbert Xu 	case CHECKSUM_COMPLETE:
258186911732SHerbert Xu 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2582bf296b12SHerbert Xu 				  skb->csum)) {
2583bf296b12SHerbert Xu 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2584bf296b12SHerbert Xu 			break;
2585bf296b12SHerbert Xu 		}
2586bf296b12SHerbert Xu 
2587bf296b12SHerbert Xu 		/* fall through */
2588bf296b12SHerbert Xu 	case CHECKSUM_NONE:
2589bf296b12SHerbert Xu 		NAPI_GRO_CB(skb)->flush = 1;
2590bf296b12SHerbert Xu 		return NULL;
2591bf296b12SHerbert Xu 	}
2592bf296b12SHerbert Xu 
2593bf296b12SHerbert Xu 	return tcp_gro_receive(head, skb);
2594bf296b12SHerbert Xu }
2595bf296b12SHerbert Xu 
2596bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb)
2597bf296b12SHerbert Xu {
2598b71d1d42SEric Dumazet 	const struct iphdr *iph = ip_hdr(skb);
2599bf296b12SHerbert Xu 	struct tcphdr *th = tcp_hdr(skb);
2600bf296b12SHerbert Xu 
2601bf296b12SHerbert Xu 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2602bf296b12SHerbert Xu 				  iph->saddr, iph->daddr, 0);
2603bf296b12SHerbert Xu 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2604bf296b12SHerbert Xu 
2605bf296b12SHerbert Xu 	return tcp_gro_complete(skb);
2606bf296b12SHerbert Xu }
2607bf296b12SHerbert Xu 
26081da177e4SLinus Torvalds struct proto tcp_prot = {
26091da177e4SLinus Torvalds 	.name			= "TCP",
26101da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
26111da177e4SLinus Torvalds 	.close			= tcp_close,
26121da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
26131da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2614463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
26151da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
26161da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
26171da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
26181da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
26191da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
26201da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
26211da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
26227ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
26237ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
26241da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
2625ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2626ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2627ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
26281da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
26291da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
26300a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
26311da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
26321da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
26331da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
26341da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
26351da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
26361da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
26373ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
26386d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
263960236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
264039d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
26417ba42910SChangli Gao 	.no_autobind		= true,
2642543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2643543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2644543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2645543d9cfeSArnaldo Carvalho de Melo #endif
2646d1a4c0b3SGlauber Costa #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2647d1a4c0b3SGlauber Costa 	.init_cgroup		= tcp_init_cgroup,
2648d1a4c0b3SGlauber Costa 	.destroy_cgroup		= tcp_destroy_cgroup,
2649d1a4c0b3SGlauber Costa 	.proto_cgroup		= tcp_proto_cgroup,
2650d1a4c0b3SGlauber Costa #endif
26511da177e4SLinus Torvalds };
26524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
26531da177e4SLinus Torvalds 
2654046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net)
2655046ee902SDenis V. Lunev {
2656046ee902SDenis V. Lunev 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2657046ee902SDenis V. Lunev 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2658046ee902SDenis V. Lunev }
2659046ee902SDenis V. Lunev 
2660046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2661046ee902SDenis V. Lunev {
2662046ee902SDenis V. Lunev 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2663b099ce26SEric W. Biederman }
2664b099ce26SEric W. Biederman 
2665b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2666b099ce26SEric W. Biederman {
2667b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2668046ee902SDenis V. Lunev }
2669046ee902SDenis V. Lunev 
2670046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2671046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2672046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2673b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2674046ee902SDenis V. Lunev };
2675046ee902SDenis V. Lunev 
26769b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
26771da177e4SLinus Torvalds {
26785caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
26796a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
26801da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
26811da177e4SLinus Torvalds }
2682