xref: /linux/net/ipv4/tcp_ipv4.c (revision 709e8697af1c86772c1a6fccda6d4b0e2e226547)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds 
54eb4dea58SHerbert Xu #include <linux/bottom_half.h>
551da177e4SLinus Torvalds #include <linux/types.h>
561da177e4SLinus Torvalds #include <linux/fcntl.h>
571da177e4SLinus Torvalds #include <linux/module.h>
581da177e4SLinus Torvalds #include <linux/random.h>
591da177e4SLinus Torvalds #include <linux/cache.h>
601da177e4SLinus Torvalds #include <linux/jhash.h>
611da177e4SLinus Torvalds #include <linux/init.h>
621da177e4SLinus Torvalds #include <linux/times.h>
635a0e3ad6STejun Heo #include <linux/slab.h>
641da177e4SLinus Torvalds 
65457c4cbcSEric W. Biederman #include <net/net_namespace.h>
661da177e4SLinus Torvalds #include <net/icmp.h>
67304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
681da177e4SLinus Torvalds #include <net/tcp.h>
6920380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
701da177e4SLinus Torvalds #include <net/ipv6.h>
711da177e4SLinus Torvalds #include <net/inet_common.h>
726d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
731da177e4SLinus Torvalds #include <net/xfrm.h>
741a2449a8SChris Leech #include <net/netdma.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds #include <linux/inet.h>
781da177e4SLinus Torvalds #include <linux/ipv6.h>
791da177e4SLinus Torvalds #include <linux/stddef.h>
801da177e4SLinus Torvalds #include <linux/proc_fs.h>
811da177e4SLinus Torvalds #include <linux/seq_file.h>
821da177e4SLinus Torvalds 
83cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
84cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
85cfb6eeb4SYOSHIFUJI Hideaki 
86ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
87ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
884bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds 
91cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
927174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
937174259eSArnaldo Carvalho de Melo 						   __be32 addr);
9449a72dfbSAdam Langley static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
95318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
969501f972SYOSHIFUJI Hideaki #else
979501f972SYOSHIFUJI Hideaki static inline
989501f972SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
999501f972SYOSHIFUJI Hideaki {
1009501f972SYOSHIFUJI Hideaki 	return NULL;
1019501f972SYOSHIFUJI Hideaki }
102cfb6eeb4SYOSHIFUJI Hideaki #endif
103cfb6eeb4SYOSHIFUJI Hideaki 
1045caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
1054bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
1061da177e4SLinus Torvalds 
107cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1081da177e4SLinus Torvalds {
109eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
110eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
111aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
112aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1131da177e4SLinus Torvalds }
1141da177e4SLinus Torvalds 
1156d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1166d6ee43eSArnaldo Carvalho de Melo {
1176d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1186d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1196d6ee43eSArnaldo Carvalho de Melo 
1206d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1216d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1226d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1236d6ee43eSArnaldo Carvalho de Melo 
1246d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1256d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1266d6ee43eSArnaldo Carvalho de Melo 	   holder.
1276d6ee43eSArnaldo Carvalho de Melo 
1286d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1296d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1306d6ee43eSArnaldo Carvalho de Melo 	 */
1316d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1326d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1339d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1346d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1356d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1366d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1376d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1386d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1396d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1406d6ee43eSArnaldo Carvalho de Melo 		return 1;
1416d6ee43eSArnaldo Carvalho de Melo 	}
1426d6ee43eSArnaldo Carvalho de Melo 
1436d6ee43eSArnaldo Carvalho de Melo 	return 0;
1446d6ee43eSArnaldo Carvalho de Melo }
1456d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1466d6ee43eSArnaldo Carvalho de Melo 
1471da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1481da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1491da177e4SLinus Torvalds {
1502d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1511da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1521da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
153dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
154bada8adcSAl Viro 	__be32 daddr, nexthop;
155da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1562d7192d6SDavid S. Miller 	struct rtable *rt;
1571da177e4SLinus Torvalds 	int err;
158f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1611da177e4SLinus Torvalds 		return -EINVAL;
1621da177e4SLinus Torvalds 
1631da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1641da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1651da177e4SLinus Torvalds 
1661da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
167f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
168f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
169f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1701da177e4SLinus Torvalds 		if (!daddr)
1711da177e4SLinus Torvalds 			return -EINVAL;
172f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1731da177e4SLinus Torvalds 	}
1741da177e4SLinus Torvalds 
175dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
176dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
177da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
178da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1791da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1801da177e4SLinus Torvalds 			      IPPROTO_TCP,
181abdf7e72SDavid S. Miller 			      orig_sport, orig_dport, sk, true);
182b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
183b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
184b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
1857c73a6faSPavel Emelyanov 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
186b23dd4feSDavid S. Miller 		return err;
187584bdf8cSWei Dong 	}
1881da177e4SLinus Torvalds 
1891da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1901da177e4SLinus Torvalds 		ip_rt_put(rt);
1911da177e4SLinus Torvalds 		return -ENETUNREACH;
1921da177e4SLinus Torvalds 	}
1931da177e4SLinus Torvalds 
194f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
195da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1961da177e4SLinus Torvalds 
197c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
198da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
199c720c7e8SEric Dumazet 	inet->inet_rcv_saddr = inet->inet_saddr;
2001da177e4SLinus Torvalds 
201c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
2021da177e4SLinus Torvalds 		/* Reset inherited state */
2031da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
2041da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
2051da177e4SLinus Torvalds 		tp->write_seq		   = 0;
2061da177e4SLinus Torvalds 	}
2071da177e4SLinus Torvalds 
208295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
209da905bd1SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
210ed2361e6SDavid S. Miller 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
2117174259eSArnaldo Carvalho de Melo 		/*
2127174259eSArnaldo Carvalho de Melo 		 * VJ's idea. We save last timestamp seen from
2137174259eSArnaldo Carvalho de Melo 		 * the destination in peer table, when entering state
2147174259eSArnaldo Carvalho de Melo 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
2157174259eSArnaldo Carvalho de Melo 		 * when trying new connection.
2161da177e4SLinus Torvalds 		 */
217317fe0e6SEric Dumazet 		if (peer) {
218317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
219317fe0e6SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
2201da177e4SLinus Torvalds 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
2211da177e4SLinus Torvalds 				tp->rx_opt.ts_recent = peer->tcp_ts;
2221da177e4SLinus Torvalds 			}
2231da177e4SLinus Torvalds 		}
224317fe0e6SEric Dumazet 	}
2251da177e4SLinus Torvalds 
226c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
227c720c7e8SEric Dumazet 	inet->inet_daddr = daddr;
2281da177e4SLinus Torvalds 
229d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
230f6d8bd05SEric Dumazet 	if (inet_opt)
231f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2321da177e4SLinus Torvalds 
233bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2341da177e4SLinus Torvalds 
2351da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2361da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2371da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2381da177e4SLinus Torvalds 	 * complete initialization after this.
2391da177e4SLinus Torvalds 	 */
2401da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
241a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2421da177e4SLinus Torvalds 	if (err)
2431da177e4SLinus Torvalds 		goto failure;
2441da177e4SLinus Torvalds 
245da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
246c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
247b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
248b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
249b23dd4feSDavid S. Miller 		rt = NULL;
2501da177e4SLinus Torvalds 		goto failure;
251b23dd4feSDavid S. Miller 	}
2521da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
253bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
254d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2551da177e4SLinus Torvalds 
2561da177e4SLinus Torvalds 	if (!tp->write_seq)
257c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
258c720c7e8SEric Dumazet 							   inet->inet_daddr,
259c720c7e8SEric Dumazet 							   inet->inet_sport,
2601da177e4SLinus Torvalds 							   usin->sin_port);
2611da177e4SLinus Torvalds 
262c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds 	err = tcp_connect(sk);
2651da177e4SLinus Torvalds 	rt = NULL;
2661da177e4SLinus Torvalds 	if (err)
2671da177e4SLinus Torvalds 		goto failure;
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 	return 0;
2701da177e4SLinus Torvalds 
2711da177e4SLinus Torvalds failure:
2727174259eSArnaldo Carvalho de Melo 	/*
2737174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2747174259eSArnaldo Carvalho de Melo 	 * if necessary.
2757174259eSArnaldo Carvalho de Melo 	 */
2761da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2771da177e4SLinus Torvalds 	ip_rt_put(rt);
2781da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
279c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2801da177e4SLinus Torvalds 	return err;
2811da177e4SLinus Torvalds }
2824bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds /*
2851da177e4SLinus Torvalds  * This routine does path mtu discovery as defined in RFC1191.
2861da177e4SLinus Torvalds  */
287b71d1d42SEric Dumazet static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
2881da177e4SLinus Torvalds {
2891da177e4SLinus Torvalds 	struct dst_entry *dst;
2901da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
2931da177e4SLinus Torvalds 	 * send out by Linux are always <576bytes so they should go through
2941da177e4SLinus Torvalds 	 * unfragmented).
2951da177e4SLinus Torvalds 	 */
2961da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN)
2971da177e4SLinus Torvalds 		return;
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	/* We don't check in the destentry if pmtu discovery is forbidden
3001da177e4SLinus Torvalds 	 * on this route. We just assume that no packet_to_big packets
3011da177e4SLinus Torvalds 	 * are send back when pmtu discovery is not active.
3021da177e4SLinus Torvalds 	 * There is a small race when the user changes this flag in the
3031da177e4SLinus Torvalds 	 * route, but I think that's acceptable.
3041da177e4SLinus Torvalds 	 */
3051da177e4SLinus Torvalds 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
3061da177e4SLinus Torvalds 		return;
3071da177e4SLinus Torvalds 
3081da177e4SLinus Torvalds 	dst->ops->update_pmtu(dst, mtu);
3091da177e4SLinus Torvalds 
3101da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
3111da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
3121da177e4SLinus Torvalds 	 */
3131da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
3141da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3171da177e4SLinus Torvalds 
3181da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
319d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3201da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3211da177e4SLinus Torvalds 
3221da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3231da177e4SLinus Torvalds 		 * clear that the old packet has been
3241da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3251da177e4SLinus Torvalds 		 * discovery.
3261da177e4SLinus Torvalds 		 */
3271da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3281da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3291da177e4SLinus Torvalds }
3301da177e4SLinus Torvalds 
3311da177e4SLinus Torvalds /*
3321da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3331da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3341da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3351da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3361da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3371da177e4SLinus Torvalds  * to find the appropriate port.
3381da177e4SLinus Torvalds  *
3391da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3401da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3411da177e4SLinus Torvalds  * and for some paths there is no check at all.
3421da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3431da177e4SLinus Torvalds  * is probably better.
3441da177e4SLinus Torvalds  *
3451da177e4SLinus Torvalds  */
3461da177e4SLinus Torvalds 
3474d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3481da177e4SLinus Torvalds {
349b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3504d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
351f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3521da177e4SLinus Torvalds 	struct tcp_sock *tp;
3531da177e4SLinus Torvalds 	struct inet_sock *inet;
3544d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3554d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3561da177e4SLinus Torvalds 	struct sock *sk;
357f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3581da177e4SLinus Torvalds 	__u32 seq;
359f1ecd5d9SDamian Lukowski 	__u32 remaining;
3601da177e4SLinus Torvalds 	int err;
3614d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3621da177e4SLinus Torvalds 
3634d1a2d9eSDamian Lukowski 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
364dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3651da177e4SLinus Torvalds 		return;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 
368fd54d716SPavel Emelyanov 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
3694d1a2d9eSDamian Lukowski 			iph->saddr, th->source, inet_iif(icmp_skb));
3701da177e4SLinus Torvalds 	if (!sk) {
371dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3721da177e4SLinus Torvalds 		return;
3731da177e4SLinus Torvalds 	}
3741da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3759469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3761da177e4SLinus Torvalds 		return;
3771da177e4SLinus Torvalds 	}
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	bh_lock_sock(sk);
3801da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3811da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
3821da177e4SLinus Torvalds 	 */
3831da177e4SLinus Torvalds 	if (sock_owned_by_user(sk))
384de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3871da177e4SLinus Torvalds 		goto out;
3881da177e4SLinus Torvalds 
38997e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
39097e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
39197e3ecd1Sstephen hemminger 		goto out;
39297e3ecd1Sstephen hemminger 	}
39397e3ecd1Sstephen hemminger 
394f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
3951da177e4SLinus Torvalds 	tp = tcp_sk(sk);
3961da177e4SLinus Torvalds 	seq = ntohl(th->seq);
3971da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
3981da177e4SLinus Torvalds 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
399de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4001da177e4SLinus Torvalds 		goto out;
4011da177e4SLinus Torvalds 	}
4021da177e4SLinus Torvalds 
4031da177e4SLinus Torvalds 	switch (type) {
4041da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4051da177e4SLinus Torvalds 		/* Just silently ignore these. */
4061da177e4SLinus Torvalds 		goto out;
4071da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4081da177e4SLinus Torvalds 		err = EPROTO;
4091da177e4SLinus Torvalds 		break;
4101da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4111da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4121da177e4SLinus Torvalds 			goto out;
4131da177e4SLinus Torvalds 
4141da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4151da177e4SLinus Torvalds 			if (!sock_owned_by_user(sk))
4161da177e4SLinus Torvalds 				do_pmtu_discovery(sk, iph, info);
4171da177e4SLinus Torvalds 			goto out;
4181da177e4SLinus Torvalds 		}
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
421f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
422f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
423f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424f1ecd5d9SDamian Lukowski 			break;
425f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
426f1ecd5d9SDamian Lukowski 		    !icsk->icsk_backoff)
427f1ecd5d9SDamian Lukowski 			break;
428f1ecd5d9SDamian Lukowski 
4298f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4308f49c270SDavid S. Miller 			break;
4318f49c270SDavid S. Miller 
432f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
4339ad7c049SJerry Chu 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
4349ad7c049SJerry Chu 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
435f1ecd5d9SDamian Lukowski 		tcp_bound_rto(sk);
436f1ecd5d9SDamian Lukowski 
437f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
438f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
439f1ecd5d9SDamian Lukowski 
440f1ecd5d9SDamian Lukowski 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
441f1ecd5d9SDamian Lukowski 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
442f1ecd5d9SDamian Lukowski 
443f1ecd5d9SDamian Lukowski 		if (remaining) {
444f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
446f1ecd5d9SDamian Lukowski 		} else {
447f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
448f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
449f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
450f1ecd5d9SDamian Lukowski 		}
451f1ecd5d9SDamian Lukowski 
4521da177e4SLinus Torvalds 		break;
4531da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4541da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4551da177e4SLinus Torvalds 		break;
4561da177e4SLinus Torvalds 	default:
4571da177e4SLinus Torvalds 		goto out;
4581da177e4SLinus Torvalds 	}
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds 	switch (sk->sk_state) {
46160236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4621da177e4SLinus Torvalds 	case TCP_LISTEN:
4631da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4641da177e4SLinus Torvalds 			goto out;
4651da177e4SLinus Torvalds 
466463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4671da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4681da177e4SLinus Torvalds 		if (!req)
4691da177e4SLinus Torvalds 			goto out;
4701da177e4SLinus Torvalds 
4711da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4721da177e4SLinus Torvalds 		   an established socket here.
4731da177e4SLinus Torvalds 		 */
474547b792cSIlpo Järvinen 		WARN_ON(req->sk);
4751da177e4SLinus Torvalds 
4762e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
477de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4781da177e4SLinus Torvalds 			goto out;
4791da177e4SLinus Torvalds 		}
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds 		/*
4821da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
4831da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
4841da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
4851da177e4SLinus Torvalds 		 * errors returned from accept().
4861da177e4SLinus Torvalds 		 */
487463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
4881da177e4SLinus Torvalds 		goto out;
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4911da177e4SLinus Torvalds 	case TCP_SYN_RECV:  /* Cannot happen.
4921da177e4SLinus Torvalds 			       It can f.e. if SYNs crossed.
4931da177e4SLinus Torvalds 			     */
4941da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
4951da177e4SLinus Torvalds 			sk->sk_err = err;
4961da177e4SLinus Torvalds 
4971da177e4SLinus Torvalds 			sk->sk_error_report(sk);
4981da177e4SLinus Torvalds 
4991da177e4SLinus Torvalds 			tcp_done(sk);
5001da177e4SLinus Torvalds 		} else {
5011da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5021da177e4SLinus Torvalds 		}
5031da177e4SLinus Torvalds 		goto out;
5041da177e4SLinus Torvalds 	}
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5071da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5081da177e4SLinus Torvalds 	 *
5091da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5101da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5111da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5121da177e4SLinus Torvalds 	 *
5131da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5141da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5151da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5161da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5171da177e4SLinus Torvalds 	 *
5181da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5191da177e4SLinus Torvalds 	 *							--ANK (980905)
5201da177e4SLinus Torvalds 	 */
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 	inet = inet_sk(sk);
5231da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5241da177e4SLinus Torvalds 		sk->sk_err = err;
5251da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5261da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5271da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5281da177e4SLinus Torvalds 	}
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds out:
5311da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5321da177e4SLinus Torvalds 	sock_put(sk);
5331da177e4SLinus Torvalds }
5341da177e4SLinus Torvalds 
535419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb,
536419f9f89SHerbert Xu 				__be32 saddr, __be32 daddr)
5371da177e4SLinus Torvalds {
538aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5391da177e4SLinus Torvalds 
54084fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
541419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
542663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
543ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5441da177e4SLinus Torvalds 	} else {
545419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
54607f0757aSJoe Perches 					 csum_partial(th,
5471da177e4SLinus Torvalds 						      th->doff << 2,
5481da177e4SLinus Torvalds 						      skb->csum));
5491da177e4SLinus Torvalds 	}
5501da177e4SLinus Torvalds }
5511da177e4SLinus Torvalds 
552419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
553bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
554419f9f89SHerbert Xu {
555cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
556419f9f89SHerbert Xu 
557419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
558419f9f89SHerbert Xu }
5594bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
560419f9f89SHerbert Xu 
561a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb)
562a430a43dSHerbert Xu {
563eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
564a430a43dSHerbert Xu 	struct tcphdr *th;
565a430a43dSHerbert Xu 
566a430a43dSHerbert Xu 	if (!pskb_may_pull(skb, sizeof(*th)))
567a430a43dSHerbert Xu 		return -EINVAL;
568a430a43dSHerbert Xu 
569eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
570aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
571a430a43dSHerbert Xu 
572a430a43dSHerbert Xu 	th->check = 0;
57384fa7933SPatrick McHardy 	skb->ip_summed = CHECKSUM_PARTIAL;
574419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
575a430a43dSHerbert Xu 	return 0;
576a430a43dSHerbert Xu }
577a430a43dSHerbert Xu 
5781da177e4SLinus Torvalds /*
5791da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5801da177e4SLinus Torvalds  *
5811da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5821da177e4SLinus Torvalds  *		      for reset.
5831da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5841da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5851da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5861da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5871da177e4SLinus Torvalds  *		arrived with segment.
5881da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5891da177e4SLinus Torvalds  */
5901da177e4SLinus Torvalds 
591cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5921da177e4SLinus Torvalds {
593cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
594cfb6eeb4SYOSHIFUJI Hideaki 	struct {
595cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
596cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
597714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
598cfb6eeb4SYOSHIFUJI Hideaki #endif
599cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
6001da177e4SLinus Torvalds 	struct ip_reply_arg arg;
601cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
602cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
603cfb6eeb4SYOSHIFUJI Hideaki #endif
604a86b1e30SPavel Emelyanov 	struct net *net;
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
6071da177e4SLinus Torvalds 	if (th->rst)
6081da177e4SLinus Torvalds 		return;
6091da177e4SLinus Torvalds 
610511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
6111da177e4SLinus Torvalds 		return;
6121da177e4SLinus Torvalds 
6131da177e4SLinus Torvalds 	/* Swap the send and the receive. */
614cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
615cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
616cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
617cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
618cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6191da177e4SLinus Torvalds 
6201da177e4SLinus Torvalds 	if (th->ack) {
621cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6221da177e4SLinus Torvalds 	} else {
623cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
624cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6251da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6261da177e4SLinus Torvalds 	}
6271da177e4SLinus Torvalds 
6287174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
629cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
630cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
631cfb6eeb4SYOSHIFUJI Hideaki 
632cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
633eddc9ec5SArnaldo Carvalho de Melo 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
634cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
635cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
636cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
637cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
638cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
639cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
640cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
641cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
642cfb6eeb4SYOSHIFUJI Hideaki 
64349a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
64478e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
64578e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
646cfb6eeb4SYOSHIFUJI Hideaki 	}
647cfb6eeb4SYOSHIFUJI Hideaki #endif
648eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
649eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
65052cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6511da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
65288ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
6531da177e4SLinus Torvalds 
654adf30907SEric Dumazet 	net = dev_net(skb_dst(skb)->dev);
65566b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
6560a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
6577feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
6581da177e4SLinus Torvalds 
65963231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
66063231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
6611da177e4SLinus Torvalds }
6621da177e4SLinus Torvalds 
6631da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
6641da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
6651da177e4SLinus Torvalds  */
6661da177e4SLinus Torvalds 
6679501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
6689501f972SYOSHIFUJI Hideaki 			    u32 win, u32 ts, int oif,
66988ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
67066b13d99SEric Dumazet 			    int reply_flags, u8 tos)
6711da177e4SLinus Torvalds {
672cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
6731da177e4SLinus Torvalds 	struct {
6741da177e4SLinus Torvalds 		struct tcphdr th;
675714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
676cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
677cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
678cfb6eeb4SYOSHIFUJI Hideaki #endif
679cfb6eeb4SYOSHIFUJI Hideaki 			];
6801da177e4SLinus Torvalds 	} rep;
6811da177e4SLinus Torvalds 	struct ip_reply_arg arg;
682adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
6831da177e4SLinus Torvalds 
6841da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
6857174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
6861da177e4SLinus Torvalds 
6871da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
6881da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
6891da177e4SLinus Torvalds 	if (ts) {
690cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
6911da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
6921da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
693cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[1] = htonl(tcp_time_stamp);
694cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[2] = htonl(ts);
695cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
6961da177e4SLinus Torvalds 	}
6971da177e4SLinus Torvalds 
6981da177e4SLinus Torvalds 	/* Swap the send and the receive. */
6991da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7001da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7011da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7021da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7031da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7041da177e4SLinus Torvalds 	rep.th.ack     = 1;
7051da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7061da177e4SLinus Torvalds 
707cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
708cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
709cfb6eeb4SYOSHIFUJI Hideaki 		int offset = (ts) ? 3 : 0;
710cfb6eeb4SYOSHIFUJI Hideaki 
711cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
712cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
713cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
714cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
715cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
716cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
717cfb6eeb4SYOSHIFUJI Hideaki 
71849a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
71990b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
72090b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
721cfb6eeb4SYOSHIFUJI Hideaki 	}
722cfb6eeb4SYOSHIFUJI Hideaki #endif
72388ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
724eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
725eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7261da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7271da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7289501f972SYOSHIFUJI Hideaki 	if (oif)
7299501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
73066b13d99SEric Dumazet 	arg.tos = tos;
7310a5ebb80SDavid S. Miller 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7327feb49c8SDenis V. Lunev 		      &arg, arg.iov[0].iov_len);
7331da177e4SLinus Torvalds 
73463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7351da177e4SLinus Torvalds }
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7381da177e4SLinus Torvalds {
7398feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
740cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7411da177e4SLinus Torvalds 
7429501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7437174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
7449501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7459501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
74688ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
74766b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
74866b13d99SEric Dumazet 			tw->tw_tos
7499501f972SYOSHIFUJI Hideaki 			);
7501da177e4SLinus Torvalds 
7518feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
7521da177e4SLinus Torvalds }
7531da177e4SLinus Torvalds 
7546edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7557174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
7561da177e4SLinus Torvalds {
7579501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
758cfb6eeb4SYOSHIFUJI Hideaki 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
7599501f972SYOSHIFUJI Hideaki 			req->ts_recent,
7609501f972SYOSHIFUJI Hideaki 			0,
76188ef4a5aSKOVACS Krisztian 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
76266b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
76366b13d99SEric Dumazet 			ip_hdr(skb)->tos);
7641da177e4SLinus Torvalds }
7651da177e4SLinus Torvalds 
7661da177e4SLinus Torvalds /*
7679bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
76860236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
7691da177e4SLinus Torvalds  *	socket.
7701da177e4SLinus Torvalds  */
77172659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
772e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
773e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
7741da177e4SLinus Torvalds {
7752e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
7766bd023f3SDavid S. Miller 	struct flowi4 fl4;
7771da177e4SLinus Torvalds 	int err = -1;
7781da177e4SLinus Torvalds 	struct sk_buff * skb;
7791da177e4SLinus Torvalds 
7801da177e4SLinus Torvalds 	/* First, grab a route. */
7816bd023f3SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
782fd80eb94SDenis V. Lunev 		return -1;
7831da177e4SLinus Torvalds 
784e6b4d113SWilliam Allen Simpson 	skb = tcp_make_synack(sk, dst, req, rvp);
7851da177e4SLinus Torvalds 
7861da177e4SLinus Torvalds 	if (skb) {
787419f9f89SHerbert Xu 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
7881da177e4SLinus Torvalds 
7892e6599cbSArnaldo Carvalho de Melo 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
7902e6599cbSArnaldo Carvalho de Melo 					    ireq->rmt_addr,
7912e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
792b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
7931da177e4SLinus Torvalds 	}
7941da177e4SLinus Torvalds 
7951da177e4SLinus Torvalds 	dst_release(dst);
7961da177e4SLinus Torvalds 	return err;
7971da177e4SLinus Torvalds }
7981da177e4SLinus Torvalds 
79972659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
800e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
801fd80eb94SDenis V. Lunev {
80272659eccSOctavian Purdila 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
80372659eccSOctavian Purdila 	return tcp_v4_send_synack(sk, NULL, req, rvp);
804fd80eb94SDenis V. Lunev }
805fd80eb94SDenis V. Lunev 
8061da177e4SLinus Torvalds /*
80760236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8081da177e4SLinus Torvalds  */
80960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8101da177e4SLinus Torvalds {
8112e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8121da177e4SLinus Torvalds }
8131da177e4SLinus Torvalds 
814946cedccSEric Dumazet /*
815946cedccSEric Dumazet  * Return 1 if a syncookie should be sent
816946cedccSEric Dumazet  */
817946cedccSEric Dumazet int tcp_syn_flood_action(struct sock *sk,
818946cedccSEric Dumazet 			 const struct sk_buff *skb,
819946cedccSEric Dumazet 			 const char *proto)
8201da177e4SLinus Torvalds {
821946cedccSEric Dumazet 	const char *msg = "Dropping request";
822946cedccSEric Dumazet 	int want_cookie = 0;
823946cedccSEric Dumazet 	struct listen_sock *lopt;
824946cedccSEric Dumazet 
825946cedccSEric Dumazet 
8261da177e4SLinus Torvalds 
8272a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES
828946cedccSEric Dumazet 	if (sysctl_tcp_syncookies) {
8292a1d4bd4SFlorian Westphal 		msg = "Sending cookies";
830946cedccSEric Dumazet 		want_cookie = 1;
831946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
832946cedccSEric Dumazet 	} else
83380e40daaSArnaldo Carvalho de Melo #endif
834946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
8352a1d4bd4SFlorian Westphal 
836946cedccSEric Dumazet 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
837946cedccSEric Dumazet 	if (!lopt->synflood_warned) {
838946cedccSEric Dumazet 		lopt->synflood_warned = 1;
839946cedccSEric Dumazet 		pr_info("%s: Possible SYN flooding on port %d. %s. "
840946cedccSEric Dumazet 			" Check SNMP counters.\n",
841946cedccSEric Dumazet 			proto, ntohs(tcp_hdr(skb)->dest), msg);
8422a1d4bd4SFlorian Westphal 	}
843946cedccSEric Dumazet 	return want_cookie;
844946cedccSEric Dumazet }
845946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action);
8461da177e4SLinus Torvalds 
8471da177e4SLinus Torvalds /*
84860236fddSArnaldo Carvalho de Melo  * Save and compile IPv4 options into the request_sock if needed.
8491da177e4SLinus Torvalds  */
850f6d8bd05SEric Dumazet static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
8511da177e4SLinus Torvalds 						  struct sk_buff *skb)
8521da177e4SLinus Torvalds {
853f6d8bd05SEric Dumazet 	const struct ip_options *opt = &(IPCB(skb)->opt);
854f6d8bd05SEric Dumazet 	struct ip_options_rcu *dopt = NULL;
8551da177e4SLinus Torvalds 
8561da177e4SLinus Torvalds 	if (opt && opt->optlen) {
857f6d8bd05SEric Dumazet 		int opt_size = sizeof(*dopt) + opt->optlen;
858f6d8bd05SEric Dumazet 
8591da177e4SLinus Torvalds 		dopt = kmalloc(opt_size, GFP_ATOMIC);
8601da177e4SLinus Torvalds 		if (dopt) {
861f6d8bd05SEric Dumazet 			if (ip_options_echo(&dopt->opt, skb)) {
8621da177e4SLinus Torvalds 				kfree(dopt);
8631da177e4SLinus Torvalds 				dopt = NULL;
8641da177e4SLinus Torvalds 			}
8651da177e4SLinus Torvalds 		}
8661da177e4SLinus Torvalds 	}
8671da177e4SLinus Torvalds 	return dopt;
8681da177e4SLinus Torvalds }
8691da177e4SLinus Torvalds 
870cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
871cfb6eeb4SYOSHIFUJI Hideaki /*
872cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
873cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
874cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
875cfb6eeb4SYOSHIFUJI Hideaki  */
876cfb6eeb4SYOSHIFUJI Hideaki 
877cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
8787174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key *
8797174259eSArnaldo Carvalho de Melo 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
880cfb6eeb4SYOSHIFUJI Hideaki {
881cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
882cfb6eeb4SYOSHIFUJI Hideaki 	int i;
883cfb6eeb4SYOSHIFUJI Hideaki 
884cfb6eeb4SYOSHIFUJI Hideaki 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
885cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
886cfb6eeb4SYOSHIFUJI Hideaki 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
887cfb6eeb4SYOSHIFUJI Hideaki 		if (tp->md5sig_info->keys4[i].addr == addr)
888f8ab18d2SDavid S. Miller 			return &tp->md5sig_info->keys4[i].base;
889cfb6eeb4SYOSHIFUJI Hideaki 	}
890cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
891cfb6eeb4SYOSHIFUJI Hideaki }
892cfb6eeb4SYOSHIFUJI Hideaki 
893cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
894cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
895cfb6eeb4SYOSHIFUJI Hideaki {
896c720c7e8SEric Dumazet 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
897cfb6eeb4SYOSHIFUJI Hideaki }
898cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
899cfb6eeb4SYOSHIFUJI Hideaki 
900f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
901cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
902cfb6eeb4SYOSHIFUJI Hideaki {
903cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
904cfb6eeb4SYOSHIFUJI Hideaki }
905cfb6eeb4SYOSHIFUJI Hideaki 
906cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
907cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
908cfb6eeb4SYOSHIFUJI Hideaki 		      u8 *newkey, u8 newkeylen)
909cfb6eeb4SYOSHIFUJI Hideaki {
910cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
911b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
912cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
913cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_md5sig_key *keys;
914cfb6eeb4SYOSHIFUJI Hideaki 
915b0a713e9SMatthias M. Dellweg 	key = tcp_v4_md5_do_lookup(sk, addr);
916cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
917cfb6eeb4SYOSHIFUJI Hideaki 		/* Pre-existing entry - just update that one. */
918b0a713e9SMatthias M. Dellweg 		kfree(key->key);
919b0a713e9SMatthias M. Dellweg 		key->key = newkey;
920b0a713e9SMatthias M. Dellweg 		key->keylen = newkeylen;
921cfb6eeb4SYOSHIFUJI Hideaki 	} else {
922f6685938SArnaldo Carvalho de Melo 		struct tcp_md5sig_info *md5sig;
923f6685938SArnaldo Carvalho de Melo 
924cfb6eeb4SYOSHIFUJI Hideaki 		if (!tp->md5sig_info) {
925f6685938SArnaldo Carvalho de Melo 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
926f6685938SArnaldo Carvalho de Melo 						  GFP_ATOMIC);
927cfb6eeb4SYOSHIFUJI Hideaki 			if (!tp->md5sig_info) {
928cfb6eeb4SYOSHIFUJI Hideaki 				kfree(newkey);
929cfb6eeb4SYOSHIFUJI Hideaki 				return -ENOMEM;
930cfb6eeb4SYOSHIFUJI Hideaki 			}
931a465419bSEric Dumazet 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
932cfb6eeb4SYOSHIFUJI Hideaki 		}
933260fcbebSYan, Zheng 
934260fcbebSYan, Zheng 		md5sig = tp->md5sig_info;
935260fcbebSYan, Zheng 		if (md5sig->entries4 == 0 &&
936260fcbebSYan, Zheng 		    tcp_alloc_md5sig_pool(sk) == NULL) {
937cfb6eeb4SYOSHIFUJI Hideaki 			kfree(newkey);
938cfb6eeb4SYOSHIFUJI Hideaki 			return -ENOMEM;
939cfb6eeb4SYOSHIFUJI Hideaki 		}
940f6685938SArnaldo Carvalho de Melo 
941f6685938SArnaldo Carvalho de Melo 		if (md5sig->alloced4 == md5sig->entries4) {
942f6685938SArnaldo Carvalho de Melo 			keys = kmalloc((sizeof(*keys) *
943f6685938SArnaldo Carvalho de Melo 					(md5sig->entries4 + 1)), GFP_ATOMIC);
944cfb6eeb4SYOSHIFUJI Hideaki 			if (!keys) {
945cfb6eeb4SYOSHIFUJI Hideaki 				kfree(newkey);
946260fcbebSYan, Zheng 				if (md5sig->entries4 == 0)
947cfb6eeb4SYOSHIFUJI Hideaki 					tcp_free_md5sig_pool();
948cfb6eeb4SYOSHIFUJI Hideaki 				return -ENOMEM;
949cfb6eeb4SYOSHIFUJI Hideaki 			}
950cfb6eeb4SYOSHIFUJI Hideaki 
951f6685938SArnaldo Carvalho de Melo 			if (md5sig->entries4)
952f6685938SArnaldo Carvalho de Melo 				memcpy(keys, md5sig->keys4,
953f6685938SArnaldo Carvalho de Melo 				       sizeof(*keys) * md5sig->entries4);
954cfb6eeb4SYOSHIFUJI Hideaki 
955cfb6eeb4SYOSHIFUJI Hideaki 			/* Free old key list, and reference new one */
956f6685938SArnaldo Carvalho de Melo 			kfree(md5sig->keys4);
957f6685938SArnaldo Carvalho de Melo 			md5sig->keys4 = keys;
958f6685938SArnaldo Carvalho de Melo 			md5sig->alloced4++;
959cfb6eeb4SYOSHIFUJI Hideaki 		}
960f6685938SArnaldo Carvalho de Melo 		md5sig->entries4++;
961f6685938SArnaldo Carvalho de Melo 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
962f8ab18d2SDavid S. Miller 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
963f8ab18d2SDavid S. Miller 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
964cfb6eeb4SYOSHIFUJI Hideaki 	}
965cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
966cfb6eeb4SYOSHIFUJI Hideaki }
967cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_add);
968cfb6eeb4SYOSHIFUJI Hideaki 
969cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
970cfb6eeb4SYOSHIFUJI Hideaki 			       u8 *newkey, u8 newkeylen)
971cfb6eeb4SYOSHIFUJI Hideaki {
972c720c7e8SEric Dumazet 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
973cfb6eeb4SYOSHIFUJI Hideaki 				 newkey, newkeylen);
974cfb6eeb4SYOSHIFUJI Hideaki }
975cfb6eeb4SYOSHIFUJI Hideaki 
976cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
977cfb6eeb4SYOSHIFUJI Hideaki {
978cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
979cfb6eeb4SYOSHIFUJI Hideaki 	int i;
980cfb6eeb4SYOSHIFUJI Hideaki 
981cfb6eeb4SYOSHIFUJI Hideaki 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
982cfb6eeb4SYOSHIFUJI Hideaki 		if (tp->md5sig_info->keys4[i].addr == addr) {
983cfb6eeb4SYOSHIFUJI Hideaki 			/* Free the key */
984f8ab18d2SDavid S. Miller 			kfree(tp->md5sig_info->keys4[i].base.key);
985cfb6eeb4SYOSHIFUJI Hideaki 			tp->md5sig_info->entries4--;
986cfb6eeb4SYOSHIFUJI Hideaki 
987cfb6eeb4SYOSHIFUJI Hideaki 			if (tp->md5sig_info->entries4 == 0) {
988cfb6eeb4SYOSHIFUJI Hideaki 				kfree(tp->md5sig_info->keys4);
989cfb6eeb4SYOSHIFUJI Hideaki 				tp->md5sig_info->keys4 = NULL;
9908228a18dSLeigh Brown 				tp->md5sig_info->alloced4 = 0;
991260fcbebSYan, Zheng 				tcp_free_md5sig_pool();
9927174259eSArnaldo Carvalho de Melo 			} else if (tp->md5sig_info->entries4 != i) {
993cfb6eeb4SYOSHIFUJI Hideaki 				/* Need to do some manipulation */
994354faf09SYOSHIFUJI Hideaki 				memmove(&tp->md5sig_info->keys4[i],
995cfb6eeb4SYOSHIFUJI Hideaki 					&tp->md5sig_info->keys4[i+1],
9967174259eSArnaldo Carvalho de Melo 					(tp->md5sig_info->entries4 - i) *
9977174259eSArnaldo Carvalho de Melo 					 sizeof(struct tcp4_md5sig_key));
998cfb6eeb4SYOSHIFUJI Hideaki 			}
999cfb6eeb4SYOSHIFUJI Hideaki 			return 0;
1000cfb6eeb4SYOSHIFUJI Hideaki 		}
1001cfb6eeb4SYOSHIFUJI Hideaki 	}
1002cfb6eeb4SYOSHIFUJI Hideaki 	return -ENOENT;
1003cfb6eeb4SYOSHIFUJI Hideaki }
1004cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_del);
1005cfb6eeb4SYOSHIFUJI Hideaki 
1006cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_clear_md5_list(struct sock *sk)
1007cfb6eeb4SYOSHIFUJI Hideaki {
1008cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1009cfb6eeb4SYOSHIFUJI Hideaki 
1010cfb6eeb4SYOSHIFUJI Hideaki 	/* Free each key, then the set of key keys,
1011cfb6eeb4SYOSHIFUJI Hideaki 	 * the crypto element, and then decrement our
1012cfb6eeb4SYOSHIFUJI Hideaki 	 * hold on the last resort crypto.
1013cfb6eeb4SYOSHIFUJI Hideaki 	 */
1014cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info->entries4) {
1015cfb6eeb4SYOSHIFUJI Hideaki 		int i;
1016cfb6eeb4SYOSHIFUJI Hideaki 		for (i = 0; i < tp->md5sig_info->entries4; i++)
1017f8ab18d2SDavid S. Miller 			kfree(tp->md5sig_info->keys4[i].base.key);
1018cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->entries4 = 0;
1019cfb6eeb4SYOSHIFUJI Hideaki 		tcp_free_md5sig_pool();
1020cfb6eeb4SYOSHIFUJI Hideaki 	}
1021cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info->keys4) {
1022cfb6eeb4SYOSHIFUJI Hideaki 		kfree(tp->md5sig_info->keys4);
1023cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->keys4 = NULL;
1024cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->alloced4  = 0;
1025cfb6eeb4SYOSHIFUJI Hideaki 	}
1026cfb6eeb4SYOSHIFUJI Hideaki }
1027cfb6eeb4SYOSHIFUJI Hideaki 
1028cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1029cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
1030cfb6eeb4SYOSHIFUJI Hideaki {
1031cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1032cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1033cfb6eeb4SYOSHIFUJI Hideaki 	u8 *newkey;
1034cfb6eeb4SYOSHIFUJI Hideaki 
1035cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1036cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1037cfb6eeb4SYOSHIFUJI Hideaki 
1038cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1039cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1040cfb6eeb4SYOSHIFUJI Hideaki 
1041cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1042cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1043cfb6eeb4SYOSHIFUJI Hideaki 
1044cfb6eeb4SYOSHIFUJI Hideaki 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1045cfb6eeb4SYOSHIFUJI Hideaki 		if (!tcp_sk(sk)->md5sig_info)
1046cfb6eeb4SYOSHIFUJI Hideaki 			return -ENOENT;
1047cfb6eeb4SYOSHIFUJI Hideaki 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1048cfb6eeb4SYOSHIFUJI Hideaki 	}
1049cfb6eeb4SYOSHIFUJI Hideaki 
1050cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1051cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1052cfb6eeb4SYOSHIFUJI Hideaki 
1053cfb6eeb4SYOSHIFUJI Hideaki 	if (!tcp_sk(sk)->md5sig_info) {
1054cfb6eeb4SYOSHIFUJI Hideaki 		struct tcp_sock *tp = tcp_sk(sk);
1055aa133076SWu Fengguang 		struct tcp_md5sig_info *p;
1056cfb6eeb4SYOSHIFUJI Hideaki 
1057aa133076SWu Fengguang 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1058cfb6eeb4SYOSHIFUJI Hideaki 		if (!p)
1059cfb6eeb4SYOSHIFUJI Hideaki 			return -EINVAL;
1060cfb6eeb4SYOSHIFUJI Hideaki 
1061cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = p;
1062a465419bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1063cfb6eeb4SYOSHIFUJI Hideaki 	}
1064cfb6eeb4SYOSHIFUJI Hideaki 
1065aa133076SWu Fengguang 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1066cfb6eeb4SYOSHIFUJI Hideaki 	if (!newkey)
1067cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1068cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1069cfb6eeb4SYOSHIFUJI Hideaki 				 newkey, cmd.tcpm_keylen);
1070cfb6eeb4SYOSHIFUJI Hideaki }
1071cfb6eeb4SYOSHIFUJI Hideaki 
107249a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
107349a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1074cfb6eeb4SYOSHIFUJI Hideaki {
1075cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
107649a72dfbSAdam Langley 	struct scatterlist sg;
1077cfb6eeb4SYOSHIFUJI Hideaki 
1078cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1079cfb6eeb4SYOSHIFUJI Hideaki 
1080cfb6eeb4SYOSHIFUJI Hideaki 	/*
108149a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1082cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1083cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1084cfb6eeb4SYOSHIFUJI Hideaki 	 */
1085cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1086cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1087cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1088076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
108949a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1090c7da57a1SDavid S. Miller 
109149a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
109249a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
109349a72dfbSAdam Langley }
109449a72dfbSAdam Langley 
109549a72dfbSAdam Langley static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1096318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
109749a72dfbSAdam Langley {
109849a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
109949a72dfbSAdam Langley 	struct hash_desc *desc;
110049a72dfbSAdam Langley 
110149a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
110249a72dfbSAdam Langley 	if (!hp)
110349a72dfbSAdam Langley 		goto clear_hash_noput;
110449a72dfbSAdam Langley 	desc = &hp->md5_desc;
110549a72dfbSAdam Langley 
110649a72dfbSAdam Langley 	if (crypto_hash_init(desc))
110749a72dfbSAdam Langley 		goto clear_hash;
110849a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
110949a72dfbSAdam Langley 		goto clear_hash;
111049a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
111149a72dfbSAdam Langley 		goto clear_hash;
111249a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
111349a72dfbSAdam Langley 		goto clear_hash;
111449a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1115cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1116cfb6eeb4SYOSHIFUJI Hideaki 
1117cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1118cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
111949a72dfbSAdam Langley 
1120cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1121cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1122cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1123cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
112449a72dfbSAdam Langley 	return 1;
1125cfb6eeb4SYOSHIFUJI Hideaki }
1126cfb6eeb4SYOSHIFUJI Hideaki 
112749a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1128318cf7aaSEric Dumazet 			const struct sock *sk, const struct request_sock *req,
1129318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1130cfb6eeb4SYOSHIFUJI Hideaki {
113149a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
113249a72dfbSAdam Langley 	struct hash_desc *desc;
1133318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1134cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1135cfb6eeb4SYOSHIFUJI Hideaki 
1136cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1137c720c7e8SEric Dumazet 		saddr = inet_sk(sk)->inet_saddr;
1138c720c7e8SEric Dumazet 		daddr = inet_sk(sk)->inet_daddr;
113949a72dfbSAdam Langley 	} else if (req) {
114049a72dfbSAdam Langley 		saddr = inet_rsk(req)->loc_addr;
114149a72dfbSAdam Langley 		daddr = inet_rsk(req)->rmt_addr;
1142cfb6eeb4SYOSHIFUJI Hideaki 	} else {
114349a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
114449a72dfbSAdam Langley 		saddr = iph->saddr;
114549a72dfbSAdam Langley 		daddr = iph->daddr;
1146cfb6eeb4SYOSHIFUJI Hideaki 	}
1147cfb6eeb4SYOSHIFUJI Hideaki 
114849a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
114949a72dfbSAdam Langley 	if (!hp)
115049a72dfbSAdam Langley 		goto clear_hash_noput;
115149a72dfbSAdam Langley 	desc = &hp->md5_desc;
115249a72dfbSAdam Langley 
115349a72dfbSAdam Langley 	if (crypto_hash_init(desc))
115449a72dfbSAdam Langley 		goto clear_hash;
115549a72dfbSAdam Langley 
115649a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
115749a72dfbSAdam Langley 		goto clear_hash;
115849a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
115949a72dfbSAdam Langley 		goto clear_hash;
116049a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
116149a72dfbSAdam Langley 		goto clear_hash;
116249a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
116349a72dfbSAdam Langley 		goto clear_hash;
116449a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
116549a72dfbSAdam Langley 		goto clear_hash;
116649a72dfbSAdam Langley 
116749a72dfbSAdam Langley 	tcp_put_md5sig_pool();
116849a72dfbSAdam Langley 	return 0;
116949a72dfbSAdam Langley 
117049a72dfbSAdam Langley clear_hash:
117149a72dfbSAdam Langley 	tcp_put_md5sig_pool();
117249a72dfbSAdam Langley clear_hash_noput:
117349a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
117449a72dfbSAdam Langley 	return 1;
117549a72dfbSAdam Langley }
117649a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1177cfb6eeb4SYOSHIFUJI Hideaki 
1178318cf7aaSEric Dumazet static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1179cfb6eeb4SYOSHIFUJI Hideaki {
1180cfb6eeb4SYOSHIFUJI Hideaki 	/*
1181cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1182cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1183cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1184cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1185cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1186cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1187cfb6eeb4SYOSHIFUJI Hideaki 	 */
1188cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1189cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1190eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1191cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1192cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1193cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1194cfb6eeb4SYOSHIFUJI Hideaki 
1195cfb6eeb4SYOSHIFUJI Hideaki 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
11967d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1197cfb6eeb4SYOSHIFUJI Hideaki 
1198cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1199cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1200cfb6eeb4SYOSHIFUJI Hideaki 		return 0;
1201cfb6eeb4SYOSHIFUJI Hideaki 
1202cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1203785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1204cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1205cfb6eeb4SYOSHIFUJI Hideaki 	}
1206cfb6eeb4SYOSHIFUJI Hideaki 
1207cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1208785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1209cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1210cfb6eeb4SYOSHIFUJI Hideaki 	}
1211cfb6eeb4SYOSHIFUJI Hideaki 
1212cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1213cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1214cfb6eeb4SYOSHIFUJI Hideaki 	 */
121549a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1216cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
121749a72dfbSAdam Langley 				      NULL, NULL, skb);
1218cfb6eeb4SYOSHIFUJI Hideaki 
1219cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1220cfb6eeb4SYOSHIFUJI Hideaki 		if (net_ratelimit()) {
1221673d57e7SHarvey Harrison 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1222673d57e7SHarvey Harrison 			       &iph->saddr, ntohs(th->source),
1223673d57e7SHarvey Harrison 			       &iph->daddr, ntohs(th->dest),
1224cfb6eeb4SYOSHIFUJI Hideaki 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1225cfb6eeb4SYOSHIFUJI Hideaki 		}
1226cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1227cfb6eeb4SYOSHIFUJI Hideaki 	}
1228cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1229cfb6eeb4SYOSHIFUJI Hideaki }
1230cfb6eeb4SYOSHIFUJI Hideaki 
1231cfb6eeb4SYOSHIFUJI Hideaki #endif
1232cfb6eeb4SYOSHIFUJI Hideaki 
123372a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12341da177e4SLinus Torvalds 	.family		=	PF_INET,
12352e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
123672659eccSOctavian Purdila 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
123760236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
123860236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12391da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
124072659eccSOctavian Purdila 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
12411da177e4SLinus Torvalds };
12421da177e4SLinus Torvalds 
1243cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1244b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1245cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1246e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1247cfb6eeb4SYOSHIFUJI Hideaki };
1248b6332e6cSAndrew Morton #endif
1249cfb6eeb4SYOSHIFUJI Hideaki 
12501da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12511da177e4SLinus Torvalds {
12524957faadSWilliam Allen Simpson 	struct tcp_extend_values tmp_ext;
12531da177e4SLinus Torvalds 	struct tcp_options_received tmp_opt;
1254cf533ea5SEric Dumazet 	const u8 *hash_location;
125560236fddSArnaldo Carvalho de Melo 	struct request_sock *req;
1256e6b4d113SWilliam Allen Simpson 	struct inet_request_sock *ireq;
12574957faadSWilliam Allen Simpson 	struct tcp_sock *tp = tcp_sk(sk);
1258e6b4d113SWilliam Allen Simpson 	struct dst_entry *dst = NULL;
1259eddc9ec5SArnaldo Carvalho de Melo 	__be32 saddr = ip_hdr(skb)->saddr;
1260eddc9ec5SArnaldo Carvalho de Melo 	__be32 daddr = ip_hdr(skb)->daddr;
12611da177e4SLinus Torvalds 	__u32 isn = TCP_SKB_CB(skb)->when;
12621da177e4SLinus Torvalds 	int want_cookie = 0;
12631da177e4SLinus Torvalds 
12641da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1265511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12661da177e4SLinus Torvalds 		goto drop;
12671da177e4SLinus Torvalds 
12681da177e4SLinus Torvalds 	/* TW buckets are converted to open requests without
12691da177e4SLinus Torvalds 	 * limitations, they conserve resources and peer is
12701da177e4SLinus Torvalds 	 * evidently real one.
12711da177e4SLinus Torvalds 	 */
1272463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1273946cedccSEric Dumazet 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1274946cedccSEric Dumazet 		if (!want_cookie)
12751da177e4SLinus Torvalds 			goto drop;
12761da177e4SLinus Torvalds 	}
12771da177e4SLinus Torvalds 
12781da177e4SLinus Torvalds 	/* Accept backlog is full. If we have already queued enough
12791da177e4SLinus Torvalds 	 * of warm entries in syn queue, drop request. It is better than
12801da177e4SLinus Torvalds 	 * clogging syn queue with openreqs with exponentially increasing
12811da177e4SLinus Torvalds 	 * timeout.
12821da177e4SLinus Torvalds 	 */
1283463c84b9SArnaldo Carvalho de Melo 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
12841da177e4SLinus Torvalds 		goto drop;
12851da177e4SLinus Torvalds 
1286ce4a7d0dSArnaldo Carvalho de Melo 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
12871da177e4SLinus Torvalds 	if (!req)
12881da177e4SLinus Torvalds 		goto drop;
12891da177e4SLinus Torvalds 
1290cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1291cfb6eeb4SYOSHIFUJI Hideaki 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1292cfb6eeb4SYOSHIFUJI Hideaki #endif
1293cfb6eeb4SYOSHIFUJI Hideaki 
12941da177e4SLinus Torvalds 	tcp_clear_options(&tmp_opt);
1295bee7ca9eSWilliam Allen Simpson 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
12964957faadSWilliam Allen Simpson 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1297bb5b7c11SDavid S. Miller 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
12981da177e4SLinus Torvalds 
12994957faadSWilliam Allen Simpson 	if (tmp_opt.cookie_plus > 0 &&
13004957faadSWilliam Allen Simpson 	    tmp_opt.saw_tstamp &&
13014957faadSWilliam Allen Simpson 	    !tp->rx_opt.cookie_out_never &&
13024957faadSWilliam Allen Simpson 	    (sysctl_tcp_cookie_size > 0 ||
13034957faadSWilliam Allen Simpson 	     (tp->cookie_values != NULL &&
13044957faadSWilliam Allen Simpson 	      tp->cookie_values->cookie_desired > 0))) {
13054957faadSWilliam Allen Simpson 		u8 *c;
13064957faadSWilliam Allen Simpson 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
13074957faadSWilliam Allen Simpson 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
13084957faadSWilliam Allen Simpson 
13094957faadSWilliam Allen Simpson 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
13104957faadSWilliam Allen Simpson 			goto drop_and_release;
13114957faadSWilliam Allen Simpson 
13124957faadSWilliam Allen Simpson 		/* Secret recipe starts with IP addresses */
13130eae88f3SEric Dumazet 		*mess++ ^= (__force u32)daddr;
13140eae88f3SEric Dumazet 		*mess++ ^= (__force u32)saddr;
13154957faadSWilliam Allen Simpson 
13164957faadSWilliam Allen Simpson 		/* plus variable length Initiator Cookie */
13174957faadSWilliam Allen Simpson 		c = (u8 *)mess;
13184957faadSWilliam Allen Simpson 		while (l-- > 0)
13194957faadSWilliam Allen Simpson 			*c++ ^= *hash_location++;
13204957faadSWilliam Allen Simpson 
13214957faadSWilliam Allen Simpson 		want_cookie = 0;	/* not our kind of cookie */
13224957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 0; /* false */
13234957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
13244957faadSWilliam Allen Simpson 	} else if (!tp->rx_opt.cookie_in_always) {
13254957faadSWilliam Allen Simpson 		/* redundant indications, but ensure initialization. */
13264957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 1; /* true */
13274957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = 0;
13284957faadSWilliam Allen Simpson 	} else {
13294957faadSWilliam Allen Simpson 		goto drop_and_release;
13304957faadSWilliam Allen Simpson 	}
13314957faadSWilliam Allen Simpson 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
13321da177e4SLinus Torvalds 
13334dfc2817SFlorian Westphal 	if (want_cookie && !tmp_opt.saw_tstamp)
13341da177e4SLinus Torvalds 		tcp_clear_options(&tmp_opt);
13351da177e4SLinus Torvalds 
13361da177e4SLinus Torvalds 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
13371da177e4SLinus Torvalds 	tcp_openreq_init(req, &tmp_opt, skb);
13381da177e4SLinus Torvalds 
1339bb5b7c11SDavid S. Miller 	ireq = inet_rsk(req);
1340bb5b7c11SDavid S. Miller 	ireq->loc_addr = daddr;
1341bb5b7c11SDavid S. Miller 	ireq->rmt_addr = saddr;
1342bb5b7c11SDavid S. Miller 	ireq->no_srccheck = inet_sk(sk)->transparent;
1343bb5b7c11SDavid S. Miller 	ireq->opt = tcp_v4_save_options(sk, skb);
1344bb5b7c11SDavid S. Miller 
1345284904aaSPaul Moore 	if (security_inet_conn_request(sk, skb, req))
1346bb5b7c11SDavid S. Miller 		goto drop_and_free;
1347284904aaSPaul Moore 
1348172d69e6SFlorian Westphal 	if (!want_cookie || tmp_opt.tstamp_ok)
1349aa8223c7SArnaldo Carvalho de Melo 		TCP_ECN_create_request(req, tcp_hdr(skb));
13501da177e4SLinus Torvalds 
13511da177e4SLinus Torvalds 	if (want_cookie) {
13521da177e4SLinus Torvalds 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1353172d69e6SFlorian Westphal 		req->cookie_ts = tmp_opt.tstamp_ok;
13541da177e4SLinus Torvalds 	} else if (!isn) {
13551da177e4SLinus Torvalds 		struct inet_peer *peer = NULL;
13566bd023f3SDavid S. Miller 		struct flowi4 fl4;
13571da177e4SLinus Torvalds 
13581da177e4SLinus Torvalds 		/* VJ's idea. We save last timestamp seen
13591da177e4SLinus Torvalds 		 * from the destination in peer table, when entering
13601da177e4SLinus Torvalds 		 * state TIME-WAIT, and check against it before
13611da177e4SLinus Torvalds 		 * accepting new connection request.
13621da177e4SLinus Torvalds 		 *
13631da177e4SLinus Torvalds 		 * If "isn" is not zero, this request hit alive
13641da177e4SLinus Torvalds 		 * timewait bucket, so that all the necessary checks
13651da177e4SLinus Torvalds 		 * are made in the function processing timewait state.
13661da177e4SLinus Torvalds 		 */
13671da177e4SLinus Torvalds 		if (tmp_opt.saw_tstamp &&
1368295ff7edSArnaldo Carvalho de Melo 		    tcp_death_row.sysctl_tw_recycle &&
13696bd023f3SDavid S. Miller 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1370ed2361e6SDavid S. Miller 		    fl4.daddr == saddr &&
1371ed2361e6SDavid S. Miller 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1372317fe0e6SEric Dumazet 			inet_peer_refcheck(peer);
13732c1409a0SEric Dumazet 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
13741da177e4SLinus Torvalds 			    (s32)(peer->tcp_ts - req->ts_recent) >
13751da177e4SLinus Torvalds 							TCP_PAWS_WINDOW) {
1376de0744afSPavel Emelyanov 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
13777cd04fa7SDenis V. Lunev 				goto drop_and_release;
13781da177e4SLinus Torvalds 			}
13791da177e4SLinus Torvalds 		}
13801da177e4SLinus Torvalds 		/* Kill the following clause, if you dislike this way. */
13811da177e4SLinus Torvalds 		else if (!sysctl_tcp_syncookies &&
1382463c84b9SArnaldo Carvalho de Melo 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
13831da177e4SLinus Torvalds 			  (sysctl_max_syn_backlog >> 2)) &&
13841da177e4SLinus Torvalds 			 (!peer || !peer->tcp_ts_stamp) &&
13851da177e4SLinus Torvalds 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
13861da177e4SLinus Torvalds 			/* Without syncookies last quarter of
13871da177e4SLinus Torvalds 			 * backlog is filled with destinations,
13881da177e4SLinus Torvalds 			 * proven to be alive.
13891da177e4SLinus Torvalds 			 * It means that we continue to communicate
13901da177e4SLinus Torvalds 			 * to destinations, already remembered
13911da177e4SLinus Torvalds 			 * to the moment of synflood.
13921da177e4SLinus Torvalds 			 */
1393673d57e7SHarvey Harrison 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1394673d57e7SHarvey Harrison 				       &saddr, ntohs(tcp_hdr(skb)->source));
13957cd04fa7SDenis V. Lunev 			goto drop_and_release;
13961da177e4SLinus Torvalds 		}
13971da177e4SLinus Torvalds 
1398a94f723dSGerrit Renker 		isn = tcp_v4_init_sequence(skb);
13991da177e4SLinus Torvalds 	}
14002e6599cbSArnaldo Carvalho de Melo 	tcp_rsk(req)->snt_isn = isn;
14019ad7c049SJerry Chu 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
14021da177e4SLinus Torvalds 
140372659eccSOctavian Purdila 	if (tcp_v4_send_synack(sk, dst, req,
14044957faadSWilliam Allen Simpson 			       (struct request_values *)&tmp_ext) ||
14054957faadSWilliam Allen Simpson 	    want_cookie)
14061da177e4SLinus Torvalds 		goto drop_and_free;
14071da177e4SLinus Torvalds 
14083f421baaSArnaldo Carvalho de Melo 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
14091da177e4SLinus Torvalds 	return 0;
14101da177e4SLinus Torvalds 
14117cd04fa7SDenis V. Lunev drop_and_release:
14127cd04fa7SDenis V. Lunev 	dst_release(dst);
14131da177e4SLinus Torvalds drop_and_free:
141460236fddSArnaldo Carvalho de Melo 	reqsk_free(req);
14151da177e4SLinus Torvalds drop:
14161da177e4SLinus Torvalds 	return 0;
14171da177e4SLinus Torvalds }
14184bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
14191da177e4SLinus Torvalds 
14201da177e4SLinus Torvalds 
14211da177e4SLinus Torvalds /*
14221da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
14231da177e4SLinus Torvalds  * now create the new socket.
14241da177e4SLinus Torvalds  */
14251da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
142660236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
14271da177e4SLinus Torvalds 				  struct dst_entry *dst)
14281da177e4SLinus Torvalds {
14292e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
14301da177e4SLinus Torvalds 	struct inet_sock *newinet;
14311da177e4SLinus Torvalds 	struct tcp_sock *newtp;
14321da177e4SLinus Torvalds 	struct sock *newsk;
1433cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1434cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1435cfb6eeb4SYOSHIFUJI Hideaki #endif
1436f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
14371da177e4SLinus Torvalds 
14381da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
14391da177e4SLinus Torvalds 		goto exit_overflow;
14401da177e4SLinus Torvalds 
14411da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
14421da177e4SLinus Torvalds 	if (!newsk)
1443093d2823SBalazs Scheidler 		goto exit_nonewsk;
14441da177e4SLinus Torvalds 
1445bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
14481da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
14492e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1450c720c7e8SEric Dumazet 	newinet->inet_daddr   = ireq->rmt_addr;
1451c720c7e8SEric Dumazet 	newinet->inet_rcv_saddr = ireq->loc_addr;
1452c720c7e8SEric Dumazet 	newinet->inet_saddr	      = ireq->loc_addr;
1453f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1454f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
14552e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1456463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1457eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1458d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1459f6d8bd05SEric Dumazet 	if (inet_opt)
1460f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1461c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
14621da177e4SLinus Torvalds 
14630e734419SDavid S. Miller 	if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
14640e734419SDavid S. Miller 		goto put_and_exit;
14650e734419SDavid S. Miller 
14660e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
14670e734419SDavid S. Miller 
14685d424d5aSJohn Heffner 	tcp_mtup_init(newsk);
14691da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
14700dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1471f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1472f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1473f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1474f5fff5dcSTom Quetchenbach 
14751da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
14769ad7c049SJerry Chu 	if (tcp_rsk(req)->snt_synack)
14779ad7c049SJerry Chu 		tcp_valid_rtt_meas(newsk,
14789ad7c049SJerry Chu 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
14799ad7c049SJerry Chu 	newtp->total_retrans = req->retrans;
14801da177e4SLinus Torvalds 
1481cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1482cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1483c720c7e8SEric Dumazet 	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1484c720c7e8SEric Dumazet 	if (key != NULL) {
1485cfb6eeb4SYOSHIFUJI Hideaki 		/*
1486cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1487cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1488cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1489cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1490cfb6eeb4SYOSHIFUJI Hideaki 		 */
1491f6685938SArnaldo Carvalho de Melo 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1492f6685938SArnaldo Carvalho de Melo 		if (newkey != NULL)
1493c720c7e8SEric Dumazet 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1494cfb6eeb4SYOSHIFUJI Hideaki 					  newkey, key->keylen);
1495a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1496cfb6eeb4SYOSHIFUJI Hideaki 	}
1497cfb6eeb4SYOSHIFUJI Hideaki #endif
1498cfb6eeb4SYOSHIFUJI Hideaki 
14990e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
15000e734419SDavid S. Miller 		goto put_and_exit;
15019327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
15021da177e4SLinus Torvalds 
15031da177e4SLinus Torvalds 	return newsk;
15041da177e4SLinus Torvalds 
15051da177e4SLinus Torvalds exit_overflow:
1506de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507093d2823SBalazs Scheidler exit_nonewsk:
1508093d2823SBalazs Scheidler 	dst_release(dst);
15091da177e4SLinus Torvalds exit:
1510de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
15111da177e4SLinus Torvalds 	return NULL;
15120e734419SDavid S. Miller put_and_exit:
1513*709e8697SEric Dumazet 	tcp_clear_xmit_timers(newsk);
1514918eb399SEric Dumazet 	bh_unlock_sock(newsk);
15150e734419SDavid S. Miller 	sock_put(newsk);
15160e734419SDavid S. Miller 	goto exit;
15171da177e4SLinus Torvalds }
15184bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
15191da177e4SLinus Torvalds 
15201da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
15211da177e4SLinus Torvalds {
1522aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
1523eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
15241da177e4SLinus Torvalds 	struct sock *nsk;
152560236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
15261da177e4SLinus Torvalds 	/* Find possible connection requests. */
1527463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
15281da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
15291da177e4SLinus Torvalds 	if (req)
15301da177e4SLinus Torvalds 		return tcp_check_req(sk, skb, req, prev);
15311da177e4SLinus Torvalds 
15323b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1533c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds 	if (nsk) {
15361da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
15371da177e4SLinus Torvalds 			bh_lock_sock(nsk);
15381da177e4SLinus Torvalds 			return nsk;
15391da177e4SLinus Torvalds 		}
15409469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
15411da177e4SLinus Torvalds 		return NULL;
15421da177e4SLinus Torvalds 	}
15431da177e4SLinus Torvalds 
15441da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1545af9b4738SFlorian Westphal 	if (!th->syn)
15461da177e4SLinus Torvalds 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
15471da177e4SLinus Torvalds #endif
15481da177e4SLinus Torvalds 	return sk;
15491da177e4SLinus Torvalds }
15501da177e4SLinus Torvalds 
1551b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
15521da177e4SLinus Torvalds {
1553eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1554eddc9ec5SArnaldo Carvalho de Melo 
155584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1556eddc9ec5SArnaldo Carvalho de Melo 		if (!tcp_v4_check(skb->len, iph->saddr,
1557eddc9ec5SArnaldo Carvalho de Melo 				  iph->daddr, skb->csum)) {
15581da177e4SLinus Torvalds 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1559fb286bb2SHerbert Xu 			return 0;
1560fb286bb2SHerbert Xu 		}
1561fb286bb2SHerbert Xu 	}
1562fb286bb2SHerbert Xu 
1563eddc9ec5SArnaldo Carvalho de Melo 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1564fb286bb2SHerbert Xu 				       skb->len, IPPROTO_TCP, 0);
1565fb286bb2SHerbert Xu 
1566fb286bb2SHerbert Xu 	if (skb->len <= 76) {
1567fb286bb2SHerbert Xu 		return __skb_checksum_complete(skb);
15681da177e4SLinus Torvalds 	}
15691da177e4SLinus Torvalds 	return 0;
15701da177e4SLinus Torvalds }
15711da177e4SLinus Torvalds 
15721da177e4SLinus Torvalds 
15731da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
15741da177e4SLinus Torvalds  * here.
15751da177e4SLinus Torvalds  *
15761da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
15771da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
15781da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
15791da177e4SLinus Torvalds  * held.
15801da177e4SLinus Torvalds  */
15811da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
15821da177e4SLinus Torvalds {
1583cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1584cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1585cfb6eeb4SYOSHIFUJI Hideaki 	/*
1586cfb6eeb4SYOSHIFUJI Hideaki 	 * We really want to reject the packet as early as possible
1587cfb6eeb4SYOSHIFUJI Hideaki 	 * if:
1588cfb6eeb4SYOSHIFUJI Hideaki 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1589cfb6eeb4SYOSHIFUJI Hideaki 	 *  o There is an MD5 option and we're not expecting one
1590cfb6eeb4SYOSHIFUJI Hideaki 	 */
1591cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_v4_inbound_md5_hash(sk, skb))
1592cfb6eeb4SYOSHIFUJI Hideaki 		goto discard;
1593cfb6eeb4SYOSHIFUJI Hideaki #endif
1594cfb6eeb4SYOSHIFUJI Hideaki 
15951da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1596bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1597aa8223c7SArnaldo Carvalho de Melo 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1598cfb6eeb4SYOSHIFUJI Hideaki 			rsk = sk;
15991da177e4SLinus Torvalds 			goto reset;
1600cfb6eeb4SYOSHIFUJI Hideaki 		}
16011da177e4SLinus Torvalds 		return 0;
16021da177e4SLinus Torvalds 	}
16031da177e4SLinus Torvalds 
1604ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
16051da177e4SLinus Torvalds 		goto csum_err;
16061da177e4SLinus Torvalds 
16071da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
16081da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
16091da177e4SLinus Torvalds 		if (!nsk)
16101da177e4SLinus Torvalds 			goto discard;
16111da177e4SLinus Torvalds 
16121da177e4SLinus Torvalds 		if (nsk != sk) {
1613bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
1614cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1615cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
16161da177e4SLinus Torvalds 				goto reset;
1617cfb6eeb4SYOSHIFUJI Hideaki 			}
16181da177e4SLinus Torvalds 			return 0;
16191da177e4SLinus Torvalds 		}
1620ca55158cSEric Dumazet 	} else
1621bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1622ca55158cSEric Dumazet 
1623aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1624cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
16251da177e4SLinus Torvalds 		goto reset;
1626cfb6eeb4SYOSHIFUJI Hideaki 	}
16271da177e4SLinus Torvalds 	return 0;
16281da177e4SLinus Torvalds 
16291da177e4SLinus Torvalds reset:
1630cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
16311da177e4SLinus Torvalds discard:
16321da177e4SLinus Torvalds 	kfree_skb(skb);
16331da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
16341da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
16351da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
16361da177e4SLinus Torvalds 	 * but you have been warned.
16371da177e4SLinus Torvalds 	 */
16381da177e4SLinus Torvalds 	return 0;
16391da177e4SLinus Torvalds 
16401da177e4SLinus Torvalds csum_err:
164163231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
16421da177e4SLinus Torvalds 	goto discard;
16431da177e4SLinus Torvalds }
16444bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
16451da177e4SLinus Torvalds 
16461da177e4SLinus Torvalds /*
16471da177e4SLinus Torvalds  *	From tcp_input.c
16481da177e4SLinus Torvalds  */
16491da177e4SLinus Torvalds 
16501da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
16511da177e4SLinus Torvalds {
1652eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1653cf533ea5SEric Dumazet 	const struct tcphdr *th;
16541da177e4SLinus Torvalds 	struct sock *sk;
16551da177e4SLinus Torvalds 	int ret;
1656a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
16571da177e4SLinus Torvalds 
16581da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
16591da177e4SLinus Torvalds 		goto discard_it;
16601da177e4SLinus Torvalds 
16611da177e4SLinus Torvalds 	/* Count it even if it's bad */
166263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
16631da177e4SLinus Torvalds 
16641da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
16651da177e4SLinus Torvalds 		goto discard_it;
16661da177e4SLinus Torvalds 
1667aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
16681da177e4SLinus Torvalds 
16691da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
16701da177e4SLinus Torvalds 		goto bad_packet;
16711da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
16721da177e4SLinus Torvalds 		goto discard_it;
16731da177e4SLinus Torvalds 
16741da177e4SLinus Torvalds 	/* An explanation is required here, I think.
16751da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1676caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
16771da177e4SLinus Torvalds 	 * So, we defer the checks. */
167860476372SHerbert Xu 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
16791da177e4SLinus Torvalds 		goto bad_packet;
16801da177e4SLinus Torvalds 
1681aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1682eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
16831da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
16841da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
16851da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
16861da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
16871da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->when	 = 0;
1688b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
16891da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
16901da177e4SLinus Torvalds 
16919a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
16921da177e4SLinus Torvalds 	if (!sk)
16931da177e4SLinus Torvalds 		goto no_tcp_socket;
16941da177e4SLinus Torvalds 
1695bb134d5dSEric Dumazet process:
1696bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1697bb134d5dSEric Dumazet 		goto do_time_wait;
1698bb134d5dSEric Dumazet 
16996cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
17006cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1701d218d111SStephen Hemminger 		goto discard_and_relse;
17026cce09f8SEric Dumazet 	}
1703d218d111SStephen Hemminger 
17041da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
17051da177e4SLinus Torvalds 		goto discard_and_relse;
1706b59c2701SPatrick McHardy 	nf_reset(skb);
17071da177e4SLinus Torvalds 
1708fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
17091da177e4SLinus Torvalds 		goto discard_and_relse;
17101da177e4SLinus Torvalds 
17111da177e4SLinus Torvalds 	skb->dev = NULL;
17121da177e4SLinus Torvalds 
1713c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
17141da177e4SLinus Torvalds 	ret = 0;
17151da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
17161a2449a8SChris Leech #ifdef CONFIG_NET_DMA
17171a2449a8SChris Leech 		struct tcp_sock *tp = tcp_sk(sk);
17181a2449a8SChris Leech 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1719f67b4599SDan Williams 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
17201a2449a8SChris Leech 		if (tp->ucopy.dma_chan)
17211a2449a8SChris Leech 			ret = tcp_v4_do_rcv(sk, skb);
17221a2449a8SChris Leech 		else
17231a2449a8SChris Leech #endif
17241a2449a8SChris Leech 		{
17251da177e4SLinus Torvalds 			if (!tcp_prequeue(sk, skb))
17261da177e4SLinus Torvalds 				ret = tcp_v4_do_rcv(sk, skb);
17271a2449a8SChris Leech 		}
17286cce09f8SEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb))) {
17296b03a53aSZhu Yi 		bh_unlock_sock(sk);
17306cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
17316b03a53aSZhu Yi 		goto discard_and_relse;
17326b03a53aSZhu Yi 	}
17331da177e4SLinus Torvalds 	bh_unlock_sock(sk);
17341da177e4SLinus Torvalds 
17351da177e4SLinus Torvalds 	sock_put(sk);
17361da177e4SLinus Torvalds 
17371da177e4SLinus Torvalds 	return ret;
17381da177e4SLinus Torvalds 
17391da177e4SLinus Torvalds no_tcp_socket:
17401da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
17411da177e4SLinus Torvalds 		goto discard_it;
17421da177e4SLinus Torvalds 
17431da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
17441da177e4SLinus Torvalds bad_packet:
174563231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
17461da177e4SLinus Torvalds 	} else {
1747cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
17481da177e4SLinus Torvalds 	}
17491da177e4SLinus Torvalds 
17501da177e4SLinus Torvalds discard_it:
17511da177e4SLinus Torvalds 	/* Discard frame. */
17521da177e4SLinus Torvalds 	kfree_skb(skb);
17531da177e4SLinus Torvalds 	return 0;
17541da177e4SLinus Torvalds 
17551da177e4SLinus Torvalds discard_and_relse:
17561da177e4SLinus Torvalds 	sock_put(sk);
17571da177e4SLinus Torvalds 	goto discard_it;
17581da177e4SLinus Torvalds 
17591da177e4SLinus Torvalds do_time_wait:
17601da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
17619469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17621da177e4SLinus Torvalds 		goto discard_it;
17631da177e4SLinus Torvalds 	}
17641da177e4SLinus Torvalds 
17651da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
176663231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
17679469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17681da177e4SLinus Torvalds 		goto discard_it;
17691da177e4SLinus Torvalds 	}
17709469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
17711da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1772c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1773c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1774eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1775463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
17761da177e4SLinus Torvalds 		if (sk2) {
17779469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
17789469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
17791da177e4SLinus Torvalds 			sk = sk2;
17801da177e4SLinus Torvalds 			goto process;
17811da177e4SLinus Torvalds 		}
17821da177e4SLinus Torvalds 		/* Fall through to ACK */
17831da177e4SLinus Torvalds 	}
17841da177e4SLinus Torvalds 	case TCP_TW_ACK:
17851da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17861da177e4SLinus Torvalds 		break;
17871da177e4SLinus Torvalds 	case TCP_TW_RST:
17881da177e4SLinus Torvalds 		goto no_tcp_socket;
17891da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17901da177e4SLinus Torvalds 	}
17911da177e4SLinus Torvalds 	goto discard_it;
17921da177e4SLinus Torvalds }
17931da177e4SLinus Torvalds 
17943f419d2dSDavid S. Miller struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
17951da177e4SLinus Torvalds {
17961da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
17973f419d2dSDavid S. Miller 	struct inet_sock *inet = inet_sk(sk);
17983f419d2dSDavid S. Miller 	struct inet_peer *peer;
17991da177e4SLinus Torvalds 
1800c5216cc7SDavid S. Miller 	if (!rt ||
1801c5216cc7SDavid S. Miller 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1802b534ecf1SDavid S. Miller 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
18033f419d2dSDavid S. Miller 		*release_it = true;
18041da177e4SLinus Torvalds 	} else {
18051da177e4SLinus Torvalds 		if (!rt->peer)
1806a48eff12SDavid S. Miller 			rt_bind_peer(rt, inet->inet_daddr, 1);
18071da177e4SLinus Torvalds 		peer = rt->peer;
18083f419d2dSDavid S. Miller 		*release_it = false;
18091da177e4SLinus Torvalds 	}
18101da177e4SLinus Torvalds 
18113f419d2dSDavid S. Miller 	return peer;
18121da177e4SLinus Torvalds }
18133f419d2dSDavid S. Miller EXPORT_SYMBOL(tcp_v4_get_peer);
18141da177e4SLinus Torvalds 
1815ccb7c410SDavid S. Miller void *tcp_v4_tw_get_peer(struct sock *sk)
18161da177e4SLinus Torvalds {
1817cf533ea5SEric Dumazet 	const struct inet_timewait_sock *tw = inet_twsk(sk);
18181da177e4SLinus Torvalds 
1819ccb7c410SDavid S. Miller 	return inet_getpeer_v4(tw->tw_daddr, 1);
1820ccb7c410SDavid S. Miller }
1821ccb7c410SDavid S. Miller EXPORT_SYMBOL(tcp_v4_tw_get_peer);
18228feaf0c0SArnaldo Carvalho de Melo 
1823ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1824ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1825ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1826ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1827ccb7c410SDavid S. Miller 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1828ccb7c410SDavid S. Miller };
18291da177e4SLinus Torvalds 
18303b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
18311da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
18321da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
183332519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
18341da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
18351da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
18363f419d2dSDavid S. Miller 	.get_peer	   = tcp_v4_get_peer,
18371da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
18381da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
18391da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1840543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1841543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1842ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
18433fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
18443fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
18453fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
18463fdadf7dSDmitry Mishin #endif
18471da177e4SLinus Torvalds };
18484bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
18491da177e4SLinus Torvalds 
1850cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1851b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1852cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
185349a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1854cfb6eeb4SYOSHIFUJI Hideaki 	.md5_add		= tcp_v4_md5_add_func,
1855cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1856cfb6eeb4SYOSHIFUJI Hideaki };
1857b6332e6cSAndrew Morton #endif
1858cfb6eeb4SYOSHIFUJI Hideaki 
18591da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
18601da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
18611da177e4SLinus Torvalds  */
18621da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
18631da177e4SLinus Torvalds {
18646687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
18651da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18661da177e4SLinus Torvalds 
18671da177e4SLinus Torvalds 	skb_queue_head_init(&tp->out_of_order_queue);
18681da177e4SLinus Torvalds 	tcp_init_xmit_timers(sk);
18691da177e4SLinus Torvalds 	tcp_prequeue_init(tp);
18701da177e4SLinus Torvalds 
18716687e988SArnaldo Carvalho de Melo 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
18721da177e4SLinus Torvalds 	tp->mdev = TCP_TIMEOUT_INIT;
18731da177e4SLinus Torvalds 
18741da177e4SLinus Torvalds 	/* So many TCP implementations out there (incorrectly) count the
18751da177e4SLinus Torvalds 	 * initial SYN frame in their delayed-ACK and congestion control
18761da177e4SLinus Torvalds 	 * algorithms that we must have the following bandaid to talk
18771da177e4SLinus Torvalds 	 * efficiently to them.  -DaveM
18781da177e4SLinus Torvalds 	 */
18799ad7c049SJerry Chu 	tp->snd_cwnd = TCP_INIT_CWND;
18801da177e4SLinus Torvalds 
18811da177e4SLinus Torvalds 	/* See draft-stevens-tcpca-spec-01 for discussion of the
18821da177e4SLinus Torvalds 	 * initialization of these values.
18831da177e4SLinus Torvalds 	 */
18840b6a05c1SIlpo Järvinen 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
18851da177e4SLinus Torvalds 	tp->snd_cwnd_clamp = ~0;
1886bee7ca9eSWilliam Allen Simpson 	tp->mss_cache = TCP_MSS_DEFAULT;
18871da177e4SLinus Torvalds 
18881da177e4SLinus Torvalds 	tp->reordering = sysctl_tcp_reordering;
18896687e988SArnaldo Carvalho de Melo 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
18901da177e4SLinus Torvalds 
18911da177e4SLinus Torvalds 	sk->sk_state = TCP_CLOSE;
18921da177e4SLinus Torvalds 
18931da177e4SLinus Torvalds 	sk->sk_write_space = sk_stream_write_space;
18941da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
18951da177e4SLinus Torvalds 
18968292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1897d83d8461SArnaldo Carvalho de Melo 	icsk->icsk_sync_mss = tcp_sync_mss;
1898cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1899cfb6eeb4SYOSHIFUJI Hideaki 	tp->af_specific = &tcp_sock_ipv4_specific;
1900cfb6eeb4SYOSHIFUJI Hideaki #endif
19011da177e4SLinus Torvalds 
1902435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
1903435cf559SWilliam Allen Simpson 	if (sysctl_tcp_cookie_size > 0) {
1904435cf559SWilliam Allen Simpson 		/* Default, cookies without s_data_payload. */
1905435cf559SWilliam Allen Simpson 		tp->cookie_values =
1906435cf559SWilliam Allen Simpson 			kzalloc(sizeof(*tp->cookie_values),
1907435cf559SWilliam Allen Simpson 				sk->sk_allocation);
1908435cf559SWilliam Allen Simpson 		if (tp->cookie_values != NULL)
1909435cf559SWilliam Allen Simpson 			kref_init(&tp->cookie_values->kref);
1910435cf559SWilliam Allen Simpson 	}
1911435cf559SWilliam Allen Simpson 	/* Presumed zeroed, in order of appearance:
1912435cf559SWilliam Allen Simpson 	 *	cookie_in_always, cookie_out_never,
1913435cf559SWilliam Allen Simpson 	 *	s_data_constant, s_data_in, s_data_out
1914435cf559SWilliam Allen Simpson 	 */
19151da177e4SLinus Torvalds 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
19161da177e4SLinus Torvalds 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
19171da177e4SLinus Torvalds 
1918eb4dea58SHerbert Xu 	local_bh_disable();
19191748376bSEric Dumazet 	percpu_counter_inc(&tcp_sockets_allocated);
1920eb4dea58SHerbert Xu 	local_bh_enable();
19211da177e4SLinus Torvalds 
19221da177e4SLinus Torvalds 	return 0;
19231da177e4SLinus Torvalds }
19241da177e4SLinus Torvalds 
19257d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
19261da177e4SLinus Torvalds {
19271da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
19281da177e4SLinus Torvalds 
19291da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
19301da177e4SLinus Torvalds 
19316687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1932317a76f9SStephen Hemminger 
19331da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1934fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
19351da177e4SLinus Torvalds 
19361da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
19371da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
19381da177e4SLinus Torvalds 
1939cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1940cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1941cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1942cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_clear_md5_list(sk);
1943cfb6eeb4SYOSHIFUJI Hideaki 		kfree(tp->md5sig_info);
1944cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1945cfb6eeb4SYOSHIFUJI Hideaki 	}
1946cfb6eeb4SYOSHIFUJI Hideaki #endif
1947cfb6eeb4SYOSHIFUJI Hideaki 
19481a2449a8SChris Leech #ifdef CONFIG_NET_DMA
19491a2449a8SChris Leech 	/* Cleans up our sk_async_wait_queue */
19501a2449a8SChris Leech 	__skb_queue_purge(&sk->sk_async_wait_queue);
19511a2449a8SChris Leech #endif
19521a2449a8SChris Leech 
19531da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
19541da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
19551da177e4SLinus Torvalds 
19561da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1957463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1958ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
19591da177e4SLinus Torvalds 
19601da177e4SLinus Torvalds 	/*
19611da177e4SLinus Torvalds 	 * If sendmsg cached page exists, toss it.
19621da177e4SLinus Torvalds 	 */
19631da177e4SLinus Torvalds 	if (sk->sk_sndmsg_page) {
19641da177e4SLinus Torvalds 		__free_page(sk->sk_sndmsg_page);
19651da177e4SLinus Torvalds 		sk->sk_sndmsg_page = NULL;
19661da177e4SLinus Torvalds 	}
19671da177e4SLinus Torvalds 
1968435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
1969435cf559SWilliam Allen Simpson 	if (tp->cookie_values != NULL) {
1970435cf559SWilliam Allen Simpson 		kref_put(&tp->cookie_values->kref,
1971435cf559SWilliam Allen Simpson 			 tcp_cookie_values_release);
1972435cf559SWilliam Allen Simpson 		tp->cookie_values = NULL;
1973435cf559SWilliam Allen Simpson 	}
1974435cf559SWilliam Allen Simpson 
19751748376bSEric Dumazet 	percpu_counter_dec(&tcp_sockets_allocated);
19761da177e4SLinus Torvalds }
19771da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
19781da177e4SLinus Torvalds 
19791da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
19801da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
19811da177e4SLinus Torvalds 
19823ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
19831da177e4SLinus Torvalds {
19843ab5aee7SEric Dumazet 	return hlist_nulls_empty(head) ? NULL :
19858feaf0c0SArnaldo Carvalho de Melo 		list_entry(head->first, struct inet_timewait_sock, tw_node);
19861da177e4SLinus Torvalds }
19871da177e4SLinus Torvalds 
19888feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
19891da177e4SLinus Torvalds {
19903ab5aee7SEric Dumazet 	return !is_a_nulls(tw->tw_node.next) ?
19913ab5aee7SEric Dumazet 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
19921da177e4SLinus Torvalds }
19931da177e4SLinus Torvalds 
1994a8b690f9STom Herbert /*
1995a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
1996a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
1997a8b690f9STom Herbert  * very first socket in the hash table is returned.
1998a8b690f9STom Herbert  */
19991da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
20001da177e4SLinus Torvalds {
2001463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
2002c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
20031da177e4SLinus Torvalds 	struct sock *sk = cur;
20045caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
20051da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2006a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
20071da177e4SLinus Torvalds 
20081da177e4SLinus Torvalds 	if (!sk) {
2009a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20105caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2011c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
2012a8b690f9STom Herbert 		st->offset = 0;
20131da177e4SLinus Torvalds 		goto get_sk;
20141da177e4SLinus Torvalds 	}
20155caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
20161da177e4SLinus Torvalds 	++st->num;
2017a8b690f9STom Herbert 	++st->offset;
20181da177e4SLinus Torvalds 
20191da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
202060236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
20211da177e4SLinus Torvalds 
2022463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
20231da177e4SLinus Torvalds 		req = req->dl_next;
20241da177e4SLinus Torvalds 		while (1) {
20251da177e4SLinus Torvalds 			while (req) {
2026bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
20271da177e4SLinus Torvalds 					cur = req;
20281da177e4SLinus Torvalds 					goto out;
20291da177e4SLinus Torvalds 				}
20301da177e4SLinus Torvalds 				req = req->dl_next;
20311da177e4SLinus Torvalds 			}
203272a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
20331da177e4SLinus Torvalds 				break;
20341da177e4SLinus Torvalds get_req:
2035463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
20361da177e4SLinus Torvalds 		}
20371bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
20381da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
2039463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20401da177e4SLinus Torvalds 	} else {
2041463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2042463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2043463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
20441da177e4SLinus Torvalds 			goto start_req;
2045463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20461bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
20471da177e4SLinus Torvalds 	}
20481da177e4SLinus Torvalds get_sk:
2049c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
20508475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
20518475ef9fSPavel Emelyanov 			continue;
20528475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
20531da177e4SLinus Torvalds 			cur = sk;
20541da177e4SLinus Torvalds 			goto out;
20551da177e4SLinus Torvalds 		}
2056463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2057463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2058463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
20591da177e4SLinus Torvalds start_req:
20601da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
20611da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
20621da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
20631da177e4SLinus Torvalds 			st->sbucket	= 0;
20641da177e4SLinus Torvalds 			goto get_req;
20651da177e4SLinus Torvalds 		}
2066463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20671da177e4SLinus Torvalds 	}
20685caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
2069a8b690f9STom Herbert 	st->offset = 0;
20700f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
20715caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
20725caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2073c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
20741da177e4SLinus Torvalds 		goto get_sk;
20751da177e4SLinus Torvalds 	}
20761da177e4SLinus Torvalds 	cur = NULL;
20771da177e4SLinus Torvalds out:
20781da177e4SLinus Torvalds 	return cur;
20791da177e4SLinus Torvalds }
20801da177e4SLinus Torvalds 
20811da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
20821da177e4SLinus Torvalds {
2083a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2084a8b690f9STom Herbert 	void *rc;
2085a8b690f9STom Herbert 
2086a8b690f9STom Herbert 	st->bucket = 0;
2087a8b690f9STom Herbert 	st->offset = 0;
2088a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
20891da177e4SLinus Torvalds 
20901da177e4SLinus Torvalds 	while (rc && *pos) {
20911da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
20921da177e4SLinus Torvalds 		--*pos;
20931da177e4SLinus Torvalds 	}
20941da177e4SLinus Torvalds 	return rc;
20951da177e4SLinus Torvalds }
20961da177e4SLinus Torvalds 
20976eac5604SAndi Kleen static inline int empty_bucket(struct tcp_iter_state *st)
20986eac5604SAndi Kleen {
20993ab5aee7SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
21003ab5aee7SEric Dumazet 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
21016eac5604SAndi Kleen }
21026eac5604SAndi Kleen 
2103a8b690f9STom Herbert /*
2104a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
2105a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
2106a8b690f9STom Herbert  */
21071da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
21081da177e4SLinus Torvalds {
21091da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2110a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21111da177e4SLinus Torvalds 	void *rc = NULL;
21121da177e4SLinus Torvalds 
2113a8b690f9STom Herbert 	st->offset = 0;
2114a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
21151da177e4SLinus Torvalds 		struct sock *sk;
21163ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
21178feaf0c0SArnaldo Carvalho de Melo 		struct inet_timewait_sock *tw;
21189db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
21191da177e4SLinus Torvalds 
21206eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
21216eac5604SAndi Kleen 		if (empty_bucket(st))
21226eac5604SAndi Kleen 			continue;
21236eac5604SAndi Kleen 
21249db66bdcSEric Dumazet 		spin_lock_bh(lock);
21253ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2126f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
2127878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
21281da177e4SLinus Torvalds 				continue;
21291da177e4SLinus Torvalds 			}
21301da177e4SLinus Torvalds 			rc = sk;
21311da177e4SLinus Torvalds 			goto out;
21321da177e4SLinus Torvalds 		}
21331da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_TIME_WAIT;
21348feaf0c0SArnaldo Carvalho de Melo 		inet_twsk_for_each(tw, node,
2135dbca9b27SEric Dumazet 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
213628518fc1SPavel Emelyanov 			if (tw->tw_family != st->family ||
2137878628fbSYOSHIFUJI Hideaki 			    !net_eq(twsk_net(tw), net)) {
21381da177e4SLinus Torvalds 				continue;
21391da177e4SLinus Torvalds 			}
21401da177e4SLinus Torvalds 			rc = tw;
21411da177e4SLinus Torvalds 			goto out;
21421da177e4SLinus Torvalds 		}
21439db66bdcSEric Dumazet 		spin_unlock_bh(lock);
21441da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21451da177e4SLinus Torvalds 	}
21461da177e4SLinus Torvalds out:
21471da177e4SLinus Torvalds 	return rc;
21481da177e4SLinus Torvalds }
21491da177e4SLinus Torvalds 
21501da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
21511da177e4SLinus Torvalds {
21521da177e4SLinus Torvalds 	struct sock *sk = cur;
21538feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw;
21543ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
21551da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2156a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
21571da177e4SLinus Torvalds 
21581da177e4SLinus Torvalds 	++st->num;
2159a8b690f9STom Herbert 	++st->offset;
21601da177e4SLinus Torvalds 
21611da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
21621da177e4SLinus Torvalds 		tw = cur;
21631da177e4SLinus Torvalds 		tw = tw_next(tw);
21641da177e4SLinus Torvalds get_tw:
2165878628fbSYOSHIFUJI Hideaki 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
21661da177e4SLinus Torvalds 			tw = tw_next(tw);
21671da177e4SLinus Torvalds 		}
21681da177e4SLinus Torvalds 		if (tw) {
21691da177e4SLinus Torvalds 			cur = tw;
21701da177e4SLinus Torvalds 			goto out;
21711da177e4SLinus Torvalds 		}
21729db66bdcSEric Dumazet 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21731da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21741da177e4SLinus Torvalds 
21756eac5604SAndi Kleen 		/* Look for next non empty bucket */
2176a8b690f9STom Herbert 		st->offset = 0;
2177f373b53bSEric Dumazet 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
21786eac5604SAndi Kleen 				empty_bucket(st))
21796eac5604SAndi Kleen 			;
2180f373b53bSEric Dumazet 		if (st->bucket > tcp_hashinfo.ehash_mask)
21816eac5604SAndi Kleen 			return NULL;
21826eac5604SAndi Kleen 
21839db66bdcSEric Dumazet 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21843ab5aee7SEric Dumazet 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
21851da177e4SLinus Torvalds 	} else
21863ab5aee7SEric Dumazet 		sk = sk_nulls_next(sk);
21871da177e4SLinus Torvalds 
21883ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2189878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
21901da177e4SLinus Torvalds 			goto found;
21911da177e4SLinus Torvalds 	}
21921da177e4SLinus Torvalds 
21931da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2194dbca9b27SEric Dumazet 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
21951da177e4SLinus Torvalds 	goto get_tw;
21961da177e4SLinus Torvalds found:
21971da177e4SLinus Torvalds 	cur = sk;
21981da177e4SLinus Torvalds out:
21991da177e4SLinus Torvalds 	return cur;
22001da177e4SLinus Torvalds }
22011da177e4SLinus Torvalds 
22021da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
22031da177e4SLinus Torvalds {
2204a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2205a8b690f9STom Herbert 	void *rc;
2206a8b690f9STom Herbert 
2207a8b690f9STom Herbert 	st->bucket = 0;
2208a8b690f9STom Herbert 	rc = established_get_first(seq);
22091da177e4SLinus Torvalds 
22101da177e4SLinus Torvalds 	while (rc && pos) {
22111da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
22121da177e4SLinus Torvalds 		--pos;
22131da177e4SLinus Torvalds 	}
22141da177e4SLinus Torvalds 	return rc;
22151da177e4SLinus Torvalds }
22161da177e4SLinus Torvalds 
22171da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
22181da177e4SLinus Torvalds {
22191da177e4SLinus Torvalds 	void *rc;
22201da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
22211da177e4SLinus Torvalds 
22221da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
22231da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
22241da177e4SLinus Torvalds 
22251da177e4SLinus Torvalds 	if (!rc) {
22261da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
22271da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
22281da177e4SLinus Torvalds 	}
22291da177e4SLinus Torvalds 
22301da177e4SLinus Torvalds 	return rc;
22311da177e4SLinus Torvalds }
22321da177e4SLinus Torvalds 
2233a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2234a8b690f9STom Herbert {
2235a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2236a8b690f9STom Herbert 	int offset = st->offset;
2237a8b690f9STom Herbert 	int orig_num = st->num;
2238a8b690f9STom Herbert 	void *rc = NULL;
2239a8b690f9STom Herbert 
2240a8b690f9STom Herbert 	switch (st->state) {
2241a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2242a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2243a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2244a8b690f9STom Herbert 			break;
2245a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2246a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2247a8b690f9STom Herbert 		while (offset-- && rc)
2248a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2249a8b690f9STom Herbert 		if (rc)
2250a8b690f9STom Herbert 			break;
2251a8b690f9STom Herbert 		st->bucket = 0;
2252a8b690f9STom Herbert 		/* Fallthrough */
2253a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2254a8b690f9STom Herbert 	case TCP_SEQ_STATE_TIME_WAIT:
2255a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2256a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2257a8b690f9STom Herbert 			break;
2258a8b690f9STom Herbert 		rc = established_get_first(seq);
2259a8b690f9STom Herbert 		while (offset-- && rc)
2260a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2261a8b690f9STom Herbert 	}
2262a8b690f9STom Herbert 
2263a8b690f9STom Herbert 	st->num = orig_num;
2264a8b690f9STom Herbert 
2265a8b690f9STom Herbert 	return rc;
2266a8b690f9STom Herbert }
2267a8b690f9STom Herbert 
22681da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
22691da177e4SLinus Torvalds {
22701da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2271a8b690f9STom Herbert 	void *rc;
2272a8b690f9STom Herbert 
2273a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2274a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2275a8b690f9STom Herbert 		if (rc)
2276a8b690f9STom Herbert 			goto out;
2277a8b690f9STom Herbert 	}
2278a8b690f9STom Herbert 
22791da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
22801da177e4SLinus Torvalds 	st->num = 0;
2281a8b690f9STom Herbert 	st->bucket = 0;
2282a8b690f9STom Herbert 	st->offset = 0;
2283a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2284a8b690f9STom Herbert 
2285a8b690f9STom Herbert out:
2286a8b690f9STom Herbert 	st->last_pos = *pos;
2287a8b690f9STom Herbert 	return rc;
22881da177e4SLinus Torvalds }
22891da177e4SLinus Torvalds 
22901da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
22911da177e4SLinus Torvalds {
2292a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
22931da177e4SLinus Torvalds 	void *rc = NULL;
22941da177e4SLinus Torvalds 
22951da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
22961da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
22971da177e4SLinus Torvalds 		goto out;
22981da177e4SLinus Torvalds 	}
22991da177e4SLinus Torvalds 
23001da177e4SLinus Torvalds 	switch (st->state) {
23011da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23021da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23031da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
23041da177e4SLinus Torvalds 		if (!rc) {
23051da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2306a8b690f9STom Herbert 			st->bucket = 0;
2307a8b690f9STom Herbert 			st->offset = 0;
23081da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
23091da177e4SLinus Torvalds 		}
23101da177e4SLinus Torvalds 		break;
23111da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23121da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23131da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
23141da177e4SLinus Torvalds 		break;
23151da177e4SLinus Torvalds 	}
23161da177e4SLinus Torvalds out:
23171da177e4SLinus Torvalds 	++*pos;
2318a8b690f9STom Herbert 	st->last_pos = *pos;
23191da177e4SLinus Torvalds 	return rc;
23201da177e4SLinus Torvalds }
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
23231da177e4SLinus Torvalds {
23241da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
23251da177e4SLinus Torvalds 
23261da177e4SLinus Torvalds 	switch (st->state) {
23271da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23281da177e4SLinus Torvalds 		if (v) {
2329463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2330463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
23311da177e4SLinus Torvalds 		}
23321da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23331da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
23345caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
23351da177e4SLinus Torvalds 		break;
23361da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23371da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23381da177e4SLinus Torvalds 		if (v)
23399db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
23401da177e4SLinus Torvalds 		break;
23411da177e4SLinus Torvalds 	}
23421da177e4SLinus Torvalds }
23431da177e4SLinus Torvalds 
234473cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
23451da177e4SLinus Torvalds {
23461da177e4SLinus Torvalds 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
23471da177e4SLinus Torvalds 	struct tcp_iter_state *s;
234852d6f3f1SDenis V. Lunev 	int err;
23491da177e4SLinus Torvalds 
235052d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
235152d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
235252d6f3f1SDenis V. Lunev 	if (err < 0)
235352d6f3f1SDenis V. Lunev 		return err;
2354f40c8174SDaniel Lezcano 
235552d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
23561da177e4SLinus Torvalds 	s->family		= afinfo->family;
2357a8b690f9STom Herbert 	s->last_pos 		= 0;
2358f40c8174SDaniel Lezcano 	return 0;
2359f40c8174SDaniel Lezcano }
236073cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2361f40c8174SDaniel Lezcano 
23626f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
23631da177e4SLinus Torvalds {
23641da177e4SLinus Torvalds 	int rc = 0;
23651da177e4SLinus Torvalds 	struct proc_dir_entry *p;
23661da177e4SLinus Torvalds 
23679427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
23689427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
23699427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
23709427c4b3SDenis V. Lunev 
237184841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
237273cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
237384841c3cSDenis V. Lunev 	if (!p)
23741da177e4SLinus Torvalds 		rc = -ENOMEM;
23751da177e4SLinus Torvalds 	return rc;
23761da177e4SLinus Torvalds }
23774bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
23781da177e4SLinus Torvalds 
23796f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
23801da177e4SLinus Torvalds {
23816f8b13bcSDaniel Lezcano 	proc_net_remove(net, afinfo->name);
23821da177e4SLinus Torvalds }
23834bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
23841da177e4SLinus Torvalds 
2385cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req,
23865e659e4cSPavel Emelyanov 			 struct seq_file *f, int i, int uid, int *len)
23871da177e4SLinus Torvalds {
23882e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
23891da177e4SLinus Torvalds 	int ttd = req->expires - jiffies;
23901da177e4SLinus Torvalds 
23915e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
239271338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
23931da177e4SLinus Torvalds 		i,
23942e6599cbSArnaldo Carvalho de Melo 		ireq->loc_addr,
2395c720c7e8SEric Dumazet 		ntohs(inet_sk(sk)->inet_sport),
23962e6599cbSArnaldo Carvalho de Melo 		ireq->rmt_addr,
23972e6599cbSArnaldo Carvalho de Melo 		ntohs(ireq->rmt_port),
23981da177e4SLinus Torvalds 		TCP_SYN_RECV,
23991da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
24001da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
24011da177e4SLinus Torvalds 		jiffies_to_clock_t(ttd),
24021da177e4SLinus Torvalds 		req->retrans,
24031da177e4SLinus Torvalds 		uid,
24041da177e4SLinus Torvalds 		0,  /* non standard timer */
24051da177e4SLinus Torvalds 		0, /* open_requests have no inode */
24061da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
24075e659e4cSPavel Emelyanov 		req,
24085e659e4cSPavel Emelyanov 		len);
24091da177e4SLinus Torvalds }
24101da177e4SLinus Torvalds 
24115e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
24121da177e4SLinus Torvalds {
24131da177e4SLinus Torvalds 	int timer_active;
24141da177e4SLinus Torvalds 	unsigned long timer_expires;
2415cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2416cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2417cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2418c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2419c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2420c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2421c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
242249d09007SEric Dumazet 	int rx_queue;
24231da177e4SLinus Torvalds 
2424463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
24251da177e4SLinus Torvalds 		timer_active	= 1;
2426463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2427463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
24281da177e4SLinus Torvalds 		timer_active	= 4;
2429463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2430cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
24311da177e4SLinus Torvalds 		timer_active	= 2;
2432cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
24331da177e4SLinus Torvalds 	} else {
24341da177e4SLinus Torvalds 		timer_active	= 0;
24351da177e4SLinus Torvalds 		timer_expires = jiffies;
24361da177e4SLinus Torvalds 	}
24371da177e4SLinus Torvalds 
243849d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
243949d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
244049d09007SEric Dumazet 	else
244149d09007SEric Dumazet 		/*
244249d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
244349d09007SEric Dumazet 		 */
244449d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
244549d09007SEric Dumazet 
24465e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
244771338aa7SDan Rosenberg 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2448cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
244947da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
245049d09007SEric Dumazet 		rx_queue,
24511da177e4SLinus Torvalds 		timer_active,
24521da177e4SLinus Torvalds 		jiffies_to_clock_t(timer_expires - jiffies),
2453463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2454cf4c6bf8SIlpo Järvinen 		sock_i_uid(sk),
24556687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2456cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2457cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
24587be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
24597be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2460463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
24611da177e4SLinus Torvalds 		tp->snd_cwnd,
24620b6a05c1SIlpo Järvinen 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
24635e659e4cSPavel Emelyanov 		len);
24641da177e4SLinus Torvalds }
24651da177e4SLinus Torvalds 
2466cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
24675e659e4cSPavel Emelyanov 			       struct seq_file *f, int i, int *len)
24681da177e4SLinus Torvalds {
246923f33c2dSAl Viro 	__be32 dest, src;
24701da177e4SLinus Torvalds 	__u16 destp, srcp;
24711da177e4SLinus Torvalds 	int ttd = tw->tw_ttd - jiffies;
24721da177e4SLinus Torvalds 
24731da177e4SLinus Torvalds 	if (ttd < 0)
24741da177e4SLinus Torvalds 		ttd = 0;
24751da177e4SLinus Torvalds 
24761da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
24771da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
24781da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
24791da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
24801da177e4SLinus Torvalds 
24815e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
248271338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
24831da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
24841da177e4SLinus Torvalds 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
24855e659e4cSPavel Emelyanov 		atomic_read(&tw->tw_refcnt), tw, len);
24861da177e4SLinus Torvalds }
24871da177e4SLinus Torvalds 
24881da177e4SLinus Torvalds #define TMPSZ 150
24891da177e4SLinus Torvalds 
24901da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
24911da177e4SLinus Torvalds {
24921da177e4SLinus Torvalds 	struct tcp_iter_state *st;
24935e659e4cSPavel Emelyanov 	int len;
24941da177e4SLinus Torvalds 
24951da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
24961da177e4SLinus Torvalds 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
24971da177e4SLinus Torvalds 			   "  sl  local_address rem_address   st tx_queue "
24981da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
24991da177e4SLinus Torvalds 			   "inode");
25001da177e4SLinus Torvalds 		goto out;
25011da177e4SLinus Torvalds 	}
25021da177e4SLinus Torvalds 	st = seq->private;
25031da177e4SLinus Torvalds 
25041da177e4SLinus Torvalds 	switch (st->state) {
25051da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
25061da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
25075e659e4cSPavel Emelyanov 		get_tcp4_sock(v, seq, st->num, &len);
25081da177e4SLinus Torvalds 		break;
25091da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
25105e659e4cSPavel Emelyanov 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
25111da177e4SLinus Torvalds 		break;
25121da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
25135e659e4cSPavel Emelyanov 		get_timewait4_sock(v, seq, st->num, &len);
25141da177e4SLinus Torvalds 		break;
25151da177e4SLinus Torvalds 	}
25165e659e4cSPavel Emelyanov 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
25171da177e4SLinus Torvalds out:
25181da177e4SLinus Torvalds 	return 0;
25191da177e4SLinus Torvalds }
25201da177e4SLinus Torvalds 
252173cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
252273cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
252373cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
252473cb88ecSArjan van de Ven 	.read    = seq_read,
252573cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
252673cb88ecSArjan van de Ven 	.release = seq_release_net
252773cb88ecSArjan van de Ven };
252873cb88ecSArjan van de Ven 
25291da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
25301da177e4SLinus Torvalds 	.name		= "tcp",
25311da177e4SLinus Torvalds 	.family		= AF_INET,
253273cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
25339427c4b3SDenis V. Lunev 	.seq_ops	= {
25349427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
25359427c4b3SDenis V. Lunev 	},
25361da177e4SLinus Torvalds };
25371da177e4SLinus Torvalds 
25382c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2539757764f6SPavel Emelyanov {
2540757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2541757764f6SPavel Emelyanov }
2542757764f6SPavel Emelyanov 
25432c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2544757764f6SPavel Emelyanov {
2545757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2546757764f6SPavel Emelyanov }
2547757764f6SPavel Emelyanov 
2548757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2549757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2550757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2551757764f6SPavel Emelyanov };
2552757764f6SPavel Emelyanov 
25531da177e4SLinus Torvalds int __init tcp4_proc_init(void)
25541da177e4SLinus Torvalds {
2555757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
25561da177e4SLinus Torvalds }
25571da177e4SLinus Torvalds 
25581da177e4SLinus Torvalds void tcp4_proc_exit(void)
25591da177e4SLinus Torvalds {
2560757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
25611da177e4SLinus Torvalds }
25621da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
25631da177e4SLinus Torvalds 
2564bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2565bf296b12SHerbert Xu {
2566b71d1d42SEric Dumazet 	const struct iphdr *iph = skb_gro_network_header(skb);
2567bf296b12SHerbert Xu 
2568bf296b12SHerbert Xu 	switch (skb->ip_summed) {
2569bf296b12SHerbert Xu 	case CHECKSUM_COMPLETE:
257086911732SHerbert Xu 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2571bf296b12SHerbert Xu 				  skb->csum)) {
2572bf296b12SHerbert Xu 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2573bf296b12SHerbert Xu 			break;
2574bf296b12SHerbert Xu 		}
2575bf296b12SHerbert Xu 
2576bf296b12SHerbert Xu 		/* fall through */
2577bf296b12SHerbert Xu 	case CHECKSUM_NONE:
2578bf296b12SHerbert Xu 		NAPI_GRO_CB(skb)->flush = 1;
2579bf296b12SHerbert Xu 		return NULL;
2580bf296b12SHerbert Xu 	}
2581bf296b12SHerbert Xu 
2582bf296b12SHerbert Xu 	return tcp_gro_receive(head, skb);
2583bf296b12SHerbert Xu }
2584bf296b12SHerbert Xu 
2585bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb)
2586bf296b12SHerbert Xu {
2587b71d1d42SEric Dumazet 	const struct iphdr *iph = ip_hdr(skb);
2588bf296b12SHerbert Xu 	struct tcphdr *th = tcp_hdr(skb);
2589bf296b12SHerbert Xu 
2590bf296b12SHerbert Xu 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2591bf296b12SHerbert Xu 				  iph->saddr, iph->daddr, 0);
2592bf296b12SHerbert Xu 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2593bf296b12SHerbert Xu 
2594bf296b12SHerbert Xu 	return tcp_gro_complete(skb);
2595bf296b12SHerbert Xu }
2596bf296b12SHerbert Xu 
25971da177e4SLinus Torvalds struct proto tcp_prot = {
25981da177e4SLinus Torvalds 	.name			= "TCP",
25991da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
26001da177e4SLinus Torvalds 	.close			= tcp_close,
26011da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
26021da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2603463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
26041da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
26051da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
26061da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
26071da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
26081da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
26091da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
26101da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
26117ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
26127ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
26131da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
2614ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2615ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2616ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
26171da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
26181da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
26190a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
26201da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
26211da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
26221da177e4SLinus Torvalds 	.sysctl_mem		= sysctl_tcp_mem,
26231da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
26241da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
26251da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
26261da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
26273ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
26286d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
262960236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
263039d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
26317ba42910SChangli Gao 	.no_autobind		= true,
2632543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2633543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2634543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2635543d9cfeSArnaldo Carvalho de Melo #endif
26361da177e4SLinus Torvalds };
26374bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
26381da177e4SLinus Torvalds 
2639046ee902SDenis V. Lunev 
2640046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net)
2641046ee902SDenis V. Lunev {
2642046ee902SDenis V. Lunev 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2643046ee902SDenis V. Lunev 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2644046ee902SDenis V. Lunev }
2645046ee902SDenis V. Lunev 
2646046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2647046ee902SDenis V. Lunev {
2648046ee902SDenis V. Lunev 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2649b099ce26SEric W. Biederman }
2650b099ce26SEric W. Biederman 
2651b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2652b099ce26SEric W. Biederman {
2653b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2654046ee902SDenis V. Lunev }
2655046ee902SDenis V. Lunev 
2656046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2657046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2658046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2659b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2660046ee902SDenis V. Lunev };
2661046ee902SDenis V. Lunev 
26629b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
26631da177e4SLinus Torvalds {
26645caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
26656a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
26661da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
26671da177e4SLinus Torvalds }
2668