xref: /linux/net/ipv4/tcp_ipv4.c (revision ab6a5bb6b28a970104a34f0f6959b73cf61bdc72)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *		IPv4 specific functions
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *
131da177e4SLinus Torvalds  *		code split from:
141da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
151da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
161da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *		See tcp.c for author information
191da177e4SLinus Torvalds  *
201da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
211da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
221da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
231da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds /*
271da177e4SLinus Torvalds  * Changes:
281da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
291da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
301da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
311da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
321da177e4SLinus Torvalds  *					and the rest go in the other half.
331da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
341da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
351da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
361da177e4SLinus Torvalds  *					ACK bit.
371da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
381da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3960236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
401da177e4SLinus Torvalds  *					most of it into the af independent code.
411da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
42caa20d9aSStephen Hemminger  *					Added new listen semantics.
431da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
441da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
451da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
461da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
471da177e4SLinus Torvalds  *					coma.
481da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
491da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
501da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
511da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
521da177e4SLinus Torvalds  *					a single port at the same time.
531da177e4SLinus Torvalds  */
541da177e4SLinus Torvalds 
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds #include <net/icmp.h>
66304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
671da177e4SLinus Torvalds #include <net/tcp.h>
6820380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
691da177e4SLinus Torvalds #include <net/ipv6.h>
701da177e4SLinus Torvalds #include <net/inet_common.h>
716d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
721da177e4SLinus Torvalds #include <net/xfrm.h>
731a2449a8SChris Leech #include <net/netdma.h>
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds #include <linux/inet.h>
761da177e4SLinus Torvalds #include <linux/ipv6.h>
771da177e4SLinus Torvalds #include <linux/stddef.h>
781da177e4SLinus Torvalds #include <linux/proc_fs.h>
791da177e4SLinus Torvalds #include <linux/seq_file.h>
801da177e4SLinus Torvalds 
81cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
82cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
83cfb6eeb4SYOSHIFUJI Hideaki 
84ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
85ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds /* Check TCP sequence numbers in ICMP packets. */
881da177e4SLinus Torvalds #define ICMP_MIN_LENGTH 8
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds /* Socket used for sending RSTs */
911da177e4SLinus Torvalds static struct socket *tcp_socket;
921da177e4SLinus Torvalds 
938292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
941da177e4SLinus Torvalds 
95cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
967174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
977174259eSArnaldo Carvalho de Melo 						   __be32 addr);
98cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
997174259eSArnaldo Carvalho de Melo 				   __be32 saddr, __be32 daddr,
1007174259eSArnaldo Carvalho de Melo 				   struct tcphdr *th, int protocol,
1017174259eSArnaldo Carvalho de Melo 				   int tcplen);
102cfb6eeb4SYOSHIFUJI Hideaki #endif
103cfb6eeb4SYOSHIFUJI Hideaki 
1040f7ff927SArnaldo Carvalho de Melo struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
105e4d91918SIngo Molnar 	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
1060f7ff927SArnaldo Carvalho de Melo 	.lhash_users = ATOMIC_INIT(0),
1070f7ff927SArnaldo Carvalho de Melo 	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
1081da177e4SLinus Torvalds };
1091da177e4SLinus Torvalds 
110463c84b9SArnaldo Carvalho de Melo static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
111463c84b9SArnaldo Carvalho de Melo {
112971af18bSArnaldo Carvalho de Melo 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
113971af18bSArnaldo Carvalho de Melo 				 inet_csk_bind_conflict);
114463c84b9SArnaldo Carvalho de Melo }
115463c84b9SArnaldo Carvalho de Melo 
1161da177e4SLinus Torvalds static void tcp_v4_hash(struct sock *sk)
1171da177e4SLinus Torvalds {
11881849d10SArnaldo Carvalho de Melo 	inet_hash(&tcp_hashinfo, sk);
1191da177e4SLinus Torvalds }
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds void tcp_unhash(struct sock *sk)
1221da177e4SLinus Torvalds {
12381849d10SArnaldo Carvalho de Melo 	inet_unhash(&tcp_hashinfo, sk);
1241da177e4SLinus Torvalds }
1251da177e4SLinus Torvalds 
126a94f723dSGerrit Renker static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1271da177e4SLinus Torvalds {
128eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
129eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
1301da177e4SLinus Torvalds 					  skb->h.th->dest,
1311da177e4SLinus Torvalds 					  skb->h.th->source);
1321da177e4SLinus Torvalds }
1331da177e4SLinus Torvalds 
1346d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1356d6ee43eSArnaldo Carvalho de Melo {
1366d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1376d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1386d6ee43eSArnaldo Carvalho de Melo 
1396d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1406d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1416d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1426d6ee43eSArnaldo Carvalho de Melo 
1436d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1446d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1456d6ee43eSArnaldo Carvalho de Melo 	   holder.
1466d6ee43eSArnaldo Carvalho de Melo 
1476d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1486d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1496d6ee43eSArnaldo Carvalho de Melo 	 */
1506d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1516d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1529d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1536d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1546d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1556d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1566d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1576d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1586d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1596d6ee43eSArnaldo Carvalho de Melo 		return 1;
1606d6ee43eSArnaldo Carvalho de Melo 	}
1616d6ee43eSArnaldo Carvalho de Melo 
1626d6ee43eSArnaldo Carvalho de Melo 	return 0;
1636d6ee43eSArnaldo Carvalho de Melo }
1646d6ee43eSArnaldo Carvalho de Melo 
1656d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1666d6ee43eSArnaldo Carvalho de Melo 
1671da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1681da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1691da177e4SLinus Torvalds {
1701da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1711da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
1721da177e4SLinus Torvalds 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1731da177e4SLinus Torvalds 	struct rtable *rt;
174bada8adcSAl Viro 	__be32 daddr, nexthop;
1751da177e4SLinus Torvalds 	int tmp;
1761da177e4SLinus Torvalds 	int err;
1771da177e4SLinus Torvalds 
1781da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1791da177e4SLinus Torvalds 		return -EINVAL;
1801da177e4SLinus Torvalds 
1811da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1821da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1831da177e4SLinus Torvalds 
1841da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
1851da177e4SLinus Torvalds 	if (inet->opt && inet->opt->srr) {
1861da177e4SLinus Torvalds 		if (!daddr)
1871da177e4SLinus Torvalds 			return -EINVAL;
1881da177e4SLinus Torvalds 		nexthop = inet->opt->faddr;
1891da177e4SLinus Torvalds 	}
1901da177e4SLinus Torvalds 
1911da177e4SLinus Torvalds 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
1921da177e4SLinus Torvalds 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1931da177e4SLinus Torvalds 			       IPPROTO_TCP,
1948eb9086fSDavid S. Miller 			       inet->sport, usin->sin_port, sk, 1);
1951da177e4SLinus Torvalds 	if (tmp < 0)
1961da177e4SLinus Torvalds 		return tmp;
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1991da177e4SLinus Torvalds 		ip_rt_put(rt);
2001da177e4SLinus Torvalds 		return -ENETUNREACH;
2011da177e4SLinus Torvalds 	}
2021da177e4SLinus Torvalds 
2031da177e4SLinus Torvalds 	if (!inet->opt || !inet->opt->srr)
2041da177e4SLinus Torvalds 		daddr = rt->rt_dst;
2051da177e4SLinus Torvalds 
2061da177e4SLinus Torvalds 	if (!inet->saddr)
2071da177e4SLinus Torvalds 		inet->saddr = rt->rt_src;
2081da177e4SLinus Torvalds 	inet->rcv_saddr = inet->saddr;
2091da177e4SLinus Torvalds 
2101da177e4SLinus Torvalds 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
2111da177e4SLinus Torvalds 		/* Reset inherited state */
2121da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
2131da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
2141da177e4SLinus Torvalds 		tp->write_seq		   = 0;
2151da177e4SLinus Torvalds 	}
2161da177e4SLinus Torvalds 
217295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
2181da177e4SLinus Torvalds 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
2191da177e4SLinus Torvalds 		struct inet_peer *peer = rt_get_peer(rt);
2207174259eSArnaldo Carvalho de Melo 		/*
2217174259eSArnaldo Carvalho de Melo 		 * VJ's idea. We save last timestamp seen from
2227174259eSArnaldo Carvalho de Melo 		 * the destination in peer table, when entering state
2237174259eSArnaldo Carvalho de Melo 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
2247174259eSArnaldo Carvalho de Melo 		 * when trying new connection.
2251da177e4SLinus Torvalds 		 */
2267174259eSArnaldo Carvalho de Melo 		if (peer != NULL &&
2279d729f72SJames Morris 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
2281da177e4SLinus Torvalds 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
2291da177e4SLinus Torvalds 			tp->rx_opt.ts_recent = peer->tcp_ts;
2301da177e4SLinus Torvalds 		}
2311da177e4SLinus Torvalds 	}
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	inet->dport = usin->sin_port;
2341da177e4SLinus Torvalds 	inet->daddr = daddr;
2351da177e4SLinus Torvalds 
236d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
2371da177e4SLinus Torvalds 	if (inet->opt)
238d83d8461SArnaldo Carvalho de Melo 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	tp->rx_opt.mss_clamp = 536;
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2431da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2441da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2451da177e4SLinus Torvalds 	 * complete initialization after this.
2461da177e4SLinus Torvalds 	 */
2471da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
248a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2491da177e4SLinus Torvalds 	if (err)
2501da177e4SLinus Torvalds 		goto failure;
2511da177e4SLinus Torvalds 
2527174259eSArnaldo Carvalho de Melo 	err = ip_route_newports(&rt, IPPROTO_TCP,
2537174259eSArnaldo Carvalho de Melo 				inet->sport, inet->dport, sk);
2541da177e4SLinus Torvalds 	if (err)
2551da177e4SLinus Torvalds 		goto failure;
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
258bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
2596cbb0df7SArnaldo Carvalho de Melo 	sk_setup_caps(sk, &rt->u.dst);
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds 	if (!tp->write_seq)
2621da177e4SLinus Torvalds 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
2631da177e4SLinus Torvalds 							   inet->daddr,
2641da177e4SLinus Torvalds 							   inet->sport,
2651da177e4SLinus Torvalds 							   usin->sin_port);
2661da177e4SLinus Torvalds 
2671da177e4SLinus Torvalds 	inet->id = tp->write_seq ^ jiffies;
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 	err = tcp_connect(sk);
2701da177e4SLinus Torvalds 	rt = NULL;
2711da177e4SLinus Torvalds 	if (err)
2721da177e4SLinus Torvalds 		goto failure;
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds 	return 0;
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds failure:
2777174259eSArnaldo Carvalho de Melo 	/*
2787174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2797174259eSArnaldo Carvalho de Melo 	 * if necessary.
2807174259eSArnaldo Carvalho de Melo 	 */
2811da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2821da177e4SLinus Torvalds 	ip_rt_put(rt);
2831da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
2841da177e4SLinus Torvalds 	inet->dport = 0;
2851da177e4SLinus Torvalds 	return err;
2861da177e4SLinus Torvalds }
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds /*
2891da177e4SLinus Torvalds  * This routine does path mtu discovery as defined in RFC1191.
2901da177e4SLinus Torvalds  */
29140efc6faSStephen Hemminger static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
2921da177e4SLinus Torvalds {
2931da177e4SLinus Torvalds 	struct dst_entry *dst;
2941da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
2951da177e4SLinus Torvalds 
2961da177e4SLinus Torvalds 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
2971da177e4SLinus Torvalds 	 * send out by Linux are always <576bytes so they should go through
2981da177e4SLinus Torvalds 	 * unfragmented).
2991da177e4SLinus Torvalds 	 */
3001da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN)
3011da177e4SLinus Torvalds 		return;
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds 	/* We don't check in the destentry if pmtu discovery is forbidden
3041da177e4SLinus Torvalds 	 * on this route. We just assume that no packet_to_big packets
3051da177e4SLinus Torvalds 	 * are send back when pmtu discovery is not active.
3061da177e4SLinus Torvalds 	 * There is a small race when the user changes this flag in the
3071da177e4SLinus Torvalds 	 * route, but I think that's acceptable.
3081da177e4SLinus Torvalds 	 */
3091da177e4SLinus Torvalds 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
3101da177e4SLinus Torvalds 		return;
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds 	dst->ops->update_pmtu(dst, mtu);
3131da177e4SLinus Torvalds 
3141da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
3151da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
3161da177e4SLinus Torvalds 	 */
3171da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
3181da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3211da177e4SLinus Torvalds 
3221da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
323d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3241da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3271da177e4SLinus Torvalds 		 * clear that the old packet has been
3281da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3291da177e4SLinus Torvalds 		 * discovery.
3301da177e4SLinus Torvalds 		 */
3311da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3321da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds /*
3361da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3371da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3381da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3391da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3401da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3411da177e4SLinus Torvalds  * to find the appropriate port.
3421da177e4SLinus Torvalds  *
3431da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3441da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3451da177e4SLinus Torvalds  * and for some paths there is no check at all.
3461da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3471da177e4SLinus Torvalds  * is probably better.
3481da177e4SLinus Torvalds  *
3491da177e4SLinus Torvalds  */
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds void tcp_v4_err(struct sk_buff *skb, u32 info)
3521da177e4SLinus Torvalds {
3531da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb->data;
3541da177e4SLinus Torvalds 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
3551da177e4SLinus Torvalds 	struct tcp_sock *tp;
3561da177e4SLinus Torvalds 	struct inet_sock *inet;
35788c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
35888c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
3591da177e4SLinus Torvalds 	struct sock *sk;
3601da177e4SLinus Torvalds 	__u32 seq;
3611da177e4SLinus Torvalds 	int err;
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds 	if (skb->len < (iph->ihl << 2) + 8) {
3641da177e4SLinus Torvalds 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
3651da177e4SLinus Torvalds 		return;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 
368e48c414eSArnaldo Carvalho de Melo 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
369463c84b9SArnaldo Carvalho de Melo 			 th->source, inet_iif(skb));
3701da177e4SLinus Torvalds 	if (!sk) {
3711da177e4SLinus Torvalds 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
3721da177e4SLinus Torvalds 		return;
3731da177e4SLinus Torvalds 	}
3741da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3759469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3761da177e4SLinus Torvalds 		return;
3771da177e4SLinus Torvalds 	}
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	bh_lock_sock(sk);
3801da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3811da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
3821da177e4SLinus Torvalds 	 */
3831da177e4SLinus Torvalds 	if (sock_owned_by_user(sk))
3841da177e4SLinus Torvalds 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3871da177e4SLinus Torvalds 		goto out;
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	tp = tcp_sk(sk);
3901da177e4SLinus Torvalds 	seq = ntohl(th->seq);
3911da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
3921da177e4SLinus Torvalds 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
39306ca719fSEric Dumazet 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
3941da177e4SLinus Torvalds 		goto out;
3951da177e4SLinus Torvalds 	}
3961da177e4SLinus Torvalds 
3971da177e4SLinus Torvalds 	switch (type) {
3981da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
3991da177e4SLinus Torvalds 		/* Just silently ignore these. */
4001da177e4SLinus Torvalds 		goto out;
4011da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4021da177e4SLinus Torvalds 		err = EPROTO;
4031da177e4SLinus Torvalds 		break;
4041da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4051da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4061da177e4SLinus Torvalds 			goto out;
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4091da177e4SLinus Torvalds 			if (!sock_owned_by_user(sk))
4101da177e4SLinus Torvalds 				do_pmtu_discovery(sk, iph, info);
4111da177e4SLinus Torvalds 			goto out;
4121da177e4SLinus Torvalds 		}
4131da177e4SLinus Torvalds 
4141da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
4151da177e4SLinus Torvalds 		break;
4161da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4171da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4181da177e4SLinus Torvalds 		break;
4191da177e4SLinus Torvalds 	default:
4201da177e4SLinus Torvalds 		goto out;
4211da177e4SLinus Torvalds 	}
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds 	switch (sk->sk_state) {
42460236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4251da177e4SLinus Torvalds 	case TCP_LISTEN:
4261da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4271da177e4SLinus Torvalds 			goto out;
4281da177e4SLinus Torvalds 
429463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4301da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4311da177e4SLinus Torvalds 		if (!req)
4321da177e4SLinus Torvalds 			goto out;
4331da177e4SLinus Torvalds 
4341da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4351da177e4SLinus Torvalds 		   an established socket here.
4361da177e4SLinus Torvalds 		 */
4371da177e4SLinus Torvalds 		BUG_TRAP(!req->sk);
4381da177e4SLinus Torvalds 
4392e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
4401da177e4SLinus Torvalds 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
4411da177e4SLinus Torvalds 			goto out;
4421da177e4SLinus Torvalds 		}
4431da177e4SLinus Torvalds 
4441da177e4SLinus Torvalds 		/*
4451da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
4461da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
4471da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
4481da177e4SLinus Torvalds 		 * errors returned from accept().
4491da177e4SLinus Torvalds 		 */
450463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
4511da177e4SLinus Torvalds 		goto out;
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4541da177e4SLinus Torvalds 	case TCP_SYN_RECV:  /* Cannot happen.
4551da177e4SLinus Torvalds 			       It can f.e. if SYNs crossed.
4561da177e4SLinus Torvalds 			     */
4571da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
4581da177e4SLinus Torvalds 			sk->sk_err = err;
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds 			sk->sk_error_report(sk);
4611da177e4SLinus Torvalds 
4621da177e4SLinus Torvalds 			tcp_done(sk);
4631da177e4SLinus Torvalds 		} else {
4641da177e4SLinus Torvalds 			sk->sk_err_soft = err;
4651da177e4SLinus Torvalds 		}
4661da177e4SLinus Torvalds 		goto out;
4671da177e4SLinus Torvalds 	}
4681da177e4SLinus Torvalds 
4691da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
4701da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
4711da177e4SLinus Torvalds 	 *
4721da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
4731da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
4741da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
4751da177e4SLinus Torvalds 	 *
4761da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
4771da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
4781da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
4791da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
4801da177e4SLinus Torvalds 	 *
4811da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
4821da177e4SLinus Torvalds 	 *							--ANK (980905)
4831da177e4SLinus Torvalds 	 */
4841da177e4SLinus Torvalds 
4851da177e4SLinus Torvalds 	inet = inet_sk(sk);
4861da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
4871da177e4SLinus Torvalds 		sk->sk_err = err;
4881da177e4SLinus Torvalds 		sk->sk_error_report(sk);
4891da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
4901da177e4SLinus Torvalds 		sk->sk_err_soft = err;
4911da177e4SLinus Torvalds 	}
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds out:
4941da177e4SLinus Torvalds 	bh_unlock_sock(sk);
4951da177e4SLinus Torvalds 	sock_put(sk);
4961da177e4SLinus Torvalds }
4971da177e4SLinus Torvalds 
4981da177e4SLinus Torvalds /* This routine computes an IPv4 TCP checksum. */
4998292a17aSArnaldo Carvalho de Melo void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
5001da177e4SLinus Torvalds {
5011da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
5028292a17aSArnaldo Carvalho de Melo 	struct tcphdr *th = skb->h.th;
5031da177e4SLinus Torvalds 
50484fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
505ba7808eaSFrederik Deweerdt 		th->check = ~tcp_v4_check(len, inet->saddr,
506ba7808eaSFrederik Deweerdt 					  inet->daddr, 0);
507ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5081da177e4SLinus Torvalds 	} else {
509ba7808eaSFrederik Deweerdt 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
5101da177e4SLinus Torvalds 					 csum_partial((char *)th,
5111da177e4SLinus Torvalds 						      th->doff << 2,
5121da177e4SLinus Torvalds 						      skb->csum));
5131da177e4SLinus Torvalds 	}
5141da177e4SLinus Torvalds }
5151da177e4SLinus Torvalds 
516a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb)
517a430a43dSHerbert Xu {
518eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
519a430a43dSHerbert Xu 	struct tcphdr *th;
520a430a43dSHerbert Xu 
521a430a43dSHerbert Xu 	if (!pskb_may_pull(skb, sizeof(*th)))
522a430a43dSHerbert Xu 		return -EINVAL;
523a430a43dSHerbert Xu 
524eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
525a430a43dSHerbert Xu 	th = skb->h.th;
526a430a43dSHerbert Xu 
527a430a43dSHerbert Xu 	th->check = 0;
528ba7808eaSFrederik Deweerdt 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
529ff1dcadbSAl Viro 	skb->csum_offset = offsetof(struct tcphdr, check);
53084fa7933SPatrick McHardy 	skb->ip_summed = CHECKSUM_PARTIAL;
531a430a43dSHerbert Xu 	return 0;
532a430a43dSHerbert Xu }
533a430a43dSHerbert Xu 
5341da177e4SLinus Torvalds /*
5351da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5361da177e4SLinus Torvalds  *
5371da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5381da177e4SLinus Torvalds  *		      for reset.
5391da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5401da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5411da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5421da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5431da177e4SLinus Torvalds  *		arrived with segment.
5441da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5451da177e4SLinus Torvalds  */
5461da177e4SLinus Torvalds 
547cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5481da177e4SLinus Torvalds {
5491da177e4SLinus Torvalds 	struct tcphdr *th = skb->h.th;
550cfb6eeb4SYOSHIFUJI Hideaki 	struct {
551cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
552cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
553714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
554cfb6eeb4SYOSHIFUJI Hideaki #endif
555cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
5561da177e4SLinus Torvalds 	struct ip_reply_arg arg;
557cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
558cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
559cfb6eeb4SYOSHIFUJI Hideaki #endif
5601da177e4SLinus Torvalds 
5611da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
5621da177e4SLinus Torvalds 	if (th->rst)
5631da177e4SLinus Torvalds 		return;
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
5661da177e4SLinus Torvalds 		return;
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds 	/* Swap the send and the receive. */
569cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
570cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
571cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
572cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
573cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	if (th->ack) {
576cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
5771da177e4SLinus Torvalds 	} else {
578cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
579cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
5801da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
5811da177e4SLinus Torvalds 	}
5821da177e4SLinus Torvalds 
5837174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
584cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
585cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
586cfb6eeb4SYOSHIFUJI Hideaki 
587cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
588eddc9ec5SArnaldo Carvalho de Melo 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
589cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
590cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
591cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
592cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
593cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
594cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
595cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
596cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
597cfb6eeb4SYOSHIFUJI Hideaki 
598cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
599cfb6eeb4SYOSHIFUJI Hideaki 					key,
600eddc9ec5SArnaldo Carvalho de Melo 					ip_hdr(skb)->daddr,
601eddc9ec5SArnaldo Carvalho de Melo 					ip_hdr(skb)->saddr,
602cfb6eeb4SYOSHIFUJI Hideaki 					&rep.th, IPPROTO_TCP,
603cfb6eeb4SYOSHIFUJI Hideaki 					arg.iov[0].iov_len);
604cfb6eeb4SYOSHIFUJI Hideaki 	}
605cfb6eeb4SYOSHIFUJI Hideaki #endif
606eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
607eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
6081da177e4SLinus Torvalds 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
6091da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
6101da177e4SLinus Torvalds 
611cfb6eeb4SYOSHIFUJI Hideaki 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
6121da177e4SLinus Torvalds 
6131da177e4SLinus Torvalds 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
6141da177e4SLinus Torvalds 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
6151da177e4SLinus Torvalds }
6161da177e4SLinus Torvalds 
6171da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
6181da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
6191da177e4SLinus Torvalds  */
6201da177e4SLinus Torvalds 
621cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
622cfb6eeb4SYOSHIFUJI Hideaki 			    struct sk_buff *skb, u32 seq, u32 ack,
6231da177e4SLinus Torvalds 			    u32 win, u32 ts)
6241da177e4SLinus Torvalds {
6251da177e4SLinus Torvalds 	struct tcphdr *th = skb->h.th;
6261da177e4SLinus Torvalds 	struct {
6271da177e4SLinus Torvalds 		struct tcphdr th;
628714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
629cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
630cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
631cfb6eeb4SYOSHIFUJI Hideaki #endif
632cfb6eeb4SYOSHIFUJI Hideaki 			];
6331da177e4SLinus Torvalds 	} rep;
6341da177e4SLinus Torvalds 	struct ip_reply_arg arg;
635cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
636cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
637cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key tw_key;
638cfb6eeb4SYOSHIFUJI Hideaki #endif
6391da177e4SLinus Torvalds 
6401da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
6417174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
6421da177e4SLinus Torvalds 
6431da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
6441da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
6451da177e4SLinus Torvalds 	if (ts) {
646cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
6471da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
6481da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
649cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[1] = htonl(tcp_time_stamp);
650cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[2] = htonl(ts);
651cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
6521da177e4SLinus Torvalds 	}
6531da177e4SLinus Torvalds 
6541da177e4SLinus Torvalds 	/* Swap the send and the receive. */
6551da177e4SLinus Torvalds 	rep.th.dest    = th->source;
6561da177e4SLinus Torvalds 	rep.th.source  = th->dest;
6571da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
6581da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
6591da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
6601da177e4SLinus Torvalds 	rep.th.ack     = 1;
6611da177e4SLinus Torvalds 	rep.th.window  = htons(win);
6621da177e4SLinus Torvalds 
663cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
664cfb6eeb4SYOSHIFUJI Hideaki 	/*
665cfb6eeb4SYOSHIFUJI Hideaki 	 * The SKB holds an imcoming packet, but may not have a valid ->sk
666cfb6eeb4SYOSHIFUJI Hideaki 	 * pointer. This is especially the case when we're dealing with a
667cfb6eeb4SYOSHIFUJI Hideaki 	 * TIME_WAIT ack, because the sk structure is long gone, and only
668cfb6eeb4SYOSHIFUJI Hideaki 	 * the tcp_timewait_sock remains. So the md5 key is stashed in that
669cfb6eeb4SYOSHIFUJI Hideaki 	 * structure, and we use it in preference.  I believe that (twsk ||
670cfb6eeb4SYOSHIFUJI Hideaki 	 * skb->sk) holds true, but we program defensively.
671cfb6eeb4SYOSHIFUJI Hideaki 	 */
672cfb6eeb4SYOSHIFUJI Hideaki 	if (!twsk && skb->sk) {
673eddc9ec5SArnaldo Carvalho de Melo 		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
674cfb6eeb4SYOSHIFUJI Hideaki 	} else if (twsk && twsk->tw_md5_keylen) {
675cfb6eeb4SYOSHIFUJI Hideaki 		tw_key.key = twsk->tw_md5_key;
676cfb6eeb4SYOSHIFUJI Hideaki 		tw_key.keylen = twsk->tw_md5_keylen;
677cfb6eeb4SYOSHIFUJI Hideaki 		key = &tw_key;
6787174259eSArnaldo Carvalho de Melo 	} else
679cfb6eeb4SYOSHIFUJI Hideaki 		key = NULL;
680cfb6eeb4SYOSHIFUJI Hideaki 
681cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
682cfb6eeb4SYOSHIFUJI Hideaki 		int offset = (ts) ? 3 : 0;
683cfb6eeb4SYOSHIFUJI Hideaki 
684cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
685cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
686cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
687cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
688cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
690cfb6eeb4SYOSHIFUJI Hideaki 
691cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
692cfb6eeb4SYOSHIFUJI Hideaki 					key,
693eddc9ec5SArnaldo Carvalho de Melo 					ip_hdr(skb)->daddr,
694eddc9ec5SArnaldo Carvalho de Melo 					ip_hdr(skb)->saddr,
695cfb6eeb4SYOSHIFUJI Hideaki 					&rep.th, IPPROTO_TCP,
696cfb6eeb4SYOSHIFUJI Hideaki 					arg.iov[0].iov_len);
697cfb6eeb4SYOSHIFUJI Hideaki 	}
698cfb6eeb4SYOSHIFUJI Hideaki #endif
699eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
700eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7011da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7021da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
7071da177e4SLinus Torvalds }
7081da177e4SLinus Torvalds 
7091da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7101da177e4SLinus Torvalds {
7118feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
712cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7131da177e4SLinus Torvalds 
714cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7157174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
7167174259eSArnaldo Carvalho de Melo 			tcptw->tw_ts_recent);
7171da177e4SLinus Torvalds 
7188feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
7191da177e4SLinus Torvalds }
7201da177e4SLinus Torvalds 
7217174259eSArnaldo Carvalho de Melo static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
7227174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
7231da177e4SLinus Torvalds {
724cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
725cfb6eeb4SYOSHIFUJI Hideaki 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
7261da177e4SLinus Torvalds 			req->ts_recent);
7271da177e4SLinus Torvalds }
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds /*
7301da177e4SLinus Torvalds  *	Send a SYN-ACK after having received an ACK.
73160236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
7321da177e4SLinus Torvalds  *	socket.
7331da177e4SLinus Torvalds  */
73460236fddSArnaldo Carvalho de Melo static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
7351da177e4SLinus Torvalds 			      struct dst_entry *dst)
7361da177e4SLinus Torvalds {
7372e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
7381da177e4SLinus Torvalds 	int err = -1;
7391da177e4SLinus Torvalds 	struct sk_buff * skb;
7401da177e4SLinus Torvalds 
7411da177e4SLinus Torvalds 	/* First, grab a route. */
742463c84b9SArnaldo Carvalho de Melo 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
7431da177e4SLinus Torvalds 		goto out;
7441da177e4SLinus Torvalds 
7451da177e4SLinus Torvalds 	skb = tcp_make_synack(sk, dst, req);
7461da177e4SLinus Torvalds 
7471da177e4SLinus Torvalds 	if (skb) {
7481da177e4SLinus Torvalds 		struct tcphdr *th = skb->h.th;
7491da177e4SLinus Torvalds 
750ba7808eaSFrederik Deweerdt 		th->check = tcp_v4_check(skb->len,
7512e6599cbSArnaldo Carvalho de Melo 					 ireq->loc_addr,
7522e6599cbSArnaldo Carvalho de Melo 					 ireq->rmt_addr,
7531da177e4SLinus Torvalds 					 csum_partial((char *)th, skb->len,
7541da177e4SLinus Torvalds 						      skb->csum));
7551da177e4SLinus Torvalds 
7562e6599cbSArnaldo Carvalho de Melo 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
7572e6599cbSArnaldo Carvalho de Melo 					    ireq->rmt_addr,
7582e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
759b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
7601da177e4SLinus Torvalds 	}
7611da177e4SLinus Torvalds 
7621da177e4SLinus Torvalds out:
7631da177e4SLinus Torvalds 	dst_release(dst);
7641da177e4SLinus Torvalds 	return err;
7651da177e4SLinus Torvalds }
7661da177e4SLinus Torvalds 
7671da177e4SLinus Torvalds /*
76860236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
7691da177e4SLinus Torvalds  */
77060236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
7711da177e4SLinus Torvalds {
7722e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
7731da177e4SLinus Torvalds }
7741da177e4SLinus Torvalds 
77580e40daaSArnaldo Carvalho de Melo #ifdef CONFIG_SYN_COOKIES
77640efc6faSStephen Hemminger static void syn_flood_warning(struct sk_buff *skb)
7771da177e4SLinus Torvalds {
7781da177e4SLinus Torvalds 	static unsigned long warntime;
7791da177e4SLinus Torvalds 
7801da177e4SLinus Torvalds 	if (time_after(jiffies, (warntime + HZ * 60))) {
7811da177e4SLinus Torvalds 		warntime = jiffies;
7821da177e4SLinus Torvalds 		printk(KERN_INFO
7831da177e4SLinus Torvalds 		       "possible SYN flooding on port %d. Sending cookies.\n",
7841da177e4SLinus Torvalds 		       ntohs(skb->h.th->dest));
7851da177e4SLinus Torvalds 	}
7861da177e4SLinus Torvalds }
78780e40daaSArnaldo Carvalho de Melo #endif
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds /*
79060236fddSArnaldo Carvalho de Melo  * Save and compile IPv4 options into the request_sock if needed.
7911da177e4SLinus Torvalds  */
79240efc6faSStephen Hemminger static struct ip_options *tcp_v4_save_options(struct sock *sk,
7931da177e4SLinus Torvalds 					      struct sk_buff *skb)
7941da177e4SLinus Torvalds {
7951da177e4SLinus Torvalds 	struct ip_options *opt = &(IPCB(skb)->opt);
7961da177e4SLinus Torvalds 	struct ip_options *dopt = NULL;
7971da177e4SLinus Torvalds 
7981da177e4SLinus Torvalds 	if (opt && opt->optlen) {
7991da177e4SLinus Torvalds 		int opt_size = optlength(opt);
8001da177e4SLinus Torvalds 		dopt = kmalloc(opt_size, GFP_ATOMIC);
8011da177e4SLinus Torvalds 		if (dopt) {
8021da177e4SLinus Torvalds 			if (ip_options_echo(dopt, skb)) {
8031da177e4SLinus Torvalds 				kfree(dopt);
8041da177e4SLinus Torvalds 				dopt = NULL;
8051da177e4SLinus Torvalds 			}
8061da177e4SLinus Torvalds 		}
8071da177e4SLinus Torvalds 	}
8081da177e4SLinus Torvalds 	return dopt;
8091da177e4SLinus Torvalds }
8101da177e4SLinus Torvalds 
811cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
812cfb6eeb4SYOSHIFUJI Hideaki /*
813cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
814cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
815cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
816cfb6eeb4SYOSHIFUJI Hideaki  */
817cfb6eeb4SYOSHIFUJI Hideaki 
818cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
8197174259eSArnaldo Carvalho de Melo static struct tcp_md5sig_key *
8207174259eSArnaldo Carvalho de Melo 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
821cfb6eeb4SYOSHIFUJI Hideaki {
822cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
823cfb6eeb4SYOSHIFUJI Hideaki 	int i;
824cfb6eeb4SYOSHIFUJI Hideaki 
825cfb6eeb4SYOSHIFUJI Hideaki 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
826cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
827cfb6eeb4SYOSHIFUJI Hideaki 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
828cfb6eeb4SYOSHIFUJI Hideaki 		if (tp->md5sig_info->keys4[i].addr == addr)
8297174259eSArnaldo Carvalho de Melo 			return (struct tcp_md5sig_key *)
8307174259eSArnaldo Carvalho de Melo 						&tp->md5sig_info->keys4[i];
831cfb6eeb4SYOSHIFUJI Hideaki 	}
832cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
833cfb6eeb4SYOSHIFUJI Hideaki }
834cfb6eeb4SYOSHIFUJI Hideaki 
835cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
836cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
837cfb6eeb4SYOSHIFUJI Hideaki {
838cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
839cfb6eeb4SYOSHIFUJI Hideaki }
840cfb6eeb4SYOSHIFUJI Hideaki 
841cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
842cfb6eeb4SYOSHIFUJI Hideaki 
843f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
844cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
845cfb6eeb4SYOSHIFUJI Hideaki {
846cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
847cfb6eeb4SYOSHIFUJI Hideaki }
848cfb6eeb4SYOSHIFUJI Hideaki 
849cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
850cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
851cfb6eeb4SYOSHIFUJI Hideaki 		      u8 *newkey, u8 newkeylen)
852cfb6eeb4SYOSHIFUJI Hideaki {
853cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
854cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_md5sig_key *key;
855cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
856cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_md5sig_key *keys;
857cfb6eeb4SYOSHIFUJI Hideaki 
858cfb6eeb4SYOSHIFUJI Hideaki 	key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
859cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
860cfb6eeb4SYOSHIFUJI Hideaki 		/* Pre-existing entry - just update that one. */
861cfb6eeb4SYOSHIFUJI Hideaki 		kfree(key->key);
862cfb6eeb4SYOSHIFUJI Hideaki 		key->key = newkey;
863cfb6eeb4SYOSHIFUJI Hideaki 		key->keylen = newkeylen;
864cfb6eeb4SYOSHIFUJI Hideaki 	} else {
865f6685938SArnaldo Carvalho de Melo 		struct tcp_md5sig_info *md5sig;
866f6685938SArnaldo Carvalho de Melo 
867cfb6eeb4SYOSHIFUJI Hideaki 		if (!tp->md5sig_info) {
868f6685938SArnaldo Carvalho de Melo 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
869f6685938SArnaldo Carvalho de Melo 						  GFP_ATOMIC);
870cfb6eeb4SYOSHIFUJI Hideaki 			if (!tp->md5sig_info) {
871cfb6eeb4SYOSHIFUJI Hideaki 				kfree(newkey);
872cfb6eeb4SYOSHIFUJI Hideaki 				return -ENOMEM;
873cfb6eeb4SYOSHIFUJI Hideaki 			}
874cfb6eeb4SYOSHIFUJI Hideaki 		}
875cfb6eeb4SYOSHIFUJI Hideaki 		if (tcp_alloc_md5sig_pool() == NULL) {
876cfb6eeb4SYOSHIFUJI Hideaki 			kfree(newkey);
877cfb6eeb4SYOSHIFUJI Hideaki 			return -ENOMEM;
878cfb6eeb4SYOSHIFUJI Hideaki 		}
879f6685938SArnaldo Carvalho de Melo 		md5sig = tp->md5sig_info;
880f6685938SArnaldo Carvalho de Melo 
881f6685938SArnaldo Carvalho de Melo 		if (md5sig->alloced4 == md5sig->entries4) {
882f6685938SArnaldo Carvalho de Melo 			keys = kmalloc((sizeof(*keys) *
883f6685938SArnaldo Carvalho de Melo 					(md5sig->entries4 + 1)), GFP_ATOMIC);
884cfb6eeb4SYOSHIFUJI Hideaki 			if (!keys) {
885cfb6eeb4SYOSHIFUJI Hideaki 				kfree(newkey);
886cfb6eeb4SYOSHIFUJI Hideaki 				tcp_free_md5sig_pool();
887cfb6eeb4SYOSHIFUJI Hideaki 				return -ENOMEM;
888cfb6eeb4SYOSHIFUJI Hideaki 			}
889cfb6eeb4SYOSHIFUJI Hideaki 
890f6685938SArnaldo Carvalho de Melo 			if (md5sig->entries4)
891f6685938SArnaldo Carvalho de Melo 				memcpy(keys, md5sig->keys4,
892f6685938SArnaldo Carvalho de Melo 				       sizeof(*keys) * md5sig->entries4);
893cfb6eeb4SYOSHIFUJI Hideaki 
894cfb6eeb4SYOSHIFUJI Hideaki 			/* Free old key list, and reference new one */
895f6685938SArnaldo Carvalho de Melo 			if (md5sig->keys4)
896f6685938SArnaldo Carvalho de Melo 				kfree(md5sig->keys4);
897f6685938SArnaldo Carvalho de Melo 			md5sig->keys4 = keys;
898f6685938SArnaldo Carvalho de Melo 			md5sig->alloced4++;
899cfb6eeb4SYOSHIFUJI Hideaki 		}
900f6685938SArnaldo Carvalho de Melo 		md5sig->entries4++;
901f6685938SArnaldo Carvalho de Melo 		md5sig->keys4[md5sig->entries4 - 1].addr   = addr;
902f6685938SArnaldo Carvalho de Melo 		md5sig->keys4[md5sig->entries4 - 1].key    = newkey;
903f6685938SArnaldo Carvalho de Melo 		md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen;
904cfb6eeb4SYOSHIFUJI Hideaki 	}
905cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
906cfb6eeb4SYOSHIFUJI Hideaki }
907cfb6eeb4SYOSHIFUJI Hideaki 
908cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_add);
909cfb6eeb4SYOSHIFUJI Hideaki 
910cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
911cfb6eeb4SYOSHIFUJI Hideaki 			       u8 *newkey, u8 newkeylen)
912cfb6eeb4SYOSHIFUJI Hideaki {
913cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
914cfb6eeb4SYOSHIFUJI Hideaki 				 newkey, newkeylen);
915cfb6eeb4SYOSHIFUJI Hideaki }
916cfb6eeb4SYOSHIFUJI Hideaki 
917cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
918cfb6eeb4SYOSHIFUJI Hideaki {
919cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
920cfb6eeb4SYOSHIFUJI Hideaki 	int i;
921cfb6eeb4SYOSHIFUJI Hideaki 
922cfb6eeb4SYOSHIFUJI Hideaki 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
923cfb6eeb4SYOSHIFUJI Hideaki 		if (tp->md5sig_info->keys4[i].addr == addr) {
924cfb6eeb4SYOSHIFUJI Hideaki 			/* Free the key */
925cfb6eeb4SYOSHIFUJI Hideaki 			kfree(tp->md5sig_info->keys4[i].key);
926cfb6eeb4SYOSHIFUJI Hideaki 			tp->md5sig_info->entries4--;
927cfb6eeb4SYOSHIFUJI Hideaki 
928cfb6eeb4SYOSHIFUJI Hideaki 			if (tp->md5sig_info->entries4 == 0) {
929cfb6eeb4SYOSHIFUJI Hideaki 				kfree(tp->md5sig_info->keys4);
930cfb6eeb4SYOSHIFUJI Hideaki 				tp->md5sig_info->keys4 = NULL;
9318228a18dSLeigh Brown 				tp->md5sig_info->alloced4 = 0;
9327174259eSArnaldo Carvalho de Melo 			} else if (tp->md5sig_info->entries4 != i) {
933cfb6eeb4SYOSHIFUJI Hideaki 				/* Need to do some manipulation */
934cfb6eeb4SYOSHIFUJI Hideaki 				memcpy(&tp->md5sig_info->keys4[i],
935cfb6eeb4SYOSHIFUJI Hideaki 				       &tp->md5sig_info->keys4[i+1],
9367174259eSArnaldo Carvalho de Melo 				       (tp->md5sig_info->entries4 - i) *
9377174259eSArnaldo Carvalho de Melo 					sizeof(struct tcp4_md5sig_key));
938cfb6eeb4SYOSHIFUJI Hideaki 			}
939cfb6eeb4SYOSHIFUJI Hideaki 			tcp_free_md5sig_pool();
940cfb6eeb4SYOSHIFUJI Hideaki 			return 0;
941cfb6eeb4SYOSHIFUJI Hideaki 		}
942cfb6eeb4SYOSHIFUJI Hideaki 	}
943cfb6eeb4SYOSHIFUJI Hideaki 	return -ENOENT;
944cfb6eeb4SYOSHIFUJI Hideaki }
945cfb6eeb4SYOSHIFUJI Hideaki 
946cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_do_del);
947cfb6eeb4SYOSHIFUJI Hideaki 
948cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_clear_md5_list(struct sock *sk)
949cfb6eeb4SYOSHIFUJI Hideaki {
950cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
951cfb6eeb4SYOSHIFUJI Hideaki 
952cfb6eeb4SYOSHIFUJI Hideaki 	/* Free each key, then the set of key keys,
953cfb6eeb4SYOSHIFUJI Hideaki 	 * the crypto element, and then decrement our
954cfb6eeb4SYOSHIFUJI Hideaki 	 * hold on the last resort crypto.
955cfb6eeb4SYOSHIFUJI Hideaki 	 */
956cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info->entries4) {
957cfb6eeb4SYOSHIFUJI Hideaki 		int i;
958cfb6eeb4SYOSHIFUJI Hideaki 		for (i = 0; i < tp->md5sig_info->entries4; i++)
959cfb6eeb4SYOSHIFUJI Hideaki 			kfree(tp->md5sig_info->keys4[i].key);
960cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->entries4 = 0;
961cfb6eeb4SYOSHIFUJI Hideaki 		tcp_free_md5sig_pool();
962cfb6eeb4SYOSHIFUJI Hideaki 	}
963cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info->keys4) {
964cfb6eeb4SYOSHIFUJI Hideaki 		kfree(tp->md5sig_info->keys4);
965cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->keys4 = NULL;
966cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info->alloced4  = 0;
967cfb6eeb4SYOSHIFUJI Hideaki 	}
968cfb6eeb4SYOSHIFUJI Hideaki }
969cfb6eeb4SYOSHIFUJI Hideaki 
970cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
971cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
972cfb6eeb4SYOSHIFUJI Hideaki {
973cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
974cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
975cfb6eeb4SYOSHIFUJI Hideaki 	u8 *newkey;
976cfb6eeb4SYOSHIFUJI Hideaki 
977cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
978cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
979cfb6eeb4SYOSHIFUJI Hideaki 
980cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
981cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
982cfb6eeb4SYOSHIFUJI Hideaki 
983cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
984cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
985cfb6eeb4SYOSHIFUJI Hideaki 
986cfb6eeb4SYOSHIFUJI Hideaki 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
987cfb6eeb4SYOSHIFUJI Hideaki 		if (!tcp_sk(sk)->md5sig_info)
988cfb6eeb4SYOSHIFUJI Hideaki 			return -ENOENT;
989cfb6eeb4SYOSHIFUJI Hideaki 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
990cfb6eeb4SYOSHIFUJI Hideaki 	}
991cfb6eeb4SYOSHIFUJI Hideaki 
992cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
993cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
994cfb6eeb4SYOSHIFUJI Hideaki 
995cfb6eeb4SYOSHIFUJI Hideaki 	if (!tcp_sk(sk)->md5sig_info) {
996cfb6eeb4SYOSHIFUJI Hideaki 		struct tcp_sock *tp = tcp_sk(sk);
9977174259eSArnaldo Carvalho de Melo 		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
998cfb6eeb4SYOSHIFUJI Hideaki 
999cfb6eeb4SYOSHIFUJI Hideaki 		if (!p)
1000cfb6eeb4SYOSHIFUJI Hideaki 			return -EINVAL;
1001cfb6eeb4SYOSHIFUJI Hideaki 
1002cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = p;
1003cfb6eeb4SYOSHIFUJI Hideaki 
1004cfb6eeb4SYOSHIFUJI Hideaki 	}
1005cfb6eeb4SYOSHIFUJI Hideaki 
1006f6685938SArnaldo Carvalho de Melo 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
1007cfb6eeb4SYOSHIFUJI Hideaki 	if (!newkey)
1008cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1009cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1010cfb6eeb4SYOSHIFUJI Hideaki 				 newkey, cmd.tcpm_keylen);
1011cfb6eeb4SYOSHIFUJI Hideaki }
1012cfb6eeb4SYOSHIFUJI Hideaki 
1013cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1014cfb6eeb4SYOSHIFUJI Hideaki 				   __be32 saddr, __be32 daddr,
1015cfb6eeb4SYOSHIFUJI Hideaki 				   struct tcphdr *th, int protocol,
1016cfb6eeb4SYOSHIFUJI Hideaki 				   int tcplen)
1017cfb6eeb4SYOSHIFUJI Hideaki {
1018cfb6eeb4SYOSHIFUJI Hideaki 	struct scatterlist sg[4];
1019cfb6eeb4SYOSHIFUJI Hideaki 	__u16 data_len;
1020cfb6eeb4SYOSHIFUJI Hideaki 	int block = 0;
10218e5200f5SAl Viro 	__sum16 old_checksum;
1022cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_pool *hp;
1023cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
1024cfb6eeb4SYOSHIFUJI Hideaki 	struct hash_desc *desc;
1025cfb6eeb4SYOSHIFUJI Hideaki 	int err;
1026cfb6eeb4SYOSHIFUJI Hideaki 	unsigned int nbytes = 0;
1027cfb6eeb4SYOSHIFUJI Hideaki 
1028cfb6eeb4SYOSHIFUJI Hideaki 	/*
1029cfb6eeb4SYOSHIFUJI Hideaki 	 * Okay, so RFC2385 is turned on for this connection,
1030cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to generate the MD5 hash for the packet now.
1031cfb6eeb4SYOSHIFUJI Hideaki 	 */
1032cfb6eeb4SYOSHIFUJI Hideaki 
1033cfb6eeb4SYOSHIFUJI Hideaki 	hp = tcp_get_md5sig_pool();
1034cfb6eeb4SYOSHIFUJI Hideaki 	if (!hp)
1035cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash_noput;
1036cfb6eeb4SYOSHIFUJI Hideaki 
1037cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1038cfb6eeb4SYOSHIFUJI Hideaki 	desc = &hp->md5_desc;
1039cfb6eeb4SYOSHIFUJI Hideaki 
1040cfb6eeb4SYOSHIFUJI Hideaki 	/*
1041cfb6eeb4SYOSHIFUJI Hideaki 	 * 1. the TCP pseudo-header (in the order: source IP address,
1042cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1043cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1044cfb6eeb4SYOSHIFUJI Hideaki 	 */
1045cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1046cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1047cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1048cfb6eeb4SYOSHIFUJI Hideaki 	bp->protocol = protocol;
1049cfb6eeb4SYOSHIFUJI Hideaki 	bp->len = htons(tcplen);
1050cfb6eeb4SYOSHIFUJI Hideaki 	sg_set_buf(&sg[block++], bp, sizeof(*bp));
1051cfb6eeb4SYOSHIFUJI Hideaki 	nbytes += sizeof(*bp);
1052cfb6eeb4SYOSHIFUJI Hideaki 
1053cfb6eeb4SYOSHIFUJI Hideaki 	/* 2. the TCP header, excluding options, and assuming a
1054cfb6eeb4SYOSHIFUJI Hideaki 	 * checksum of zero/
1055cfb6eeb4SYOSHIFUJI Hideaki 	 */
1056cfb6eeb4SYOSHIFUJI Hideaki 	old_checksum = th->check;
1057cfb6eeb4SYOSHIFUJI Hideaki 	th->check = 0;
1058cfb6eeb4SYOSHIFUJI Hideaki 	sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1059cfb6eeb4SYOSHIFUJI Hideaki 	nbytes += sizeof(struct tcphdr);
106008dd1a50SDavid S. Miller 
1061cfb6eeb4SYOSHIFUJI Hideaki 	/* 3. the TCP segment data (if any) */
1062cfb6eeb4SYOSHIFUJI Hideaki 	data_len = tcplen - (th->doff << 2);
1063cfb6eeb4SYOSHIFUJI Hideaki 	if (data_len > 0) {
1064cfb6eeb4SYOSHIFUJI Hideaki 		unsigned char *data = (unsigned char *)th + (th->doff << 2);
1065cfb6eeb4SYOSHIFUJI Hideaki 		sg_set_buf(&sg[block++], data, data_len);
1066cfb6eeb4SYOSHIFUJI Hideaki 		nbytes += data_len;
1067cfb6eeb4SYOSHIFUJI Hideaki 	}
1068cfb6eeb4SYOSHIFUJI Hideaki 
1069cfb6eeb4SYOSHIFUJI Hideaki 	/* 4. an independently-specified key or password, known to both
1070cfb6eeb4SYOSHIFUJI Hideaki 	 * TCPs and presumably connection-specific
1071cfb6eeb4SYOSHIFUJI Hideaki 	 */
1072cfb6eeb4SYOSHIFUJI Hideaki 	sg_set_buf(&sg[block++], key->key, key->keylen);
1073cfb6eeb4SYOSHIFUJI Hideaki 	nbytes += key->keylen;
1074cfb6eeb4SYOSHIFUJI Hideaki 
1075cfb6eeb4SYOSHIFUJI Hideaki 	/* Now store the Hash into the packet */
1076cfb6eeb4SYOSHIFUJI Hideaki 	err = crypto_hash_init(desc);
1077cfb6eeb4SYOSHIFUJI Hideaki 	if (err)
1078cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1079cfb6eeb4SYOSHIFUJI Hideaki 	err = crypto_hash_update(desc, sg, nbytes);
1080cfb6eeb4SYOSHIFUJI Hideaki 	if (err)
1081cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1082cfb6eeb4SYOSHIFUJI Hideaki 	err = crypto_hash_final(desc, md5_hash);
1083cfb6eeb4SYOSHIFUJI Hideaki 	if (err)
1084cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1085cfb6eeb4SYOSHIFUJI Hideaki 
1086cfb6eeb4SYOSHIFUJI Hideaki 	/* Reset header, and free up the crypto */
1087cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1088cfb6eeb4SYOSHIFUJI Hideaki 	th->check = old_checksum;
1089cfb6eeb4SYOSHIFUJI Hideaki 
1090cfb6eeb4SYOSHIFUJI Hideaki out:
1091cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1092cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1093cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1094cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1095cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
1096cfb6eeb4SYOSHIFUJI Hideaki 	goto out;
1097cfb6eeb4SYOSHIFUJI Hideaki }
1098cfb6eeb4SYOSHIFUJI Hideaki 
1099cfb6eeb4SYOSHIFUJI Hideaki int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1100cfb6eeb4SYOSHIFUJI Hideaki 			 struct sock *sk,
1101cfb6eeb4SYOSHIFUJI Hideaki 			 struct dst_entry *dst,
1102cfb6eeb4SYOSHIFUJI Hideaki 			 struct request_sock *req,
1103cfb6eeb4SYOSHIFUJI Hideaki 			 struct tcphdr *th, int protocol,
1104cfb6eeb4SYOSHIFUJI Hideaki 			 int tcplen)
1105cfb6eeb4SYOSHIFUJI Hideaki {
1106cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1107cfb6eeb4SYOSHIFUJI Hideaki 
1108cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1109cfb6eeb4SYOSHIFUJI Hideaki 		saddr = inet_sk(sk)->saddr;
1110cfb6eeb4SYOSHIFUJI Hideaki 		daddr = inet_sk(sk)->daddr;
1111cfb6eeb4SYOSHIFUJI Hideaki 	} else {
1112cfb6eeb4SYOSHIFUJI Hideaki 		struct rtable *rt = (struct rtable *)dst;
1113cfb6eeb4SYOSHIFUJI Hideaki 		BUG_ON(!rt);
1114cfb6eeb4SYOSHIFUJI Hideaki 		saddr = rt->rt_src;
1115cfb6eeb4SYOSHIFUJI Hideaki 		daddr = rt->rt_dst;
1116cfb6eeb4SYOSHIFUJI Hideaki 	}
1117cfb6eeb4SYOSHIFUJI Hideaki 	return tcp_v4_do_calc_md5_hash(md5_hash, key,
1118cfb6eeb4SYOSHIFUJI Hideaki 				       saddr, daddr,
1119cfb6eeb4SYOSHIFUJI Hideaki 				       th, protocol, tcplen);
1120cfb6eeb4SYOSHIFUJI Hideaki }
1121cfb6eeb4SYOSHIFUJI Hideaki 
1122cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1123cfb6eeb4SYOSHIFUJI Hideaki 
1124cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1125cfb6eeb4SYOSHIFUJI Hideaki {
1126cfb6eeb4SYOSHIFUJI Hideaki 	/*
1127cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1128cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1129cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1130cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1131cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1132cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1133cfb6eeb4SYOSHIFUJI Hideaki 	 */
1134cfb6eeb4SYOSHIFUJI Hideaki 	__u8 *hash_location = NULL;
1135cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1136eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1137cfb6eeb4SYOSHIFUJI Hideaki 	struct tcphdr *th = skb->h.th;
1138cfb6eeb4SYOSHIFUJI Hideaki 	int length = (th->doff << 2) - sizeof(struct tcphdr);
1139cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1140cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char *ptr;
1141cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1142cfb6eeb4SYOSHIFUJI Hideaki 
1143cfb6eeb4SYOSHIFUJI Hideaki 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1144cfb6eeb4SYOSHIFUJI Hideaki 
1145cfb6eeb4SYOSHIFUJI Hideaki 	/*
1146cfb6eeb4SYOSHIFUJI Hideaki 	 * If the TCP option length is less than the TCP_MD5SIG
1147cfb6eeb4SYOSHIFUJI Hideaki 	 * option length, then we can shortcut
1148cfb6eeb4SYOSHIFUJI Hideaki 	 */
1149cfb6eeb4SYOSHIFUJI Hideaki 	if (length < TCPOLEN_MD5SIG) {
1150cfb6eeb4SYOSHIFUJI Hideaki 		if (hash_expected)
1151cfb6eeb4SYOSHIFUJI Hideaki 			return 1;
1152cfb6eeb4SYOSHIFUJI Hideaki 		else
1153cfb6eeb4SYOSHIFUJI Hideaki 			return 0;
1154cfb6eeb4SYOSHIFUJI Hideaki 	}
1155cfb6eeb4SYOSHIFUJI Hideaki 
1156cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, we can't shortcut - we have to grub through the options */
1157cfb6eeb4SYOSHIFUJI Hideaki 	ptr = (unsigned char *)(th + 1);
1158cfb6eeb4SYOSHIFUJI Hideaki 	while (length > 0) {
1159cfb6eeb4SYOSHIFUJI Hideaki 		int opcode = *ptr++;
1160cfb6eeb4SYOSHIFUJI Hideaki 		int opsize;
1161cfb6eeb4SYOSHIFUJI Hideaki 
1162cfb6eeb4SYOSHIFUJI Hideaki 		switch (opcode) {
1163cfb6eeb4SYOSHIFUJI Hideaki 		case TCPOPT_EOL:
1164cfb6eeb4SYOSHIFUJI Hideaki 			goto done_opts;
1165cfb6eeb4SYOSHIFUJI Hideaki 		case TCPOPT_NOP:
1166cfb6eeb4SYOSHIFUJI Hideaki 			length--;
1167cfb6eeb4SYOSHIFUJI Hideaki 			continue;
1168cfb6eeb4SYOSHIFUJI Hideaki 		default:
1169cfb6eeb4SYOSHIFUJI Hideaki 			opsize = *ptr++;
1170cfb6eeb4SYOSHIFUJI Hideaki 			if (opsize < 2)
1171cfb6eeb4SYOSHIFUJI Hideaki 				goto done_opts;
1172cfb6eeb4SYOSHIFUJI Hideaki 			if (opsize > length)
1173cfb6eeb4SYOSHIFUJI Hideaki 				goto done_opts;
1174cfb6eeb4SYOSHIFUJI Hideaki 
1175cfb6eeb4SYOSHIFUJI Hideaki 			if (opcode == TCPOPT_MD5SIG) {
1176cfb6eeb4SYOSHIFUJI Hideaki 				hash_location = ptr;
1177cfb6eeb4SYOSHIFUJI Hideaki 				goto done_opts;
1178cfb6eeb4SYOSHIFUJI Hideaki 			}
1179cfb6eeb4SYOSHIFUJI Hideaki 		}
1180cfb6eeb4SYOSHIFUJI Hideaki 		ptr += opsize-2;
1181cfb6eeb4SYOSHIFUJI Hideaki 		length -= opsize;
1182cfb6eeb4SYOSHIFUJI Hideaki 	}
1183cfb6eeb4SYOSHIFUJI Hideaki done_opts:
1184cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1185cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1186cfb6eeb4SYOSHIFUJI Hideaki 		return 0;
1187cfb6eeb4SYOSHIFUJI Hideaki 
1188cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1189a9fc00ccSLeigh Brown 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1190cfb6eeb4SYOSHIFUJI Hideaki 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1191cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->saddr), ntohs(th->source),
1192cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1193cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1194cfb6eeb4SYOSHIFUJI Hideaki 	}
1195cfb6eeb4SYOSHIFUJI Hideaki 
1196cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
11977174259eSArnaldo Carvalho de Melo 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1198cfb6eeb4SYOSHIFUJI Hideaki 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1199cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->saddr), ntohs(th->source),
1200cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1201cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1202cfb6eeb4SYOSHIFUJI Hideaki 	}
1203cfb6eeb4SYOSHIFUJI Hideaki 
1204cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1205cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1206cfb6eeb4SYOSHIFUJI Hideaki 	 */
1207cfb6eeb4SYOSHIFUJI Hideaki 	genhash = tcp_v4_do_calc_md5_hash(newhash,
1208cfb6eeb4SYOSHIFUJI Hideaki 					  hash_expected,
1209cfb6eeb4SYOSHIFUJI Hideaki 					  iph->saddr, iph->daddr,
1210cfb6eeb4SYOSHIFUJI Hideaki 					  th, sk->sk_protocol,
1211cfb6eeb4SYOSHIFUJI Hideaki 					  skb->len);
1212cfb6eeb4SYOSHIFUJI Hideaki 
1213cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1214cfb6eeb4SYOSHIFUJI Hideaki 		if (net_ratelimit()) {
1215cfb6eeb4SYOSHIFUJI Hideaki 			printk(KERN_INFO "MD5 Hash failed for "
1216cfb6eeb4SYOSHIFUJI Hideaki 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1217cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->saddr), ntohs(th->source),
1218cfb6eeb4SYOSHIFUJI Hideaki 			       NIPQUAD(iph->daddr), ntohs(th->dest),
1219cfb6eeb4SYOSHIFUJI Hideaki 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1220cfb6eeb4SYOSHIFUJI Hideaki 		}
1221cfb6eeb4SYOSHIFUJI Hideaki 		return 1;
1222cfb6eeb4SYOSHIFUJI Hideaki 	}
1223cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1224cfb6eeb4SYOSHIFUJI Hideaki }
1225cfb6eeb4SYOSHIFUJI Hideaki 
1226cfb6eeb4SYOSHIFUJI Hideaki #endif
1227cfb6eeb4SYOSHIFUJI Hideaki 
122872a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12291da177e4SLinus Torvalds 	.family		=	PF_INET,
12302e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
12311da177e4SLinus Torvalds 	.rtx_syn_ack	=	tcp_v4_send_synack,
123260236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
123360236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12341da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
12351da177e4SLinus Torvalds };
12361da177e4SLinus Torvalds 
1237cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1238b6332e6cSAndrew Morton static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1239cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1240cfb6eeb4SYOSHIFUJI Hideaki };
1241b6332e6cSAndrew Morton #endif
1242cfb6eeb4SYOSHIFUJI Hideaki 
12436d6ee43eSArnaldo Carvalho de Melo static struct timewait_sock_ops tcp_timewait_sock_ops = {
12446d6ee43eSArnaldo Carvalho de Melo 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
12456d6ee43eSArnaldo Carvalho de Melo 	.twsk_unique	= tcp_twsk_unique,
1246cfb6eeb4SYOSHIFUJI Hideaki 	.twsk_destructor= tcp_twsk_destructor,
12476d6ee43eSArnaldo Carvalho de Melo };
12486d6ee43eSArnaldo Carvalho de Melo 
12491da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12501da177e4SLinus Torvalds {
12512e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
12521da177e4SLinus Torvalds 	struct tcp_options_received tmp_opt;
125360236fddSArnaldo Carvalho de Melo 	struct request_sock *req;
1254eddc9ec5SArnaldo Carvalho de Melo 	__be32 saddr = ip_hdr(skb)->saddr;
1255eddc9ec5SArnaldo Carvalho de Melo 	__be32 daddr = ip_hdr(skb)->daddr;
12561da177e4SLinus Torvalds 	__u32 isn = TCP_SKB_CB(skb)->when;
12571da177e4SLinus Torvalds 	struct dst_entry *dst = NULL;
12581da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
12591da177e4SLinus Torvalds 	int want_cookie = 0;
12601da177e4SLinus Torvalds #else
12611da177e4SLinus Torvalds #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
12621da177e4SLinus Torvalds #endif
12631da177e4SLinus Torvalds 
12641da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
12651da177e4SLinus Torvalds 	if (((struct rtable *)skb->dst)->rt_flags &
12661da177e4SLinus Torvalds 	    (RTCF_BROADCAST | RTCF_MULTICAST))
12671da177e4SLinus Torvalds 		goto drop;
12681da177e4SLinus Torvalds 
12691da177e4SLinus Torvalds 	/* TW buckets are converted to open requests without
12701da177e4SLinus Torvalds 	 * limitations, they conserve resources and peer is
12711da177e4SLinus Torvalds 	 * evidently real one.
12721da177e4SLinus Torvalds 	 */
1273463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
12741da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
12751da177e4SLinus Torvalds 		if (sysctl_tcp_syncookies) {
12761da177e4SLinus Torvalds 			want_cookie = 1;
12771da177e4SLinus Torvalds 		} else
12781da177e4SLinus Torvalds #endif
12791da177e4SLinus Torvalds 		goto drop;
12801da177e4SLinus Torvalds 	}
12811da177e4SLinus Torvalds 
12821da177e4SLinus Torvalds 	/* Accept backlog is full. If we have already queued enough
12831da177e4SLinus Torvalds 	 * of warm entries in syn queue, drop request. It is better than
12841da177e4SLinus Torvalds 	 * clogging syn queue with openreqs with exponentially increasing
12851da177e4SLinus Torvalds 	 * timeout.
12861da177e4SLinus Torvalds 	 */
1287463c84b9SArnaldo Carvalho de Melo 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
12881da177e4SLinus Torvalds 		goto drop;
12891da177e4SLinus Torvalds 
129060236fddSArnaldo Carvalho de Melo 	req = reqsk_alloc(&tcp_request_sock_ops);
12911da177e4SLinus Torvalds 	if (!req)
12921da177e4SLinus Torvalds 		goto drop;
12931da177e4SLinus Torvalds 
1294cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1295cfb6eeb4SYOSHIFUJI Hideaki 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1296cfb6eeb4SYOSHIFUJI Hideaki #endif
1297cfb6eeb4SYOSHIFUJI Hideaki 
12981da177e4SLinus Torvalds 	tcp_clear_options(&tmp_opt);
12991da177e4SLinus Torvalds 	tmp_opt.mss_clamp = 536;
13001da177e4SLinus Torvalds 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
13011da177e4SLinus Torvalds 
13021da177e4SLinus Torvalds 	tcp_parse_options(skb, &tmp_opt, 0);
13031da177e4SLinus Torvalds 
13041da177e4SLinus Torvalds 	if (want_cookie) {
13051da177e4SLinus Torvalds 		tcp_clear_options(&tmp_opt);
13061da177e4SLinus Torvalds 		tmp_opt.saw_tstamp = 0;
13071da177e4SLinus Torvalds 	}
13081da177e4SLinus Torvalds 
13091da177e4SLinus Torvalds 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
13101da177e4SLinus Torvalds 		/* Some OSes (unknown ones, but I see them on web server, which
13111da177e4SLinus Torvalds 		 * contains information interesting only for windows'
13121da177e4SLinus Torvalds 		 * users) do not send their stamp in SYN. It is easy case.
13131da177e4SLinus Torvalds 		 * We simply do not advertise TS support.
13141da177e4SLinus Torvalds 		 */
13151da177e4SLinus Torvalds 		tmp_opt.saw_tstamp = 0;
13161da177e4SLinus Torvalds 		tmp_opt.tstamp_ok  = 0;
13171da177e4SLinus Torvalds 	}
13181da177e4SLinus Torvalds 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
13191da177e4SLinus Torvalds 
13201da177e4SLinus Torvalds 	tcp_openreq_init(req, &tmp_opt, skb);
13211da177e4SLinus Torvalds 
13224237c75cSVenkat Yekkirala 	if (security_inet_conn_request(sk, skb, req))
13234237c75cSVenkat Yekkirala 		goto drop_and_free;
13244237c75cSVenkat Yekkirala 
13252e6599cbSArnaldo Carvalho de Melo 	ireq = inet_rsk(req);
13262e6599cbSArnaldo Carvalho de Melo 	ireq->loc_addr = daddr;
13272e6599cbSArnaldo Carvalho de Melo 	ireq->rmt_addr = saddr;
13282e6599cbSArnaldo Carvalho de Melo 	ireq->opt = tcp_v4_save_options(sk, skb);
13291da177e4SLinus Torvalds 	if (!want_cookie)
13301da177e4SLinus Torvalds 		TCP_ECN_create_request(req, skb->h.th);
13311da177e4SLinus Torvalds 
13321da177e4SLinus Torvalds 	if (want_cookie) {
13331da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
13341da177e4SLinus Torvalds 		syn_flood_warning(skb);
13351da177e4SLinus Torvalds #endif
13361da177e4SLinus Torvalds 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
13371da177e4SLinus Torvalds 	} else if (!isn) {
13381da177e4SLinus Torvalds 		struct inet_peer *peer = NULL;
13391da177e4SLinus Torvalds 
13401da177e4SLinus Torvalds 		/* VJ's idea. We save last timestamp seen
13411da177e4SLinus Torvalds 		 * from the destination in peer table, when entering
13421da177e4SLinus Torvalds 		 * state TIME-WAIT, and check against it before
13431da177e4SLinus Torvalds 		 * accepting new connection request.
13441da177e4SLinus Torvalds 		 *
13451da177e4SLinus Torvalds 		 * If "isn" is not zero, this request hit alive
13461da177e4SLinus Torvalds 		 * timewait bucket, so that all the necessary checks
13471da177e4SLinus Torvalds 		 * are made in the function processing timewait state.
13481da177e4SLinus Torvalds 		 */
13491da177e4SLinus Torvalds 		if (tmp_opt.saw_tstamp &&
1350295ff7edSArnaldo Carvalho de Melo 		    tcp_death_row.sysctl_tw_recycle &&
1351463c84b9SArnaldo Carvalho de Melo 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
13521da177e4SLinus Torvalds 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
13531da177e4SLinus Torvalds 		    peer->v4daddr == saddr) {
13549d729f72SJames Morris 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
13551da177e4SLinus Torvalds 			    (s32)(peer->tcp_ts - req->ts_recent) >
13561da177e4SLinus Torvalds 							TCP_PAWS_WINDOW) {
13571da177e4SLinus Torvalds 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
13581da177e4SLinus Torvalds 				dst_release(dst);
13591da177e4SLinus Torvalds 				goto drop_and_free;
13601da177e4SLinus Torvalds 			}
13611da177e4SLinus Torvalds 		}
13621da177e4SLinus Torvalds 		/* Kill the following clause, if you dislike this way. */
13631da177e4SLinus Torvalds 		else if (!sysctl_tcp_syncookies &&
1364463c84b9SArnaldo Carvalho de Melo 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
13651da177e4SLinus Torvalds 			  (sysctl_max_syn_backlog >> 2)) &&
13661da177e4SLinus Torvalds 			 (!peer || !peer->tcp_ts_stamp) &&
13671da177e4SLinus Torvalds 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
13681da177e4SLinus Torvalds 			/* Without syncookies last quarter of
13691da177e4SLinus Torvalds 			 * backlog is filled with destinations,
13701da177e4SLinus Torvalds 			 * proven to be alive.
13711da177e4SLinus Torvalds 			 * It means that we continue to communicate
13721da177e4SLinus Torvalds 			 * to destinations, already remembered
13731da177e4SLinus Torvalds 			 * to the moment of synflood.
13741da177e4SLinus Torvalds 			 */
137564ce2073SPatrick McHardy 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
137664ce2073SPatrick McHardy 				       "request from %u.%u.%u.%u/%u\n",
13771da177e4SLinus Torvalds 				       NIPQUAD(saddr),
137864ce2073SPatrick McHardy 				       ntohs(skb->h.th->source));
13791da177e4SLinus Torvalds 			dst_release(dst);
13801da177e4SLinus Torvalds 			goto drop_and_free;
13811da177e4SLinus Torvalds 		}
13821da177e4SLinus Torvalds 
1383a94f723dSGerrit Renker 		isn = tcp_v4_init_sequence(skb);
13841da177e4SLinus Torvalds 	}
13852e6599cbSArnaldo Carvalho de Melo 	tcp_rsk(req)->snt_isn = isn;
13861da177e4SLinus Torvalds 
13871da177e4SLinus Torvalds 	if (tcp_v4_send_synack(sk, req, dst))
13881da177e4SLinus Torvalds 		goto drop_and_free;
13891da177e4SLinus Torvalds 
13901da177e4SLinus Torvalds 	if (want_cookie) {
139160236fddSArnaldo Carvalho de Melo 		reqsk_free(req);
13921da177e4SLinus Torvalds 	} else {
13933f421baaSArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
13941da177e4SLinus Torvalds 	}
13951da177e4SLinus Torvalds 	return 0;
13961da177e4SLinus Torvalds 
13971da177e4SLinus Torvalds drop_and_free:
139860236fddSArnaldo Carvalho de Melo 	reqsk_free(req);
13991da177e4SLinus Torvalds drop:
14001da177e4SLinus Torvalds 	return 0;
14011da177e4SLinus Torvalds }
14021da177e4SLinus Torvalds 
14031da177e4SLinus Torvalds 
14041da177e4SLinus Torvalds /*
14051da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
14061da177e4SLinus Torvalds  * now create the new socket.
14071da177e4SLinus Torvalds  */
14081da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
140960236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
14101da177e4SLinus Torvalds 				  struct dst_entry *dst)
14111da177e4SLinus Torvalds {
14122e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
14131da177e4SLinus Torvalds 	struct inet_sock *newinet;
14141da177e4SLinus Torvalds 	struct tcp_sock *newtp;
14151da177e4SLinus Torvalds 	struct sock *newsk;
1416cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1417cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1418cfb6eeb4SYOSHIFUJI Hideaki #endif
14191da177e4SLinus Torvalds 
14201da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
14211da177e4SLinus Torvalds 		goto exit_overflow;
14221da177e4SLinus Torvalds 
1423463c84b9SArnaldo Carvalho de Melo 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
14241da177e4SLinus Torvalds 		goto exit;
14251da177e4SLinus Torvalds 
14261da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
14271da177e4SLinus Torvalds 	if (!newsk)
14281da177e4SLinus Torvalds 		goto exit;
14291da177e4SLinus Torvalds 
1430bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
14316cbb0df7SArnaldo Carvalho de Melo 	sk_setup_caps(newsk, dst);
14321da177e4SLinus Torvalds 
14331da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
14341da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
14352e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
14362e6599cbSArnaldo Carvalho de Melo 	newinet->daddr	      = ireq->rmt_addr;
14372e6599cbSArnaldo Carvalho de Melo 	newinet->rcv_saddr    = ireq->loc_addr;
14382e6599cbSArnaldo Carvalho de Melo 	newinet->saddr	      = ireq->loc_addr;
14392e6599cbSArnaldo Carvalho de Melo 	newinet->opt	      = ireq->opt;
14402e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1441463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1442eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1443d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
14441da177e4SLinus Torvalds 	if (newinet->opt)
1445d83d8461SArnaldo Carvalho de Melo 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
14461da177e4SLinus Torvalds 	newinet->id = newtp->write_seq ^ jiffies;
14471da177e4SLinus Torvalds 
14485d424d5aSJohn Heffner 	tcp_mtup_init(newsk);
14491da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
14501da177e4SLinus Torvalds 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
14511da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
14521da177e4SLinus Torvalds 
1453cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1454cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1455cfb6eeb4SYOSHIFUJI Hideaki 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1456cfb6eeb4SYOSHIFUJI Hideaki 		/*
1457cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1458cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1459cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1460cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1461cfb6eeb4SYOSHIFUJI Hideaki 		 */
1462f6685938SArnaldo Carvalho de Melo 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1463f6685938SArnaldo Carvalho de Melo 		if (newkey != NULL)
1464cfb6eeb4SYOSHIFUJI Hideaki 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1465cfb6eeb4SYOSHIFUJI Hideaki 					  newkey, key->keylen);
1466cfb6eeb4SYOSHIFUJI Hideaki 	}
1467cfb6eeb4SYOSHIFUJI Hideaki #endif
1468cfb6eeb4SYOSHIFUJI Hideaki 
1469f3f05f70SArnaldo Carvalho de Melo 	__inet_hash(&tcp_hashinfo, newsk, 0);
14702d8c4ce5SArnaldo Carvalho de Melo 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
14711da177e4SLinus Torvalds 
14721da177e4SLinus Torvalds 	return newsk;
14731da177e4SLinus Torvalds 
14741da177e4SLinus Torvalds exit_overflow:
14751da177e4SLinus Torvalds 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
14761da177e4SLinus Torvalds exit:
14771da177e4SLinus Torvalds 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
14781da177e4SLinus Torvalds 	dst_release(dst);
14791da177e4SLinus Torvalds 	return NULL;
14801da177e4SLinus Torvalds }
14811da177e4SLinus Torvalds 
14821da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
14831da177e4SLinus Torvalds {
14841da177e4SLinus Torvalds 	struct tcphdr *th = skb->h.th;
1485eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
14861da177e4SLinus Torvalds 	struct sock *nsk;
148760236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
14881da177e4SLinus Torvalds 	/* Find possible connection requests. */
1489463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
14901da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
14911da177e4SLinus Torvalds 	if (req)
14921da177e4SLinus Torvalds 		return tcp_check_req(sk, skb, req, prev);
14931da177e4SLinus Torvalds 
1494eddc9ec5SArnaldo Carvalho de Melo 	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
1495eddc9ec5SArnaldo Carvalho de Melo 				      iph->daddr, th->dest, inet_iif(skb));
14961da177e4SLinus Torvalds 
14971da177e4SLinus Torvalds 	if (nsk) {
14981da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
14991da177e4SLinus Torvalds 			bh_lock_sock(nsk);
15001da177e4SLinus Torvalds 			return nsk;
15011da177e4SLinus Torvalds 		}
15029469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
15031da177e4SLinus Torvalds 		return NULL;
15041da177e4SLinus Torvalds 	}
15051da177e4SLinus Torvalds 
15061da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
15071da177e4SLinus Torvalds 	if (!th->rst && !th->syn && th->ack)
15081da177e4SLinus Torvalds 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
15091da177e4SLinus Torvalds #endif
15101da177e4SLinus Torvalds 	return sk;
15111da177e4SLinus Torvalds }
15121da177e4SLinus Torvalds 
1513b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
15141da177e4SLinus Torvalds {
1515eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1516eddc9ec5SArnaldo Carvalho de Melo 
151784fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1518eddc9ec5SArnaldo Carvalho de Melo 		if (!tcp_v4_check(skb->len, iph->saddr,
1519eddc9ec5SArnaldo Carvalho de Melo 				  iph->daddr, skb->csum)) {
15201da177e4SLinus Torvalds 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1521fb286bb2SHerbert Xu 			return 0;
1522fb286bb2SHerbert Xu 		}
1523fb286bb2SHerbert Xu 	}
1524fb286bb2SHerbert Xu 
1525eddc9ec5SArnaldo Carvalho de Melo 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1526fb286bb2SHerbert Xu 				       skb->len, IPPROTO_TCP, 0);
1527fb286bb2SHerbert Xu 
1528fb286bb2SHerbert Xu 	if (skb->len <= 76) {
1529fb286bb2SHerbert Xu 		return __skb_checksum_complete(skb);
15301da177e4SLinus Torvalds 	}
15311da177e4SLinus Torvalds 	return 0;
15321da177e4SLinus Torvalds }
15331da177e4SLinus Torvalds 
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
15361da177e4SLinus Torvalds  * here.
15371da177e4SLinus Torvalds  *
15381da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
15391da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
15401da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
15411da177e4SLinus Torvalds  * held.
15421da177e4SLinus Torvalds  */
15431da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
15441da177e4SLinus Torvalds {
1545cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1546cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1547cfb6eeb4SYOSHIFUJI Hideaki 	/*
1548cfb6eeb4SYOSHIFUJI Hideaki 	 * We really want to reject the packet as early as possible
1549cfb6eeb4SYOSHIFUJI Hideaki 	 * if:
1550cfb6eeb4SYOSHIFUJI Hideaki 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1551cfb6eeb4SYOSHIFUJI Hideaki 	 *  o There is an MD5 option and we're not expecting one
1552cfb6eeb4SYOSHIFUJI Hideaki 	 */
1553cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_v4_inbound_md5_hash(sk, skb))
1554cfb6eeb4SYOSHIFUJI Hideaki 		goto discard;
1555cfb6eeb4SYOSHIFUJI Hideaki #endif
1556cfb6eeb4SYOSHIFUJI Hideaki 
15571da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
15581da177e4SLinus Torvalds 		TCP_CHECK_TIMER(sk);
1559cfb6eeb4SYOSHIFUJI Hideaki 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1560cfb6eeb4SYOSHIFUJI Hideaki 			rsk = sk;
15611da177e4SLinus Torvalds 			goto reset;
1562cfb6eeb4SYOSHIFUJI Hideaki 		}
15631da177e4SLinus Torvalds 		TCP_CHECK_TIMER(sk);
15641da177e4SLinus Torvalds 		return 0;
15651da177e4SLinus Torvalds 	}
15661da177e4SLinus Torvalds 
1567*ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
15681da177e4SLinus Torvalds 		goto csum_err;
15691da177e4SLinus Torvalds 
15701da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
15711da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
15721da177e4SLinus Torvalds 		if (!nsk)
15731da177e4SLinus Torvalds 			goto discard;
15741da177e4SLinus Torvalds 
15751da177e4SLinus Torvalds 		if (nsk != sk) {
1576cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1577cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
15781da177e4SLinus Torvalds 				goto reset;
1579cfb6eeb4SYOSHIFUJI Hideaki 			}
15801da177e4SLinus Torvalds 			return 0;
15811da177e4SLinus Torvalds 		}
15821da177e4SLinus Torvalds 	}
15831da177e4SLinus Torvalds 
15841da177e4SLinus Torvalds 	TCP_CHECK_TIMER(sk);
1585cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1586cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
15871da177e4SLinus Torvalds 		goto reset;
1588cfb6eeb4SYOSHIFUJI Hideaki 	}
15891da177e4SLinus Torvalds 	TCP_CHECK_TIMER(sk);
15901da177e4SLinus Torvalds 	return 0;
15911da177e4SLinus Torvalds 
15921da177e4SLinus Torvalds reset:
1593cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
15941da177e4SLinus Torvalds discard:
15951da177e4SLinus Torvalds 	kfree_skb(skb);
15961da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
15971da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
15981da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
15991da177e4SLinus Torvalds 	 * but you have been warned.
16001da177e4SLinus Torvalds 	 */
16011da177e4SLinus Torvalds 	return 0;
16021da177e4SLinus Torvalds 
16031da177e4SLinus Torvalds csum_err:
16041da177e4SLinus Torvalds 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
16051da177e4SLinus Torvalds 	goto discard;
16061da177e4SLinus Torvalds }
16071da177e4SLinus Torvalds 
16081da177e4SLinus Torvalds /*
16091da177e4SLinus Torvalds  *	From tcp_input.c
16101da177e4SLinus Torvalds  */
16111da177e4SLinus Torvalds 
16121da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
16131da177e4SLinus Torvalds {
1614eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
16151da177e4SLinus Torvalds 	struct tcphdr *th;
16161da177e4SLinus Torvalds 	struct sock *sk;
16171da177e4SLinus Torvalds 	int ret;
16181da177e4SLinus Torvalds 
16191da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
16201da177e4SLinus Torvalds 		goto discard_it;
16211da177e4SLinus Torvalds 
16221da177e4SLinus Torvalds 	/* Count it even if it's bad */
16231da177e4SLinus Torvalds 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
16241da177e4SLinus Torvalds 
16251da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
16261da177e4SLinus Torvalds 		goto discard_it;
16271da177e4SLinus Torvalds 
16281da177e4SLinus Torvalds 	th = skb->h.th;
16291da177e4SLinus Torvalds 
16301da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
16311da177e4SLinus Torvalds 		goto bad_packet;
16321da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
16331da177e4SLinus Torvalds 		goto discard_it;
16341da177e4SLinus Torvalds 
16351da177e4SLinus Torvalds 	/* An explanation is required here, I think.
16361da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1637caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
16381da177e4SLinus Torvalds 	 * So, we defer the checks. */
16391da177e4SLinus Torvalds 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1640fb286bb2SHerbert Xu 	     tcp_v4_checksum_init(skb)))
16411da177e4SLinus Torvalds 		goto bad_packet;
16421da177e4SLinus Torvalds 
16431da177e4SLinus Torvalds 	th = skb->h.th;
1644eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
16451da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
16461da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
16471da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
16481da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
16491da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->when	 = 0;
1650eddc9ec5SArnaldo Carvalho de Melo 	TCP_SKB_CB(skb)->flags	 = iph->tos;
16511da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
16521da177e4SLinus Torvalds 
1653eddc9ec5SArnaldo Carvalho de Melo 	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
1654eddc9ec5SArnaldo Carvalho de Melo 			   iph->daddr, th->dest, inet_iif(skb));
16551da177e4SLinus Torvalds 	if (!sk)
16561da177e4SLinus Torvalds 		goto no_tcp_socket;
16571da177e4SLinus Torvalds 
16581da177e4SLinus Torvalds process:
16591da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT)
16601da177e4SLinus Torvalds 		goto do_time_wait;
16611da177e4SLinus Torvalds 
16621da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
16631da177e4SLinus Torvalds 		goto discard_and_relse;
1664b59c2701SPatrick McHardy 	nf_reset(skb);
16651da177e4SLinus Torvalds 
1666fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
16671da177e4SLinus Torvalds 		goto discard_and_relse;
16681da177e4SLinus Torvalds 
16691da177e4SLinus Torvalds 	skb->dev = NULL;
16701da177e4SLinus Torvalds 
1671c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
16721da177e4SLinus Torvalds 	ret = 0;
16731da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
16741a2449a8SChris Leech #ifdef CONFIG_NET_DMA
16751a2449a8SChris Leech 		struct tcp_sock *tp = tcp_sk(sk);
16761a2449a8SChris Leech 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
16771a2449a8SChris Leech 			tp->ucopy.dma_chan = get_softnet_dma();
16781a2449a8SChris Leech 		if (tp->ucopy.dma_chan)
16791a2449a8SChris Leech 			ret = tcp_v4_do_rcv(sk, skb);
16801a2449a8SChris Leech 		else
16811a2449a8SChris Leech #endif
16821a2449a8SChris Leech 		{
16831da177e4SLinus Torvalds 			if (!tcp_prequeue(sk, skb))
16841da177e4SLinus Torvalds 			ret = tcp_v4_do_rcv(sk, skb);
16851a2449a8SChris Leech 		}
16861da177e4SLinus Torvalds 	} else
16871da177e4SLinus Torvalds 		sk_add_backlog(sk, skb);
16881da177e4SLinus Torvalds 	bh_unlock_sock(sk);
16891da177e4SLinus Torvalds 
16901da177e4SLinus Torvalds 	sock_put(sk);
16911da177e4SLinus Torvalds 
16921da177e4SLinus Torvalds 	return ret;
16931da177e4SLinus Torvalds 
16941da177e4SLinus Torvalds no_tcp_socket:
16951da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
16961da177e4SLinus Torvalds 		goto discard_it;
16971da177e4SLinus Torvalds 
16981da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
16991da177e4SLinus Torvalds bad_packet:
17001da177e4SLinus Torvalds 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
17011da177e4SLinus Torvalds 	} else {
1702cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
17031da177e4SLinus Torvalds 	}
17041da177e4SLinus Torvalds 
17051da177e4SLinus Torvalds discard_it:
17061da177e4SLinus Torvalds 	/* Discard frame. */
17071da177e4SLinus Torvalds 	kfree_skb(skb);
17081da177e4SLinus Torvalds 	return 0;
17091da177e4SLinus Torvalds 
17101da177e4SLinus Torvalds discard_and_relse:
17111da177e4SLinus Torvalds 	sock_put(sk);
17121da177e4SLinus Torvalds 	goto discard_it;
17131da177e4SLinus Torvalds 
17141da177e4SLinus Torvalds do_time_wait:
17151da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
17169469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17171da177e4SLinus Torvalds 		goto discard_it;
17181da177e4SLinus Torvalds 	}
17191da177e4SLinus Torvalds 
17201da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
17211da177e4SLinus Torvalds 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
17229469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17231da177e4SLinus Torvalds 		goto discard_it;
17241da177e4SLinus Torvalds 	}
17259469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
17261da177e4SLinus Torvalds 	case TCP_TW_SYN: {
172733b62231SArnaldo Carvalho de Melo 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1728eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1729463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
17301da177e4SLinus Torvalds 		if (sk2) {
17319469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
17329469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
17331da177e4SLinus Torvalds 			sk = sk2;
17341da177e4SLinus Torvalds 			goto process;
17351da177e4SLinus Torvalds 		}
17361da177e4SLinus Torvalds 		/* Fall through to ACK */
17371da177e4SLinus Torvalds 	}
17381da177e4SLinus Torvalds 	case TCP_TW_ACK:
17391da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17401da177e4SLinus Torvalds 		break;
17411da177e4SLinus Torvalds 	case TCP_TW_RST:
17421da177e4SLinus Torvalds 		goto no_tcp_socket;
17431da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17441da177e4SLinus Torvalds 	}
17451da177e4SLinus Torvalds 	goto discard_it;
17461da177e4SLinus Torvalds }
17471da177e4SLinus Torvalds 
17481da177e4SLinus Torvalds /* VJ's idea. Save last timestamp seen from this destination
17491da177e4SLinus Torvalds  * and hold it at least for normal timewait interval to use for duplicate
17501da177e4SLinus Torvalds  * segment detection in subsequent connections, before they enter synchronized
17511da177e4SLinus Torvalds  * state.
17521da177e4SLinus Torvalds  */
17531da177e4SLinus Torvalds 
17541da177e4SLinus Torvalds int tcp_v4_remember_stamp(struct sock *sk)
17551da177e4SLinus Torvalds {
17561da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
17571da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
17581da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
17591da177e4SLinus Torvalds 	struct inet_peer *peer = NULL;
17601da177e4SLinus Torvalds 	int release_it = 0;
17611da177e4SLinus Torvalds 
17621da177e4SLinus Torvalds 	if (!rt || rt->rt_dst != inet->daddr) {
17631da177e4SLinus Torvalds 		peer = inet_getpeer(inet->daddr, 1);
17641da177e4SLinus Torvalds 		release_it = 1;
17651da177e4SLinus Torvalds 	} else {
17661da177e4SLinus Torvalds 		if (!rt->peer)
17671da177e4SLinus Torvalds 			rt_bind_peer(rt, 1);
17681da177e4SLinus Torvalds 		peer = rt->peer;
17691da177e4SLinus Torvalds 	}
17701da177e4SLinus Torvalds 
17711da177e4SLinus Torvalds 	if (peer) {
17721da177e4SLinus Torvalds 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
17739d729f72SJames Morris 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
17741da177e4SLinus Torvalds 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
17751da177e4SLinus Torvalds 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
17761da177e4SLinus Torvalds 			peer->tcp_ts = tp->rx_opt.ts_recent;
17771da177e4SLinus Torvalds 		}
17781da177e4SLinus Torvalds 		if (release_it)
17791da177e4SLinus Torvalds 			inet_putpeer(peer);
17801da177e4SLinus Torvalds 		return 1;
17811da177e4SLinus Torvalds 	}
17821da177e4SLinus Torvalds 
17831da177e4SLinus Torvalds 	return 0;
17841da177e4SLinus Torvalds }
17851da177e4SLinus Torvalds 
17868feaf0c0SArnaldo Carvalho de Melo int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
17871da177e4SLinus Torvalds {
17888feaf0c0SArnaldo Carvalho de Melo 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
17891da177e4SLinus Torvalds 
17901da177e4SLinus Torvalds 	if (peer) {
17918feaf0c0SArnaldo Carvalho de Melo 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
17928feaf0c0SArnaldo Carvalho de Melo 
17938feaf0c0SArnaldo Carvalho de Melo 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
17949d729f72SJames Morris 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
17958feaf0c0SArnaldo Carvalho de Melo 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
17968feaf0c0SArnaldo Carvalho de Melo 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
17978feaf0c0SArnaldo Carvalho de Melo 			peer->tcp_ts	   = tcptw->tw_ts_recent;
17981da177e4SLinus Torvalds 		}
17991da177e4SLinus Torvalds 		inet_putpeer(peer);
18001da177e4SLinus Torvalds 		return 1;
18011da177e4SLinus Torvalds 	}
18021da177e4SLinus Torvalds 
18031da177e4SLinus Torvalds 	return 0;
18041da177e4SLinus Torvalds }
18051da177e4SLinus Torvalds 
18068292a17aSArnaldo Carvalho de Melo struct inet_connection_sock_af_ops ipv4_specific = {
18071da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
18081da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
180932519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
18101da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
18111da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
18121da177e4SLinus Torvalds 	.remember_stamp	   = tcp_v4_remember_stamp,
18131da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
18141da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
18151da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1816543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1817543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
18183fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
18193fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
18203fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
18213fdadf7dSDmitry Mishin #endif
18221da177e4SLinus Torvalds };
18231da177e4SLinus Torvalds 
1824cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1825b6332e6cSAndrew Morton static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1826cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
1827cfb6eeb4SYOSHIFUJI Hideaki 	.calc_md5_hash		= tcp_v4_calc_md5_hash,
1828cfb6eeb4SYOSHIFUJI Hideaki 	.md5_add		= tcp_v4_md5_add_func,
1829cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1830cfb6eeb4SYOSHIFUJI Hideaki };
1831b6332e6cSAndrew Morton #endif
1832cfb6eeb4SYOSHIFUJI Hideaki 
18331da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
18341da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
18351da177e4SLinus Torvalds  */
18361da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
18371da177e4SLinus Torvalds {
18386687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
18391da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18401da177e4SLinus Torvalds 
18411da177e4SLinus Torvalds 	skb_queue_head_init(&tp->out_of_order_queue);
18421da177e4SLinus Torvalds 	tcp_init_xmit_timers(sk);
18431da177e4SLinus Torvalds 	tcp_prequeue_init(tp);
18441da177e4SLinus Torvalds 
18456687e988SArnaldo Carvalho de Melo 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
18461da177e4SLinus Torvalds 	tp->mdev = TCP_TIMEOUT_INIT;
18471da177e4SLinus Torvalds 
18481da177e4SLinus Torvalds 	/* So many TCP implementations out there (incorrectly) count the
18491da177e4SLinus Torvalds 	 * initial SYN frame in their delayed-ACK and congestion control
18501da177e4SLinus Torvalds 	 * algorithms that we must have the following bandaid to talk
18511da177e4SLinus Torvalds 	 * efficiently to them.  -DaveM
18521da177e4SLinus Torvalds 	 */
18531da177e4SLinus Torvalds 	tp->snd_cwnd = 2;
18541da177e4SLinus Torvalds 
18551da177e4SLinus Torvalds 	/* See draft-stevens-tcpca-spec-01 for discussion of the
18561da177e4SLinus Torvalds 	 * initialization of these values.
18571da177e4SLinus Torvalds 	 */
18581da177e4SLinus Torvalds 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
18591da177e4SLinus Torvalds 	tp->snd_cwnd_clamp = ~0;
1860c1b4a7e6SDavid S. Miller 	tp->mss_cache = 536;
18611da177e4SLinus Torvalds 
18621da177e4SLinus Torvalds 	tp->reordering = sysctl_tcp_reordering;
18636687e988SArnaldo Carvalho de Melo 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
18641da177e4SLinus Torvalds 
18651da177e4SLinus Torvalds 	sk->sk_state = TCP_CLOSE;
18661da177e4SLinus Torvalds 
18671da177e4SLinus Torvalds 	sk->sk_write_space = sk_stream_write_space;
18681da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
18691da177e4SLinus Torvalds 
18708292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1871d83d8461SArnaldo Carvalho de Melo 	icsk->icsk_sync_mss = tcp_sync_mss;
1872cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1873cfb6eeb4SYOSHIFUJI Hideaki 	tp->af_specific = &tcp_sock_ipv4_specific;
1874cfb6eeb4SYOSHIFUJI Hideaki #endif
18751da177e4SLinus Torvalds 
18761da177e4SLinus Torvalds 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
18771da177e4SLinus Torvalds 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
18781da177e4SLinus Torvalds 
18791da177e4SLinus Torvalds 	atomic_inc(&tcp_sockets_allocated);
18801da177e4SLinus Torvalds 
18811da177e4SLinus Torvalds 	return 0;
18821da177e4SLinus Torvalds }
18831da177e4SLinus Torvalds 
18841da177e4SLinus Torvalds int tcp_v4_destroy_sock(struct sock *sk)
18851da177e4SLinus Torvalds {
18861da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18871da177e4SLinus Torvalds 
18881da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
18891da177e4SLinus Torvalds 
18906687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1891317a76f9SStephen Hemminger 
18921da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1893fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
18941da177e4SLinus Torvalds 
18951da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
18961da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
18971da177e4SLinus Torvalds 
1898cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1899cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1900cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1901cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_clear_md5_list(sk);
1902cfb6eeb4SYOSHIFUJI Hideaki 		kfree(tp->md5sig_info);
1903cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1904cfb6eeb4SYOSHIFUJI Hideaki 	}
1905cfb6eeb4SYOSHIFUJI Hideaki #endif
1906cfb6eeb4SYOSHIFUJI Hideaki 
19071a2449a8SChris Leech #ifdef CONFIG_NET_DMA
19081a2449a8SChris Leech 	/* Cleans up our sk_async_wait_queue */
19091a2449a8SChris Leech 	__skb_queue_purge(&sk->sk_async_wait_queue);
19101a2449a8SChris Leech #endif
19111a2449a8SChris Leech 
19121da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
19131da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
19141da177e4SLinus Torvalds 
19151da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1916463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
19172d8c4ce5SArnaldo Carvalho de Melo 		inet_put_port(&tcp_hashinfo, sk);
19181da177e4SLinus Torvalds 
19191da177e4SLinus Torvalds 	/*
19201da177e4SLinus Torvalds 	 * If sendmsg cached page exists, toss it.
19211da177e4SLinus Torvalds 	 */
19221da177e4SLinus Torvalds 	if (sk->sk_sndmsg_page) {
19231da177e4SLinus Torvalds 		__free_page(sk->sk_sndmsg_page);
19241da177e4SLinus Torvalds 		sk->sk_sndmsg_page = NULL;
19251da177e4SLinus Torvalds 	}
19261da177e4SLinus Torvalds 
19271da177e4SLinus Torvalds 	atomic_dec(&tcp_sockets_allocated);
19281da177e4SLinus Torvalds 
19291da177e4SLinus Torvalds 	return 0;
19301da177e4SLinus Torvalds }
19311da177e4SLinus Torvalds 
19321da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
19331da177e4SLinus Torvalds 
19341da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
19351da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
19361da177e4SLinus Torvalds 
19378feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
19381da177e4SLinus Torvalds {
19391da177e4SLinus Torvalds 	return hlist_empty(head) ? NULL :
19408feaf0c0SArnaldo Carvalho de Melo 		list_entry(head->first, struct inet_timewait_sock, tw_node);
19411da177e4SLinus Torvalds }
19421da177e4SLinus Torvalds 
19438feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
19441da177e4SLinus Torvalds {
19451da177e4SLinus Torvalds 	return tw->tw_node.next ?
19461da177e4SLinus Torvalds 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
19471da177e4SLinus Torvalds }
19481da177e4SLinus Torvalds 
19491da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
19501da177e4SLinus Torvalds {
1951463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
19521da177e4SLinus Torvalds 	struct hlist_node *node;
19531da177e4SLinus Torvalds 	struct sock *sk = cur;
19541da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
19551da177e4SLinus Torvalds 
19561da177e4SLinus Torvalds 	if (!sk) {
19571da177e4SLinus Torvalds 		st->bucket = 0;
19586e04e021SArnaldo Carvalho de Melo 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
19591da177e4SLinus Torvalds 		goto get_sk;
19601da177e4SLinus Torvalds 	}
19611da177e4SLinus Torvalds 
19621da177e4SLinus Torvalds 	++st->num;
19631da177e4SLinus Torvalds 
19641da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
196560236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
19661da177e4SLinus Torvalds 
1967463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
19681da177e4SLinus Torvalds 		req = req->dl_next;
19691da177e4SLinus Torvalds 		while (1) {
19701da177e4SLinus Torvalds 			while (req) {
197160236fddSArnaldo Carvalho de Melo 				if (req->rsk_ops->family == st->family) {
19721da177e4SLinus Torvalds 					cur = req;
19731da177e4SLinus Torvalds 					goto out;
19741da177e4SLinus Torvalds 				}
19751da177e4SLinus Torvalds 				req = req->dl_next;
19761da177e4SLinus Torvalds 			}
197772a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
19781da177e4SLinus Torvalds 				break;
19791da177e4SLinus Torvalds get_req:
1980463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
19811da177e4SLinus Torvalds 		}
19821da177e4SLinus Torvalds 		sk	  = sk_next(st->syn_wait_sk);
19831da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
1984463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
19851da177e4SLinus Torvalds 	} else {
1986463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
1987463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1988463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
19891da177e4SLinus Torvalds 			goto start_req;
1990463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
19911da177e4SLinus Torvalds 		sk = sk_next(sk);
19921da177e4SLinus Torvalds 	}
19931da177e4SLinus Torvalds get_sk:
19941da177e4SLinus Torvalds 	sk_for_each_from(sk, node) {
19951da177e4SLinus Torvalds 		if (sk->sk_family == st->family) {
19961da177e4SLinus Torvalds 			cur = sk;
19971da177e4SLinus Torvalds 			goto out;
19981da177e4SLinus Torvalds 		}
1999463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2000463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2001463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
20021da177e4SLinus Torvalds start_req:
20031da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
20041da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
20051da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
20061da177e4SLinus Torvalds 			st->sbucket	= 0;
20071da177e4SLinus Torvalds 			goto get_req;
20081da177e4SLinus Torvalds 		}
2009463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
20101da177e4SLinus Torvalds 	}
20110f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
20126e04e021SArnaldo Carvalho de Melo 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
20131da177e4SLinus Torvalds 		goto get_sk;
20141da177e4SLinus Torvalds 	}
20151da177e4SLinus Torvalds 	cur = NULL;
20161da177e4SLinus Torvalds out:
20171da177e4SLinus Torvalds 	return cur;
20181da177e4SLinus Torvalds }
20191da177e4SLinus Torvalds 
20201da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
20211da177e4SLinus Torvalds {
20221da177e4SLinus Torvalds 	void *rc = listening_get_next(seq, NULL);
20231da177e4SLinus Torvalds 
20241da177e4SLinus Torvalds 	while (rc && *pos) {
20251da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
20261da177e4SLinus Torvalds 		--*pos;
20271da177e4SLinus Torvalds 	}
20281da177e4SLinus Torvalds 	return rc;
20291da177e4SLinus Torvalds }
20301da177e4SLinus Torvalds 
20311da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
20321da177e4SLinus Torvalds {
20331da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
20341da177e4SLinus Torvalds 	void *rc = NULL;
20351da177e4SLinus Torvalds 
20366e04e021SArnaldo Carvalho de Melo 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
20371da177e4SLinus Torvalds 		struct sock *sk;
20381da177e4SLinus Torvalds 		struct hlist_node *node;
20398feaf0c0SArnaldo Carvalho de Melo 		struct inet_timewait_sock *tw;
20401da177e4SLinus Torvalds 
20411da177e4SLinus Torvalds 		/* We can reschedule _before_ having picked the target: */
20421da177e4SLinus Torvalds 		cond_resched_softirq();
20431da177e4SLinus Torvalds 
20446e04e021SArnaldo Carvalho de Melo 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
20456e04e021SArnaldo Carvalho de Melo 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
20461da177e4SLinus Torvalds 			if (sk->sk_family != st->family) {
20471da177e4SLinus Torvalds 				continue;
20481da177e4SLinus Torvalds 			}
20491da177e4SLinus Torvalds 			rc = sk;
20501da177e4SLinus Torvalds 			goto out;
20511da177e4SLinus Torvalds 		}
20521da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_TIME_WAIT;
20538feaf0c0SArnaldo Carvalho de Melo 		inet_twsk_for_each(tw, node,
2054dbca9b27SEric Dumazet 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
20551da177e4SLinus Torvalds 			if (tw->tw_family != st->family) {
20561da177e4SLinus Torvalds 				continue;
20571da177e4SLinus Torvalds 			}
20581da177e4SLinus Torvalds 			rc = tw;
20591da177e4SLinus Torvalds 			goto out;
20601da177e4SLinus Torvalds 		}
20616e04e021SArnaldo Carvalho de Melo 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
20621da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
20631da177e4SLinus Torvalds 	}
20641da177e4SLinus Torvalds out:
20651da177e4SLinus Torvalds 	return rc;
20661da177e4SLinus Torvalds }
20671da177e4SLinus Torvalds 
20681da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
20691da177e4SLinus Torvalds {
20701da177e4SLinus Torvalds 	struct sock *sk = cur;
20718feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw;
20721da177e4SLinus Torvalds 	struct hlist_node *node;
20731da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
20741da177e4SLinus Torvalds 
20751da177e4SLinus Torvalds 	++st->num;
20761da177e4SLinus Torvalds 
20771da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
20781da177e4SLinus Torvalds 		tw = cur;
20791da177e4SLinus Torvalds 		tw = tw_next(tw);
20801da177e4SLinus Torvalds get_tw:
20811da177e4SLinus Torvalds 		while (tw && tw->tw_family != st->family) {
20821da177e4SLinus Torvalds 			tw = tw_next(tw);
20831da177e4SLinus Torvalds 		}
20841da177e4SLinus Torvalds 		if (tw) {
20851da177e4SLinus Torvalds 			cur = tw;
20861da177e4SLinus Torvalds 			goto out;
20871da177e4SLinus Torvalds 		}
20886e04e021SArnaldo Carvalho de Melo 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
20891da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
20901da177e4SLinus Torvalds 
20911da177e4SLinus Torvalds 		/* We can reschedule between buckets: */
20921da177e4SLinus Torvalds 		cond_resched_softirq();
20931da177e4SLinus Torvalds 
20946e04e021SArnaldo Carvalho de Melo 		if (++st->bucket < tcp_hashinfo.ehash_size) {
20956e04e021SArnaldo Carvalho de Melo 			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
20966e04e021SArnaldo Carvalho de Melo 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
20971da177e4SLinus Torvalds 		} else {
20981da177e4SLinus Torvalds 			cur = NULL;
20991da177e4SLinus Torvalds 			goto out;
21001da177e4SLinus Torvalds 		}
21011da177e4SLinus Torvalds 	} else
21021da177e4SLinus Torvalds 		sk = sk_next(sk);
21031da177e4SLinus Torvalds 
21041da177e4SLinus Torvalds 	sk_for_each_from(sk, node) {
21051da177e4SLinus Torvalds 		if (sk->sk_family == st->family)
21061da177e4SLinus Torvalds 			goto found;
21071da177e4SLinus Torvalds 	}
21081da177e4SLinus Torvalds 
21091da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2110dbca9b27SEric Dumazet 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
21111da177e4SLinus Torvalds 	goto get_tw;
21121da177e4SLinus Torvalds found:
21131da177e4SLinus Torvalds 	cur = sk;
21141da177e4SLinus Torvalds out:
21151da177e4SLinus Torvalds 	return cur;
21161da177e4SLinus Torvalds }
21171da177e4SLinus Torvalds 
21181da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
21191da177e4SLinus Torvalds {
21201da177e4SLinus Torvalds 	void *rc = established_get_first(seq);
21211da177e4SLinus Torvalds 
21221da177e4SLinus Torvalds 	while (rc && pos) {
21231da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
21241da177e4SLinus Torvalds 		--pos;
21251da177e4SLinus Torvalds 	}
21261da177e4SLinus Torvalds 	return rc;
21271da177e4SLinus Torvalds }
21281da177e4SLinus Torvalds 
21291da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
21301da177e4SLinus Torvalds {
21311da177e4SLinus Torvalds 	void *rc;
21321da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
21331da177e4SLinus Torvalds 
2134f3f05f70SArnaldo Carvalho de Melo 	inet_listen_lock(&tcp_hashinfo);
21351da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
21361da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
21371da177e4SLinus Torvalds 
21381da177e4SLinus Torvalds 	if (!rc) {
2139f3f05f70SArnaldo Carvalho de Melo 		inet_listen_unlock(&tcp_hashinfo);
21401da177e4SLinus Torvalds 		local_bh_disable();
21411da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
21421da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
21431da177e4SLinus Torvalds 	}
21441da177e4SLinus Torvalds 
21451da177e4SLinus Torvalds 	return rc;
21461da177e4SLinus Torvalds }
21471da177e4SLinus Torvalds 
21481da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
21491da177e4SLinus Torvalds {
21501da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
21511da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
21521da177e4SLinus Torvalds 	st->num = 0;
21531da177e4SLinus Torvalds 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
21541da177e4SLinus Torvalds }
21551da177e4SLinus Torvalds 
21561da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
21571da177e4SLinus Torvalds {
21581da177e4SLinus Torvalds 	void *rc = NULL;
21591da177e4SLinus Torvalds 	struct tcp_iter_state* st;
21601da177e4SLinus Torvalds 
21611da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
21621da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
21631da177e4SLinus Torvalds 		goto out;
21641da177e4SLinus Torvalds 	}
21651da177e4SLinus Torvalds 	st = seq->private;
21661da177e4SLinus Torvalds 
21671da177e4SLinus Torvalds 	switch (st->state) {
21681da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
21691da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21701da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
21711da177e4SLinus Torvalds 		if (!rc) {
2172f3f05f70SArnaldo Carvalho de Melo 			inet_listen_unlock(&tcp_hashinfo);
21731da177e4SLinus Torvalds 			local_bh_disable();
21741da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
21751da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
21761da177e4SLinus Torvalds 		}
21771da177e4SLinus Torvalds 		break;
21781da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21791da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
21801da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
21811da177e4SLinus Torvalds 		break;
21821da177e4SLinus Torvalds 	}
21831da177e4SLinus Torvalds out:
21841da177e4SLinus Torvalds 	++*pos;
21851da177e4SLinus Torvalds 	return rc;
21861da177e4SLinus Torvalds }
21871da177e4SLinus Torvalds 
21881da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
21891da177e4SLinus Torvalds {
21901da177e4SLinus Torvalds 	struct tcp_iter_state* st = seq->private;
21911da177e4SLinus Torvalds 
21921da177e4SLinus Torvalds 	switch (st->state) {
21931da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
21941da177e4SLinus Torvalds 		if (v) {
2195463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2196463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
21971da177e4SLinus Torvalds 		}
21981da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21991da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
2200f3f05f70SArnaldo Carvalho de Melo 			inet_listen_unlock(&tcp_hashinfo);
22011da177e4SLinus Torvalds 		break;
22021da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
22031da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
22041da177e4SLinus Torvalds 		if (v)
22056e04e021SArnaldo Carvalho de Melo 			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
22061da177e4SLinus Torvalds 		local_bh_enable();
22071da177e4SLinus Torvalds 		break;
22081da177e4SLinus Torvalds 	}
22091da177e4SLinus Torvalds }
22101da177e4SLinus Torvalds 
22111da177e4SLinus Torvalds static int tcp_seq_open(struct inode *inode, struct file *file)
22121da177e4SLinus Torvalds {
22131da177e4SLinus Torvalds 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
22141da177e4SLinus Torvalds 	struct seq_file *seq;
22151da177e4SLinus Torvalds 	struct tcp_iter_state *s;
22161da177e4SLinus Torvalds 	int rc;
22171da177e4SLinus Torvalds 
22181da177e4SLinus Torvalds 	if (unlikely(afinfo == NULL))
22191da177e4SLinus Torvalds 		return -EINVAL;
22201da177e4SLinus Torvalds 
22210da974f4SPanagiotis Issaris 	s = kzalloc(sizeof(*s), GFP_KERNEL);
22221da177e4SLinus Torvalds 	if (!s)
22231da177e4SLinus Torvalds 		return -ENOMEM;
22241da177e4SLinus Torvalds 	s->family		= afinfo->family;
22251da177e4SLinus Torvalds 	s->seq_ops.start	= tcp_seq_start;
22261da177e4SLinus Torvalds 	s->seq_ops.next		= tcp_seq_next;
22271da177e4SLinus Torvalds 	s->seq_ops.show		= afinfo->seq_show;
22281da177e4SLinus Torvalds 	s->seq_ops.stop		= tcp_seq_stop;
22291da177e4SLinus Torvalds 
22301da177e4SLinus Torvalds 	rc = seq_open(file, &s->seq_ops);
22311da177e4SLinus Torvalds 	if (rc)
22321da177e4SLinus Torvalds 		goto out_kfree;
22331da177e4SLinus Torvalds 	seq	     = file->private_data;
22341da177e4SLinus Torvalds 	seq->private = s;
22351da177e4SLinus Torvalds out:
22361da177e4SLinus Torvalds 	return rc;
22371da177e4SLinus Torvalds out_kfree:
22381da177e4SLinus Torvalds 	kfree(s);
22391da177e4SLinus Torvalds 	goto out;
22401da177e4SLinus Torvalds }
22411da177e4SLinus Torvalds 
22421da177e4SLinus Torvalds int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
22431da177e4SLinus Torvalds {
22441da177e4SLinus Torvalds 	int rc = 0;
22451da177e4SLinus Torvalds 	struct proc_dir_entry *p;
22461da177e4SLinus Torvalds 
22471da177e4SLinus Torvalds 	if (!afinfo)
22481da177e4SLinus Torvalds 		return -EINVAL;
22491da177e4SLinus Torvalds 	afinfo->seq_fops->owner		= afinfo->owner;
22501da177e4SLinus Torvalds 	afinfo->seq_fops->open		= tcp_seq_open;
22511da177e4SLinus Torvalds 	afinfo->seq_fops->read		= seq_read;
22521da177e4SLinus Torvalds 	afinfo->seq_fops->llseek	= seq_lseek;
22531da177e4SLinus Torvalds 	afinfo->seq_fops->release	= seq_release_private;
22541da177e4SLinus Torvalds 
22551da177e4SLinus Torvalds 	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
22561da177e4SLinus Torvalds 	if (p)
22571da177e4SLinus Torvalds 		p->data = afinfo;
22581da177e4SLinus Torvalds 	else
22591da177e4SLinus Torvalds 		rc = -ENOMEM;
22601da177e4SLinus Torvalds 	return rc;
22611da177e4SLinus Torvalds }
22621da177e4SLinus Torvalds 
22631da177e4SLinus Torvalds void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
22641da177e4SLinus Torvalds {
22651da177e4SLinus Torvalds 	if (!afinfo)
22661da177e4SLinus Torvalds 		return;
22671da177e4SLinus Torvalds 	proc_net_remove(afinfo->name);
22681da177e4SLinus Torvalds 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
22691da177e4SLinus Torvalds }
22701da177e4SLinus Torvalds 
227160236fddSArnaldo Carvalho de Melo static void get_openreq4(struct sock *sk, struct request_sock *req,
22721da177e4SLinus Torvalds 			 char *tmpbuf, int i, int uid)
22731da177e4SLinus Torvalds {
22742e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
22751da177e4SLinus Torvalds 	int ttd = req->expires - jiffies;
22761da177e4SLinus Torvalds 
22771da177e4SLinus Torvalds 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
22781da177e4SLinus Torvalds 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
22791da177e4SLinus Torvalds 		i,
22802e6599cbSArnaldo Carvalho de Melo 		ireq->loc_addr,
22811da177e4SLinus Torvalds 		ntohs(inet_sk(sk)->sport),
22822e6599cbSArnaldo Carvalho de Melo 		ireq->rmt_addr,
22832e6599cbSArnaldo Carvalho de Melo 		ntohs(ireq->rmt_port),
22841da177e4SLinus Torvalds 		TCP_SYN_RECV,
22851da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
22861da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
22871da177e4SLinus Torvalds 		jiffies_to_clock_t(ttd),
22881da177e4SLinus Torvalds 		req->retrans,
22891da177e4SLinus Torvalds 		uid,
22901da177e4SLinus Torvalds 		0,  /* non standard timer */
22911da177e4SLinus Torvalds 		0, /* open_requests have no inode */
22921da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
22931da177e4SLinus Torvalds 		req);
22941da177e4SLinus Torvalds }
22951da177e4SLinus Torvalds 
2296cf4c6bf8SIlpo Järvinen static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
22971da177e4SLinus Torvalds {
22981da177e4SLinus Torvalds 	int timer_active;
22991da177e4SLinus Torvalds 	unsigned long timer_expires;
2300cf4c6bf8SIlpo Järvinen 	struct tcp_sock *tp = tcp_sk(sk);
2301cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2302cf4c6bf8SIlpo Järvinen 	struct inet_sock *inet = inet_sk(sk);
2303714e85beSAl Viro 	__be32 dest = inet->daddr;
2304714e85beSAl Viro 	__be32 src = inet->rcv_saddr;
23051da177e4SLinus Torvalds 	__u16 destp = ntohs(inet->dport);
23061da177e4SLinus Torvalds 	__u16 srcp = ntohs(inet->sport);
23071da177e4SLinus Torvalds 
2308463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
23091da177e4SLinus Torvalds 		timer_active	= 1;
2310463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2311463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
23121da177e4SLinus Torvalds 		timer_active	= 4;
2313463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2314cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
23151da177e4SLinus Torvalds 		timer_active	= 2;
2316cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
23171da177e4SLinus Torvalds 	} else {
23181da177e4SLinus Torvalds 		timer_active	= 0;
23191da177e4SLinus Torvalds 		timer_expires = jiffies;
23201da177e4SLinus Torvalds 	}
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
23231da177e4SLinus Torvalds 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
2324cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
232547da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
2326cf4c6bf8SIlpo Järvinen 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
23277174259eSArnaldo Carvalho de Melo 					     (tp->rcv_nxt - tp->copied_seq),
23281da177e4SLinus Torvalds 		timer_active,
23291da177e4SLinus Torvalds 		jiffies_to_clock_t(timer_expires - jiffies),
2330463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2331cf4c6bf8SIlpo Järvinen 		sock_i_uid(sk),
23326687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2333cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2334cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
2335463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_rto,
2336463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_ack.ato,
2337463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
23381da177e4SLinus Torvalds 		tp->snd_cwnd,
23391da177e4SLinus Torvalds 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
23401da177e4SLinus Torvalds }
23411da177e4SLinus Torvalds 
23427174259eSArnaldo Carvalho de Melo static void get_timewait4_sock(struct inet_timewait_sock *tw,
23437174259eSArnaldo Carvalho de Melo 			       char *tmpbuf, int i)
23441da177e4SLinus Torvalds {
234523f33c2dSAl Viro 	__be32 dest, src;
23461da177e4SLinus Torvalds 	__u16 destp, srcp;
23471da177e4SLinus Torvalds 	int ttd = tw->tw_ttd - jiffies;
23481da177e4SLinus Torvalds 
23491da177e4SLinus Torvalds 	if (ttd < 0)
23501da177e4SLinus Torvalds 		ttd = 0;
23511da177e4SLinus Torvalds 
23521da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
23531da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
23541da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
23551da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
23561da177e4SLinus Torvalds 
23571da177e4SLinus Torvalds 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
23581da177e4SLinus Torvalds 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
23591da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
23601da177e4SLinus Torvalds 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
23611da177e4SLinus Torvalds 		atomic_read(&tw->tw_refcnt), tw);
23621da177e4SLinus Torvalds }
23631da177e4SLinus Torvalds 
23641da177e4SLinus Torvalds #define TMPSZ 150
23651da177e4SLinus Torvalds 
23661da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
23671da177e4SLinus Torvalds {
23681da177e4SLinus Torvalds 	struct tcp_iter_state* st;
23691da177e4SLinus Torvalds 	char tmpbuf[TMPSZ + 1];
23701da177e4SLinus Torvalds 
23711da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
23721da177e4SLinus Torvalds 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
23731da177e4SLinus Torvalds 			   "  sl  local_address rem_address   st tx_queue "
23741da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
23751da177e4SLinus Torvalds 			   "inode");
23761da177e4SLinus Torvalds 		goto out;
23771da177e4SLinus Torvalds 	}
23781da177e4SLinus Torvalds 	st = seq->private;
23791da177e4SLinus Torvalds 
23801da177e4SLinus Torvalds 	switch (st->state) {
23811da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
23821da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
23831da177e4SLinus Torvalds 		get_tcp4_sock(v, tmpbuf, st->num);
23841da177e4SLinus Torvalds 		break;
23851da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
23861da177e4SLinus Torvalds 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
23871da177e4SLinus Torvalds 		break;
23881da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
23891da177e4SLinus Torvalds 		get_timewait4_sock(v, tmpbuf, st->num);
23901da177e4SLinus Torvalds 		break;
23911da177e4SLinus Torvalds 	}
23921da177e4SLinus Torvalds 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
23931da177e4SLinus Torvalds out:
23941da177e4SLinus Torvalds 	return 0;
23951da177e4SLinus Torvalds }
23961da177e4SLinus Torvalds 
23971da177e4SLinus Torvalds static struct file_operations tcp4_seq_fops;
23981da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
23991da177e4SLinus Torvalds 	.owner		= THIS_MODULE,
24001da177e4SLinus Torvalds 	.name		= "tcp",
24011da177e4SLinus Torvalds 	.family		= AF_INET,
24021da177e4SLinus Torvalds 	.seq_show	= tcp4_seq_show,
24031da177e4SLinus Torvalds 	.seq_fops	= &tcp4_seq_fops,
24041da177e4SLinus Torvalds };
24051da177e4SLinus Torvalds 
24061da177e4SLinus Torvalds int __init tcp4_proc_init(void)
24071da177e4SLinus Torvalds {
24081da177e4SLinus Torvalds 	return tcp_proc_register(&tcp4_seq_afinfo);
24091da177e4SLinus Torvalds }
24101da177e4SLinus Torvalds 
24111da177e4SLinus Torvalds void tcp4_proc_exit(void)
24121da177e4SLinus Torvalds {
24131da177e4SLinus Torvalds 	tcp_proc_unregister(&tcp4_seq_afinfo);
24141da177e4SLinus Torvalds }
24151da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
24161da177e4SLinus Torvalds 
24171da177e4SLinus Torvalds struct proto tcp_prot = {
24181da177e4SLinus Torvalds 	.name			= "TCP",
24191da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
24201da177e4SLinus Torvalds 	.close			= tcp_close,
24211da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
24221da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2423463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
24241da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
24251da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
24261da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
24271da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
24281da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
24291da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
24301da177e4SLinus Torvalds 	.sendmsg		= tcp_sendmsg,
24311da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
24321da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
24331da177e4SLinus Torvalds 	.hash			= tcp_v4_hash,
24341da177e4SLinus Torvalds 	.unhash			= tcp_unhash,
24351da177e4SLinus Torvalds 	.get_port		= tcp_v4_get_port,
24361da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
24371da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
24380a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
24391da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
24401da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
24411da177e4SLinus Torvalds 	.sysctl_mem		= sysctl_tcp_mem,
24421da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
24431da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
24441da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
24451da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
24466d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
244760236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
2448543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2449543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2450543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2451543d9cfeSArnaldo Carvalho de Melo #endif
24521da177e4SLinus Torvalds };
24531da177e4SLinus Torvalds 
24541da177e4SLinus Torvalds void __init tcp_v4_init(struct net_proto_family *ops)
24551da177e4SLinus Torvalds {
24567174259eSArnaldo Carvalho de Melo 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
24577174259eSArnaldo Carvalho de Melo 				     IPPROTO_TCP) < 0)
24581da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
24591da177e4SLinus Torvalds }
24601da177e4SLinus Torvalds 
24611da177e4SLinus Torvalds EXPORT_SYMBOL(ipv4_specific);
24621da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_hashinfo);
24631da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_prot);
24641da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_unhash);
24651da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_conn_request);
24661da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_connect);
24671da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_do_rcv);
24681da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_remember_stamp);
24691da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_send_check);
24701da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
24711da177e4SLinus Torvalds 
24721da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
24731da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_register);
24741da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_proc_unregister);
24751da177e4SLinus Torvalds #endif
24761da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_local_port_range);
24771da177e4SLinus Torvalds EXPORT_SYMBOL(sysctl_tcp_low_latency);
24781da177e4SLinus Torvalds 
2479