xref: /linux/net/ipv4/inet_connection_sock.c (revision 3e12939a2a67fbb4cbd962c3b9bc398c73319766)
13f421baaSArnaldo Carvalho de Melo /*
23f421baaSArnaldo Carvalho de Melo  * INET		An implementation of the TCP/IP protocol suite for the LINUX
33f421baaSArnaldo Carvalho de Melo  *		operating system.  INET is implemented using the  BSD Socket
43f421baaSArnaldo Carvalho de Melo  *		interface as the means of communication with the user level.
53f421baaSArnaldo Carvalho de Melo  *
63f421baaSArnaldo Carvalho de Melo  *		Support for INET connection oriented protocols.
73f421baaSArnaldo Carvalho de Melo  *
83f421baaSArnaldo Carvalho de Melo  * Authors:	See the TCP sources
93f421baaSArnaldo Carvalho de Melo  *
103f421baaSArnaldo Carvalho de Melo  *		This program is free software; you can redistribute it and/or
113f421baaSArnaldo Carvalho de Melo  *		modify it under the terms of the GNU General Public License
123f421baaSArnaldo Carvalho de Melo  *		as published by the Free Software Foundation; either version
133f421baaSArnaldo Carvalho de Melo  *		2 of the License, or(at your option) any later version.
143f421baaSArnaldo Carvalho de Melo  */
153f421baaSArnaldo Carvalho de Melo 
163f421baaSArnaldo Carvalho de Melo #include <linux/module.h>
173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h>
183f421baaSArnaldo Carvalho de Melo 
193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h>
203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h>
223f421baaSArnaldo Carvalho de Melo #include <net/ip.h>
233f421baaSArnaldo Carvalho de Melo #include <net/route.h>
243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h>
25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h>
263f421baaSArnaldo Carvalho de Melo 
273f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG
283f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
293f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg);
303f421baaSArnaldo Carvalho de Melo #endif
313f421baaSArnaldo Carvalho de Melo 
323f421baaSArnaldo Carvalho de Melo /*
333c689b73SEric Dumazet  * This struct holds the first and last local port number.
343f421baaSArnaldo Carvalho de Melo  */
353c689b73SEric Dumazet struct local_ports sysctl_local_ports __read_mostly = {
36c4dbe54eSEric Dumazet 	.lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
373c689b73SEric Dumazet 	.range = { 32768, 61000 },
383c689b73SEric Dumazet };
39227b60f5SStephen Hemminger 
40e3826f1eSAmerigo Wang unsigned long *sysctl_local_reserved_ports;
41e3826f1eSAmerigo Wang EXPORT_SYMBOL(sysctl_local_reserved_ports);
42e3826f1eSAmerigo Wang 
43227b60f5SStephen Hemminger void inet_get_local_port_range(int *low, int *high)
44227b60f5SStephen Hemminger {
4595c96174SEric Dumazet 	unsigned int seq;
4695c96174SEric Dumazet 
47227b60f5SStephen Hemminger 	do {
483c689b73SEric Dumazet 		seq = read_seqbegin(&sysctl_local_ports.lock);
49227b60f5SStephen Hemminger 
503c689b73SEric Dumazet 		*low = sysctl_local_ports.range[0];
513c689b73SEric Dumazet 		*high = sysctl_local_ports.range[1];
523c689b73SEric Dumazet 	} while (read_seqretry(&sysctl_local_ports.lock, seq));
53227b60f5SStephen Hemminger }
54227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range);
553f421baaSArnaldo Carvalho de Melo 
56971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk,
57aacd9289SAlex Copot 			   const struct inet_bind_bucket *tb, bool relax)
583f421baaSArnaldo Carvalho de Melo {
593f421baaSArnaldo Carvalho de Melo 	struct sock *sk2;
603f421baaSArnaldo Carvalho de Melo 	struct hlist_node *node;
613f421baaSArnaldo Carvalho de Melo 	int reuse = sk->sk_reuse;
623f421baaSArnaldo Carvalho de Melo 
637477fd2eSPavel Emelyanov 	/*
647477fd2eSPavel Emelyanov 	 * Unlike other sk lookup places we do not check
657477fd2eSPavel Emelyanov 	 * for sk_net here, since _all_ the socks listed
667477fd2eSPavel Emelyanov 	 * in tb->owners list belong to the same net - the
677477fd2eSPavel Emelyanov 	 * one this bucket belongs to.
687477fd2eSPavel Emelyanov 	 */
697477fd2eSPavel Emelyanov 
703f421baaSArnaldo Carvalho de Melo 	sk_for_each_bound(sk2, node, &tb->owners) {
713f421baaSArnaldo Carvalho de Melo 		if (sk != sk2 &&
723f421baaSArnaldo Carvalho de Melo 		    !inet_v6_ipv6only(sk2) &&
733f421baaSArnaldo Carvalho de Melo 		    (!sk->sk_bound_dev_if ||
743f421baaSArnaldo Carvalho de Melo 		     !sk2->sk_bound_dev_if ||
753f421baaSArnaldo Carvalho de Melo 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
763f421baaSArnaldo Carvalho de Melo 			if (!reuse || !sk2->sk_reuse ||
773e8c806aSDavid S. Miller 			    sk2->sk_state == TCP_LISTEN) {
7868835abaSEric Dumazet 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
7968835abaSEric Dumazet 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
8068835abaSEric Dumazet 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
813f421baaSArnaldo Carvalho de Melo 					break;
828d238b25SDavid S. Miller 			}
83aacd9289SAlex Copot 			if (!relax && reuse && sk2->sk_reuse &&
84aacd9289SAlex Copot 			    sk2->sk_state != TCP_LISTEN) {
85aacd9289SAlex Copot 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
86aacd9289SAlex Copot 
87aacd9289SAlex Copot 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
88aacd9289SAlex Copot 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
89aacd9289SAlex Copot 					break;
90aacd9289SAlex Copot 			}
913f421baaSArnaldo Carvalho de Melo 		}
923f421baaSArnaldo Carvalho de Melo 	}
933f421baaSArnaldo Carvalho de Melo 	return node != NULL;
943f421baaSArnaldo Carvalho de Melo }
95971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
96971af18bSArnaldo Carvalho de Melo 
973f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock,
983f421baaSArnaldo Carvalho de Melo  * if snum is zero it means select any available local port.
993f421baaSArnaldo Carvalho de Melo  */
100ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum)
1013f421baaSArnaldo Carvalho de Melo {
10239d8cda7SPavel Emelyanov 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
1033f421baaSArnaldo Carvalho de Melo 	struct inet_bind_hashbucket *head;
1043f421baaSArnaldo Carvalho de Melo 	struct hlist_node *node;
1053f421baaSArnaldo Carvalho de Melo 	struct inet_bind_bucket *tb;
106a9d8f911SEvgeniy Polyakov 	int ret, attempts = 5;
1073b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(sk);
108a9d8f911SEvgeniy Polyakov 	int smallest_size = -1, smallest_rover;
1093f421baaSArnaldo Carvalho de Melo 
1103f421baaSArnaldo Carvalho de Melo 	local_bh_disable();
1113f421baaSArnaldo Carvalho de Melo 	if (!snum) {
112227b60f5SStephen Hemminger 		int remaining, rover, low, high;
113227b60f5SStephen Hemminger 
114a9d8f911SEvgeniy Polyakov again:
115227b60f5SStephen Hemminger 		inet_get_local_port_range(&low, &high);
116a25de534SAnton Arapov 		remaining = (high - low) + 1;
117a9d8f911SEvgeniy Polyakov 		smallest_rover = rover = net_random() % remaining + low;
1183f421baaSArnaldo Carvalho de Melo 
119a9d8f911SEvgeniy Polyakov 		smallest_size = -1;
1203f421baaSArnaldo Carvalho de Melo 		do {
121e3826f1eSAmerigo Wang 			if (inet_is_reserved_local_port(rover))
122e3826f1eSAmerigo Wang 				goto next_nolock;
1237f635ab7SPavel Emelyanov 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
1247f635ab7SPavel Emelyanov 					hashinfo->bhash_size)];
1253f421baaSArnaldo Carvalho de Melo 			spin_lock(&head->lock);
1263f421baaSArnaldo Carvalho de Melo 			inet_bind_bucket_for_each(tb, node, &head->chain)
12709ad9bc7SOctavian Purdila 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
128a9d8f911SEvgeniy Polyakov 					if (tb->fastreuse > 0 &&
129a9d8f911SEvgeniy Polyakov 					    sk->sk_reuse &&
130a9d8f911SEvgeniy Polyakov 					    sk->sk_state != TCP_LISTEN &&
131a9d8f911SEvgeniy Polyakov 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
132a9d8f911SEvgeniy Polyakov 						smallest_size = tb->num_owners;
133a9d8f911SEvgeniy Polyakov 						smallest_rover = rover;
134aacd9289SAlex Copot 						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
135aacd9289SAlex Copot 						    !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
136a9d8f911SEvgeniy Polyakov 							snum = smallest_rover;
137fddb7b57SFlavio Leitner 							goto tb_found;
138a9d8f911SEvgeniy Polyakov 						}
139a9d8f911SEvgeniy Polyakov 					}
140aacd9289SAlex Copot 					if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
1412b05ad33SFlavio Leitner 						snum = rover;
142fddb7b57SFlavio Leitner 						goto tb_found;
1432b05ad33SFlavio Leitner 					}
1443f421baaSArnaldo Carvalho de Melo 					goto next;
145a9d8f911SEvgeniy Polyakov 				}
1463f421baaSArnaldo Carvalho de Melo 			break;
1473f421baaSArnaldo Carvalho de Melo 		next:
1483f421baaSArnaldo Carvalho de Melo 			spin_unlock(&head->lock);
149e3826f1eSAmerigo Wang 		next_nolock:
1506df71634SStephen Hemminger 			if (++rover > high)
1516df71634SStephen Hemminger 				rover = low;
1523f421baaSArnaldo Carvalho de Melo 		} while (--remaining > 0);
1533f421baaSArnaldo Carvalho de Melo 
1543f421baaSArnaldo Carvalho de Melo 		/* Exhausted local port range during search?  It is not
1553f421baaSArnaldo Carvalho de Melo 		 * possible for us to be holding one of the bind hash
1563f421baaSArnaldo Carvalho de Melo 		 * locks if this test triggers, because if 'remaining'
1573f421baaSArnaldo Carvalho de Melo 		 * drops to zero, we broke out of the do/while loop at
1583f421baaSArnaldo Carvalho de Melo 		 * the top level, not from the 'break;' statement.
1593f421baaSArnaldo Carvalho de Melo 		 */
1603f421baaSArnaldo Carvalho de Melo 		ret = 1;
161a9d8f911SEvgeniy Polyakov 		if (remaining <= 0) {
162a9d8f911SEvgeniy Polyakov 			if (smallest_size != -1) {
163a9d8f911SEvgeniy Polyakov 				snum = smallest_rover;
164a9d8f911SEvgeniy Polyakov 				goto have_snum;
165a9d8f911SEvgeniy Polyakov 			}
1663f421baaSArnaldo Carvalho de Melo 			goto fail;
167a9d8f911SEvgeniy Polyakov 		}
1683f421baaSArnaldo Carvalho de Melo 		/* OK, here is the one we will use.  HEAD is
1693f421baaSArnaldo Carvalho de Melo 		 * non-NULL and we hold it's mutex.
1703f421baaSArnaldo Carvalho de Melo 		 */
1713f421baaSArnaldo Carvalho de Melo 		snum = rover;
1723f421baaSArnaldo Carvalho de Melo 	} else {
173a9d8f911SEvgeniy Polyakov have_snum:
1747f635ab7SPavel Emelyanov 		head = &hashinfo->bhash[inet_bhashfn(net, snum,
1757f635ab7SPavel Emelyanov 				hashinfo->bhash_size)];
1763f421baaSArnaldo Carvalho de Melo 		spin_lock(&head->lock);
1773f421baaSArnaldo Carvalho de Melo 		inet_bind_bucket_for_each(tb, node, &head->chain)
17809ad9bc7SOctavian Purdila 			if (net_eq(ib_net(tb), net) && tb->port == snum)
1793f421baaSArnaldo Carvalho de Melo 				goto tb_found;
1803f421baaSArnaldo Carvalho de Melo 	}
1813f421baaSArnaldo Carvalho de Melo 	tb = NULL;
1823f421baaSArnaldo Carvalho de Melo 	goto tb_not_found;
1833f421baaSArnaldo Carvalho de Melo tb_found:
1843f421baaSArnaldo Carvalho de Melo 	if (!hlist_empty(&tb->owners)) {
1854a17fd52SPavel Emelyanov 		if (sk->sk_reuse == SK_FORCE_REUSE)
1864a17fd52SPavel Emelyanov 			goto success;
1874a17fd52SPavel Emelyanov 
1883f421baaSArnaldo Carvalho de Melo 		if (tb->fastreuse > 0 &&
189a9d8f911SEvgeniy Polyakov 		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
190a9d8f911SEvgeniy Polyakov 		    smallest_size == -1) {
1913f421baaSArnaldo Carvalho de Melo 			goto success;
1923f421baaSArnaldo Carvalho de Melo 		} else {
1933f421baaSArnaldo Carvalho de Melo 			ret = 1;
194aacd9289SAlex Copot 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
1955add3009SStephen Hemminger 				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
1965add3009SStephen Hemminger 				    smallest_size != -1 && --attempts >= 0) {
197a9d8f911SEvgeniy Polyakov 					spin_unlock(&head->lock);
198a9d8f911SEvgeniy Polyakov 					goto again;
199a9d8f911SEvgeniy Polyakov 				}
200aacd9289SAlex Copot 
2013f421baaSArnaldo Carvalho de Melo 				goto fail_unlock;
2023f421baaSArnaldo Carvalho de Melo 			}
2033f421baaSArnaldo Carvalho de Melo 		}
204a9d8f911SEvgeniy Polyakov 	}
2053f421baaSArnaldo Carvalho de Melo tb_not_found:
2063f421baaSArnaldo Carvalho de Melo 	ret = 1;
207941b1d22SPavel Emelyanov 	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
208941b1d22SPavel Emelyanov 					net, head, snum)) == NULL)
2093f421baaSArnaldo Carvalho de Melo 		goto fail_unlock;
2103f421baaSArnaldo Carvalho de Melo 	if (hlist_empty(&tb->owners)) {
2113f421baaSArnaldo Carvalho de Melo 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
2123f421baaSArnaldo Carvalho de Melo 			tb->fastreuse = 1;
2133f421baaSArnaldo Carvalho de Melo 		else
2143f421baaSArnaldo Carvalho de Melo 			tb->fastreuse = 0;
2153f421baaSArnaldo Carvalho de Melo 	} else if (tb->fastreuse &&
2163f421baaSArnaldo Carvalho de Melo 		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
2173f421baaSArnaldo Carvalho de Melo 		tb->fastreuse = 0;
2183f421baaSArnaldo Carvalho de Melo success:
2193f421baaSArnaldo Carvalho de Melo 	if (!inet_csk(sk)->icsk_bind_hash)
2203f421baaSArnaldo Carvalho de Melo 		inet_bind_hash(sk, tb, snum);
221547b792cSIlpo Järvinen 	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
2223f421baaSArnaldo Carvalho de Melo 	ret = 0;
2233f421baaSArnaldo Carvalho de Melo 
2243f421baaSArnaldo Carvalho de Melo fail_unlock:
2253f421baaSArnaldo Carvalho de Melo 	spin_unlock(&head->lock);
2263f421baaSArnaldo Carvalho de Melo fail:
2273f421baaSArnaldo Carvalho de Melo 	local_bh_enable();
2283f421baaSArnaldo Carvalho de Melo 	return ret;
2293f421baaSArnaldo Carvalho de Melo }
2303f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port);
2313f421baaSArnaldo Carvalho de Melo 
2323f421baaSArnaldo Carvalho de Melo /*
2333f421baaSArnaldo Carvalho de Melo  * Wait for an incoming connection, avoid race conditions. This must be called
2343f421baaSArnaldo Carvalho de Melo  * with the socket locked.
2353f421baaSArnaldo Carvalho de Melo  */
2363f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
2373f421baaSArnaldo Carvalho de Melo {
2383f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
2393f421baaSArnaldo Carvalho de Melo 	DEFINE_WAIT(wait);
2403f421baaSArnaldo Carvalho de Melo 	int err;
2413f421baaSArnaldo Carvalho de Melo 
2423f421baaSArnaldo Carvalho de Melo 	/*
2433f421baaSArnaldo Carvalho de Melo 	 * True wake-one mechanism for incoming connections: only
2443f421baaSArnaldo Carvalho de Melo 	 * one process gets woken up, not the 'whole herd'.
2453f421baaSArnaldo Carvalho de Melo 	 * Since we do not 'race & poll' for established sockets
2463f421baaSArnaldo Carvalho de Melo 	 * anymore, the common case will execute the loop only once.
2473f421baaSArnaldo Carvalho de Melo 	 *
2483f421baaSArnaldo Carvalho de Melo 	 * Subtle issue: "add_wait_queue_exclusive()" will be added
2493f421baaSArnaldo Carvalho de Melo 	 * after any current non-exclusive waiters, and we know that
2503f421baaSArnaldo Carvalho de Melo 	 * it will always _stay_ after any new non-exclusive waiters
2513f421baaSArnaldo Carvalho de Melo 	 * because all non-exclusive waiters are added at the
2523f421baaSArnaldo Carvalho de Melo 	 * beginning of the wait-queue. As such, it's ok to "drop"
2533f421baaSArnaldo Carvalho de Melo 	 * our exclusiveness temporarily when we get woken up without
2543f421baaSArnaldo Carvalho de Melo 	 * having to remove and re-insert us on the wait queue.
2553f421baaSArnaldo Carvalho de Melo 	 */
2563f421baaSArnaldo Carvalho de Melo 	for (;;) {
257aa395145SEric Dumazet 		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
2583f421baaSArnaldo Carvalho de Melo 					  TASK_INTERRUPTIBLE);
2593f421baaSArnaldo Carvalho de Melo 		release_sock(sk);
2603f421baaSArnaldo Carvalho de Melo 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
2613f421baaSArnaldo Carvalho de Melo 			timeo = schedule_timeout(timeo);
2623f421baaSArnaldo Carvalho de Melo 		lock_sock(sk);
2633f421baaSArnaldo Carvalho de Melo 		err = 0;
2643f421baaSArnaldo Carvalho de Melo 		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
2653f421baaSArnaldo Carvalho de Melo 			break;
2663f421baaSArnaldo Carvalho de Melo 		err = -EINVAL;
2673f421baaSArnaldo Carvalho de Melo 		if (sk->sk_state != TCP_LISTEN)
2683f421baaSArnaldo Carvalho de Melo 			break;
2693f421baaSArnaldo Carvalho de Melo 		err = sock_intr_errno(timeo);
2703f421baaSArnaldo Carvalho de Melo 		if (signal_pending(current))
2713f421baaSArnaldo Carvalho de Melo 			break;
2723f421baaSArnaldo Carvalho de Melo 		err = -EAGAIN;
2733f421baaSArnaldo Carvalho de Melo 		if (!timeo)
2743f421baaSArnaldo Carvalho de Melo 			break;
2753f421baaSArnaldo Carvalho de Melo 	}
276aa395145SEric Dumazet 	finish_wait(sk_sleep(sk), &wait);
2773f421baaSArnaldo Carvalho de Melo 	return err;
2783f421baaSArnaldo Carvalho de Melo }
2793f421baaSArnaldo Carvalho de Melo 
2803f421baaSArnaldo Carvalho de Melo /*
2813f421baaSArnaldo Carvalho de Melo  * This will accept the next outstanding connection.
2823f421baaSArnaldo Carvalho de Melo  */
2833f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
2843f421baaSArnaldo Carvalho de Melo {
2853f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
2863f421baaSArnaldo Carvalho de Melo 	struct sock *newsk;
2873f421baaSArnaldo Carvalho de Melo 	int error;
2883f421baaSArnaldo Carvalho de Melo 
2893f421baaSArnaldo Carvalho de Melo 	lock_sock(sk);
2903f421baaSArnaldo Carvalho de Melo 
2913f421baaSArnaldo Carvalho de Melo 	/* We need to make sure that this socket is listening,
2923f421baaSArnaldo Carvalho de Melo 	 * and that it has something pending.
2933f421baaSArnaldo Carvalho de Melo 	 */
2943f421baaSArnaldo Carvalho de Melo 	error = -EINVAL;
2953f421baaSArnaldo Carvalho de Melo 	if (sk->sk_state != TCP_LISTEN)
2963f421baaSArnaldo Carvalho de Melo 		goto out_err;
2973f421baaSArnaldo Carvalho de Melo 
2983f421baaSArnaldo Carvalho de Melo 	/* Find already established connection */
2993f421baaSArnaldo Carvalho de Melo 	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
3003f421baaSArnaldo Carvalho de Melo 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
3013f421baaSArnaldo Carvalho de Melo 
3023f421baaSArnaldo Carvalho de Melo 		/* If this is a non blocking socket don't sleep */
3033f421baaSArnaldo Carvalho de Melo 		error = -EAGAIN;
3043f421baaSArnaldo Carvalho de Melo 		if (!timeo)
3053f421baaSArnaldo Carvalho de Melo 			goto out_err;
3063f421baaSArnaldo Carvalho de Melo 
3073f421baaSArnaldo Carvalho de Melo 		error = inet_csk_wait_for_connect(sk, timeo);
3083f421baaSArnaldo Carvalho de Melo 		if (error)
3093f421baaSArnaldo Carvalho de Melo 			goto out_err;
3103f421baaSArnaldo Carvalho de Melo 	}
3113f421baaSArnaldo Carvalho de Melo 
3123f421baaSArnaldo Carvalho de Melo 	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
313547b792cSIlpo Järvinen 	WARN_ON(newsk->sk_state == TCP_SYN_RECV);
3143f421baaSArnaldo Carvalho de Melo out:
3153f421baaSArnaldo Carvalho de Melo 	release_sock(sk);
3163f421baaSArnaldo Carvalho de Melo 	return newsk;
3173f421baaSArnaldo Carvalho de Melo out_err:
3183f421baaSArnaldo Carvalho de Melo 	newsk = NULL;
3193f421baaSArnaldo Carvalho de Melo 	*err = error;
3203f421baaSArnaldo Carvalho de Melo 	goto out;
3213f421baaSArnaldo Carvalho de Melo }
3223f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept);
3233f421baaSArnaldo Carvalho de Melo 
3243f421baaSArnaldo Carvalho de Melo /*
3253f421baaSArnaldo Carvalho de Melo  * Using different timers for retransmit, delayed acks and probes
3263f421baaSArnaldo Carvalho de Melo  * We may wish use just one timer maintaining a list of expire jiffies
3273f421baaSArnaldo Carvalho de Melo  * to optimize.
3283f421baaSArnaldo Carvalho de Melo  */
3293f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk,
3303f421baaSArnaldo Carvalho de Melo 			       void (*retransmit_handler)(unsigned long),
3313f421baaSArnaldo Carvalho de Melo 			       void (*delack_handler)(unsigned long),
3323f421baaSArnaldo Carvalho de Melo 			       void (*keepalive_handler)(unsigned long))
3333f421baaSArnaldo Carvalho de Melo {
3343f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
3353f421baaSArnaldo Carvalho de Melo 
336b24b8a24SPavel Emelyanov 	setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
337b24b8a24SPavel Emelyanov 			(unsigned long)sk);
338b24b8a24SPavel Emelyanov 	setup_timer(&icsk->icsk_delack_timer, delack_handler,
339b24b8a24SPavel Emelyanov 			(unsigned long)sk);
340b24b8a24SPavel Emelyanov 	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
3413f421baaSArnaldo Carvalho de Melo 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
3423f421baaSArnaldo Carvalho de Melo }
3433f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers);
3443f421baaSArnaldo Carvalho de Melo 
3453f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk)
3463f421baaSArnaldo Carvalho de Melo {
3473f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
3483f421baaSArnaldo Carvalho de Melo 
3493f421baaSArnaldo Carvalho de Melo 	icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
3503f421baaSArnaldo Carvalho de Melo 
3513f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
3523f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &icsk->icsk_delack_timer);
3533f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &sk->sk_timer);
3543f421baaSArnaldo Carvalho de Melo }
3553f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
3563f421baaSArnaldo Carvalho de Melo 
3573f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk)
3583f421baaSArnaldo Carvalho de Melo {
3593f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &sk->sk_timer);
3603f421baaSArnaldo Carvalho de Melo }
3613f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
3623f421baaSArnaldo Carvalho de Melo 
3633f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
3643f421baaSArnaldo Carvalho de Melo {
3653f421baaSArnaldo Carvalho de Melo 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
3663f421baaSArnaldo Carvalho de Melo }
3673f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
3683f421baaSArnaldo Carvalho de Melo 
3693f421baaSArnaldo Carvalho de Melo struct dst_entry *inet_csk_route_req(struct sock *sk,
3706bd023f3SDavid S. Miller 				     struct flowi4 *fl4,
3717586ecebSEric Dumazet 				     const struct request_sock *req,
3727586ecebSEric Dumazet 				     bool nocache)
3733f421baaSArnaldo Carvalho de Melo {
3743f421baaSArnaldo Carvalho de Melo 	struct rtable *rt;
3753f421baaSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
376f6d8bd05SEric Dumazet 	struct ip_options_rcu *opt = inet_rsk(req)->opt;
37784a3aa00SPavel Emelyanov 	struct net *net = sock_net(sk);
378*3e12939aSDavid S. Miller 	int flags = inet_sk_flowi_flags(sk);
3793f421baaSArnaldo Carvalho de Melo 
3807586ecebSEric Dumazet 	if (nocache)
3817586ecebSEric Dumazet 		flags |= FLOWI_FLAG_RT_NOCACHE;
3826bd023f3SDavid S. Miller 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
383e79d9bc7SDavid S. Miller 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
3847433819aSEric Dumazet 			   sk->sk_protocol,
3857586ecebSEric Dumazet 			   flags,
386f6d8bd05SEric Dumazet 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
387e79d9bc7SDavid S. Miller 			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
3886bd023f3SDavid S. Miller 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
3896bd023f3SDavid S. Miller 	rt = ip_route_output_flow(net, fl4, sk);
390b23dd4feSDavid S. Miller 	if (IS_ERR(rt))
391857a6e0aSIlpo Järvinen 		goto no_route;
3926bd023f3SDavid S. Miller 	if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
393857a6e0aSIlpo Järvinen 		goto route_err;
394d8d1f30bSChangli Gao 	return &rt->dst;
395857a6e0aSIlpo Järvinen 
396857a6e0aSIlpo Järvinen route_err:
397857a6e0aSIlpo Järvinen 	ip_rt_put(rt);
398857a6e0aSIlpo Järvinen no_route:
399857a6e0aSIlpo Järvinen 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
400857a6e0aSIlpo Järvinen 	return NULL;
4013f421baaSArnaldo Carvalho de Melo }
4023f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req);
4033f421baaSArnaldo Carvalho de Melo 
40477357a95SDavid S. Miller struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
40577357a95SDavid S. Miller 					    struct sock *newsk,
40677357a95SDavid S. Miller 					    const struct request_sock *req)
40777357a95SDavid S. Miller {
40877357a95SDavid S. Miller 	const struct inet_request_sock *ireq = inet_rsk(req);
40977357a95SDavid S. Miller 	struct inet_sock *newinet = inet_sk(newsk);
41077357a95SDavid S. Miller 	struct ip_options_rcu *opt = ireq->opt;
41177357a95SDavid S. Miller 	struct net *net = sock_net(sk);
41277357a95SDavid S. Miller 	struct flowi4 *fl4;
41377357a95SDavid S. Miller 	struct rtable *rt;
41477357a95SDavid S. Miller 
41577357a95SDavid S. Miller 	fl4 = &newinet->cork.fl.u.ip4;
41677357a95SDavid S. Miller 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
41777357a95SDavid S. Miller 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
41877357a95SDavid S. Miller 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
41977357a95SDavid S. Miller 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
42077357a95SDavid S. Miller 			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
42177357a95SDavid S. Miller 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
42277357a95SDavid S. Miller 	rt = ip_route_output_flow(net, fl4, sk);
42377357a95SDavid S. Miller 	if (IS_ERR(rt))
42477357a95SDavid S. Miller 		goto no_route;
42577357a95SDavid S. Miller 	if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
42677357a95SDavid S. Miller 		goto route_err;
42777357a95SDavid S. Miller 	return &rt->dst;
42877357a95SDavid S. Miller 
42977357a95SDavid S. Miller route_err:
43077357a95SDavid S. Miller 	ip_rt_put(rt);
43177357a95SDavid S. Miller no_route:
43277357a95SDavid S. Miller 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
43377357a95SDavid S. Miller 	return NULL;
43477357a95SDavid S. Miller }
43577357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
43677357a95SDavid S. Miller 
4376b72977bSAl Viro static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
43872a3effaSEric Dumazet 				 const u32 rnd, const u32 synq_hsize)
4393f421baaSArnaldo Carvalho de Melo {
4406b72977bSAl Viro 	return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
4413f421baaSArnaldo Carvalho de Melo }
4423f421baaSArnaldo Carvalho de Melo 
443dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
4443f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
4453f421baaSArnaldo Carvalho de Melo #else
4463f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) 1
4473f421baaSArnaldo Carvalho de Melo #endif
4483f421baaSArnaldo Carvalho de Melo 
4493f421baaSArnaldo Carvalho de Melo struct request_sock *inet_csk_search_req(const struct sock *sk,
4503f421baaSArnaldo Carvalho de Melo 					 struct request_sock ***prevp,
4516b72977bSAl Viro 					 const __be16 rport, const __be32 raddr,
4527f25afbbSAl Viro 					 const __be32 laddr)
4533f421baaSArnaldo Carvalho de Melo {
4543f421baaSArnaldo Carvalho de Melo 	const struct inet_connection_sock *icsk = inet_csk(sk);
4553f421baaSArnaldo Carvalho de Melo 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
4563f421baaSArnaldo Carvalho de Melo 	struct request_sock *req, **prev;
4573f421baaSArnaldo Carvalho de Melo 
4583f421baaSArnaldo Carvalho de Melo 	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
4593f421baaSArnaldo Carvalho de Melo 						    lopt->nr_table_entries)];
4603f421baaSArnaldo Carvalho de Melo 	     (req = *prev) != NULL;
4613f421baaSArnaldo Carvalho de Melo 	     prev = &req->dl_next) {
4623f421baaSArnaldo Carvalho de Melo 		const struct inet_request_sock *ireq = inet_rsk(req);
4633f421baaSArnaldo Carvalho de Melo 
4643f421baaSArnaldo Carvalho de Melo 		if (ireq->rmt_port == rport &&
4653f421baaSArnaldo Carvalho de Melo 		    ireq->rmt_addr == raddr &&
4663f421baaSArnaldo Carvalho de Melo 		    ireq->loc_addr == laddr &&
4673f421baaSArnaldo Carvalho de Melo 		    AF_INET_FAMILY(req->rsk_ops->family)) {
468547b792cSIlpo Järvinen 			WARN_ON(req->sk);
4693f421baaSArnaldo Carvalho de Melo 			*prevp = prev;
4703f421baaSArnaldo Carvalho de Melo 			break;
4713f421baaSArnaldo Carvalho de Melo 		}
4723f421baaSArnaldo Carvalho de Melo 	}
4733f421baaSArnaldo Carvalho de Melo 
4743f421baaSArnaldo Carvalho de Melo 	return req;
4753f421baaSArnaldo Carvalho de Melo }
4763f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_search_req);
4773f421baaSArnaldo Carvalho de Melo 
4783f421baaSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
479c2977c22SArnaldo Carvalho de Melo 				   unsigned long timeout)
4803f421baaSArnaldo Carvalho de Melo {
4813f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
4823f421baaSArnaldo Carvalho de Melo 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
4833f421baaSArnaldo Carvalho de Melo 	const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
4843f421baaSArnaldo Carvalho de Melo 				     lopt->hash_rnd, lopt->nr_table_entries);
4853f421baaSArnaldo Carvalho de Melo 
4863f421baaSArnaldo Carvalho de Melo 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
4873f421baaSArnaldo Carvalho de Melo 	inet_csk_reqsk_queue_added(sk, timeout);
4883f421baaSArnaldo Carvalho de Melo }
4894bc2f18bSEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
4903f421baaSArnaldo Carvalho de Melo 
491a019d6feSArnaldo Carvalho de Melo /* Only thing we need from tcp.h */
492a019d6feSArnaldo Carvalho de Melo extern int sysctl_tcp_synack_retries;
493a019d6feSArnaldo Carvalho de Melo 
4949f1d2604SArnaldo Carvalho de Melo 
4950c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */
4960c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
4970c3d79bcSJulian Anastasov 				  const int max_retries,
4980c3d79bcSJulian Anastasov 				  const u8 rskq_defer_accept,
4990c3d79bcSJulian Anastasov 				  int *expire, int *resend)
5000c3d79bcSJulian Anastasov {
5010c3d79bcSJulian Anastasov 	if (!rskq_defer_accept) {
5020c3d79bcSJulian Anastasov 		*expire = req->retrans >= thresh;
5030c3d79bcSJulian Anastasov 		*resend = 1;
5040c3d79bcSJulian Anastasov 		return;
5050c3d79bcSJulian Anastasov 	}
5060c3d79bcSJulian Anastasov 	*expire = req->retrans >= thresh &&
5070c3d79bcSJulian Anastasov 		  (!inet_rsk(req)->acked || req->retrans >= max_retries);
5080c3d79bcSJulian Anastasov 	/*
5090c3d79bcSJulian Anastasov 	 * Do not resend while waiting for data after ACK,
5100c3d79bcSJulian Anastasov 	 * start to resend on end of deferring period to give
5110c3d79bcSJulian Anastasov 	 * last chance for data or ACK to create established socket.
5120c3d79bcSJulian Anastasov 	 */
5130c3d79bcSJulian Anastasov 	*resend = !inet_rsk(req)->acked ||
5140c3d79bcSJulian Anastasov 		  req->retrans >= rskq_defer_accept - 1;
5150c3d79bcSJulian Anastasov }
5160c3d79bcSJulian Anastasov 
517a019d6feSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_prune(struct sock *parent,
518a019d6feSArnaldo Carvalho de Melo 				const unsigned long interval,
519a019d6feSArnaldo Carvalho de Melo 				const unsigned long timeout,
520a019d6feSArnaldo Carvalho de Melo 				const unsigned long max_rto)
521a019d6feSArnaldo Carvalho de Melo {
522a019d6feSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(parent);
523a019d6feSArnaldo Carvalho de Melo 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
524a019d6feSArnaldo Carvalho de Melo 	struct listen_sock *lopt = queue->listen_opt;
525ec0a1966SDavid S. Miller 	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
526ec0a1966SDavid S. Miller 	int thresh = max_retries;
527a019d6feSArnaldo Carvalho de Melo 	unsigned long now = jiffies;
528a019d6feSArnaldo Carvalho de Melo 	struct request_sock **reqp, *req;
529a019d6feSArnaldo Carvalho de Melo 	int i, budget;
530a019d6feSArnaldo Carvalho de Melo 
531a019d6feSArnaldo Carvalho de Melo 	if (lopt == NULL || lopt->qlen == 0)
532a019d6feSArnaldo Carvalho de Melo 		return;
533a019d6feSArnaldo Carvalho de Melo 
534a019d6feSArnaldo Carvalho de Melo 	/* Normally all the openreqs are young and become mature
535a019d6feSArnaldo Carvalho de Melo 	 * (i.e. converted to established socket) for first timeout.
536fd4f2ceaSEric Dumazet 	 * If synack was not acknowledged for 1 second, it means
537a019d6feSArnaldo Carvalho de Melo 	 * one of the following things: synack was lost, ack was lost,
538a019d6feSArnaldo Carvalho de Melo 	 * rtt is high or nobody planned to ack (i.e. synflood).
539a019d6feSArnaldo Carvalho de Melo 	 * When server is a bit loaded, queue is populated with old
540a019d6feSArnaldo Carvalho de Melo 	 * open requests, reducing effective size of queue.
541a019d6feSArnaldo Carvalho de Melo 	 * When server is well loaded, queue size reduces to zero
542a019d6feSArnaldo Carvalho de Melo 	 * after several minutes of work. It is not synflood,
543a019d6feSArnaldo Carvalho de Melo 	 * it is normal operation. The solution is pruning
544a019d6feSArnaldo Carvalho de Melo 	 * too old entries overriding normal timeout, when
545a019d6feSArnaldo Carvalho de Melo 	 * situation becomes dangerous.
546a019d6feSArnaldo Carvalho de Melo 	 *
547a019d6feSArnaldo Carvalho de Melo 	 * Essentially, we reserve half of room for young
548a019d6feSArnaldo Carvalho de Melo 	 * embrions; and abort old ones without pity, if old
549a019d6feSArnaldo Carvalho de Melo 	 * ones are about to clog our table.
550a019d6feSArnaldo Carvalho de Melo 	 */
551a019d6feSArnaldo Carvalho de Melo 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
552a019d6feSArnaldo Carvalho de Melo 		int young = (lopt->qlen_young<<1);
553a019d6feSArnaldo Carvalho de Melo 
554a019d6feSArnaldo Carvalho de Melo 		while (thresh > 2) {
555a019d6feSArnaldo Carvalho de Melo 			if (lopt->qlen < young)
556a019d6feSArnaldo Carvalho de Melo 				break;
557a019d6feSArnaldo Carvalho de Melo 			thresh--;
558a019d6feSArnaldo Carvalho de Melo 			young <<= 1;
559a019d6feSArnaldo Carvalho de Melo 		}
560a019d6feSArnaldo Carvalho de Melo 	}
561a019d6feSArnaldo Carvalho de Melo 
562ec0a1966SDavid S. Miller 	if (queue->rskq_defer_accept)
563ec0a1966SDavid S. Miller 		max_retries = queue->rskq_defer_accept;
564ec0a1966SDavid S. Miller 
565a019d6feSArnaldo Carvalho de Melo 	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
566a019d6feSArnaldo Carvalho de Melo 	i = lopt->clock_hand;
567a019d6feSArnaldo Carvalho de Melo 
568a019d6feSArnaldo Carvalho de Melo 	do {
569a019d6feSArnaldo Carvalho de Melo 		reqp=&lopt->syn_table[i];
570a019d6feSArnaldo Carvalho de Melo 		while ((req = *reqp) != NULL) {
571a019d6feSArnaldo Carvalho de Melo 			if (time_after_eq(now, req->expires)) {
5720c3d79bcSJulian Anastasov 				int expire = 0, resend = 0;
5730c3d79bcSJulian Anastasov 
5740c3d79bcSJulian Anastasov 				syn_ack_recalc(req, thresh, max_retries,
5750c3d79bcSJulian Anastasov 					       queue->rskq_defer_accept,
5760c3d79bcSJulian Anastasov 					       &expire, &resend);
57772659eccSOctavian Purdila 				req->rsk_ops->syn_ack_timeout(parent, req);
5780c3d79bcSJulian Anastasov 				if (!expire &&
5790c3d79bcSJulian Anastasov 				    (!resend ||
580e6b4d113SWilliam Allen Simpson 				     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
5810c3d79bcSJulian Anastasov 				     inet_rsk(req)->acked)) {
582a019d6feSArnaldo Carvalho de Melo 					unsigned long timeo;
583a019d6feSArnaldo Carvalho de Melo 
584a019d6feSArnaldo Carvalho de Melo 					if (req->retrans++ == 0)
585a019d6feSArnaldo Carvalho de Melo 						lopt->qlen_young--;
586a019d6feSArnaldo Carvalho de Melo 					timeo = min((timeout << req->retrans), max_rto);
587a019d6feSArnaldo Carvalho de Melo 					req->expires = now + timeo;
588a019d6feSArnaldo Carvalho de Melo 					reqp = &req->dl_next;
589a019d6feSArnaldo Carvalho de Melo 					continue;
590a019d6feSArnaldo Carvalho de Melo 				}
591a019d6feSArnaldo Carvalho de Melo 
592a019d6feSArnaldo Carvalho de Melo 				/* Drop this request */
593a019d6feSArnaldo Carvalho de Melo 				inet_csk_reqsk_queue_unlink(parent, req, reqp);
594a019d6feSArnaldo Carvalho de Melo 				reqsk_queue_removed(queue, req);
595a019d6feSArnaldo Carvalho de Melo 				reqsk_free(req);
596a019d6feSArnaldo Carvalho de Melo 				continue;
597a019d6feSArnaldo Carvalho de Melo 			}
598a019d6feSArnaldo Carvalho de Melo 			reqp = &req->dl_next;
599a019d6feSArnaldo Carvalho de Melo 		}
600a019d6feSArnaldo Carvalho de Melo 
601a019d6feSArnaldo Carvalho de Melo 		i = (i + 1) & (lopt->nr_table_entries - 1);
602a019d6feSArnaldo Carvalho de Melo 
603a019d6feSArnaldo Carvalho de Melo 	} while (--budget > 0);
604a019d6feSArnaldo Carvalho de Melo 
605a019d6feSArnaldo Carvalho de Melo 	lopt->clock_hand = i;
606a019d6feSArnaldo Carvalho de Melo 
607a019d6feSArnaldo Carvalho de Melo 	if (lopt->qlen)
608a019d6feSArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer(parent, interval);
609a019d6feSArnaldo Carvalho de Melo }
610a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
611a019d6feSArnaldo Carvalho de Melo 
612e56c57d0SEric Dumazet /**
613e56c57d0SEric Dumazet  *	inet_csk_clone_lock - clone an inet socket, and lock its clone
614e56c57d0SEric Dumazet  *	@sk: the socket to clone
615e56c57d0SEric Dumazet  *	@req: request_sock
616e56c57d0SEric Dumazet  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
617e56c57d0SEric Dumazet  *
618e56c57d0SEric Dumazet  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
619e56c57d0SEric Dumazet  */
620e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk,
621e56c57d0SEric Dumazet 				 const struct request_sock *req,
622dd0fc66fSAl Viro 				 const gfp_t priority)
6239f1d2604SArnaldo Carvalho de Melo {
624e56c57d0SEric Dumazet 	struct sock *newsk = sk_clone_lock(sk, priority);
6259f1d2604SArnaldo Carvalho de Melo 
6269f1d2604SArnaldo Carvalho de Melo 	if (newsk != NULL) {
6279f1d2604SArnaldo Carvalho de Melo 		struct inet_connection_sock *newicsk = inet_csk(newsk);
6289f1d2604SArnaldo Carvalho de Melo 
6299f1d2604SArnaldo Carvalho de Melo 		newsk->sk_state = TCP_SYN_RECV;
6309f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_bind_hash = NULL;
6319f1d2604SArnaldo Carvalho de Melo 
632c720c7e8SEric Dumazet 		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
633c720c7e8SEric Dumazet 		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
634c720c7e8SEric Dumazet 		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
6359f1d2604SArnaldo Carvalho de Melo 		newsk->sk_write_space = sk_stream_write_space;
6369f1d2604SArnaldo Carvalho de Melo 
6379f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_retransmits = 0;
6389f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_backoff	  = 0;
6396687e988SArnaldo Carvalho de Melo 		newicsk->icsk_probes_out  = 0;
6409f1d2604SArnaldo Carvalho de Melo 
6419f1d2604SArnaldo Carvalho de Melo 		/* Deinitialize accept_queue to trap illegal accesses. */
6429f1d2604SArnaldo Carvalho de Melo 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
6434237c75cSVenkat Yekkirala 
6444237c75cSVenkat Yekkirala 		security_inet_csk_clone(newsk, req);
6459f1d2604SArnaldo Carvalho de Melo 	}
6469f1d2604SArnaldo Carvalho de Melo 	return newsk;
6479f1d2604SArnaldo Carvalho de Melo }
648e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
649a019d6feSArnaldo Carvalho de Melo 
650a019d6feSArnaldo Carvalho de Melo /*
651a019d6feSArnaldo Carvalho de Melo  * At this point, there should be no process reference to this
652a019d6feSArnaldo Carvalho de Melo  * socket, and thus no user references at all.  Therefore we
653a019d6feSArnaldo Carvalho de Melo  * can assume the socket waitqueue is inactive and nobody will
654a019d6feSArnaldo Carvalho de Melo  * try to jump onto it.
655a019d6feSArnaldo Carvalho de Melo  */
656a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk)
657a019d6feSArnaldo Carvalho de Melo {
658547b792cSIlpo Järvinen 	WARN_ON(sk->sk_state != TCP_CLOSE);
659547b792cSIlpo Järvinen 	WARN_ON(!sock_flag(sk, SOCK_DEAD));
660a019d6feSArnaldo Carvalho de Melo 
661a019d6feSArnaldo Carvalho de Melo 	/* It cannot be in hash table! */
662547b792cSIlpo Järvinen 	WARN_ON(!sk_unhashed(sk));
663a019d6feSArnaldo Carvalho de Melo 
664c720c7e8SEric Dumazet 	/* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
665c720c7e8SEric Dumazet 	WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
666a019d6feSArnaldo Carvalho de Melo 
667a019d6feSArnaldo Carvalho de Melo 	sk->sk_prot->destroy(sk);
668a019d6feSArnaldo Carvalho de Melo 
669a019d6feSArnaldo Carvalho de Melo 	sk_stream_kill_queues(sk);
670a019d6feSArnaldo Carvalho de Melo 
671a019d6feSArnaldo Carvalho de Melo 	xfrm_sk_free_policy(sk);
672a019d6feSArnaldo Carvalho de Melo 
673a019d6feSArnaldo Carvalho de Melo 	sk_refcnt_debug_release(sk);
674a019d6feSArnaldo Carvalho de Melo 
675dd24c001SEric Dumazet 	percpu_counter_dec(sk->sk_prot->orphan_count);
676a019d6feSArnaldo Carvalho de Melo 	sock_put(sk);
677a019d6feSArnaldo Carvalho de Melo }
678a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock);
679a019d6feSArnaldo Carvalho de Melo 
680a019d6feSArnaldo Carvalho de Melo int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
681a019d6feSArnaldo Carvalho de Melo {
682a019d6feSArnaldo Carvalho de Melo 	struct inet_sock *inet = inet_sk(sk);
683a019d6feSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
684a019d6feSArnaldo Carvalho de Melo 	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
685a019d6feSArnaldo Carvalho de Melo 
686a019d6feSArnaldo Carvalho de Melo 	if (rc != 0)
687a019d6feSArnaldo Carvalho de Melo 		return rc;
688a019d6feSArnaldo Carvalho de Melo 
689a019d6feSArnaldo Carvalho de Melo 	sk->sk_max_ack_backlog = 0;
690a019d6feSArnaldo Carvalho de Melo 	sk->sk_ack_backlog = 0;
691a019d6feSArnaldo Carvalho de Melo 	inet_csk_delack_init(sk);
692a019d6feSArnaldo Carvalho de Melo 
693a019d6feSArnaldo Carvalho de Melo 	/* There is race window here: we announce ourselves listening,
694a019d6feSArnaldo Carvalho de Melo 	 * but this transition is still not validated by get_port().
695a019d6feSArnaldo Carvalho de Melo 	 * It is OK, because this socket enters to hash table only
696a019d6feSArnaldo Carvalho de Melo 	 * after validation is complete.
697a019d6feSArnaldo Carvalho de Melo 	 */
698a019d6feSArnaldo Carvalho de Melo 	sk->sk_state = TCP_LISTEN;
699c720c7e8SEric Dumazet 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
700c720c7e8SEric Dumazet 		inet->inet_sport = htons(inet->inet_num);
701a019d6feSArnaldo Carvalho de Melo 
702a019d6feSArnaldo Carvalho de Melo 		sk_dst_reset(sk);
703a019d6feSArnaldo Carvalho de Melo 		sk->sk_prot->hash(sk);
704a019d6feSArnaldo Carvalho de Melo 
705a019d6feSArnaldo Carvalho de Melo 		return 0;
706a019d6feSArnaldo Carvalho de Melo 	}
707a019d6feSArnaldo Carvalho de Melo 
708a019d6feSArnaldo Carvalho de Melo 	sk->sk_state = TCP_CLOSE;
709a019d6feSArnaldo Carvalho de Melo 	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
710a019d6feSArnaldo Carvalho de Melo 	return -EADDRINUSE;
711a019d6feSArnaldo Carvalho de Melo }
712a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start);
713a019d6feSArnaldo Carvalho de Melo 
714a019d6feSArnaldo Carvalho de Melo /*
715a019d6feSArnaldo Carvalho de Melo  *	This routine closes sockets which have been at least partially
716a019d6feSArnaldo Carvalho de Melo  *	opened, but not yet accepted.
717a019d6feSArnaldo Carvalho de Melo  */
718a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk)
719a019d6feSArnaldo Carvalho de Melo {
720a019d6feSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
721a019d6feSArnaldo Carvalho de Melo 	struct request_sock *acc_req;
722a019d6feSArnaldo Carvalho de Melo 	struct request_sock *req;
723a019d6feSArnaldo Carvalho de Melo 
724a019d6feSArnaldo Carvalho de Melo 	inet_csk_delete_keepalive_timer(sk);
725a019d6feSArnaldo Carvalho de Melo 
726a019d6feSArnaldo Carvalho de Melo 	/* make all the listen_opt local to us */
727a019d6feSArnaldo Carvalho de Melo 	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
728a019d6feSArnaldo Carvalho de Melo 
729a019d6feSArnaldo Carvalho de Melo 	/* Following specs, it would be better either to send FIN
730a019d6feSArnaldo Carvalho de Melo 	 * (and enter FIN-WAIT-1, it is normal close)
731a019d6feSArnaldo Carvalho de Melo 	 * or to send active reset (abort).
732a019d6feSArnaldo Carvalho de Melo 	 * Certainly, it is pretty dangerous while synflood, but it is
733a019d6feSArnaldo Carvalho de Melo 	 * bad justification for our negligence 8)
734a019d6feSArnaldo Carvalho de Melo 	 * To be honest, we are not able to make either
735a019d6feSArnaldo Carvalho de Melo 	 * of the variants now.			--ANK
736a019d6feSArnaldo Carvalho de Melo 	 */
737a019d6feSArnaldo Carvalho de Melo 	reqsk_queue_destroy(&icsk->icsk_accept_queue);
738a019d6feSArnaldo Carvalho de Melo 
739a019d6feSArnaldo Carvalho de Melo 	while ((req = acc_req) != NULL) {
740a019d6feSArnaldo Carvalho de Melo 		struct sock *child = req->sk;
741a019d6feSArnaldo Carvalho de Melo 
742a019d6feSArnaldo Carvalho de Melo 		acc_req = req->dl_next;
743a019d6feSArnaldo Carvalho de Melo 
744a019d6feSArnaldo Carvalho de Melo 		local_bh_disable();
745a019d6feSArnaldo Carvalho de Melo 		bh_lock_sock(child);
746547b792cSIlpo Järvinen 		WARN_ON(sock_owned_by_user(child));
747a019d6feSArnaldo Carvalho de Melo 		sock_hold(child);
748a019d6feSArnaldo Carvalho de Melo 
749a019d6feSArnaldo Carvalho de Melo 		sk->sk_prot->disconnect(child, O_NONBLOCK);
750a019d6feSArnaldo Carvalho de Melo 
751a019d6feSArnaldo Carvalho de Melo 		sock_orphan(child);
752a019d6feSArnaldo Carvalho de Melo 
753eb4dea58SHerbert Xu 		percpu_counter_inc(sk->sk_prot->orphan_count);
754eb4dea58SHerbert Xu 
755a019d6feSArnaldo Carvalho de Melo 		inet_csk_destroy_sock(child);
756a019d6feSArnaldo Carvalho de Melo 
757a019d6feSArnaldo Carvalho de Melo 		bh_unlock_sock(child);
758a019d6feSArnaldo Carvalho de Melo 		local_bh_enable();
759a019d6feSArnaldo Carvalho de Melo 		sock_put(child);
760a019d6feSArnaldo Carvalho de Melo 
761a019d6feSArnaldo Carvalho de Melo 		sk_acceptq_removed(sk);
762a019d6feSArnaldo Carvalho de Melo 		__reqsk_free(req);
763a019d6feSArnaldo Carvalho de Melo 	}
764547b792cSIlpo Järvinen 	WARN_ON(sk->sk_ack_backlog);
765a019d6feSArnaldo Carvalho de Melo }
766a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
767af05dc93SArnaldo Carvalho de Melo 
768af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
769af05dc93SArnaldo Carvalho de Melo {
770af05dc93SArnaldo Carvalho de Melo 	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
771af05dc93SArnaldo Carvalho de Melo 	const struct inet_sock *inet = inet_sk(sk);
772af05dc93SArnaldo Carvalho de Melo 
773af05dc93SArnaldo Carvalho de Melo 	sin->sin_family		= AF_INET;
774c720c7e8SEric Dumazet 	sin->sin_addr.s_addr	= inet->inet_daddr;
775c720c7e8SEric Dumazet 	sin->sin_port		= inet->inet_dport;
776af05dc93SArnaldo Carvalho de Melo }
777af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
778c4d93909SArnaldo Carvalho de Melo 
779dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
780dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
781dec73ff0SArnaldo Carvalho de Melo 			       char __user *optval, int __user *optlen)
782dec73ff0SArnaldo Carvalho de Melo {
783dbeff12bSDavid S. Miller 	const struct inet_connection_sock *icsk = inet_csk(sk);
784dec73ff0SArnaldo Carvalho de Melo 
785dec73ff0SArnaldo Carvalho de Melo 	if (icsk->icsk_af_ops->compat_getsockopt != NULL)
786dec73ff0SArnaldo Carvalho de Melo 		return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
787dec73ff0SArnaldo Carvalho de Melo 							    optval, optlen);
788dec73ff0SArnaldo Carvalho de Melo 	return icsk->icsk_af_ops->getsockopt(sk, level, optname,
789dec73ff0SArnaldo Carvalho de Melo 					     optval, optlen);
790dec73ff0SArnaldo Carvalho de Melo }
791dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
792dec73ff0SArnaldo Carvalho de Melo 
793dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
794b7058842SDavid S. Miller 			       char __user *optval, unsigned int optlen)
795dec73ff0SArnaldo Carvalho de Melo {
796dbeff12bSDavid S. Miller 	const struct inet_connection_sock *icsk = inet_csk(sk);
797dec73ff0SArnaldo Carvalho de Melo 
798dec73ff0SArnaldo Carvalho de Melo 	if (icsk->icsk_af_ops->compat_setsockopt != NULL)
799dec73ff0SArnaldo Carvalho de Melo 		return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
800dec73ff0SArnaldo Carvalho de Melo 							    optval, optlen);
801dec73ff0SArnaldo Carvalho de Melo 	return icsk->icsk_af_ops->setsockopt(sk, level, optname,
802dec73ff0SArnaldo Carvalho de Melo 					     optval, optlen);
803dec73ff0SArnaldo Carvalho de Melo }
804dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
805dec73ff0SArnaldo Carvalho de Melo #endif
806