xref: /linux/net/ipv4/inet_connection_sock.c (revision 07d6bf634bc8f93caf8920c9d61df761645336e2)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
23f421baaSArnaldo Carvalho de Melo /*
33f421baaSArnaldo Carvalho de Melo  * INET		An implementation of the TCP/IP protocol suite for the LINUX
43f421baaSArnaldo Carvalho de Melo  *		operating system.  INET is implemented using the  BSD Socket
53f421baaSArnaldo Carvalho de Melo  *		interface as the means of communication with the user level.
63f421baaSArnaldo Carvalho de Melo  *
73f421baaSArnaldo Carvalho de Melo  *		Support for INET connection oriented protocols.
83f421baaSArnaldo Carvalho de Melo  *
93f421baaSArnaldo Carvalho de Melo  * Authors:	See the TCP sources
103f421baaSArnaldo Carvalho de Melo  */
113f421baaSArnaldo Carvalho de Melo 
123f421baaSArnaldo Carvalho de Melo #include <linux/module.h>
133f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h>
143f421baaSArnaldo Carvalho de Melo 
153f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h>
163f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
173f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h>
183f421baaSArnaldo Carvalho de Melo #include <net/ip.h>
193f421baaSArnaldo Carvalho de Melo #include <net/route.h>
203f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h>
21a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h>
22fa76ce73SEric Dumazet #include <net/tcp.h>
23c125e80bSCraig Gallek #include <net/sock_reuseport.h>
249691724eSstephen hemminger #include <net/addrconf.h>
253f421baaSArnaldo Carvalho de Melo 
26fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
2788d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses
2888d7fcfaSMartin KaFai Lau  *				if IPv6 only, and any IPv4 addresses
2988d7fcfaSMartin KaFai Lau  *				if not IPv6 only
3088d7fcfaSMartin KaFai Lau  * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
31fe38d2a1SJosef Bacik  *				IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
32fe38d2a1SJosef Bacik  *				and 0.0.0.0 equals to 0.0.0.0 only
33fe38d2a1SJosef Bacik  */
ipv6_rcv_saddr_equal(const struct in6_addr * sk1_rcv_saddr6,const struct in6_addr * sk2_rcv_saddr6,__be32 sk1_rcv_saddr,__be32 sk2_rcv_saddr,bool sk1_ipv6only,bool sk2_ipv6only,bool match_sk1_wildcard,bool match_sk2_wildcard)347016e062SJoe Perches static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
35637bc8bbSJosef Bacik 				 const struct in6_addr *sk2_rcv_saddr6,
36637bc8bbSJosef Bacik 				 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
37637bc8bbSJosef Bacik 				 bool sk1_ipv6only, bool sk2_ipv6only,
3888d7fcfaSMartin KaFai Lau 				 bool match_sk1_wildcard,
3988d7fcfaSMartin KaFai Lau 				 bool match_sk2_wildcard)
40fe38d2a1SJosef Bacik {
41637bc8bbSJosef Bacik 	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
42fe38d2a1SJosef Bacik 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
43fe38d2a1SJosef Bacik 
44fe38d2a1SJosef Bacik 	/* if both are mapped, treat as IPv4 */
45fe38d2a1SJosef Bacik 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
46fe38d2a1SJosef Bacik 		if (!sk2_ipv6only) {
47637bc8bbSJosef Bacik 			if (sk1_rcv_saddr == sk2_rcv_saddr)
487016e062SJoe Perches 				return true;
4988d7fcfaSMartin KaFai Lau 			return (match_sk1_wildcard && !sk1_rcv_saddr) ||
5088d7fcfaSMartin KaFai Lau 				(match_sk2_wildcard && !sk2_rcv_saddr);
51fe38d2a1SJosef Bacik 		}
527016e062SJoe Perches 		return false;
53fe38d2a1SJosef Bacik 	}
54fe38d2a1SJosef Bacik 
55fe38d2a1SJosef Bacik 	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
567016e062SJoe Perches 		return true;
57fe38d2a1SJosef Bacik 
5888d7fcfaSMartin KaFai Lau 	if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
59fe38d2a1SJosef Bacik 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
607016e062SJoe Perches 		return true;
61fe38d2a1SJosef Bacik 
6288d7fcfaSMartin KaFai Lau 	if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
63637bc8bbSJosef Bacik 	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
647016e062SJoe Perches 		return true;
65fe38d2a1SJosef Bacik 
66fe38d2a1SJosef Bacik 	if (sk2_rcv_saddr6 &&
67637bc8bbSJosef Bacik 	    ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
687016e062SJoe Perches 		return true;
69fe38d2a1SJosef Bacik 
707016e062SJoe Perches 	return false;
71fe38d2a1SJosef Bacik }
72fe38d2a1SJosef Bacik #endif
73fe38d2a1SJosef Bacik 
7488d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true:  0.0.0.0 equals to any IPv4 addresses
7588d7fcfaSMartin KaFai Lau  * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
76fe38d2a1SJosef Bacik  *				0.0.0.0 only equals to 0.0.0.0
77fe38d2a1SJosef Bacik  */
ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr,__be32 sk2_rcv_saddr,bool sk2_ipv6only,bool match_sk1_wildcard,bool match_sk2_wildcard)787016e062SJoe Perches static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
7988d7fcfaSMartin KaFai Lau 				 bool sk2_ipv6only, bool match_sk1_wildcard,
8088d7fcfaSMartin KaFai Lau 				 bool match_sk2_wildcard)
81fe38d2a1SJosef Bacik {
82637bc8bbSJosef Bacik 	if (!sk2_ipv6only) {
83637bc8bbSJosef Bacik 		if (sk1_rcv_saddr == sk2_rcv_saddr)
847016e062SJoe Perches 			return true;
8588d7fcfaSMartin KaFai Lau 		return (match_sk1_wildcard && !sk1_rcv_saddr) ||
8688d7fcfaSMartin KaFai Lau 			(match_sk2_wildcard && !sk2_rcv_saddr);
87fe38d2a1SJosef Bacik 	}
887016e062SJoe Perches 	return false;
89fe38d2a1SJosef Bacik }
90fe38d2a1SJosef Bacik 
inet_rcv_saddr_equal(const struct sock * sk,const struct sock * sk2,bool match_wildcard)917016e062SJoe Perches bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
92fe38d2a1SJosef Bacik 			  bool match_wildcard)
93fe38d2a1SJosef Bacik {
94fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
95fe38d2a1SJosef Bacik 	if (sk->sk_family == AF_INET6)
96637bc8bbSJosef Bacik 		return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
97319554f2SJosef Bacik 					    inet6_rcv_saddr(sk2),
98637bc8bbSJosef Bacik 					    sk->sk_rcv_saddr,
99637bc8bbSJosef Bacik 					    sk2->sk_rcv_saddr,
100637bc8bbSJosef Bacik 					    ipv6_only_sock(sk),
101637bc8bbSJosef Bacik 					    ipv6_only_sock(sk2),
10288d7fcfaSMartin KaFai Lau 					    match_wildcard,
103637bc8bbSJosef Bacik 					    match_wildcard);
104fe38d2a1SJosef Bacik #endif
105637bc8bbSJosef Bacik 	return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
10688d7fcfaSMartin KaFai Lau 				    ipv6_only_sock(sk2), match_wildcard,
10788d7fcfaSMartin KaFai Lau 				    match_wildcard);
108fe38d2a1SJosef Bacik }
109fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal);
110fe38d2a1SJosef Bacik 
inet_rcv_saddr_any(const struct sock * sk)1112dbb9b9eSMartin KaFai Lau bool inet_rcv_saddr_any(const struct sock *sk)
1122dbb9b9eSMartin KaFai Lau {
1132dbb9b9eSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6)
1142dbb9b9eSMartin KaFai Lau 	if (sk->sk_family == AF_INET6)
1152dbb9b9eSMartin KaFai Lau 		return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
1162dbb9b9eSMartin KaFai Lau #endif
1172dbb9b9eSMartin KaFai Lau 	return !sk->sk_rcv_saddr;
1182dbb9b9eSMartin KaFai Lau }
1192dbb9b9eSMartin KaFai Lau 
12041db7626SEric Dumazet /**
12141db7626SEric Dumazet  *	inet_sk_get_local_port_range - fetch ephemeral ports range
12241db7626SEric Dumazet  *	@sk: socket
12341db7626SEric Dumazet  *	@low: pointer to low port
12441db7626SEric Dumazet  *	@high: pointer to high port
12541db7626SEric Dumazet  *
12641db7626SEric Dumazet  *	Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
12741db7626SEric Dumazet  *	Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
12841db7626SEric Dumazet  *	Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
12941db7626SEric Dumazet  */
inet_sk_get_local_port_range(const struct sock * sk,int * low,int * high)13041db7626SEric Dumazet bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
13191d0b78cSJakub Sitnicki {
13291d0b78cSJakub Sitnicki 	int lo, hi, sk_lo, sk_hi;
13341db7626SEric Dumazet 	bool local_range = false;
134d9f28735SDavid Laight 	u32 sk_range;
13591d0b78cSJakub Sitnicki 
13641db7626SEric Dumazet 	inet_get_local_port_range(sock_net(sk), &lo, &hi);
13791d0b78cSJakub Sitnicki 
13841db7626SEric Dumazet 	sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
139d9f28735SDavid Laight 	if (unlikely(sk_range)) {
140d9f28735SDavid Laight 		sk_lo = sk_range & 0xffff;
141d9f28735SDavid Laight 		sk_hi = sk_range >> 16;
14291d0b78cSJakub Sitnicki 
143d9f28735SDavid Laight 		if (lo <= sk_lo && sk_lo <= hi)
14491d0b78cSJakub Sitnicki 			lo = sk_lo;
145d9f28735SDavid Laight 		if (lo <= sk_hi && sk_hi <= hi)
14691d0b78cSJakub Sitnicki 			hi = sk_hi;
14741db7626SEric Dumazet 		local_range = true;
148d9f28735SDavid Laight 	}
14991d0b78cSJakub Sitnicki 
15091d0b78cSJakub Sitnicki 	*low = lo;
15191d0b78cSJakub Sitnicki 	*high = hi;
15241db7626SEric Dumazet 	return local_range;
15391d0b78cSJakub Sitnicki }
15491d0b78cSJakub Sitnicki EXPORT_SYMBOL(inet_sk_get_local_port_range);
15591d0b78cSJakub Sitnicki 
inet_use_bhash2_on_bind(const struct sock * sk)15628044fc1SJoanne Koong static bool inet_use_bhash2_on_bind(const struct sock *sk)
15728044fc1SJoanne Koong {
15828044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
15928044fc1SJoanne Koong 	if (sk->sk_family == AF_INET6) {
16028044fc1SJoanne Koong 		int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
16128044fc1SJoanne Koong 
1625e07e672SKuniyuki Iwashima 		if (addr_type == IPV6_ADDR_ANY)
1635e07e672SKuniyuki Iwashima 			return false;
1645e07e672SKuniyuki Iwashima 
1655e07e672SKuniyuki Iwashima 		if (addr_type != IPV6_ADDR_MAPPED)
1665e07e672SKuniyuki Iwashima 			return true;
16728044fc1SJoanne Koong 	}
16828044fc1SJoanne Koong #endif
16928044fc1SJoanne Koong 	return sk->sk_rcv_saddr != htonl(INADDR_ANY);
17028044fc1SJoanne Koong }
17128044fc1SJoanne Koong 
inet_bind_conflict(const struct sock * sk,struct sock * sk2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)17228044fc1SJoanne Koong static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
17328044fc1SJoanne Koong 			       kuid_t sk_uid, bool relax,
17428044fc1SJoanne Koong 			       bool reuseport_cb_ok, bool reuseport_ok)
17528044fc1SJoanne Koong {
17628044fc1SJoanne Koong 	int bound_dev_if2;
17728044fc1SJoanne Koong 
17828044fc1SJoanne Koong 	if (sk == sk2)
17928044fc1SJoanne Koong 		return false;
18028044fc1SJoanne Koong 
18128044fc1SJoanne Koong 	bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
18228044fc1SJoanne Koong 
18328044fc1SJoanne Koong 	if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
18428044fc1SJoanne Koong 	    sk->sk_bound_dev_if == bound_dev_if2) {
18528044fc1SJoanne Koong 		if (sk->sk_reuse && sk2->sk_reuse &&
18628044fc1SJoanne Koong 		    sk2->sk_state != TCP_LISTEN) {
18728044fc1SJoanne Koong 			if (!relax || (!reuseport_ok && sk->sk_reuseport &&
18828044fc1SJoanne Koong 				       sk2->sk_reuseport && reuseport_cb_ok &&
18928044fc1SJoanne Koong 				       (sk2->sk_state == TCP_TIME_WAIT ||
19028044fc1SJoanne Koong 					uid_eq(sk_uid, sock_i_uid(sk2)))))
19128044fc1SJoanne Koong 				return true;
19228044fc1SJoanne Koong 		} else if (!reuseport_ok || !sk->sk_reuseport ||
19328044fc1SJoanne Koong 			   !sk2->sk_reuseport || !reuseport_cb_ok ||
19428044fc1SJoanne Koong 			   (sk2->sk_state != TCP_TIME_WAIT &&
19528044fc1SJoanne Koong 			    !uid_eq(sk_uid, sock_i_uid(sk2)))) {
19628044fc1SJoanne Koong 			return true;
19728044fc1SJoanne Koong 		}
19828044fc1SJoanne Koong 	}
19928044fc1SJoanne Koong 	return false;
20028044fc1SJoanne Koong }
20128044fc1SJoanne Koong 
__inet_bhash2_conflict(const struct sock * sk,struct sock * sk2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)202936a192fSKuniyuki Iwashima static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
203936a192fSKuniyuki Iwashima 				   kuid_t sk_uid, bool relax,
204936a192fSKuniyuki Iwashima 				   bool reuseport_cb_ok, bool reuseport_ok)
205936a192fSKuniyuki Iwashima {
206ea111449SKuniyuki Iwashima 	if (ipv6_only_sock(sk2)) {
207ea111449SKuniyuki Iwashima 		if (sk->sk_family == AF_INET)
208936a192fSKuniyuki Iwashima 			return false;
209936a192fSKuniyuki Iwashima 
210ea111449SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IPV6)
211ea111449SKuniyuki Iwashima 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
212ea111449SKuniyuki Iwashima 			return false;
213ea111449SKuniyuki Iwashima #endif
214ea111449SKuniyuki Iwashima 	}
215ea111449SKuniyuki Iwashima 
216936a192fSKuniyuki Iwashima 	return inet_bind_conflict(sk, sk2, sk_uid, relax,
217936a192fSKuniyuki Iwashima 				  reuseport_cb_ok, reuseport_ok);
218936a192fSKuniyuki Iwashima }
219936a192fSKuniyuki Iwashima 
inet_bhash2_conflict(const struct sock * sk,const struct inet_bind2_bucket * tb2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)22028044fc1SJoanne Koong static bool inet_bhash2_conflict(const struct sock *sk,
22128044fc1SJoanne Koong 				 const struct inet_bind2_bucket *tb2,
22228044fc1SJoanne Koong 				 kuid_t sk_uid,
22328044fc1SJoanne Koong 				 bool relax, bool reuseport_cb_ok,
22428044fc1SJoanne Koong 				 bool reuseport_ok)
225d5a42de8SJoanne Koong {
226d5a42de8SJoanne Koong 	struct sock *sk2;
22728044fc1SJoanne Koong 
228770041d3SKuniyuki Iwashima 	sk_for_each_bound(sk2, &tb2->owners) {
229936a192fSKuniyuki Iwashima 		if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
23028044fc1SJoanne Koong 					   reuseport_cb_ok, reuseport_ok))
23128044fc1SJoanne Koong 			return true;
23228044fc1SJoanne Koong 	}
233936a192fSKuniyuki Iwashima 
23428044fc1SJoanne Koong 	return false;
23528044fc1SJoanne Koong }
23628044fc1SJoanne Koong 
237b82ba728SKuniyuki Iwashima #define sk_for_each_bound_bhash(__sk, __tb2, __tb)			\
238b82ba728SKuniyuki Iwashima 	hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node)	\
2399ceebd7aSHongbo Li 		sk_for_each_bound((__sk), &(__tb2)->owners)
240b82ba728SKuniyuki Iwashima 
24128044fc1SJoanne Koong /* This should be called only when the tb and tb2 hashbuckets' locks are held */
inet_csk_bind_conflict(const struct sock * sk,const struct inet_bind_bucket * tb,const struct inet_bind2_bucket * tb2,bool relax,bool reuseport_ok)24228044fc1SJoanne Koong static int inet_csk_bind_conflict(const struct sock *sk,
24328044fc1SJoanne Koong 				  const struct inet_bind_bucket *tb,
24428044fc1SJoanne Koong 				  const struct inet_bind2_bucket *tb2, /* may be null */
24528044fc1SJoanne Koong 				  bool relax, bool reuseport_ok)
24628044fc1SJoanne Koong {
247593d1ebeSJoanne Koong 	kuid_t uid = sock_i_uid((struct sock *)sk);
24858655bc0SKuniyuki Iwashima 	struct sock_reuseport *reuseport_cb;
24958655bc0SKuniyuki Iwashima 	bool reuseport_cb_ok;
25058655bc0SKuniyuki Iwashima 	struct sock *sk2;
2513f421baaSArnaldo Carvalho de Melo 
252333bb73fSKuniyuki Iwashima 	rcu_read_lock();
253333bb73fSKuniyuki Iwashima 	reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
254333bb73fSKuniyuki Iwashima 	/* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
255333bb73fSKuniyuki Iwashima 	reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
256333bb73fSKuniyuki Iwashima 	rcu_read_unlock();
257333bb73fSKuniyuki Iwashima 
25828044fc1SJoanne Koong 	/* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
25928044fc1SJoanne Koong 	 * ipv4) should have been checked already. We need to do these two
26028044fc1SJoanne Koong 	 * checks separately because their spinlocks have to be acquired/released
26128044fc1SJoanne Koong 	 * independently of each other, to prevent possible deadlocks
26228044fc1SJoanne Koong 	 */
26358655bc0SKuniyuki Iwashima 	if (inet_use_bhash2_on_bind(sk))
26458655bc0SKuniyuki Iwashima 		return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
26558655bc0SKuniyuki Iwashima 						   reuseport_cb_ok, reuseport_ok);
26658655bc0SKuniyuki Iwashima 
26758655bc0SKuniyuki Iwashima 	/* Unlike other sk lookup places we do not check
26858655bc0SKuniyuki Iwashima 	 * for sk_net here, since _all_ the socks listed
26958655bc0SKuniyuki Iwashima 	 * in tb->owners and tb2->owners list belong
27058655bc0SKuniyuki Iwashima 	 * to the same net - the one this bucket belongs to.
27158655bc0SKuniyuki Iwashima 	 */
272b82ba728SKuniyuki Iwashima 	sk_for_each_bound_bhash(sk2, tb2, tb) {
273b82ba728SKuniyuki Iwashima 		if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok))
274b82ba728SKuniyuki Iwashima 			continue;
275b82ba728SKuniyuki Iwashima 
276b82ba728SKuniyuki Iwashima 		if (inet_rcv_saddr_equal(sk, sk2, true))
277b82ba728SKuniyuki Iwashima 			return true;
278b82ba728SKuniyuki Iwashima 	}
279b82ba728SKuniyuki Iwashima 
28058655bc0SKuniyuki Iwashima 	return false;
28128044fc1SJoanne Koong }
28228044fc1SJoanne Koong 
28328044fc1SJoanne Koong /* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
28428044fc1SJoanne Koong  * INADDR_ANY (if ipv4) socket.
28528044fc1SJoanne Koong  *
28628044fc1SJoanne Koong  * Caller must hold bhash hashbucket lock with local bh disabled, to protect
28728044fc1SJoanne Koong  * against concurrent binds on the port for addr any
28828044fc1SJoanne Koong  */
inet_bhash2_addr_any_conflict(const struct sock * sk,int port,int l3mdev,bool relax,bool reuseport_ok)28928044fc1SJoanne Koong static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev,
29028044fc1SJoanne Koong 					  bool relax, bool reuseport_ok)
29128044fc1SJoanne Koong {
29228044fc1SJoanne Koong 	kuid_t uid = sock_i_uid((struct sock *)sk);
29328044fc1SJoanne Koong 	const struct net *net = sock_net(sk);
29428044fc1SJoanne Koong 	struct sock_reuseport *reuseport_cb;
29528044fc1SJoanne Koong 	struct inet_bind_hashbucket *head2;
29628044fc1SJoanne Koong 	struct inet_bind2_bucket *tb2;
297d91ef1e1SKuniyuki Iwashima 	bool conflict = false;
29828044fc1SJoanne Koong 	bool reuseport_cb_ok;
29928044fc1SJoanne Koong 
30028044fc1SJoanne Koong 	rcu_read_lock();
30128044fc1SJoanne Koong 	reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
30228044fc1SJoanne Koong 	/* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
30328044fc1SJoanne Koong 	reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
30428044fc1SJoanne Koong 	rcu_read_unlock();
30528044fc1SJoanne Koong 
30628044fc1SJoanne Koong 	head2 = inet_bhash2_addr_any_hashbucket(sk, net, port);
30728044fc1SJoanne Koong 
30828044fc1SJoanne Koong 	spin_lock(&head2->lock);
30928044fc1SJoanne Koong 
310d91ef1e1SKuniyuki Iwashima 	inet_bind_bucket_for_each(tb2, &head2->chain) {
311d91ef1e1SKuniyuki Iwashima 		if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
312d91ef1e1SKuniyuki Iwashima 			continue;
31328044fc1SJoanne Koong 
314d91ef1e1SKuniyuki Iwashima 		if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,	reuseport_ok))
315d91ef1e1SKuniyuki Iwashima 			continue;
316d91ef1e1SKuniyuki Iwashima 
317d91ef1e1SKuniyuki Iwashima 		conflict = true;
318d91ef1e1SKuniyuki Iwashima 		break;
319593d1ebeSJoanne Koong 	}
32028044fc1SJoanne Koong 
32128044fc1SJoanne Koong 	spin_unlock(&head2->lock);
322d91ef1e1SKuniyuki Iwashima 
323d91ef1e1SKuniyuki Iwashima 	return conflict;
3243f421baaSArnaldo Carvalho de Melo }
325971af18bSArnaldo Carvalho de Melo 
326289141b7SJosef Bacik /*
327289141b7SJosef Bacik  * Find an open port number for the socket.  Returns with the
32828044fc1SJoanne Koong  * inet_bind_hashbucket locks held if successful.
3293f421baaSArnaldo Carvalho de Melo  */
330289141b7SJosef Bacik static struct inet_bind_hashbucket *
inet_csk_find_open_port(const struct sock * sk,struct inet_bind_bucket ** tb_ret,struct inet_bind2_bucket ** tb2_ret,struct inet_bind_hashbucket ** head2_ret,int * port_ret)33128044fc1SJoanne Koong inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
33228044fc1SJoanne Koong 			struct inet_bind2_bucket **tb2_ret,
33328044fc1SJoanne Koong 			struct inet_bind_hashbucket **head2_ret, int *port_ret)
3343f421baaSArnaldo Carvalho de Melo {
335429e42c1SKuniyuki Iwashima 	struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
33608eaef90SKuniyuki Iwashima 	int i, low, high, attempt_half, port, l3mdev;
33728044fc1SJoanne Koong 	struct inet_bind_hashbucket *head, *head2;
3383b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(sk);
33928044fc1SJoanne Koong 	struct inet_bind2_bucket *tb2;
340ea8add2bSEric Dumazet 	struct inet_bind_bucket *tb;
341ea8add2bSEric Dumazet 	u32 remaining, offset;
34208eaef90SKuniyuki Iwashima 	bool relax = false;
3433f421baaSArnaldo Carvalho de Melo 
3443c82a21fSRobert Shearman 	l3mdev = inet_sk_bound_l3mdev(sk);
3454b01a967SKuniyuki Iwashima ports_exhausted:
346ea8add2bSEric Dumazet 	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
347ea8add2bSEric Dumazet other_half_scan:
34891d0b78cSJakub Sitnicki 	inet_sk_get_local_port_range(sk, &low, &high);
349ea8add2bSEric Dumazet 	high++; /* [32768, 60999] -> [32768, 61000[ */
350ea8add2bSEric Dumazet 	if (high - low < 4)
351ea8add2bSEric Dumazet 		attempt_half = 0;
352946f9eb2SEric Dumazet 	if (attempt_half) {
353ea8add2bSEric Dumazet 		int half = low + (((high - low) >> 2) << 1);
354946f9eb2SEric Dumazet 
355946f9eb2SEric Dumazet 		if (attempt_half == 1)
356946f9eb2SEric Dumazet 			high = half;
357946f9eb2SEric Dumazet 		else
358946f9eb2SEric Dumazet 			low = half;
359946f9eb2SEric Dumazet 	}
360ea8add2bSEric Dumazet 	remaining = high - low;
361ea8add2bSEric Dumazet 	if (likely(remaining > 1))
362ea8add2bSEric Dumazet 		remaining &= ~1U;
3633f421baaSArnaldo Carvalho de Melo 
3648032bf12SJason A. Donenfeld 	offset = get_random_u32_below(remaining);
365ea8add2bSEric Dumazet 	/* __inet_hash_connect() favors ports having @low parity
366ea8add2bSEric Dumazet 	 * We do the opposite to not pollute connect() users.
367ea8add2bSEric Dumazet 	 */
368ea8add2bSEric Dumazet 	offset |= 1U;
369ea8add2bSEric Dumazet 
370ea8add2bSEric Dumazet other_parity_scan:
371ea8add2bSEric Dumazet 	port = low + offset;
372ea8add2bSEric Dumazet 	for (i = 0; i < remaining; i += 2, port += 2) {
373ea8add2bSEric Dumazet 		if (unlikely(port >= high))
374ea8add2bSEric Dumazet 			port -= remaining;
375ea8add2bSEric Dumazet 		if (inet_is_local_reserved_port(net, port))
376ea8add2bSEric Dumazet 			continue;
377ea8add2bSEric Dumazet 		head = &hinfo->bhash[inet_bhashfn(net, port,
378ea8add2bSEric Dumazet 						  hinfo->bhash_size)];
379ea8add2bSEric Dumazet 		spin_lock_bh(&head->lock);
38028044fc1SJoanne Koong 		if (inet_use_bhash2_on_bind(sk)) {
38128044fc1SJoanne Koong 			if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
38228044fc1SJoanne Koong 				goto next_port;
38328044fc1SJoanne Koong 		}
38428044fc1SJoanne Koong 
38528044fc1SJoanne Koong 		head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
38628044fc1SJoanne Koong 		spin_lock(&head2->lock);
38728044fc1SJoanne Koong 		tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
388b67bfe0dSSasha Levin 		inet_bind_bucket_for_each(tb, &head->chain)
38928044fc1SJoanne Koong 			if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
39028044fc1SJoanne Koong 				if (!inet_csk_bind_conflict(sk, tb, tb2,
39128044fc1SJoanne Koong 							    relax, false))
3926cd66616SJosef Bacik 					goto success;
39328044fc1SJoanne Koong 				spin_unlock(&head2->lock);
394ea8add2bSEric Dumazet 				goto next_port;
3952b05ad33SFlavio Leitner 			}
396289141b7SJosef Bacik 		tb = NULL;
397289141b7SJosef Bacik 		goto success;
398ea8add2bSEric Dumazet next_port:
399ea8add2bSEric Dumazet 		spin_unlock_bh(&head->lock);
400ea8add2bSEric Dumazet 		cond_resched();
401a9d8f911SEvgeniy Polyakov 	}
4023f421baaSArnaldo Carvalho de Melo 
403ea8add2bSEric Dumazet 	offset--;
404ea8add2bSEric Dumazet 	if (!(offset & 1))
405ea8add2bSEric Dumazet 		goto other_parity_scan;
406ea8add2bSEric Dumazet 
407946f9eb2SEric Dumazet 	if (attempt_half == 1) {
408946f9eb2SEric Dumazet 		/* OK we now try the upper half of the range */
409946f9eb2SEric Dumazet 		attempt_half = 2;
410ea8add2bSEric Dumazet 		goto other_half_scan;
411946f9eb2SEric Dumazet 	}
4124b01a967SKuniyuki Iwashima 
4130db23276SKuniyuki Iwashima 	if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
4144b01a967SKuniyuki Iwashima 		/* We still have a chance to connect to different destinations */
4154b01a967SKuniyuki Iwashima 		relax = true;
4164b01a967SKuniyuki Iwashima 		goto ports_exhausted;
4174b01a967SKuniyuki Iwashima 	}
418289141b7SJosef Bacik 	return NULL;
419289141b7SJosef Bacik success:
420289141b7SJosef Bacik 	*port_ret = port;
421289141b7SJosef Bacik 	*tb_ret = tb;
42228044fc1SJoanne Koong 	*tb2_ret = tb2;
42328044fc1SJoanne Koong 	*head2_ret = head2;
424289141b7SJosef Bacik 	return head;
425289141b7SJosef Bacik }
426ea8add2bSEric Dumazet 
sk_reuseport_match(struct inet_bind_bucket * tb,struct sock * sk)427637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
428637bc8bbSJosef Bacik 				     struct sock *sk)
429637bc8bbSJosef Bacik {
430637bc8bbSJosef Bacik 	kuid_t uid = sock_i_uid(sk);
431637bc8bbSJosef Bacik 
432637bc8bbSJosef Bacik 	if (tb->fastreuseport <= 0)
433637bc8bbSJosef Bacik 		return 0;
434637bc8bbSJosef Bacik 	if (!sk->sk_reuseport)
435637bc8bbSJosef Bacik 		return 0;
436637bc8bbSJosef Bacik 	if (rcu_access_pointer(sk->sk_reuseport_cb))
437637bc8bbSJosef Bacik 		return 0;
438637bc8bbSJosef Bacik 	if (!uid_eq(tb->fastuid, uid))
439637bc8bbSJosef Bacik 		return 0;
440637bc8bbSJosef Bacik 	/* We only need to check the rcv_saddr if this tb was once marked
441637bc8bbSJosef Bacik 	 * without fastreuseport and then was reset, as we can only know that
442637bc8bbSJosef Bacik 	 * the fast_*rcv_saddr doesn't have any conflicts with the socks on the
443637bc8bbSJosef Bacik 	 * owners list.
444637bc8bbSJosef Bacik 	 */
445637bc8bbSJosef Bacik 	if (tb->fastreuseport == FASTREUSEPORT_ANY)
446637bc8bbSJosef Bacik 		return 1;
447637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
448637bc8bbSJosef Bacik 	if (tb->fast_sk_family == AF_INET6)
449637bc8bbSJosef Bacik 		return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
4507a56673bSJosef Bacik 					    inet6_rcv_saddr(sk),
451637bc8bbSJosef Bacik 					    tb->fast_rcv_saddr,
452637bc8bbSJosef Bacik 					    sk->sk_rcv_saddr,
453637bc8bbSJosef Bacik 					    tb->fast_ipv6_only,
45488d7fcfaSMartin KaFai Lau 					    ipv6_only_sock(sk), true, false);
455637bc8bbSJosef Bacik #endif
456637bc8bbSJosef Bacik 	return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
45788d7fcfaSMartin KaFai Lau 				    ipv6_only_sock(sk), true, false);
458637bc8bbSJosef Bacik }
459637bc8bbSJosef Bacik 
inet_csk_update_fastreuse(struct inet_bind_bucket * tb,struct sock * sk)46062ffc589STim Froidcoeur void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
46162ffc589STim Froidcoeur 			       struct sock *sk)
462289141b7SJosef Bacik {
463289141b7SJosef Bacik 	kuid_t uid = sock_i_uid(sk);
46462ffc589STim Froidcoeur 	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
4653c82a21fSRobert Shearman 
4668002d44fSKuniyuki Iwashima 	if (hlist_empty(&tb->bhash2)) {
467ea8add2bSEric Dumazet 		tb->fastreuse = reuse;
468da5e3630STom Herbert 		if (sk->sk_reuseport) {
469637bc8bbSJosef Bacik 			tb->fastreuseport = FASTREUSEPORT_ANY;
470da5e3630STom Herbert 			tb->fastuid = uid;
471637bc8bbSJosef Bacik 			tb->fast_rcv_saddr = sk->sk_rcv_saddr;
472637bc8bbSJosef Bacik 			tb->fast_ipv6_only = ipv6_only_sock(sk);
473cbb2fb5cSJosef Bacik 			tb->fast_sk_family = sk->sk_family;
474637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
475637bc8bbSJosef Bacik 			tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
476637bc8bbSJosef Bacik #endif
477da5e3630STom Herbert 		} else {
478da5e3630STom Herbert 			tb->fastreuseport = 0;
479da5e3630STom Herbert 		}
4806cd66616SJosef Bacik 	} else {
4816cd66616SJosef Bacik 		if (!reuse)
4826cd66616SJosef Bacik 			tb->fastreuse = 0;
483637bc8bbSJosef Bacik 		if (sk->sk_reuseport) {
484637bc8bbSJosef Bacik 			/* We didn't match or we don't have fastreuseport set on
485637bc8bbSJosef Bacik 			 * the tb, but we have sk_reuseport set on this socket
486637bc8bbSJosef Bacik 			 * and we know that there are no bind conflicts with
487637bc8bbSJosef Bacik 			 * this socket in this tb, so reset our tb's reuseport
488637bc8bbSJosef Bacik 			 * settings so that any subsequent sockets that match
489637bc8bbSJosef Bacik 			 * our current socket will be put on the fast path.
490637bc8bbSJosef Bacik 			 *
491637bc8bbSJosef Bacik 			 * If we reset we need to set FASTREUSEPORT_STRICT so we
492637bc8bbSJosef Bacik 			 * do extra checking for all subsequent sk_reuseport
493637bc8bbSJosef Bacik 			 * socks.
494637bc8bbSJosef Bacik 			 */
495637bc8bbSJosef Bacik 			if (!sk_reuseport_match(tb, sk)) {
496637bc8bbSJosef Bacik 				tb->fastreuseport = FASTREUSEPORT_STRICT;
497637bc8bbSJosef Bacik 				tb->fastuid = uid;
498637bc8bbSJosef Bacik 				tb->fast_rcv_saddr = sk->sk_rcv_saddr;
499637bc8bbSJosef Bacik 				tb->fast_ipv6_only = ipv6_only_sock(sk);
500cbb2fb5cSJosef Bacik 				tb->fast_sk_family = sk->sk_family;
501637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
502637bc8bbSJosef Bacik 				tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
503637bc8bbSJosef Bacik #endif
504637bc8bbSJosef Bacik 			}
505637bc8bbSJosef Bacik 		} else {
5066cd66616SJosef Bacik 			tb->fastreuseport = 0;
507ea8add2bSEric Dumazet 		}
508637bc8bbSJosef Bacik 	}
50962ffc589STim Froidcoeur }
51062ffc589STim Froidcoeur 
51162ffc589STim Froidcoeur /* Obtain a reference to a local port for the given sock,
51262ffc589STim Froidcoeur  * if snum is zero it means select any available local port.
51362ffc589STim Froidcoeur  * We try to allocate an odd port (and leave even ports for connect())
51462ffc589STim Froidcoeur  */
inet_csk_get_port(struct sock * sk,unsigned short snum)51562ffc589STim Froidcoeur int inet_csk_get_port(struct sock *sk, unsigned short snum)
51662ffc589STim Froidcoeur {
517429e42c1SKuniyuki Iwashima 	struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
51862ffc589STim Froidcoeur 	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
51928044fc1SJoanne Koong 	bool found_port = false, check_bind_conflict = true;
52028044fc1SJoanne Koong 	bool bhash_created = false, bhash2_created = false;
5217a7160edSKuniyuki Iwashima 	int ret = -EADDRINUSE, port = snum, l3mdev;
52228044fc1SJoanne Koong 	struct inet_bind_hashbucket *head, *head2;
52328044fc1SJoanne Koong 	struct inet_bind2_bucket *tb2 = NULL;
524593d1ebeSJoanne Koong 	struct inet_bind_bucket *tb = NULL;
52528044fc1SJoanne Koong 	bool head2_lock_acquired = false;
52608eaef90SKuniyuki Iwashima 	struct net *net = sock_net(sk);
52762ffc589STim Froidcoeur 
52862ffc589STim Froidcoeur 	l3mdev = inet_sk_bound_l3mdev(sk);
52962ffc589STim Froidcoeur 
53062ffc589STim Froidcoeur 	if (!port) {
53128044fc1SJoanne Koong 		head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
53262ffc589STim Froidcoeur 		if (!head)
53362ffc589STim Froidcoeur 			return ret;
53428044fc1SJoanne Koong 
53528044fc1SJoanne Koong 		head2_lock_acquired = true;
53628044fc1SJoanne Koong 
53728044fc1SJoanne Koong 		if (tb && tb2)
53862ffc589STim Froidcoeur 			goto success;
53928044fc1SJoanne Koong 		found_port = true;
54028044fc1SJoanne Koong 	} else {
54162ffc589STim Froidcoeur 		head = &hinfo->bhash[inet_bhashfn(net, port,
54262ffc589STim Froidcoeur 						  hinfo->bhash_size)];
54362ffc589STim Froidcoeur 		spin_lock_bh(&head->lock);
54462ffc589STim Froidcoeur 		inet_bind_bucket_for_each(tb, &head->chain)
54528044fc1SJoanne Koong 			if (inet_bind_bucket_match(tb, net, port, l3mdev))
54628044fc1SJoanne Koong 				break;
54728044fc1SJoanne Koong 	}
54828044fc1SJoanne Koong 
54928044fc1SJoanne Koong 	if (!tb) {
55028044fc1SJoanne Koong 		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
55128044fc1SJoanne Koong 					     head, port, l3mdev);
55262ffc589STim Froidcoeur 		if (!tb)
55362ffc589STim Froidcoeur 			goto fail_unlock;
55428044fc1SJoanne Koong 		bhash_created = true;
55528044fc1SJoanne Koong 	}
55662ffc589STim Froidcoeur 
55728044fc1SJoanne Koong 	if (!found_port) {
5588002d44fSKuniyuki Iwashima 		if (!hlist_empty(&tb->bhash2)) {
55928044fc1SJoanne Koong 			if (sk->sk_reuse == SK_FORCE_REUSE ||
56028044fc1SJoanne Koong 			    (tb->fastreuse > 0 && reuse) ||
56162ffc589STim Froidcoeur 			    sk_reuseport_match(tb, sk))
56228044fc1SJoanne Koong 				check_bind_conflict = false;
56328044fc1SJoanne Koong 		}
56428044fc1SJoanne Koong 
56528044fc1SJoanne Koong 		if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
56628044fc1SJoanne Koong 			if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
56762ffc589STim Froidcoeur 				goto fail_unlock;
56862ffc589STim Froidcoeur 		}
56928044fc1SJoanne Koong 
57028044fc1SJoanne Koong 		head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
57128044fc1SJoanne Koong 		spin_lock(&head2->lock);
57228044fc1SJoanne Koong 		head2_lock_acquired = true;
57328044fc1SJoanne Koong 		tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
57428044fc1SJoanne Koong 	}
57528044fc1SJoanne Koong 
57628044fc1SJoanne Koong 	if (!tb2) {
57728044fc1SJoanne Koong 		tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
578822fb91fSKuniyuki Iwashima 					       net, head2, tb, sk);
57928044fc1SJoanne Koong 		if (!tb2)
58028044fc1SJoanne Koong 			goto fail_unlock;
58128044fc1SJoanne Koong 		bhash2_created = true;
58228044fc1SJoanne Koong 	}
58328044fc1SJoanne Koong 
58428044fc1SJoanne Koong 	if (!found_port && check_bind_conflict) {
58528044fc1SJoanne Koong 		if (inet_csk_bind_conflict(sk, tb, tb2, true, true))
58628044fc1SJoanne Koong 			goto fail_unlock;
58728044fc1SJoanne Koong 	}
58828044fc1SJoanne Koong 
58962ffc589STim Froidcoeur success:
59062ffc589STim Froidcoeur 	inet_csk_update_fastreuse(tb, sk);
59162ffc589STim Froidcoeur 
5923f421baaSArnaldo Carvalho de Melo 	if (!inet_csk(sk)->icsk_bind_hash)
59328044fc1SJoanne Koong 		inet_bind_hash(sk, tb, tb2, port);
594547b792cSIlpo Järvinen 	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
59528044fc1SJoanne Koong 	WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
5963f421baaSArnaldo Carvalho de Melo 	ret = 0;
5973f421baaSArnaldo Carvalho de Melo 
5983f421baaSArnaldo Carvalho de Melo fail_unlock:
59928044fc1SJoanne Koong 	if (ret) {
6008002d44fSKuniyuki Iwashima 		if (bhash2_created)
6018002d44fSKuniyuki Iwashima 			inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2);
60228044fc1SJoanne Koong 		if (bhash_created)
60328044fc1SJoanne Koong 			inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
60428044fc1SJoanne Koong 	}
60528044fc1SJoanne Koong 	if (head2_lock_acquired)
60628044fc1SJoanne Koong 		spin_unlock(&head2->lock);
607ea8add2bSEric Dumazet 	spin_unlock_bh(&head->lock);
6083f421baaSArnaldo Carvalho de Melo 	return ret;
6093f421baaSArnaldo Carvalho de Melo }
6103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port);
6113f421baaSArnaldo Carvalho de Melo 
6123f421baaSArnaldo Carvalho de Melo /*
6133f421baaSArnaldo Carvalho de Melo  * Wait for an incoming connection, avoid race conditions. This must be called
6143f421baaSArnaldo Carvalho de Melo  * with the socket locked.
6153f421baaSArnaldo Carvalho de Melo  */
inet_csk_wait_for_connect(struct sock * sk,long timeo)6163f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
6173f421baaSArnaldo Carvalho de Melo {
6183f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
6193f421baaSArnaldo Carvalho de Melo 	DEFINE_WAIT(wait);
6203f421baaSArnaldo Carvalho de Melo 	int err;
6213f421baaSArnaldo Carvalho de Melo 
6223f421baaSArnaldo Carvalho de Melo 	/*
6233f421baaSArnaldo Carvalho de Melo 	 * True wake-one mechanism for incoming connections: only
6243f421baaSArnaldo Carvalho de Melo 	 * one process gets woken up, not the 'whole herd'.
6253f421baaSArnaldo Carvalho de Melo 	 * Since we do not 'race & poll' for established sockets
6263f421baaSArnaldo Carvalho de Melo 	 * anymore, the common case will execute the loop only once.
6273f421baaSArnaldo Carvalho de Melo 	 *
6283f421baaSArnaldo Carvalho de Melo 	 * Subtle issue: "add_wait_queue_exclusive()" will be added
6293f421baaSArnaldo Carvalho de Melo 	 * after any current non-exclusive waiters, and we know that
6303f421baaSArnaldo Carvalho de Melo 	 * it will always _stay_ after any new non-exclusive waiters
6313f421baaSArnaldo Carvalho de Melo 	 * because all non-exclusive waiters are added at the
6323f421baaSArnaldo Carvalho de Melo 	 * beginning of the wait-queue. As such, it's ok to "drop"
6333f421baaSArnaldo Carvalho de Melo 	 * our exclusiveness temporarily when we get woken up without
6343f421baaSArnaldo Carvalho de Melo 	 * having to remove and re-insert us on the wait queue.
6353f421baaSArnaldo Carvalho de Melo 	 */
6363f421baaSArnaldo Carvalho de Melo 	for (;;) {
637aa395145SEric Dumazet 		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
6383f421baaSArnaldo Carvalho de Melo 					  TASK_INTERRUPTIBLE);
6393f421baaSArnaldo Carvalho de Melo 		release_sock(sk);
6403f421baaSArnaldo Carvalho de Melo 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
6413f421baaSArnaldo Carvalho de Melo 			timeo = schedule_timeout(timeo);
642cb7cf8a3SEric Dumazet 		sched_annotate_sleep();
6433f421baaSArnaldo Carvalho de Melo 		lock_sock(sk);
6443f421baaSArnaldo Carvalho de Melo 		err = 0;
6453f421baaSArnaldo Carvalho de Melo 		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
6463f421baaSArnaldo Carvalho de Melo 			break;
6473f421baaSArnaldo Carvalho de Melo 		err = -EINVAL;
6483f421baaSArnaldo Carvalho de Melo 		if (sk->sk_state != TCP_LISTEN)
6493f421baaSArnaldo Carvalho de Melo 			break;
6503f421baaSArnaldo Carvalho de Melo 		err = sock_intr_errno(timeo);
6513f421baaSArnaldo Carvalho de Melo 		if (signal_pending(current))
6523f421baaSArnaldo Carvalho de Melo 			break;
6533f421baaSArnaldo Carvalho de Melo 		err = -EAGAIN;
6543f421baaSArnaldo Carvalho de Melo 		if (!timeo)
6553f421baaSArnaldo Carvalho de Melo 			break;
6563f421baaSArnaldo Carvalho de Melo 	}
657aa395145SEric Dumazet 	finish_wait(sk_sleep(sk), &wait);
6583f421baaSArnaldo Carvalho de Melo 	return err;
6593f421baaSArnaldo Carvalho de Melo }
6603f421baaSArnaldo Carvalho de Melo 
6613f421baaSArnaldo Carvalho de Melo /*
6623f421baaSArnaldo Carvalho de Melo  * This will accept the next outstanding connection.
6633f421baaSArnaldo Carvalho de Melo  */
inet_csk_accept(struct sock * sk,struct proto_accept_arg * arg)66492ef0fd5SJens Axboe struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
6653f421baaSArnaldo Carvalho de Melo {
6663f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
6678336886fSJerry Chu 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
6688336886fSJerry Chu 	struct request_sock *req;
669e3d95ad7SEric Dumazet 	struct sock *newsk;
6703f421baaSArnaldo Carvalho de Melo 	int error;
6713f421baaSArnaldo Carvalho de Melo 
6723f421baaSArnaldo Carvalho de Melo 	lock_sock(sk);
6733f421baaSArnaldo Carvalho de Melo 
6743f421baaSArnaldo Carvalho de Melo 	/* We need to make sure that this socket is listening,
6753f421baaSArnaldo Carvalho de Melo 	 * and that it has something pending.
6763f421baaSArnaldo Carvalho de Melo 	 */
6773f421baaSArnaldo Carvalho de Melo 	error = -EINVAL;
6783f421baaSArnaldo Carvalho de Melo 	if (sk->sk_state != TCP_LISTEN)
6793f421baaSArnaldo Carvalho de Melo 		goto out_err;
6803f421baaSArnaldo Carvalho de Melo 
6813f421baaSArnaldo Carvalho de Melo 	/* Find already established connection */
6828336886fSJerry Chu 	if (reqsk_queue_empty(queue)) {
68392ef0fd5SJens Axboe 		long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
6843f421baaSArnaldo Carvalho de Melo 
6853f421baaSArnaldo Carvalho de Melo 		/* If this is a non blocking socket don't sleep */
6863f421baaSArnaldo Carvalho de Melo 		error = -EAGAIN;
6873f421baaSArnaldo Carvalho de Melo 		if (!timeo)
6883f421baaSArnaldo Carvalho de Melo 			goto out_err;
6893f421baaSArnaldo Carvalho de Melo 
6903f421baaSArnaldo Carvalho de Melo 		error = inet_csk_wait_for_connect(sk, timeo);
6913f421baaSArnaldo Carvalho de Melo 		if (error)
6923f421baaSArnaldo Carvalho de Melo 			goto out_err;
6933f421baaSArnaldo Carvalho de Melo 	}
694fff1f300SEric Dumazet 	req = reqsk_queue_remove(queue, sk);
6957951e36aSJens Axboe 	arg->is_empty = reqsk_queue_empty(queue);
6968336886fSJerry Chu 	newsk = req->sk;
6973f421baaSArnaldo Carvalho de Melo 
698e3d95ad7SEric Dumazet 	if (sk->sk_protocol == IPPROTO_TCP &&
6990536fcc0SEric Dumazet 	    tcp_rsk(req)->tfo_listener) {
7000536fcc0SEric Dumazet 		spin_lock_bh(&queue->fastopenq.lock);
7019439ce00SEric Dumazet 		if (tcp_rsk(req)->tfo_listener) {
7028336886fSJerry Chu 			/* We are still waiting for the final ACK from 3WHS
7038336886fSJerry Chu 			 * so can't free req now. Instead, we set req->sk to
7048336886fSJerry Chu 			 * NULL to signify that the child socket is taken
7058336886fSJerry Chu 			 * so reqsk_fastopen_remove() will free the req
7068336886fSJerry Chu 			 * when 3WHS finishes (or is aborted).
7078336886fSJerry Chu 			 */
7088336886fSJerry Chu 			req->sk = NULL;
7098336886fSJerry Chu 			req = NULL;
7108336886fSJerry Chu 		}
7110536fcc0SEric Dumazet 		spin_unlock_bh(&queue->fastopenq.lock);
7128336886fSJerry Chu 	}
713d752a498SShakeel Butt 
7143f421baaSArnaldo Carvalho de Melo out:
7153f421baaSArnaldo Carvalho de Melo 	release_sock(sk);
71606669ea3SEric Dumazet 	if (newsk && mem_cgroup_sockets_enabled) {
7179028cdebSShakeel Butt 		gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
71853bf9164SAbel Wu 		int amt = 0;
719d752a498SShakeel Butt 
720d752a498SShakeel Butt 		/* atomically get the memory usage, set and charge the
72106669ea3SEric Dumazet 		 * newsk->sk_memcg.
722d752a498SShakeel Butt 		 */
723d752a498SShakeel Butt 		lock_sock(newsk);
724d752a498SShakeel Butt 
72553bf9164SAbel Wu 		mem_cgroup_sk_alloc(newsk);
72653bf9164SAbel Wu 		if (newsk->sk_memcg) {
72753bf9164SAbel Wu 			/* The socket has not been accepted yet, no need
72853bf9164SAbel Wu 			 * to look at newsk->sk_wmem_queued.
729d752a498SShakeel Butt 			 */
730d752a498SShakeel Butt 			amt = sk_mem_pages(newsk->sk_forward_alloc +
73106669ea3SEric Dumazet 					   atomic_read(&newsk->sk_rmem_alloc));
73253bf9164SAbel Wu 		}
73353bf9164SAbel Wu 
73453bf9164SAbel Wu 		if (amt)
7359028cdebSShakeel Butt 			mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
7369028cdebSShakeel Butt 		kmem_cache_charge(newsk, gfp);
737d752a498SShakeel Butt 
738d752a498SShakeel Butt 		release_sock(newsk);
739d752a498SShakeel Butt 	}
7408336886fSJerry Chu 	if (req)
74113854e5aSEric Dumazet 		reqsk_put(req);
742198bc90eSZhengchao Shao 
743198bc90eSZhengchao Shao 	if (newsk)
744198bc90eSZhengchao Shao 		inet_init_csk_locks(newsk);
745198bc90eSZhengchao Shao 
7463f421baaSArnaldo Carvalho de Melo 	return newsk;
7473f421baaSArnaldo Carvalho de Melo out_err:
7483f421baaSArnaldo Carvalho de Melo 	newsk = NULL;
7498336886fSJerry Chu 	req = NULL;
75092ef0fd5SJens Axboe 	arg->err = error;
7513f421baaSArnaldo Carvalho de Melo 	goto out;
7523f421baaSArnaldo Carvalho de Melo }
7533f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept);
7543f421baaSArnaldo Carvalho de Melo 
7553f421baaSArnaldo Carvalho de Melo /*
7563f421baaSArnaldo Carvalho de Melo  * Using different timers for retransmit, delayed acks and probes
7573f421baaSArnaldo Carvalho de Melo  * We may wish use just one timer maintaining a list of expire jiffies
7583f421baaSArnaldo Carvalho de Melo  * to optimize.
7593f421baaSArnaldo Carvalho de Melo  */
inet_csk_init_xmit_timers(struct sock * sk,void (* retransmit_handler)(struct timer_list * t),void (* delack_handler)(struct timer_list * t),void (* keepalive_handler)(struct timer_list * t))7603f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk,
76159f379f9SKees Cook 			       void (*retransmit_handler)(struct timer_list *t),
76259f379f9SKees Cook 			       void (*delack_handler)(struct timer_list *t),
76359f379f9SKees Cook 			       void (*keepalive_handler)(struct timer_list *t))
7643f421baaSArnaldo Carvalho de Melo {
7653f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
7663f421baaSArnaldo Carvalho de Melo 
76759f379f9SKees Cook 	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
76859f379f9SKees Cook 	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
76959f379f9SKees Cook 	timer_setup(&sk->sk_timer, keepalive_handler, 0);
7703f421baaSArnaldo Carvalho de Melo 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
7713f421baaSArnaldo Carvalho de Melo }
7723f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers);
7733f421baaSArnaldo Carvalho de Melo 
inet_csk_clear_xmit_timers(struct sock * sk)7743f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk)
7753f421baaSArnaldo Carvalho de Melo {
7763f421baaSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
7773f421baaSArnaldo Carvalho de Melo 
778b6b6d653SEric Dumazet 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
7793f421baaSArnaldo Carvalho de Melo 
7803f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
7813f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &icsk->icsk_delack_timer);
7823f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &sk->sk_timer);
7833f421baaSArnaldo Carvalho de Melo }
7843f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
7853f421baaSArnaldo Carvalho de Melo 
inet_csk_clear_xmit_timers_sync(struct sock * sk)786151c9c72SEric Dumazet void inet_csk_clear_xmit_timers_sync(struct sock *sk)
787151c9c72SEric Dumazet {
788151c9c72SEric Dumazet 	struct inet_connection_sock *icsk = inet_csk(sk);
789151c9c72SEric Dumazet 
790151c9c72SEric Dumazet 	/* ongoing timer handlers need to acquire socket lock. */
791151c9c72SEric Dumazet 	sock_not_owned_by_me(sk);
792151c9c72SEric Dumazet 
793151c9c72SEric Dumazet 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
794151c9c72SEric Dumazet 
795151c9c72SEric Dumazet 	sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
796151c9c72SEric Dumazet 	sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
797151c9c72SEric Dumazet 	sk_stop_timer_sync(sk, &sk->sk_timer);
798151c9c72SEric Dumazet }
799151c9c72SEric Dumazet 
inet_csk_delete_keepalive_timer(struct sock * sk)8003f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk)
8013f421baaSArnaldo Carvalho de Melo {
8023f421baaSArnaldo Carvalho de Melo 	sk_stop_timer(sk, &sk->sk_timer);
8033f421baaSArnaldo Carvalho de Melo }
8043f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
8053f421baaSArnaldo Carvalho de Melo 
inet_csk_reset_keepalive_timer(struct sock * sk,unsigned long len)8063f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
8073f421baaSArnaldo Carvalho de Melo {
8083f421baaSArnaldo Carvalho de Melo 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
8093f421baaSArnaldo Carvalho de Melo }
8103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
8113f421baaSArnaldo Carvalho de Melo 
inet_csk_route_req(const struct sock * sk,struct flowi4 * fl4,const struct request_sock * req)812e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk,
8136bd023f3SDavid S. Miller 				     struct flowi4 *fl4,
814ba3f7f04SDavid S. Miller 				     const struct request_sock *req)
8153f421baaSArnaldo Carvalho de Melo {
8163f421baaSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8178b929ab1SEric Dumazet 	struct net *net = read_pnet(&ireq->ireq_net);
818c92e8c02SEric Dumazet 	struct ip_options_rcu *opt;
8198b929ab1SEric Dumazet 	struct rtable *rt;
8203f421baaSArnaldo Carvalho de Melo 
8212ab2ddd3SEric Dumazet 	rcu_read_lock();
8222ab2ddd3SEric Dumazet 	opt = rcu_dereference(ireq->ireq_opt);
82306f877d6SEric Dumazet 
8248b929ab1SEric Dumazet 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
8254b095281SGuillaume Nault 			   ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
8268b929ab1SEric Dumazet 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
827634fb979SEric Dumazet 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
8288b929ab1SEric Dumazet 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
829e2d118a1SLorenzo Colitti 			   htons(ireq->ir_num), sk->sk_uid);
8303df98d79SPaul Moore 	security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
8316bd023f3SDavid S. Miller 	rt = ip_route_output_flow(net, fl4, sk);
832b23dd4feSDavid S. Miller 	if (IS_ERR(rt))
833857a6e0aSIlpo Järvinen 		goto no_route;
83477d5bc7eSDavid Ahern 	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
835857a6e0aSIlpo Järvinen 		goto route_err;
8362ab2ddd3SEric Dumazet 	rcu_read_unlock();
837d8d1f30bSChangli Gao 	return &rt->dst;
838857a6e0aSIlpo Järvinen 
839857a6e0aSIlpo Järvinen route_err:
840857a6e0aSIlpo Järvinen 	ip_rt_put(rt);
841857a6e0aSIlpo Järvinen no_route:
8422ab2ddd3SEric Dumazet 	rcu_read_unlock();
843b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
844857a6e0aSIlpo Järvinen 	return NULL;
8453f421baaSArnaldo Carvalho de Melo }
8463f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req);
8473f421baaSArnaldo Carvalho de Melo 
inet_csk_route_child_sock(const struct sock * sk,struct sock * newsk,const struct request_sock * req)848a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
84977357a95SDavid S. Miller 					    struct sock *newsk,
85077357a95SDavid S. Miller 					    const struct request_sock *req)
85177357a95SDavid S. Miller {
85277357a95SDavid S. Miller 	const struct inet_request_sock *ireq = inet_rsk(req);
8538b929ab1SEric Dumazet 	struct net *net = read_pnet(&ireq->ireq_net);
85477357a95SDavid S. Miller 	struct inet_sock *newinet = inet_sk(newsk);
8551a7b27c9SChristoph Paasch 	struct ip_options_rcu *opt;
85677357a95SDavid S. Miller 	struct flowi4 *fl4;
85777357a95SDavid S. Miller 	struct rtable *rt;
85877357a95SDavid S. Miller 
859c92e8c02SEric Dumazet 	opt = rcu_dereference(ireq->ireq_opt);
86077357a95SDavid S. Miller 	fl4 = &newinet->cork.fl.u.ip4;
8611a7b27c9SChristoph Paasch 
8628b929ab1SEric Dumazet 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
8634b095281SGuillaume Nault 			   ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
86477357a95SDavid S. Miller 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
865634fb979SEric Dumazet 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
8668b929ab1SEric Dumazet 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
867e2d118a1SLorenzo Colitti 			   htons(ireq->ir_num), sk->sk_uid);
8683df98d79SPaul Moore 	security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
86977357a95SDavid S. Miller 	rt = ip_route_output_flow(net, fl4, sk);
87077357a95SDavid S. Miller 	if (IS_ERR(rt))
87177357a95SDavid S. Miller 		goto no_route;
87277d5bc7eSDavid Ahern 	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
87377357a95SDavid S. Miller 		goto route_err;
87477357a95SDavid S. Miller 	return &rt->dst;
87577357a95SDavid S. Miller 
87677357a95SDavid S. Miller route_err:
87777357a95SDavid S. Miller 	ip_rt_put(rt);
87877357a95SDavid S. Miller no_route:
879b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
88077357a95SDavid S. Miller 	return NULL;
88177357a95SDavid S. Miller }
88277357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
88377357a95SDavid S. Miller 
8840c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */
syn_ack_recalc(struct request_sock * req,const int max_syn_ack_retries,const u8 rskq_defer_accept,int * expire,int * resend)885a594920fSKuniyuki Iwashima static void syn_ack_recalc(struct request_sock *req,
886a594920fSKuniyuki Iwashima 			   const int max_syn_ack_retries,
8870c3d79bcSJulian Anastasov 			   const u8 rskq_defer_accept,
8880c3d79bcSJulian Anastasov 			   int *expire, int *resend)
8890c3d79bcSJulian Anastasov {
8900c3d79bcSJulian Anastasov 	if (!rskq_defer_accept) {
891a594920fSKuniyuki Iwashima 		*expire = req->num_timeout >= max_syn_ack_retries;
8920c3d79bcSJulian Anastasov 		*resend = 1;
8930c3d79bcSJulian Anastasov 		return;
8940c3d79bcSJulian Anastasov 	}
895a594920fSKuniyuki Iwashima 	*expire = req->num_timeout >= max_syn_ack_retries &&
896a594920fSKuniyuki Iwashima 		  (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
897a594920fSKuniyuki Iwashima 	/* Do not resend while waiting for data after ACK,
8980c3d79bcSJulian Anastasov 	 * start to resend on end of deferring period to give
8990c3d79bcSJulian Anastasov 	 * last chance for data or ACK to create established socket.
9000c3d79bcSJulian Anastasov 	 */
9010c3d79bcSJulian Anastasov 	*resend = !inet_rsk(req)->acked ||
902e6c022a4SEric Dumazet 		  req->num_timeout >= rskq_defer_accept - 1;
9030c3d79bcSJulian Anastasov }
9040c3d79bcSJulian Anastasov 
inet_rtx_syn_ack(const struct sock * parent,struct request_sock * req)9051b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
906e6c022a4SEric Dumazet {
9071a2c6181SChristoph Paasch 	int err = req->rsk_ops->rtx_syn_ack(parent, req);
908e6c022a4SEric Dumazet 
909e6c022a4SEric Dumazet 	if (!err)
910e6c022a4SEric Dumazet 		req->num_retrans++;
911e6c022a4SEric Dumazet 	return err;
912e6c022a4SEric Dumazet }
913e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack);
914e6c022a4SEric Dumazet 
9156971d216SEric Dumazet static struct request_sock *
reqsk_alloc_noprof(const struct request_sock_ops * ops,struct sock * sk_listener,bool attach_listener)9166971d216SEric Dumazet reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
9176971d216SEric Dumazet 		   bool attach_listener)
9186971d216SEric Dumazet {
9196971d216SEric Dumazet 	struct request_sock *req;
9206971d216SEric Dumazet 
9216971d216SEric Dumazet 	req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
9226971d216SEric Dumazet 	if (!req)
9236971d216SEric Dumazet 		return NULL;
9246971d216SEric Dumazet 	req->rsk_listener = NULL;
9256971d216SEric Dumazet 	if (attach_listener) {
9266971d216SEric Dumazet 		if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
9276971d216SEric Dumazet 			kmem_cache_free(ops->slab, req);
9286971d216SEric Dumazet 			return NULL;
9296971d216SEric Dumazet 		}
9306971d216SEric Dumazet 		req->rsk_listener = sk_listener;
9316971d216SEric Dumazet 	}
9326971d216SEric Dumazet 	req->rsk_ops = ops;
9336971d216SEric Dumazet 	req_to_sk(req)->sk_prot = sk_listener->sk_prot;
9346971d216SEric Dumazet 	sk_node_init(&req_to_sk(req)->sk_node);
9356971d216SEric Dumazet 	sk_tx_queue_clear(req_to_sk(req));
9366971d216SEric Dumazet 	req->saved_syn = NULL;
9376971d216SEric Dumazet 	req->syncookie = 0;
9386971d216SEric Dumazet 	req->timeout = 0;
9396971d216SEric Dumazet 	req->num_timeout = 0;
9406971d216SEric Dumazet 	req->num_retrans = 0;
9416971d216SEric Dumazet 	req->sk = NULL;
9426971d216SEric Dumazet 	refcount_set(&req->rsk_refcnt, 0);
9436971d216SEric Dumazet 
9446971d216SEric Dumazet 	return req;
9456971d216SEric Dumazet }
9466971d216SEric Dumazet #define reqsk_alloc(...)	alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__))
9476971d216SEric Dumazet 
inet_reqsk_alloc(const struct request_sock_ops * ops,struct sock * sk_listener,bool attach_listener)948adbe695aSEric Dumazet struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
949adbe695aSEric Dumazet 				      struct sock *sk_listener,
950adbe695aSEric Dumazet 				      bool attach_listener)
951adbe695aSEric Dumazet {
952adbe695aSEric Dumazet 	struct request_sock *req = reqsk_alloc(ops, sk_listener,
953adbe695aSEric Dumazet 					       attach_listener);
954adbe695aSEric Dumazet 
955adbe695aSEric Dumazet 	if (req) {
956adbe695aSEric Dumazet 		struct inet_request_sock *ireq = inet_rsk(req);
957adbe695aSEric Dumazet 
958adbe695aSEric Dumazet 		ireq->ireq_opt = NULL;
959adbe695aSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
960adbe695aSEric Dumazet 		ireq->pktopts = NULL;
961adbe695aSEric Dumazet #endif
962adbe695aSEric Dumazet 		atomic64_set(&ireq->ir_cookie, 0);
963adbe695aSEric Dumazet 		ireq->ireq_state = TCP_NEW_SYN_RECV;
964adbe695aSEric Dumazet 		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
965adbe695aSEric Dumazet 		ireq->ireq_family = sk_listener->sk_family;
966adbe695aSEric Dumazet 		req->timeout = TCP_TIMEOUT_INIT;
967adbe695aSEric Dumazet 	}
968adbe695aSEric Dumazet 
969adbe695aSEric Dumazet 	return req;
970adbe695aSEric Dumazet }
971adbe695aSEric Dumazet EXPORT_SYMBOL(inet_reqsk_alloc);
972adbe695aSEric Dumazet 
inet_reqsk_clone(struct request_sock * req,struct sock * sk)97354b92e84SKuniyuki Iwashima static struct request_sock *inet_reqsk_clone(struct request_sock *req,
97454b92e84SKuniyuki Iwashima 					     struct sock *sk)
97554b92e84SKuniyuki Iwashima {
97654b92e84SKuniyuki Iwashima 	struct sock *req_sk, *nreq_sk;
97754b92e84SKuniyuki Iwashima 	struct request_sock *nreq;
97854b92e84SKuniyuki Iwashima 
97954b92e84SKuniyuki Iwashima 	nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
98054b92e84SKuniyuki Iwashima 	if (!nreq) {
98155d444b3SKuniyuki Iwashima 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
98255d444b3SKuniyuki Iwashima 
98354b92e84SKuniyuki Iwashima 		/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
98454b92e84SKuniyuki Iwashima 		sock_put(sk);
98554b92e84SKuniyuki Iwashima 		return NULL;
98654b92e84SKuniyuki Iwashima 	}
98754b92e84SKuniyuki Iwashima 
98854b92e84SKuniyuki Iwashima 	req_sk = req_to_sk(req);
98954b92e84SKuniyuki Iwashima 	nreq_sk = req_to_sk(nreq);
99054b92e84SKuniyuki Iwashima 
99154b92e84SKuniyuki Iwashima 	memcpy(nreq_sk, req_sk,
99254b92e84SKuniyuki Iwashima 	       offsetof(struct sock, sk_dontcopy_begin));
993ff73f834SKees Cook 	unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
994ff73f834SKees Cook 		      req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end),
995ff73f834SKees Cook 		      /* alloc is larger than struct, see above */);
99654b92e84SKuniyuki Iwashima 
99754b92e84SKuniyuki Iwashima 	sk_node_init(&nreq_sk->sk_node);
99854b92e84SKuniyuki Iwashima 	nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
999a9418924SEric Dumazet #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
100054b92e84SKuniyuki Iwashima 	nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
100154b92e84SKuniyuki Iwashima #endif
100254b92e84SKuniyuki Iwashima 	nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
100354b92e84SKuniyuki Iwashima 
100454b92e84SKuniyuki Iwashima 	nreq->rsk_listener = sk;
100554b92e84SKuniyuki Iwashima 
100654b92e84SKuniyuki Iwashima 	/* We need not acquire fastopenq->lock
100754b92e84SKuniyuki Iwashima 	 * because the child socket is locked in inet_csk_listen_stop().
100854b92e84SKuniyuki Iwashima 	 */
100954b92e84SKuniyuki Iwashima 	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
101054b92e84SKuniyuki Iwashima 		rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
101154b92e84SKuniyuki Iwashima 
101254b92e84SKuniyuki Iwashima 	return nreq;
101354b92e84SKuniyuki Iwashima }
101454b92e84SKuniyuki Iwashima 
reqsk_queue_migrated(struct request_sock_queue * queue,const struct request_sock * req)1015c905dee6SKuniyuki Iwashima static void reqsk_queue_migrated(struct request_sock_queue *queue,
1016c905dee6SKuniyuki Iwashima 				 const struct request_sock *req)
1017c905dee6SKuniyuki Iwashima {
1018c905dee6SKuniyuki Iwashima 	if (req->num_timeout == 0)
1019c905dee6SKuniyuki Iwashima 		atomic_inc(&queue->young);
1020c905dee6SKuniyuki Iwashima 	atomic_inc(&queue->qlen);
1021c905dee6SKuniyuki Iwashima }
1022c905dee6SKuniyuki Iwashima 
reqsk_migrate_reset(struct request_sock * req)102354b92e84SKuniyuki Iwashima static void reqsk_migrate_reset(struct request_sock *req)
102454b92e84SKuniyuki Iwashima {
1025c905dee6SKuniyuki Iwashima 	req->saved_syn = NULL;
102654b92e84SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IPV6)
102754b92e84SKuniyuki Iwashima 	inet_rsk(req)->ipv6_opt = NULL;
1028c905dee6SKuniyuki Iwashima 	inet_rsk(req)->pktopts = NULL;
1029c905dee6SKuniyuki Iwashima #else
1030c905dee6SKuniyuki Iwashima 	inet_rsk(req)->ireq_opt = NULL;
103154b92e84SKuniyuki Iwashima #endif
103254b92e84SKuniyuki Iwashima }
103354b92e84SKuniyuki Iwashima 
1034079096f1SEric Dumazet /* return true if req was found in the ehash table */
reqsk_queue_unlink(struct request_sock * req)10358b5e07d7SZhiqiang Liu static bool reqsk_queue_unlink(struct request_sock *req)
1036b357a364SEric Dumazet {
103708eaef90SKuniyuki Iwashima 	struct sock *sk = req_to_sk(req);
10385e0724d0SEric Dumazet 	bool found = false;
1039b357a364SEric Dumazet 
104008eaef90SKuniyuki Iwashima 	if (sk_hashed(sk)) {
1041429e42c1SKuniyuki Iwashima 		struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
1042429e42c1SKuniyuki Iwashima 		spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
1043b357a364SEric Dumazet 
1044079096f1SEric Dumazet 		spin_lock(lock);
104508eaef90SKuniyuki Iwashima 		found = __sk_nulls_del_node_init_rcu(sk);
1046079096f1SEric Dumazet 		spin_unlock(lock);
10475e0724d0SEric Dumazet 	}
1048*e8c526f2SKuniyuki Iwashima 
1049b357a364SEric Dumazet 	return found;
1050b357a364SEric Dumazet }
1051b357a364SEric Dumazet 
__inet_csk_reqsk_queue_drop(struct sock * sk,struct request_sock * req,bool from_timer)1052*e8c526f2SKuniyuki Iwashima static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
1053*e8c526f2SKuniyuki Iwashima 					struct request_sock *req,
1054*e8c526f2SKuniyuki Iwashima 					bool from_timer)
1055b357a364SEric Dumazet {
10567233da86SAlexander Ovechkin 	bool unlinked = reqsk_queue_unlink(req);
10577233da86SAlexander Ovechkin 
1058*e8c526f2SKuniyuki Iwashima 	if (!from_timer && timer_delete_sync(&req->rsk_timer))
1059*e8c526f2SKuniyuki Iwashima 		reqsk_put(req);
1060*e8c526f2SKuniyuki Iwashima 
10617233da86SAlexander Ovechkin 	if (unlinked) {
1062b357a364SEric Dumazet 		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
1063b357a364SEric Dumazet 		reqsk_put(req);
1064b357a364SEric Dumazet 	}
1065*e8c526f2SKuniyuki Iwashima 
10667233da86SAlexander Ovechkin 	return unlinked;
1067b357a364SEric Dumazet }
1068*e8c526f2SKuniyuki Iwashima 
inet_csk_reqsk_queue_drop(struct sock * sk,struct request_sock * req)1069*e8c526f2SKuniyuki Iwashima bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
1070*e8c526f2SKuniyuki Iwashima {
1071*e8c526f2SKuniyuki Iwashima 	return __inet_csk_reqsk_queue_drop(sk, req, false);
1072*e8c526f2SKuniyuki Iwashima }
1073b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
1074b357a364SEric Dumazet 
inet_csk_reqsk_queue_drop_and_put(struct sock * sk,struct request_sock * req)1075f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
1076f03f2e15SEric Dumazet {
1077f03f2e15SEric Dumazet 	inet_csk_reqsk_queue_drop(sk, req);
1078f03f2e15SEric Dumazet 	reqsk_put(req);
1079f03f2e15SEric Dumazet }
1080f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
1081f03f2e15SEric Dumazet 
reqsk_timer_handler(struct timer_list * t)108259f379f9SKees Cook static void reqsk_timer_handler(struct timer_list *t)
1083a019d6feSArnaldo Carvalho de Melo {
108459f379f9SKees Cook 	struct request_sock *req = from_timer(req, t, rsk_timer);
1085c905dee6SKuniyuki Iwashima 	struct request_sock *nreq = NULL, *oreq = req;
1086fa76ce73SEric Dumazet 	struct sock *sk_listener = req->rsk_listener;
1087c905dee6SKuniyuki Iwashima 	struct inet_connection_sock *icsk;
1088c905dee6SKuniyuki Iwashima 	struct request_sock_queue *queue;
1089c905dee6SKuniyuki Iwashima 	struct net *net;
1090a594920fSKuniyuki Iwashima 	int max_syn_ack_retries, qlen, expire = 0, resend = 0;
1091a019d6feSArnaldo Carvalho de Melo 
1092c905dee6SKuniyuki Iwashima 	if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
1093c905dee6SKuniyuki Iwashima 		struct sock *nsk;
1094c905dee6SKuniyuki Iwashima 
1095c905dee6SKuniyuki Iwashima 		nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
1096c905dee6SKuniyuki Iwashima 		if (!nsk)
1097079096f1SEric Dumazet 			goto drop;
1098a019d6feSArnaldo Carvalho de Melo 
1099c905dee6SKuniyuki Iwashima 		nreq = inet_reqsk_clone(req, nsk);
1100c905dee6SKuniyuki Iwashima 		if (!nreq)
1101c905dee6SKuniyuki Iwashima 			goto drop;
1102c905dee6SKuniyuki Iwashima 
1103c905dee6SKuniyuki Iwashima 		/* The new timer for the cloned req can decrease the 2
1104c905dee6SKuniyuki Iwashima 		 * by calling inet_csk_reqsk_queue_drop_and_put(), so
1105c905dee6SKuniyuki Iwashima 		 * hold another count to prevent use-after-free and
1106c905dee6SKuniyuki Iwashima 		 * call reqsk_put() just before return.
1107c905dee6SKuniyuki Iwashima 		 */
1108c905dee6SKuniyuki Iwashima 		refcount_set(&nreq->rsk_refcnt, 2 + 1);
1109c905dee6SKuniyuki Iwashima 		timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
1110c905dee6SKuniyuki Iwashima 		reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
1111c905dee6SKuniyuki Iwashima 
1112c905dee6SKuniyuki Iwashima 		req = nreq;
1113c905dee6SKuniyuki Iwashima 		sk_listener = nsk;
1114c905dee6SKuniyuki Iwashima 	}
1115c905dee6SKuniyuki Iwashima 
1116c905dee6SKuniyuki Iwashima 	icsk = inet_csk(sk_listener);
1117c905dee6SKuniyuki Iwashima 	net = sock_net(sk_listener);
11183a037f0fSEric Dumazet 	max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
111920a3b1c0SKuniyuki Iwashima 		READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
1120a019d6feSArnaldo Carvalho de Melo 	/* Normally all the openreqs are young and become mature
1121a019d6feSArnaldo Carvalho de Melo 	 * (i.e. converted to established socket) for first timeout.
1122fd4f2ceaSEric Dumazet 	 * If synack was not acknowledged for 1 second, it means
1123a019d6feSArnaldo Carvalho de Melo 	 * one of the following things: synack was lost, ack was lost,
1124a019d6feSArnaldo Carvalho de Melo 	 * rtt is high or nobody planned to ack (i.e. synflood).
1125a019d6feSArnaldo Carvalho de Melo 	 * When server is a bit loaded, queue is populated with old
1126a019d6feSArnaldo Carvalho de Melo 	 * open requests, reducing effective size of queue.
1127a019d6feSArnaldo Carvalho de Melo 	 * When server is well loaded, queue size reduces to zero
1128a019d6feSArnaldo Carvalho de Melo 	 * after several minutes of work. It is not synflood,
1129a019d6feSArnaldo Carvalho de Melo 	 * it is normal operation. The solution is pruning
1130a019d6feSArnaldo Carvalho de Melo 	 * too old entries overriding normal timeout, when
1131a019d6feSArnaldo Carvalho de Melo 	 * situation becomes dangerous.
1132a019d6feSArnaldo Carvalho de Melo 	 *
1133a019d6feSArnaldo Carvalho de Melo 	 * Essentially, we reserve half of room for young
1134a019d6feSArnaldo Carvalho de Melo 	 * embrions; and abort old ones without pity, if old
1135a019d6feSArnaldo Carvalho de Melo 	 * ones are about to clog our table.
1136a019d6feSArnaldo Carvalho de Melo 	 */
1137c905dee6SKuniyuki Iwashima 	queue = &icsk->icsk_accept_queue;
1138aac065c5SEric Dumazet 	qlen = reqsk_queue_len(queue);
1139099ecf59SEric Dumazet 	if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
1140aac065c5SEric Dumazet 		int young = reqsk_queue_len_young(queue) << 1;
1141a019d6feSArnaldo Carvalho de Melo 
1142a594920fSKuniyuki Iwashima 		while (max_syn_ack_retries > 2) {
11432b41fab7SEric Dumazet 			if (qlen < young)
1144a019d6feSArnaldo Carvalho de Melo 				break;
1145a594920fSKuniyuki Iwashima 			max_syn_ack_retries--;
1146a019d6feSArnaldo Carvalho de Melo 			young <<= 1;
1147a019d6feSArnaldo Carvalho de Melo 		}
1148a019d6feSArnaldo Carvalho de Melo 	}
1149a594920fSKuniyuki Iwashima 	syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
11500c3d79bcSJulian Anastasov 		       &expire, &resend);
115142cb80a2SEric Dumazet 	req->rsk_ops->syn_ack_timeout(req);
11520c3d79bcSJulian Anastasov 	if (!expire &&
11530c3d79bcSJulian Anastasov 	    (!resend ||
1154fa76ce73SEric Dumazet 	     !inet_rtx_syn_ack(sk_listener, req) ||
11550c3d79bcSJulian Anastasov 	     inet_rsk(req)->acked)) {
1156e6c022a4SEric Dumazet 		if (req->num_timeout++ == 0)
1157aac065c5SEric Dumazet 			atomic_dec(&queue->young);
11585903123fSAkhmat Karakotov 		mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
1159c905dee6SKuniyuki Iwashima 
1160c905dee6SKuniyuki Iwashima 		if (!nreq)
1161c905dee6SKuniyuki Iwashima 			return;
1162c905dee6SKuniyuki Iwashima 
1163c905dee6SKuniyuki Iwashima 		if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
1164c905dee6SKuniyuki Iwashima 			/* delete timer */
1165*e8c526f2SKuniyuki Iwashima 			__inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
116655d444b3SKuniyuki Iwashima 			goto no_ownership;
1167c905dee6SKuniyuki Iwashima 		}
1168c905dee6SKuniyuki Iwashima 
116955d444b3SKuniyuki Iwashima 		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
1170c905dee6SKuniyuki Iwashima 		reqsk_migrate_reset(oreq);
1171c905dee6SKuniyuki Iwashima 		reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
1172c905dee6SKuniyuki Iwashima 		reqsk_put(oreq);
1173c905dee6SKuniyuki Iwashima 
1174c905dee6SKuniyuki Iwashima 		reqsk_put(nreq);
1175fa76ce73SEric Dumazet 		return;
1176a019d6feSArnaldo Carvalho de Melo 	}
1177c905dee6SKuniyuki Iwashima 
1178c905dee6SKuniyuki Iwashima 	/* Even if we can clone the req, we may need not retransmit any more
1179c905dee6SKuniyuki Iwashima 	 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
1180c905dee6SKuniyuki Iwashima 	 * CPU may win the "own_req" race so that inet_ehash_insert() fails.
1181c905dee6SKuniyuki Iwashima 	 */
1182c905dee6SKuniyuki Iwashima 	if (nreq) {
118355d444b3SKuniyuki Iwashima 		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
118455d444b3SKuniyuki Iwashima no_ownership:
1185c905dee6SKuniyuki Iwashima 		reqsk_migrate_reset(nreq);
1186c905dee6SKuniyuki Iwashima 		reqsk_queue_removed(queue, nreq);
1187c905dee6SKuniyuki Iwashima 		__reqsk_free(nreq);
1188c905dee6SKuniyuki Iwashima 	}
1189c905dee6SKuniyuki Iwashima 
119055d444b3SKuniyuki Iwashima drop:
1191*e8c526f2SKuniyuki Iwashima 	__inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
1192*e8c526f2SKuniyuki Iwashima 	reqsk_put(req);
1193a019d6feSArnaldo Carvalho de Melo }
1194fa76ce73SEric Dumazet 
reqsk_queue_hash_req(struct request_sock * req,unsigned long timeout)1195ff46e3b4Sluoxuanqiang static bool reqsk_queue_hash_req(struct request_sock *req,
1196fa76ce73SEric Dumazet 				 unsigned long timeout)
1197fa76ce73SEric Dumazet {
1198ff46e3b4Sluoxuanqiang 	bool found_dup_sk = false;
1199ff46e3b4Sluoxuanqiang 
1200ff46e3b4Sluoxuanqiang 	if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
1201ff46e3b4Sluoxuanqiang 		return false;
1202ff46e3b4Sluoxuanqiang 
1203ff46e3b4Sluoxuanqiang 	/* The timer needs to be setup after a successful insertion. */
120459f379f9SKees Cook 	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
1205f3438bc7SThomas Gleixner 	mod_timer(&req->rsk_timer, jiffies + timeout);
120629c68526SEric Dumazet 
1207fa76ce73SEric Dumazet 	/* before letting lookups find us, make sure all req fields
1208fa76ce73SEric Dumazet 	 * are committed to memory and refcnt initialized.
1209fa76ce73SEric Dumazet 	 */
1210fa76ce73SEric Dumazet 	smp_wmb();
121141c6d650SReshetova, Elena 	refcount_set(&req->rsk_refcnt, 2 + 1);
1212ff46e3b4Sluoxuanqiang 	return true;
1213a019d6feSArnaldo Carvalho de Melo }
1214079096f1SEric Dumazet 
inet_csk_reqsk_queue_hash_add(struct sock * sk,struct request_sock * req,unsigned long timeout)1215ff46e3b4Sluoxuanqiang bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
1216079096f1SEric Dumazet 				   unsigned long timeout)
1217079096f1SEric Dumazet {
1218ff46e3b4Sluoxuanqiang 	if (!reqsk_queue_hash_req(req, timeout))
1219ff46e3b4Sluoxuanqiang 		return false;
1220ff46e3b4Sluoxuanqiang 
1221079096f1SEric Dumazet 	inet_csk_reqsk_queue_added(sk);
1222ff46e3b4Sluoxuanqiang 	return true;
1223079096f1SEric Dumazet }
1224079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
1225a019d6feSArnaldo Carvalho de Melo 
inet_clone_ulp(const struct request_sock * req,struct sock * newsk,const gfp_t priority)122613230593SMat Martineau static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
122713230593SMat Martineau 			   const gfp_t priority)
122813230593SMat Martineau {
122913230593SMat Martineau 	struct inet_connection_sock *icsk = inet_csk(newsk);
123013230593SMat Martineau 
123113230593SMat Martineau 	if (!icsk->icsk_ulp_ops)
123213230593SMat Martineau 		return;
123313230593SMat Martineau 
123413230593SMat Martineau 	icsk->icsk_ulp_ops->clone(req, newsk, priority);
123513230593SMat Martineau }
123613230593SMat Martineau 
1237e56c57d0SEric Dumazet /**
1238e56c57d0SEric Dumazet  *	inet_csk_clone_lock - clone an inet socket, and lock its clone
1239e56c57d0SEric Dumazet  *	@sk: the socket to clone
1240e56c57d0SEric Dumazet  *	@req: request_sock
1241e56c57d0SEric Dumazet  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1242e56c57d0SEric Dumazet  *
1243e56c57d0SEric Dumazet  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1244e56c57d0SEric Dumazet  */
inet_csk_clone_lock(const struct sock * sk,const struct request_sock * req,const gfp_t priority)1245e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk,
1246e56c57d0SEric Dumazet 				 const struct request_sock *req,
1247dd0fc66fSAl Viro 				 const gfp_t priority)
12489f1d2604SArnaldo Carvalho de Melo {
1249e56c57d0SEric Dumazet 	struct sock *newsk = sk_clone_lock(sk, priority);
12509f1d2604SArnaldo Carvalho de Melo 
125100db4124SIan Morris 	if (newsk) {
12529f1d2604SArnaldo Carvalho de Melo 		struct inet_connection_sock *newicsk = inet_csk(newsk);
12539f1d2604SArnaldo Carvalho de Melo 
1254563e0bb0SYafang Shao 		inet_sk_set_state(newsk, TCP_SYN_RECV);
12559f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_bind_hash = NULL;
125628044fc1SJoanne Koong 		newicsk->icsk_bind2_hash = NULL;
12579f1d2604SArnaldo Carvalho de Melo 
1258634fb979SEric Dumazet 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
1259b44084c2SEric Dumazet 		inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
1260b44084c2SEric Dumazet 		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
12619f1d2604SArnaldo Carvalho de Melo 
126285017869SEric Dumazet 		/* listeners have SOCK_RCU_FREE, not the children */
126385017869SEric Dumazet 		sock_reset_flag(newsk, SOCK_RCU_FREE);
126485017869SEric Dumazet 
1265657831ffSEric Dumazet 		inet_sk(newsk)->mc_list = NULL;
1266657831ffSEric Dumazet 
126784f39b08SLorenzo Colitti 		newsk->sk_mark = inet_rsk(req)->ir_mark;
126833cf7c90SEric Dumazet 		atomic64_set(&newsk->sk_cookie,
126933cf7c90SEric Dumazet 			     atomic64_read(&inet_rsk(req)->ir_cookie));
127084f39b08SLorenzo Colitti 
12719f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_retransmits = 0;
12729f1d2604SArnaldo Carvalho de Melo 		newicsk->icsk_backoff	  = 0;
12736687e988SArnaldo Carvalho de Melo 		newicsk->icsk_probes_out  = 0;
12749d9b1ee0SEnke Chen 		newicsk->icsk_probes_tstamp = 0;
12759f1d2604SArnaldo Carvalho de Melo 
12769f1d2604SArnaldo Carvalho de Melo 		/* Deinitialize accept_queue to trap illegal accesses. */
12779f1d2604SArnaldo Carvalho de Melo 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
12784237c75cSVenkat Yekkirala 
127913230593SMat Martineau 		inet_clone_ulp(req, newsk, priority);
128013230593SMat Martineau 
12814237c75cSVenkat Yekkirala 		security_inet_csk_clone(newsk, req);
12829f1d2604SArnaldo Carvalho de Melo 	}
12839f1d2604SArnaldo Carvalho de Melo 	return newsk;
12849f1d2604SArnaldo Carvalho de Melo }
1285e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
1286a019d6feSArnaldo Carvalho de Melo 
1287a019d6feSArnaldo Carvalho de Melo /*
1288a019d6feSArnaldo Carvalho de Melo  * At this point, there should be no process reference to this
1289a019d6feSArnaldo Carvalho de Melo  * socket, and thus no user references at all.  Therefore we
1290a019d6feSArnaldo Carvalho de Melo  * can assume the socket waitqueue is inactive and nobody will
1291a019d6feSArnaldo Carvalho de Melo  * try to jump onto it.
1292a019d6feSArnaldo Carvalho de Melo  */
inet_csk_destroy_sock(struct sock * sk)1293a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk)
1294a019d6feSArnaldo Carvalho de Melo {
1295547b792cSIlpo Järvinen 	WARN_ON(sk->sk_state != TCP_CLOSE);
1296547b792cSIlpo Järvinen 	WARN_ON(!sock_flag(sk, SOCK_DEAD));
1297a019d6feSArnaldo Carvalho de Melo 
1298a019d6feSArnaldo Carvalho de Melo 	/* It cannot be in hash table! */
1299547b792cSIlpo Järvinen 	WARN_ON(!sk_unhashed(sk));
1300a019d6feSArnaldo Carvalho de Melo 
1301c720c7e8SEric Dumazet 	/* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
1302c720c7e8SEric Dumazet 	WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
1303a019d6feSArnaldo Carvalho de Melo 
1304a019d6feSArnaldo Carvalho de Melo 	sk->sk_prot->destroy(sk);
1305a019d6feSArnaldo Carvalho de Melo 
1306a019d6feSArnaldo Carvalho de Melo 	sk_stream_kill_queues(sk);
1307a019d6feSArnaldo Carvalho de Melo 
1308a019d6feSArnaldo Carvalho de Melo 	xfrm_sk_free_policy(sk);
1309a019d6feSArnaldo Carvalho de Melo 
131019757cebSEric Dumazet 	this_cpu_dec(*sk->sk_prot->orphan_count);
1311c2a2efbbSEric Dumazet 
1312a019d6feSArnaldo Carvalho de Melo 	sock_put(sk);
1313a019d6feSArnaldo Carvalho de Melo }
1314a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock);
1315a019d6feSArnaldo Carvalho de Melo 
1316e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to
1317e337e24dSChristoph Paasch  * tcp/dccp_create_openreq_child().
1318e337e24dSChristoph Paasch  */
inet_csk_prepare_forced_close(struct sock * sk)1319e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk)
1320c10cb5fcSChristoph Paasch 	__releases(&sk->sk_lock.slock)
1321e337e24dSChristoph Paasch {
1322e337e24dSChristoph Paasch 	/* sk_clone_lock locked the socket and set refcnt to 2 */
1323e337e24dSChristoph Paasch 	bh_unlock_sock(sk);
1324e337e24dSChristoph Paasch 	sock_put(sk);
13252f8a397dSPaolo Abeni 	inet_csk_prepare_for_destroy_sock(sk);
13266761893eSPaolo Abeni 	inet_sk(sk)->inet_num = 0;
1327e337e24dSChristoph Paasch }
1328e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close);
1329e337e24dSChristoph Paasch 
inet_ulp_can_listen(const struct sock * sk)13302c02d41dSPaolo Abeni static int inet_ulp_can_listen(const struct sock *sk)
13312c02d41dSPaolo Abeni {
13322c02d41dSPaolo Abeni 	const struct inet_connection_sock *icsk = inet_csk(sk);
13332c02d41dSPaolo Abeni 
13342c02d41dSPaolo Abeni 	if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
13352c02d41dSPaolo Abeni 		return -EINVAL;
13362c02d41dSPaolo Abeni 
13372c02d41dSPaolo Abeni 	return 0;
13382c02d41dSPaolo Abeni }
13392c02d41dSPaolo Abeni 
inet_csk_listen_start(struct sock * sk)1340e7049395SKuniyuki Iwashima int inet_csk_listen_start(struct sock *sk)
1341a019d6feSArnaldo Carvalho de Melo {
1342a019d6feSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
134310cbc8f1SEric Dumazet 	struct inet_sock *inet = inet_sk(sk);
13447a7160edSKuniyuki Iwashima 	int err;
1345a019d6feSArnaldo Carvalho de Melo 
13462c02d41dSPaolo Abeni 	err = inet_ulp_can_listen(sk);
13472c02d41dSPaolo Abeni 	if (unlikely(err))
13482c02d41dSPaolo Abeni 		return err;
13492c02d41dSPaolo Abeni 
1350ef547f2aSEric Dumazet 	reqsk_queue_alloc(&icsk->icsk_accept_queue);
1351a019d6feSArnaldo Carvalho de Melo 
1352a019d6feSArnaldo Carvalho de Melo 	sk->sk_ack_backlog = 0;
1353a019d6feSArnaldo Carvalho de Melo 	inet_csk_delack_init(sk);
1354a019d6feSArnaldo Carvalho de Melo 
1355a019d6feSArnaldo Carvalho de Melo 	/* There is race window here: we announce ourselves listening,
1356a019d6feSArnaldo Carvalho de Melo 	 * but this transition is still not validated by get_port().
1357a019d6feSArnaldo Carvalho de Melo 	 * It is OK, because this socket enters to hash table only
1358a019d6feSArnaldo Carvalho de Melo 	 * after validation is complete.
1359a019d6feSArnaldo Carvalho de Melo 	 */
1360563e0bb0SYafang Shao 	inet_sk_state_store(sk, TCP_LISTEN);
13617a7160edSKuniyuki Iwashima 	err = sk->sk_prot->get_port(sk, inet->inet_num);
13627a7160edSKuniyuki Iwashima 	if (!err) {
1363c720c7e8SEric Dumazet 		inet->inet_sport = htons(inet->inet_num);
1364a019d6feSArnaldo Carvalho de Melo 
1365a019d6feSArnaldo Carvalho de Melo 		sk_dst_reset(sk);
1366086c653fSCraig Gallek 		err = sk->sk_prot->hash(sk);
1367a019d6feSArnaldo Carvalho de Melo 
1368086c653fSCraig Gallek 		if (likely(!err))
1369a019d6feSArnaldo Carvalho de Melo 			return 0;
1370a019d6feSArnaldo Carvalho de Melo 	}
1371a019d6feSArnaldo Carvalho de Melo 
1372563e0bb0SYafang Shao 	inet_sk_set_state(sk, TCP_CLOSE);
1373086c653fSCraig Gallek 	return err;
1374a019d6feSArnaldo Carvalho de Melo }
1375a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start);
1376a019d6feSArnaldo Carvalho de Melo 
inet_child_forget(struct sock * sk,struct request_sock * req,struct sock * child)1377ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req,
1378ebb516afSEric Dumazet 			      struct sock *child)
1379ebb516afSEric Dumazet {
1380ebb516afSEric Dumazet 	sk->sk_prot->disconnect(child, O_NONBLOCK);
1381ebb516afSEric Dumazet 
1382ebb516afSEric Dumazet 	sock_orphan(child);
1383ebb516afSEric Dumazet 
138419757cebSEric Dumazet 	this_cpu_inc(*sk->sk_prot->orphan_count);
1385ebb516afSEric Dumazet 
1386ebb516afSEric Dumazet 	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
1387d983ea6fSEric Dumazet 		BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
1388ebb516afSEric Dumazet 		BUG_ON(sk != req->rsk_listener);
1389ebb516afSEric Dumazet 
1390ebb516afSEric Dumazet 		/* Paranoid, to prevent race condition if
1391ebb516afSEric Dumazet 		 * an inbound pkt destined for child is
1392ebb516afSEric Dumazet 		 * blocked by sock lock in tcp_v4_rcv().
1393ebb516afSEric Dumazet 		 * Also to satisfy an assertion in
1394ebb516afSEric Dumazet 		 * tcp_v4_destroy_sock().
1395ebb516afSEric Dumazet 		 */
1396d983ea6fSEric Dumazet 		RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
1397ebb516afSEric Dumazet 	}
1398ebb516afSEric Dumazet 	inet_csk_destroy_sock(child);
1399ebb516afSEric Dumazet }
1400ebb516afSEric Dumazet 
inet_csk_reqsk_queue_add(struct sock * sk,struct request_sock * req,struct sock * child)14017716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
14027716682cSEric Dumazet 				      struct request_sock *req,
1403ebb516afSEric Dumazet 				      struct sock *child)
1404ebb516afSEric Dumazet {
1405ebb516afSEric Dumazet 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1406ebb516afSEric Dumazet 
1407ebb516afSEric Dumazet 	spin_lock(&queue->rskq_lock);
1408ebb516afSEric Dumazet 	if (unlikely(sk->sk_state != TCP_LISTEN)) {
1409ebb516afSEric Dumazet 		inet_child_forget(sk, req, child);
14107716682cSEric Dumazet 		child = NULL;
1411ebb516afSEric Dumazet 	} else {
1412ebb516afSEric Dumazet 		req->sk = child;
1413ebb516afSEric Dumazet 		req->dl_next = NULL;
1414ebb516afSEric Dumazet 		if (queue->rskq_accept_head == NULL)
141560b173caSEric Dumazet 			WRITE_ONCE(queue->rskq_accept_head, req);
1416ebb516afSEric Dumazet 		else
1417ebb516afSEric Dumazet 			queue->rskq_accept_tail->dl_next = req;
1418ebb516afSEric Dumazet 		queue->rskq_accept_tail = req;
1419ebb516afSEric Dumazet 		sk_acceptq_added(sk);
1420ebb516afSEric Dumazet 	}
1421ebb516afSEric Dumazet 	spin_unlock(&queue->rskq_lock);
14227716682cSEric Dumazet 	return child;
1423ebb516afSEric Dumazet }
1424ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
1425ebb516afSEric Dumazet 
inet_csk_complete_hashdance(struct sock * sk,struct sock * child,struct request_sock * req,bool own_req)14265e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
14275e0724d0SEric Dumazet 					 struct request_sock *req, bool own_req)
14285e0724d0SEric Dumazet {
14295e0724d0SEric Dumazet 	if (own_req) {
1430d4f2c86bSKuniyuki Iwashima 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
1431d4f2c86bSKuniyuki Iwashima 		reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
1432d4f2c86bSKuniyuki Iwashima 
1433d4f2c86bSKuniyuki Iwashima 		if (sk != req->rsk_listener) {
1434d4f2c86bSKuniyuki Iwashima 			/* another listening sk has been selected,
1435d4f2c86bSKuniyuki Iwashima 			 * migrate the req to it.
1436d4f2c86bSKuniyuki Iwashima 			 */
1437d4f2c86bSKuniyuki Iwashima 			struct request_sock *nreq;
1438d4f2c86bSKuniyuki Iwashima 
1439d4f2c86bSKuniyuki Iwashima 			/* hold a refcnt for the nreq->rsk_listener
1440d4f2c86bSKuniyuki Iwashima 			 * which is assigned in inet_reqsk_clone()
1441d4f2c86bSKuniyuki Iwashima 			 */
1442d4f2c86bSKuniyuki Iwashima 			sock_hold(sk);
1443d4f2c86bSKuniyuki Iwashima 			nreq = inet_reqsk_clone(req, sk);
1444d4f2c86bSKuniyuki Iwashima 			if (!nreq) {
1445d4f2c86bSKuniyuki Iwashima 				inet_child_forget(sk, req, child);
1446d4f2c86bSKuniyuki Iwashima 				goto child_put;
1447d4f2c86bSKuniyuki Iwashima 			}
1448d4f2c86bSKuniyuki Iwashima 
1449d4f2c86bSKuniyuki Iwashima 			refcount_set(&nreq->rsk_refcnt, 1);
1450d4f2c86bSKuniyuki Iwashima 			if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
145155d444b3SKuniyuki Iwashima 				__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
1452d4f2c86bSKuniyuki Iwashima 				reqsk_migrate_reset(req);
1453d4f2c86bSKuniyuki Iwashima 				reqsk_put(req);
14545e0724d0SEric Dumazet 				return child;
14555e0724d0SEric Dumazet 			}
1456d4f2c86bSKuniyuki Iwashima 
145755d444b3SKuniyuki Iwashima 			__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
1458d4f2c86bSKuniyuki Iwashima 			reqsk_migrate_reset(nreq);
1459d4f2c86bSKuniyuki Iwashima 			__reqsk_free(nreq);
1460d4f2c86bSKuniyuki Iwashima 		} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
1461d4f2c86bSKuniyuki Iwashima 			return child;
1462d4f2c86bSKuniyuki Iwashima 		}
1463d4f2c86bSKuniyuki Iwashima 	}
14645e0724d0SEric Dumazet 	/* Too bad, another child took ownership of the request, undo. */
1465d4f2c86bSKuniyuki Iwashima child_put:
14665e0724d0SEric Dumazet 	bh_unlock_sock(child);
14675e0724d0SEric Dumazet 	sock_put(child);
14685e0724d0SEric Dumazet 	return NULL;
14695e0724d0SEric Dumazet }
14705e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance);
14715e0724d0SEric Dumazet 
1472a019d6feSArnaldo Carvalho de Melo /*
1473a019d6feSArnaldo Carvalho de Melo  *	This routine closes sockets which have been at least partially
1474a019d6feSArnaldo Carvalho de Melo  *	opened, but not yet accepted.
1475a019d6feSArnaldo Carvalho de Melo  */
inet_csk_listen_stop(struct sock * sk)1476a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk)
1477a019d6feSArnaldo Carvalho de Melo {
1478a019d6feSArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
14798336886fSJerry Chu 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
1480fff1f300SEric Dumazet 	struct request_sock *next, *req;
1481a019d6feSArnaldo Carvalho de Melo 
1482a019d6feSArnaldo Carvalho de Melo 	/* Following specs, it would be better either to send FIN
1483a019d6feSArnaldo Carvalho de Melo 	 * (and enter FIN-WAIT-1, it is normal close)
1484a019d6feSArnaldo Carvalho de Melo 	 * or to send active reset (abort).
1485a019d6feSArnaldo Carvalho de Melo 	 * Certainly, it is pretty dangerous while synflood, but it is
1486a019d6feSArnaldo Carvalho de Melo 	 * bad justification for our negligence 8)
1487a019d6feSArnaldo Carvalho de Melo 	 * To be honest, we are not able to make either
1488a019d6feSArnaldo Carvalho de Melo 	 * of the variants now.			--ANK
1489a019d6feSArnaldo Carvalho de Melo 	 */
1490fff1f300SEric Dumazet 	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
149154b92e84SKuniyuki Iwashima 		struct sock *child = req->sk, *nsk;
149254b92e84SKuniyuki Iwashima 		struct request_sock *nreq;
1493a019d6feSArnaldo Carvalho de Melo 
1494a019d6feSArnaldo Carvalho de Melo 		local_bh_disable();
1495a019d6feSArnaldo Carvalho de Melo 		bh_lock_sock(child);
1496547b792cSIlpo Järvinen 		WARN_ON(sock_owned_by_user(child));
1497a019d6feSArnaldo Carvalho de Melo 		sock_hold(child);
1498a019d6feSArnaldo Carvalho de Melo 
149954b92e84SKuniyuki Iwashima 		nsk = reuseport_migrate_sock(sk, child, NULL);
150054b92e84SKuniyuki Iwashima 		if (nsk) {
150154b92e84SKuniyuki Iwashima 			nreq = inet_reqsk_clone(req, nsk);
150254b92e84SKuniyuki Iwashima 			if (nreq) {
150354b92e84SKuniyuki Iwashima 				refcount_set(&nreq->rsk_refcnt, 1);
150454b92e84SKuniyuki Iwashima 
150554b92e84SKuniyuki Iwashima 				if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
150655d444b3SKuniyuki Iwashima 					__NET_INC_STATS(sock_net(nsk),
150755d444b3SKuniyuki Iwashima 							LINUX_MIB_TCPMIGRATEREQSUCCESS);
150854b92e84SKuniyuki Iwashima 					reqsk_migrate_reset(req);
150954b92e84SKuniyuki Iwashima 				} else {
151055d444b3SKuniyuki Iwashima 					__NET_INC_STATS(sock_net(nsk),
151155d444b3SKuniyuki Iwashima 							LINUX_MIB_TCPMIGRATEREQFAILURE);
151254b92e84SKuniyuki Iwashima 					reqsk_migrate_reset(nreq);
151354b92e84SKuniyuki Iwashima 					__reqsk_free(nreq);
151454b92e84SKuniyuki Iwashima 				}
151554b92e84SKuniyuki Iwashima 
151654b92e84SKuniyuki Iwashima 				/* inet_csk_reqsk_queue_add() has already
151754b92e84SKuniyuki Iwashima 				 * called inet_child_forget() on failure case.
151854b92e84SKuniyuki Iwashima 				 */
151954b92e84SKuniyuki Iwashima 				goto skip_child_forget;
152054b92e84SKuniyuki Iwashima 			}
152154b92e84SKuniyuki Iwashima 		}
152254b92e84SKuniyuki Iwashima 
1523ebb516afSEric Dumazet 		inet_child_forget(sk, req, child);
152454b92e84SKuniyuki Iwashima skip_child_forget:
1525da8ab578SEric Dumazet 		reqsk_put(req);
1526a019d6feSArnaldo Carvalho de Melo 		bh_unlock_sock(child);
1527a019d6feSArnaldo Carvalho de Melo 		local_bh_enable();
1528a019d6feSArnaldo Carvalho de Melo 		sock_put(child);
1529a019d6feSArnaldo Carvalho de Melo 
153092d6f176SEric Dumazet 		cond_resched();
1531a019d6feSArnaldo Carvalho de Melo 	}
15320536fcc0SEric Dumazet 	if (queue->fastopenq.rskq_rst_head) {
15338336886fSJerry Chu 		/* Free all the reqs queued in rskq_rst_head. */
15340536fcc0SEric Dumazet 		spin_lock_bh(&queue->fastopenq.lock);
1535fff1f300SEric Dumazet 		req = queue->fastopenq.rskq_rst_head;
15360536fcc0SEric Dumazet 		queue->fastopenq.rskq_rst_head = NULL;
15370536fcc0SEric Dumazet 		spin_unlock_bh(&queue->fastopenq.lock);
1538fff1f300SEric Dumazet 		while (req != NULL) {
1539fff1f300SEric Dumazet 			next = req->dl_next;
154013854e5aSEric Dumazet 			reqsk_put(req);
1541fff1f300SEric Dumazet 			req = next;
15428336886fSJerry Chu 		}
15438336886fSJerry Chu 	}
1544ebb516afSEric Dumazet 	WARN_ON_ONCE(sk->sk_ack_backlog);
1545a019d6feSArnaldo Carvalho de Melo }
1546a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
1547af05dc93SArnaldo Carvalho de Melo 
inet_csk_addr2sockaddr(struct sock * sk,struct sockaddr * uaddr)1548af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
1549af05dc93SArnaldo Carvalho de Melo {
1550af05dc93SArnaldo Carvalho de Melo 	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
1551af05dc93SArnaldo Carvalho de Melo 	const struct inet_sock *inet = inet_sk(sk);
1552af05dc93SArnaldo Carvalho de Melo 
1553af05dc93SArnaldo Carvalho de Melo 	sin->sin_family		= AF_INET;
1554c720c7e8SEric Dumazet 	sin->sin_addr.s_addr	= inet->inet_daddr;
1555c720c7e8SEric Dumazet 	sin->sin_port		= inet->inet_dport;
1556af05dc93SArnaldo Carvalho de Melo }
1557af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
1558c4d93909SArnaldo Carvalho de Melo 
inet_csk_rebuild_route(struct sock * sk,struct flowi * fl)155980d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
156080d0a69fSDavid S. Miller {
15615abf7f7eSEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
15625abf7f7eSEric Dumazet 	const struct ip_options_rcu *inet_opt;
156380d0a69fSDavid S. Miller 	__be32 daddr = inet->inet_daddr;
156480d0a69fSDavid S. Miller 	struct flowi4 *fl4;
156580d0a69fSDavid S. Miller 	struct rtable *rt;
156680d0a69fSDavid S. Miller 
156780d0a69fSDavid S. Miller 	rcu_read_lock();
156880d0a69fSDavid S. Miller 	inet_opt = rcu_dereference(inet->inet_opt);
156980d0a69fSDavid S. Miller 	if (inet_opt && inet_opt->opt.srr)
157080d0a69fSDavid S. Miller 		daddr = inet_opt->opt.faddr;
157180d0a69fSDavid S. Miller 	fl4 = &fl->u.ip4;
157280d0a69fSDavid S. Miller 	rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
157380d0a69fSDavid S. Miller 				   inet->inet_saddr, inet->inet_dport,
157480d0a69fSDavid S. Miller 				   inet->inet_sport, sk->sk_protocol,
1575a3522a2eSGuillaume Nault 				   ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
157680d0a69fSDavid S. Miller 	if (IS_ERR(rt))
157780d0a69fSDavid S. Miller 		rt = NULL;
157880d0a69fSDavid S. Miller 	if (rt)
157980d0a69fSDavid S. Miller 		sk_setup_caps(sk, &rt->dst);
158080d0a69fSDavid S. Miller 	rcu_read_unlock();
158180d0a69fSDavid S. Miller 
158280d0a69fSDavid S. Miller 	return &rt->dst;
158380d0a69fSDavid S. Miller }
158480d0a69fSDavid S. Miller 
inet_csk_update_pmtu(struct sock * sk,u32 mtu)158580d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
158680d0a69fSDavid S. Miller {
158780d0a69fSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
158880d0a69fSDavid S. Miller 	struct inet_sock *inet = inet_sk(sk);
158980d0a69fSDavid S. Miller 
159080d0a69fSDavid S. Miller 	if (!dst) {
159180d0a69fSDavid S. Miller 		dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
159280d0a69fSDavid S. Miller 		if (!dst)
159380d0a69fSDavid S. Miller 			goto out;
159480d0a69fSDavid S. Miller 	}
1595bd085ef6SHangbin Liu 	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
159680d0a69fSDavid S. Miller 
159780d0a69fSDavid S. Miller 	dst = __sk_dst_check(sk, 0);
159880d0a69fSDavid S. Miller 	if (!dst)
159980d0a69fSDavid S. Miller 		dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
160080d0a69fSDavid S. Miller out:
160180d0a69fSDavid S. Miller 	return dst;
160280d0a69fSDavid S. Miller }
160380d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);
1604