12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
23f421baaSArnaldo Carvalho de Melo /*
33f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX
43f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket
53f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level.
63f421baaSArnaldo Carvalho de Melo *
73f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols.
83f421baaSArnaldo Carvalho de Melo *
93f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources
103f421baaSArnaldo Carvalho de Melo */
113f421baaSArnaldo Carvalho de Melo
123f421baaSArnaldo Carvalho de Melo #include <linux/module.h>
133f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h>
143f421baaSArnaldo Carvalho de Melo
153f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h>
163f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
173f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h>
183f421baaSArnaldo Carvalho de Melo #include <net/ip.h>
193f421baaSArnaldo Carvalho de Melo #include <net/route.h>
203f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h>
21a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h>
22fa76ce73SEric Dumazet #include <net/tcp.h>
23c125e80bSCraig Gallek #include <net/sock_reuseport.h>
249691724eSstephen hemminger #include <net/addrconf.h>
253f421baaSArnaldo Carvalho de Melo
26fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
2788d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses
2888d7fcfaSMartin KaFai Lau * if IPv6 only, and any IPv4 addresses
2988d7fcfaSMartin KaFai Lau * if not IPv6 only
3088d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
31fe38d2a1SJosef Bacik * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
32fe38d2a1SJosef Bacik * and 0.0.0.0 equals to 0.0.0.0 only
33fe38d2a1SJosef Bacik */
ipv6_rcv_saddr_equal(const struct in6_addr * sk1_rcv_saddr6,const struct in6_addr * sk2_rcv_saddr6,__be32 sk1_rcv_saddr,__be32 sk2_rcv_saddr,bool sk1_ipv6only,bool sk2_ipv6only,bool match_sk1_wildcard,bool match_sk2_wildcard)347016e062SJoe Perches static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
35637bc8bbSJosef Bacik const struct in6_addr *sk2_rcv_saddr6,
36637bc8bbSJosef Bacik __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
37637bc8bbSJosef Bacik bool sk1_ipv6only, bool sk2_ipv6only,
3888d7fcfaSMartin KaFai Lau bool match_sk1_wildcard,
3988d7fcfaSMartin KaFai Lau bool match_sk2_wildcard)
40fe38d2a1SJosef Bacik {
41637bc8bbSJosef Bacik int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
42fe38d2a1SJosef Bacik int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
43fe38d2a1SJosef Bacik
44fe38d2a1SJosef Bacik /* if both are mapped, treat as IPv4 */
45fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
46fe38d2a1SJosef Bacik if (!sk2_ipv6only) {
47637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr)
487016e062SJoe Perches return true;
4988d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) ||
5088d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr);
51fe38d2a1SJosef Bacik }
527016e062SJoe Perches return false;
53fe38d2a1SJosef Bacik }
54fe38d2a1SJosef Bacik
55fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
567016e062SJoe Perches return true;
57fe38d2a1SJosef Bacik
5888d7fcfaSMartin KaFai Lau if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
59fe38d2a1SJosef Bacik !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
607016e062SJoe Perches return true;
61fe38d2a1SJosef Bacik
6288d7fcfaSMartin KaFai Lau if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
63637bc8bbSJosef Bacik !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
647016e062SJoe Perches return true;
65fe38d2a1SJosef Bacik
66fe38d2a1SJosef Bacik if (sk2_rcv_saddr6 &&
67637bc8bbSJosef Bacik ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
687016e062SJoe Perches return true;
69fe38d2a1SJosef Bacik
707016e062SJoe Perches return false;
71fe38d2a1SJosef Bacik }
72fe38d2a1SJosef Bacik #endif
73fe38d2a1SJosef Bacik
7488d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
7588d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
76fe38d2a1SJosef Bacik * 0.0.0.0 only equals to 0.0.0.0
77fe38d2a1SJosef Bacik */
ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr,__be32 sk2_rcv_saddr,bool sk2_ipv6only,bool match_sk1_wildcard,bool match_sk2_wildcard)787016e062SJoe Perches static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
7988d7fcfaSMartin KaFai Lau bool sk2_ipv6only, bool match_sk1_wildcard,
8088d7fcfaSMartin KaFai Lau bool match_sk2_wildcard)
81fe38d2a1SJosef Bacik {
82637bc8bbSJosef Bacik if (!sk2_ipv6only) {
83637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr)
847016e062SJoe Perches return true;
8588d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) ||
8688d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr);
87fe38d2a1SJosef Bacik }
887016e062SJoe Perches return false;
89fe38d2a1SJosef Bacik }
90fe38d2a1SJosef Bacik
inet_rcv_saddr_equal(const struct sock * sk,const struct sock * sk2,bool match_wildcard)917016e062SJoe Perches bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
92fe38d2a1SJosef Bacik bool match_wildcard)
93fe38d2a1SJosef Bacik {
94fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
95fe38d2a1SJosef Bacik if (sk->sk_family == AF_INET6)
96637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
97319554f2SJosef Bacik inet6_rcv_saddr(sk2),
98637bc8bbSJosef Bacik sk->sk_rcv_saddr,
99637bc8bbSJosef Bacik sk2->sk_rcv_saddr,
100637bc8bbSJosef Bacik ipv6_only_sock(sk),
101637bc8bbSJosef Bacik ipv6_only_sock(sk2),
10288d7fcfaSMartin KaFai Lau match_wildcard,
103637bc8bbSJosef Bacik match_wildcard);
104fe38d2a1SJosef Bacik #endif
105637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
10688d7fcfaSMartin KaFai Lau ipv6_only_sock(sk2), match_wildcard,
10788d7fcfaSMartin KaFai Lau match_wildcard);
108fe38d2a1SJosef Bacik }
109fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal);
110fe38d2a1SJosef Bacik
inet_rcv_saddr_any(const struct sock * sk)1112dbb9b9eSMartin KaFai Lau bool inet_rcv_saddr_any(const struct sock *sk)
1122dbb9b9eSMartin KaFai Lau {
1132dbb9b9eSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6)
1142dbb9b9eSMartin KaFai Lau if (sk->sk_family == AF_INET6)
1152dbb9b9eSMartin KaFai Lau return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
1162dbb9b9eSMartin KaFai Lau #endif
1172dbb9b9eSMartin KaFai Lau return !sk->sk_rcv_saddr;
1182dbb9b9eSMartin KaFai Lau }
1192dbb9b9eSMartin KaFai Lau
12041db7626SEric Dumazet /**
12141db7626SEric Dumazet * inet_sk_get_local_port_range - fetch ephemeral ports range
12241db7626SEric Dumazet * @sk: socket
12341db7626SEric Dumazet * @low: pointer to low port
12441db7626SEric Dumazet * @high: pointer to high port
12541db7626SEric Dumazet *
12641db7626SEric Dumazet * Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
12741db7626SEric Dumazet * Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
12841db7626SEric Dumazet * Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
12941db7626SEric Dumazet */
inet_sk_get_local_port_range(const struct sock * sk,int * low,int * high)13041db7626SEric Dumazet bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
13191d0b78cSJakub Sitnicki {
13291d0b78cSJakub Sitnicki int lo, hi, sk_lo, sk_hi;
13341db7626SEric Dumazet bool local_range = false;
134d9f28735SDavid Laight u32 sk_range;
13591d0b78cSJakub Sitnicki
13641db7626SEric Dumazet inet_get_local_port_range(sock_net(sk), &lo, &hi);
13791d0b78cSJakub Sitnicki
13841db7626SEric Dumazet sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
139d9f28735SDavid Laight if (unlikely(sk_range)) {
140d9f28735SDavid Laight sk_lo = sk_range & 0xffff;
141d9f28735SDavid Laight sk_hi = sk_range >> 16;
14291d0b78cSJakub Sitnicki
143d9f28735SDavid Laight if (lo <= sk_lo && sk_lo <= hi)
14491d0b78cSJakub Sitnicki lo = sk_lo;
145d9f28735SDavid Laight if (lo <= sk_hi && sk_hi <= hi)
14691d0b78cSJakub Sitnicki hi = sk_hi;
14741db7626SEric Dumazet local_range = true;
148d9f28735SDavid Laight }
14991d0b78cSJakub Sitnicki
15091d0b78cSJakub Sitnicki *low = lo;
15191d0b78cSJakub Sitnicki *high = hi;
15241db7626SEric Dumazet return local_range;
15391d0b78cSJakub Sitnicki }
15491d0b78cSJakub Sitnicki EXPORT_SYMBOL(inet_sk_get_local_port_range);
15591d0b78cSJakub Sitnicki
inet_use_bhash2_on_bind(const struct sock * sk)15628044fc1SJoanne Koong static bool inet_use_bhash2_on_bind(const struct sock *sk)
15728044fc1SJoanne Koong {
15828044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
15928044fc1SJoanne Koong if (sk->sk_family == AF_INET6) {
16028044fc1SJoanne Koong int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
16128044fc1SJoanne Koong
1625e07e672SKuniyuki Iwashima if (addr_type == IPV6_ADDR_ANY)
1635e07e672SKuniyuki Iwashima return false;
1645e07e672SKuniyuki Iwashima
1655e07e672SKuniyuki Iwashima if (addr_type != IPV6_ADDR_MAPPED)
1665e07e672SKuniyuki Iwashima return true;
16728044fc1SJoanne Koong }
16828044fc1SJoanne Koong #endif
16928044fc1SJoanne Koong return sk->sk_rcv_saddr != htonl(INADDR_ANY);
17028044fc1SJoanne Koong }
17128044fc1SJoanne Koong
inet_bind_conflict(const struct sock * sk,struct sock * sk2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)17228044fc1SJoanne Koong static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
17328044fc1SJoanne Koong kuid_t sk_uid, bool relax,
17428044fc1SJoanne Koong bool reuseport_cb_ok, bool reuseport_ok)
17528044fc1SJoanne Koong {
17628044fc1SJoanne Koong int bound_dev_if2;
17728044fc1SJoanne Koong
17828044fc1SJoanne Koong if (sk == sk2)
17928044fc1SJoanne Koong return false;
18028044fc1SJoanne Koong
18128044fc1SJoanne Koong bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
18228044fc1SJoanne Koong
18328044fc1SJoanne Koong if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
18428044fc1SJoanne Koong sk->sk_bound_dev_if == bound_dev_if2) {
18528044fc1SJoanne Koong if (sk->sk_reuse && sk2->sk_reuse &&
18628044fc1SJoanne Koong sk2->sk_state != TCP_LISTEN) {
18728044fc1SJoanne Koong if (!relax || (!reuseport_ok && sk->sk_reuseport &&
18828044fc1SJoanne Koong sk2->sk_reuseport && reuseport_cb_ok &&
18928044fc1SJoanne Koong (sk2->sk_state == TCP_TIME_WAIT ||
19028044fc1SJoanne Koong uid_eq(sk_uid, sock_i_uid(sk2)))))
19128044fc1SJoanne Koong return true;
19228044fc1SJoanne Koong } else if (!reuseport_ok || !sk->sk_reuseport ||
19328044fc1SJoanne Koong !sk2->sk_reuseport || !reuseport_cb_ok ||
19428044fc1SJoanne Koong (sk2->sk_state != TCP_TIME_WAIT &&
19528044fc1SJoanne Koong !uid_eq(sk_uid, sock_i_uid(sk2)))) {
19628044fc1SJoanne Koong return true;
19728044fc1SJoanne Koong }
19828044fc1SJoanne Koong }
19928044fc1SJoanne Koong return false;
20028044fc1SJoanne Koong }
20128044fc1SJoanne Koong
__inet_bhash2_conflict(const struct sock * sk,struct sock * sk2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)202936a192fSKuniyuki Iwashima static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
203936a192fSKuniyuki Iwashima kuid_t sk_uid, bool relax,
204936a192fSKuniyuki Iwashima bool reuseport_cb_ok, bool reuseport_ok)
205936a192fSKuniyuki Iwashima {
206ea111449SKuniyuki Iwashima if (ipv6_only_sock(sk2)) {
207ea111449SKuniyuki Iwashima if (sk->sk_family == AF_INET)
208936a192fSKuniyuki Iwashima return false;
209936a192fSKuniyuki Iwashima
210ea111449SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IPV6)
211ea111449SKuniyuki Iwashima if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
212ea111449SKuniyuki Iwashima return false;
213ea111449SKuniyuki Iwashima #endif
214ea111449SKuniyuki Iwashima }
215ea111449SKuniyuki Iwashima
216936a192fSKuniyuki Iwashima return inet_bind_conflict(sk, sk2, sk_uid, relax,
217936a192fSKuniyuki Iwashima reuseport_cb_ok, reuseport_ok);
218936a192fSKuniyuki Iwashima }
219936a192fSKuniyuki Iwashima
inet_bhash2_conflict(const struct sock * sk,const struct inet_bind2_bucket * tb2,kuid_t sk_uid,bool relax,bool reuseport_cb_ok,bool reuseport_ok)22028044fc1SJoanne Koong static bool inet_bhash2_conflict(const struct sock *sk,
22128044fc1SJoanne Koong const struct inet_bind2_bucket *tb2,
22228044fc1SJoanne Koong kuid_t sk_uid,
22328044fc1SJoanne Koong bool relax, bool reuseport_cb_ok,
22428044fc1SJoanne Koong bool reuseport_ok)
225d5a42de8SJoanne Koong {
226d5a42de8SJoanne Koong struct sock *sk2;
22728044fc1SJoanne Koong
228770041d3SKuniyuki Iwashima sk_for_each_bound(sk2, &tb2->owners) {
229936a192fSKuniyuki Iwashima if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
23028044fc1SJoanne Koong reuseport_cb_ok, reuseport_ok))
23128044fc1SJoanne Koong return true;
23228044fc1SJoanne Koong }
233936a192fSKuniyuki Iwashima
23428044fc1SJoanne Koong return false;
23528044fc1SJoanne Koong }
23628044fc1SJoanne Koong
237b82ba728SKuniyuki Iwashima #define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
238b82ba728SKuniyuki Iwashima hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
2399ceebd7aSHongbo Li sk_for_each_bound((__sk), &(__tb2)->owners)
240b82ba728SKuniyuki Iwashima
24128044fc1SJoanne Koong /* This should be called only when the tb and tb2 hashbuckets' locks are held */
inet_csk_bind_conflict(const struct sock * sk,const struct inet_bind_bucket * tb,const struct inet_bind2_bucket * tb2,bool relax,bool reuseport_ok)24228044fc1SJoanne Koong static int inet_csk_bind_conflict(const struct sock *sk,
24328044fc1SJoanne Koong const struct inet_bind_bucket *tb,
24428044fc1SJoanne Koong const struct inet_bind2_bucket *tb2, /* may be null */
24528044fc1SJoanne Koong bool relax, bool reuseport_ok)
24628044fc1SJoanne Koong {
247593d1ebeSJoanne Koong kuid_t uid = sock_i_uid((struct sock *)sk);
24858655bc0SKuniyuki Iwashima struct sock_reuseport *reuseport_cb;
24958655bc0SKuniyuki Iwashima bool reuseport_cb_ok;
25058655bc0SKuniyuki Iwashima struct sock *sk2;
2513f421baaSArnaldo Carvalho de Melo
252333bb73fSKuniyuki Iwashima rcu_read_lock();
253333bb73fSKuniyuki Iwashima reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
254333bb73fSKuniyuki Iwashima /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
255333bb73fSKuniyuki Iwashima reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
256333bb73fSKuniyuki Iwashima rcu_read_unlock();
257333bb73fSKuniyuki Iwashima
25828044fc1SJoanne Koong /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
25928044fc1SJoanne Koong * ipv4) should have been checked already. We need to do these two
26028044fc1SJoanne Koong * checks separately because their spinlocks have to be acquired/released
26128044fc1SJoanne Koong * independently of each other, to prevent possible deadlocks
26228044fc1SJoanne Koong */
26358655bc0SKuniyuki Iwashima if (inet_use_bhash2_on_bind(sk))
26458655bc0SKuniyuki Iwashima return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
26558655bc0SKuniyuki Iwashima reuseport_cb_ok, reuseport_ok);
26658655bc0SKuniyuki Iwashima
26758655bc0SKuniyuki Iwashima /* Unlike other sk lookup places we do not check
26858655bc0SKuniyuki Iwashima * for sk_net here, since _all_ the socks listed
26958655bc0SKuniyuki Iwashima * in tb->owners and tb2->owners list belong
27058655bc0SKuniyuki Iwashima * to the same net - the one this bucket belongs to.
27158655bc0SKuniyuki Iwashima */
272b82ba728SKuniyuki Iwashima sk_for_each_bound_bhash(sk2, tb2, tb) {
273b82ba728SKuniyuki Iwashima if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok))
274b82ba728SKuniyuki Iwashima continue;
275b82ba728SKuniyuki Iwashima
276b82ba728SKuniyuki Iwashima if (inet_rcv_saddr_equal(sk, sk2, true))
277b82ba728SKuniyuki Iwashima return true;
278b82ba728SKuniyuki Iwashima }
279b82ba728SKuniyuki Iwashima
28058655bc0SKuniyuki Iwashima return false;
28128044fc1SJoanne Koong }
28228044fc1SJoanne Koong
28328044fc1SJoanne Koong /* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
28428044fc1SJoanne Koong * INADDR_ANY (if ipv4) socket.
28528044fc1SJoanne Koong *
28628044fc1SJoanne Koong * Caller must hold bhash hashbucket lock with local bh disabled, to protect
28728044fc1SJoanne Koong * against concurrent binds on the port for addr any
28828044fc1SJoanne Koong */
inet_bhash2_addr_any_conflict(const struct sock * sk,int port,int l3mdev,bool relax,bool reuseport_ok)28928044fc1SJoanne Koong static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev,
29028044fc1SJoanne Koong bool relax, bool reuseport_ok)
29128044fc1SJoanne Koong {
29228044fc1SJoanne Koong kuid_t uid = sock_i_uid((struct sock *)sk);
29328044fc1SJoanne Koong const struct net *net = sock_net(sk);
29428044fc1SJoanne Koong struct sock_reuseport *reuseport_cb;
29528044fc1SJoanne Koong struct inet_bind_hashbucket *head2;
29628044fc1SJoanne Koong struct inet_bind2_bucket *tb2;
297d91ef1e1SKuniyuki Iwashima bool conflict = false;
29828044fc1SJoanne Koong bool reuseport_cb_ok;
29928044fc1SJoanne Koong
30028044fc1SJoanne Koong rcu_read_lock();
30128044fc1SJoanne Koong reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
30228044fc1SJoanne Koong /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
30328044fc1SJoanne Koong reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
30428044fc1SJoanne Koong rcu_read_unlock();
30528044fc1SJoanne Koong
30628044fc1SJoanne Koong head2 = inet_bhash2_addr_any_hashbucket(sk, net, port);
30728044fc1SJoanne Koong
30828044fc1SJoanne Koong spin_lock(&head2->lock);
30928044fc1SJoanne Koong
310d91ef1e1SKuniyuki Iwashima inet_bind_bucket_for_each(tb2, &head2->chain) {
311d91ef1e1SKuniyuki Iwashima if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
312d91ef1e1SKuniyuki Iwashima continue;
31328044fc1SJoanne Koong
314d91ef1e1SKuniyuki Iwashima if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
315d91ef1e1SKuniyuki Iwashima continue;
316d91ef1e1SKuniyuki Iwashima
317d91ef1e1SKuniyuki Iwashima conflict = true;
318d91ef1e1SKuniyuki Iwashima break;
319593d1ebeSJoanne Koong }
32028044fc1SJoanne Koong
32128044fc1SJoanne Koong spin_unlock(&head2->lock);
322d91ef1e1SKuniyuki Iwashima
323d91ef1e1SKuniyuki Iwashima return conflict;
3243f421baaSArnaldo Carvalho de Melo }
325971af18bSArnaldo Carvalho de Melo
326289141b7SJosef Bacik /*
327289141b7SJosef Bacik * Find an open port number for the socket. Returns with the
32828044fc1SJoanne Koong * inet_bind_hashbucket locks held if successful.
3293f421baaSArnaldo Carvalho de Melo */
330289141b7SJosef Bacik static struct inet_bind_hashbucket *
inet_csk_find_open_port(const struct sock * sk,struct inet_bind_bucket ** tb_ret,struct inet_bind2_bucket ** tb2_ret,struct inet_bind_hashbucket ** head2_ret,int * port_ret)33128044fc1SJoanne Koong inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
33228044fc1SJoanne Koong struct inet_bind2_bucket **tb2_ret,
33328044fc1SJoanne Koong struct inet_bind_hashbucket **head2_ret, int *port_ret)
3343f421baaSArnaldo Carvalho de Melo {
335429e42c1SKuniyuki Iwashima struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
33608eaef90SKuniyuki Iwashima int i, low, high, attempt_half, port, l3mdev;
33728044fc1SJoanne Koong struct inet_bind_hashbucket *head, *head2;
3383b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk);
33928044fc1SJoanne Koong struct inet_bind2_bucket *tb2;
340ea8add2bSEric Dumazet struct inet_bind_bucket *tb;
341ea8add2bSEric Dumazet u32 remaining, offset;
34208eaef90SKuniyuki Iwashima bool relax = false;
3433f421baaSArnaldo Carvalho de Melo
3443c82a21fSRobert Shearman l3mdev = inet_sk_bound_l3mdev(sk);
3454b01a967SKuniyuki Iwashima ports_exhausted:
346ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
347ea8add2bSEric Dumazet other_half_scan:
34891d0b78cSJakub Sitnicki inet_sk_get_local_port_range(sk, &low, &high);
349ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */
350ea8add2bSEric Dumazet if (high - low < 4)
351ea8add2bSEric Dumazet attempt_half = 0;
352946f9eb2SEric Dumazet if (attempt_half) {
353ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1);
354946f9eb2SEric Dumazet
355946f9eb2SEric Dumazet if (attempt_half == 1)
356946f9eb2SEric Dumazet high = half;
357946f9eb2SEric Dumazet else
358946f9eb2SEric Dumazet low = half;
359946f9eb2SEric Dumazet }
360ea8add2bSEric Dumazet remaining = high - low;
361ea8add2bSEric Dumazet if (likely(remaining > 1))
362ea8add2bSEric Dumazet remaining &= ~1U;
3633f421baaSArnaldo Carvalho de Melo
3648032bf12SJason A. Donenfeld offset = get_random_u32_below(remaining);
365ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity
366ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users.
367ea8add2bSEric Dumazet */
368ea8add2bSEric Dumazet offset |= 1U;
369ea8add2bSEric Dumazet
370ea8add2bSEric Dumazet other_parity_scan:
371ea8add2bSEric Dumazet port = low + offset;
372ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) {
373ea8add2bSEric Dumazet if (unlikely(port >= high))
374ea8add2bSEric Dumazet port -= remaining;
375ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port))
376ea8add2bSEric Dumazet continue;
377ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port,
378ea8add2bSEric Dumazet hinfo->bhash_size)];
379ea8add2bSEric Dumazet spin_lock_bh(&head->lock);
38028044fc1SJoanne Koong if (inet_use_bhash2_on_bind(sk)) {
38128044fc1SJoanne Koong if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
38228044fc1SJoanne Koong goto next_port;
38328044fc1SJoanne Koong }
38428044fc1SJoanne Koong
38528044fc1SJoanne Koong head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
38628044fc1SJoanne Koong spin_lock(&head2->lock);
38728044fc1SJoanne Koong tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
388b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain)
38928044fc1SJoanne Koong if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
39028044fc1SJoanne Koong if (!inet_csk_bind_conflict(sk, tb, tb2,
39128044fc1SJoanne Koong relax, false))
3926cd66616SJosef Bacik goto success;
39328044fc1SJoanne Koong spin_unlock(&head2->lock);
394ea8add2bSEric Dumazet goto next_port;
3952b05ad33SFlavio Leitner }
396289141b7SJosef Bacik tb = NULL;
397289141b7SJosef Bacik goto success;
398ea8add2bSEric Dumazet next_port:
399ea8add2bSEric Dumazet spin_unlock_bh(&head->lock);
400ea8add2bSEric Dumazet cond_resched();
401a9d8f911SEvgeniy Polyakov }
4023f421baaSArnaldo Carvalho de Melo
403ea8add2bSEric Dumazet offset--;
404ea8add2bSEric Dumazet if (!(offset & 1))
405ea8add2bSEric Dumazet goto other_parity_scan;
406ea8add2bSEric Dumazet
407946f9eb2SEric Dumazet if (attempt_half == 1) {
408946f9eb2SEric Dumazet /* OK we now try the upper half of the range */
409946f9eb2SEric Dumazet attempt_half = 2;
410ea8add2bSEric Dumazet goto other_half_scan;
411946f9eb2SEric Dumazet }
4124b01a967SKuniyuki Iwashima
4130db23276SKuniyuki Iwashima if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
4144b01a967SKuniyuki Iwashima /* We still have a chance to connect to different destinations */
4154b01a967SKuniyuki Iwashima relax = true;
4164b01a967SKuniyuki Iwashima goto ports_exhausted;
4174b01a967SKuniyuki Iwashima }
418289141b7SJosef Bacik return NULL;
419289141b7SJosef Bacik success:
420289141b7SJosef Bacik *port_ret = port;
421289141b7SJosef Bacik *tb_ret = tb;
42228044fc1SJoanne Koong *tb2_ret = tb2;
42328044fc1SJoanne Koong *head2_ret = head2;
424289141b7SJosef Bacik return head;
425289141b7SJosef Bacik }
426ea8add2bSEric Dumazet
sk_reuseport_match(struct inet_bind_bucket * tb,struct sock * sk)427637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
428637bc8bbSJosef Bacik struct sock *sk)
429637bc8bbSJosef Bacik {
430637bc8bbSJosef Bacik kuid_t uid = sock_i_uid(sk);
431637bc8bbSJosef Bacik
432637bc8bbSJosef Bacik if (tb->fastreuseport <= 0)
433637bc8bbSJosef Bacik return 0;
434637bc8bbSJosef Bacik if (!sk->sk_reuseport)
435637bc8bbSJosef Bacik return 0;
436637bc8bbSJosef Bacik if (rcu_access_pointer(sk->sk_reuseport_cb))
437637bc8bbSJosef Bacik return 0;
438637bc8bbSJosef Bacik if (!uid_eq(tb->fastuid, uid))
439637bc8bbSJosef Bacik return 0;
440637bc8bbSJosef Bacik /* We only need to check the rcv_saddr if this tb was once marked
441637bc8bbSJosef Bacik * without fastreuseport and then was reset, as we can only know that
442637bc8bbSJosef Bacik * the fast_*rcv_saddr doesn't have any conflicts with the socks on the
443637bc8bbSJosef Bacik * owners list.
444637bc8bbSJosef Bacik */
445637bc8bbSJosef Bacik if (tb->fastreuseport == FASTREUSEPORT_ANY)
446637bc8bbSJosef Bacik return 1;
447637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
448637bc8bbSJosef Bacik if (tb->fast_sk_family == AF_INET6)
449637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
4507a56673bSJosef Bacik inet6_rcv_saddr(sk),
451637bc8bbSJosef Bacik tb->fast_rcv_saddr,
452637bc8bbSJosef Bacik sk->sk_rcv_saddr,
453637bc8bbSJosef Bacik tb->fast_ipv6_only,
45488d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false);
455637bc8bbSJosef Bacik #endif
456637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
45788d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false);
458637bc8bbSJosef Bacik }
459637bc8bbSJosef Bacik
inet_csk_update_fastreuse(struct inet_bind_bucket * tb,struct sock * sk)46062ffc589STim Froidcoeur void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
46162ffc589STim Froidcoeur struct sock *sk)
462289141b7SJosef Bacik {
463289141b7SJosef Bacik kuid_t uid = sock_i_uid(sk);
46462ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
4653c82a21fSRobert Shearman
4668002d44fSKuniyuki Iwashima if (hlist_empty(&tb->bhash2)) {
467ea8add2bSEric Dumazet tb->fastreuse = reuse;
468da5e3630STom Herbert if (sk->sk_reuseport) {
469637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_ANY;
470da5e3630STom Herbert tb->fastuid = uid;
471637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr;
472637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk);
473cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family;
474637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
475637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
476637bc8bbSJosef Bacik #endif
477da5e3630STom Herbert } else {
478da5e3630STom Herbert tb->fastreuseport = 0;
479da5e3630STom Herbert }
4806cd66616SJosef Bacik } else {
4816cd66616SJosef Bacik if (!reuse)
4826cd66616SJosef Bacik tb->fastreuse = 0;
483637bc8bbSJosef Bacik if (sk->sk_reuseport) {
484637bc8bbSJosef Bacik /* We didn't match or we don't have fastreuseport set on
485637bc8bbSJosef Bacik * the tb, but we have sk_reuseport set on this socket
486637bc8bbSJosef Bacik * and we know that there are no bind conflicts with
487637bc8bbSJosef Bacik * this socket in this tb, so reset our tb's reuseport
488637bc8bbSJosef Bacik * settings so that any subsequent sockets that match
489637bc8bbSJosef Bacik * our current socket will be put on the fast path.
490637bc8bbSJosef Bacik *
491637bc8bbSJosef Bacik * If we reset we need to set FASTREUSEPORT_STRICT so we
492637bc8bbSJosef Bacik * do extra checking for all subsequent sk_reuseport
493637bc8bbSJosef Bacik * socks.
494637bc8bbSJosef Bacik */
495637bc8bbSJosef Bacik if (!sk_reuseport_match(tb, sk)) {
496637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_STRICT;
497637bc8bbSJosef Bacik tb->fastuid = uid;
498637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr;
499637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk);
500cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family;
501637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
502637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
503637bc8bbSJosef Bacik #endif
504637bc8bbSJosef Bacik }
505637bc8bbSJosef Bacik } else {
5066cd66616SJosef Bacik tb->fastreuseport = 0;
507ea8add2bSEric Dumazet }
508637bc8bbSJosef Bacik }
50962ffc589STim Froidcoeur }
51062ffc589STim Froidcoeur
51162ffc589STim Froidcoeur /* Obtain a reference to a local port for the given sock,
51262ffc589STim Froidcoeur * if snum is zero it means select any available local port.
51362ffc589STim Froidcoeur * We try to allocate an odd port (and leave even ports for connect())
51462ffc589STim Froidcoeur */
inet_csk_get_port(struct sock * sk,unsigned short snum)51562ffc589STim Froidcoeur int inet_csk_get_port(struct sock *sk, unsigned short snum)
51662ffc589STim Froidcoeur {
517429e42c1SKuniyuki Iwashima struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
51862ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
51928044fc1SJoanne Koong bool found_port = false, check_bind_conflict = true;
52028044fc1SJoanne Koong bool bhash_created = false, bhash2_created = false;
5217a7160edSKuniyuki Iwashima int ret = -EADDRINUSE, port = snum, l3mdev;
52228044fc1SJoanne Koong struct inet_bind_hashbucket *head, *head2;
52328044fc1SJoanne Koong struct inet_bind2_bucket *tb2 = NULL;
524593d1ebeSJoanne Koong struct inet_bind_bucket *tb = NULL;
52528044fc1SJoanne Koong bool head2_lock_acquired = false;
52608eaef90SKuniyuki Iwashima struct net *net = sock_net(sk);
52762ffc589STim Froidcoeur
52862ffc589STim Froidcoeur l3mdev = inet_sk_bound_l3mdev(sk);
52962ffc589STim Froidcoeur
53062ffc589STim Froidcoeur if (!port) {
53128044fc1SJoanne Koong head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
53262ffc589STim Froidcoeur if (!head)
53362ffc589STim Froidcoeur return ret;
53428044fc1SJoanne Koong
53528044fc1SJoanne Koong head2_lock_acquired = true;
53628044fc1SJoanne Koong
53728044fc1SJoanne Koong if (tb && tb2)
53862ffc589STim Froidcoeur goto success;
53928044fc1SJoanne Koong found_port = true;
54028044fc1SJoanne Koong } else {
54162ffc589STim Froidcoeur head = &hinfo->bhash[inet_bhashfn(net, port,
54262ffc589STim Froidcoeur hinfo->bhash_size)];
54362ffc589STim Froidcoeur spin_lock_bh(&head->lock);
54462ffc589STim Froidcoeur inet_bind_bucket_for_each(tb, &head->chain)
54528044fc1SJoanne Koong if (inet_bind_bucket_match(tb, net, port, l3mdev))
54628044fc1SJoanne Koong break;
54728044fc1SJoanne Koong }
54828044fc1SJoanne Koong
54928044fc1SJoanne Koong if (!tb) {
55028044fc1SJoanne Koong tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
55128044fc1SJoanne Koong head, port, l3mdev);
55262ffc589STim Froidcoeur if (!tb)
55362ffc589STim Froidcoeur goto fail_unlock;
55428044fc1SJoanne Koong bhash_created = true;
55528044fc1SJoanne Koong }
55662ffc589STim Froidcoeur
55728044fc1SJoanne Koong if (!found_port) {
5588002d44fSKuniyuki Iwashima if (!hlist_empty(&tb->bhash2)) {
55928044fc1SJoanne Koong if (sk->sk_reuse == SK_FORCE_REUSE ||
56028044fc1SJoanne Koong (tb->fastreuse > 0 && reuse) ||
56162ffc589STim Froidcoeur sk_reuseport_match(tb, sk))
56228044fc1SJoanne Koong check_bind_conflict = false;
56328044fc1SJoanne Koong }
56428044fc1SJoanne Koong
56528044fc1SJoanne Koong if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
56628044fc1SJoanne Koong if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
56762ffc589STim Froidcoeur goto fail_unlock;
56862ffc589STim Froidcoeur }
56928044fc1SJoanne Koong
57028044fc1SJoanne Koong head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
57128044fc1SJoanne Koong spin_lock(&head2->lock);
57228044fc1SJoanne Koong head2_lock_acquired = true;
57328044fc1SJoanne Koong tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
57428044fc1SJoanne Koong }
57528044fc1SJoanne Koong
57628044fc1SJoanne Koong if (!tb2) {
57728044fc1SJoanne Koong tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
578822fb91fSKuniyuki Iwashima net, head2, tb, sk);
57928044fc1SJoanne Koong if (!tb2)
58028044fc1SJoanne Koong goto fail_unlock;
58128044fc1SJoanne Koong bhash2_created = true;
58228044fc1SJoanne Koong }
58328044fc1SJoanne Koong
58428044fc1SJoanne Koong if (!found_port && check_bind_conflict) {
58528044fc1SJoanne Koong if (inet_csk_bind_conflict(sk, tb, tb2, true, true))
58628044fc1SJoanne Koong goto fail_unlock;
58728044fc1SJoanne Koong }
58828044fc1SJoanne Koong
58962ffc589STim Froidcoeur success:
59062ffc589STim Froidcoeur inet_csk_update_fastreuse(tb, sk);
59162ffc589STim Froidcoeur
5923f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash)
59328044fc1SJoanne Koong inet_bind_hash(sk, tb, tb2, port);
594547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
59528044fc1SJoanne Koong WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
5963f421baaSArnaldo Carvalho de Melo ret = 0;
5973f421baaSArnaldo Carvalho de Melo
5983f421baaSArnaldo Carvalho de Melo fail_unlock:
59928044fc1SJoanne Koong if (ret) {
6008002d44fSKuniyuki Iwashima if (bhash2_created)
6018002d44fSKuniyuki Iwashima inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2);
60228044fc1SJoanne Koong if (bhash_created)
60328044fc1SJoanne Koong inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
60428044fc1SJoanne Koong }
60528044fc1SJoanne Koong if (head2_lock_acquired)
60628044fc1SJoanne Koong spin_unlock(&head2->lock);
607ea8add2bSEric Dumazet spin_unlock_bh(&head->lock);
6083f421baaSArnaldo Carvalho de Melo return ret;
6093f421baaSArnaldo Carvalho de Melo }
6103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port);
6113f421baaSArnaldo Carvalho de Melo
6123f421baaSArnaldo Carvalho de Melo /*
6133f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called
6143f421baaSArnaldo Carvalho de Melo * with the socket locked.
6153f421baaSArnaldo Carvalho de Melo */
inet_csk_wait_for_connect(struct sock * sk,long timeo)6163f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
6173f421baaSArnaldo Carvalho de Melo {
6183f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
6193f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait);
6203f421baaSArnaldo Carvalho de Melo int err;
6213f421baaSArnaldo Carvalho de Melo
6223f421baaSArnaldo Carvalho de Melo /*
6233f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only
6243f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'.
6253f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets
6263f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once.
6273f421baaSArnaldo Carvalho de Melo *
6283f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added
6293f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that
6303f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters
6313f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the
6323f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop"
6333f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without
6343f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue.
6353f421baaSArnaldo Carvalho de Melo */
6363f421baaSArnaldo Carvalho de Melo for (;;) {
637aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait,
6383f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE);
6393f421baaSArnaldo Carvalho de Melo release_sock(sk);
6403f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue))
6413f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo);
642cb7cf8a3SEric Dumazet sched_annotate_sleep();
6433f421baaSArnaldo Carvalho de Melo lock_sock(sk);
6443f421baaSArnaldo Carvalho de Melo err = 0;
6453f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
6463f421baaSArnaldo Carvalho de Melo break;
6473f421baaSArnaldo Carvalho de Melo err = -EINVAL;
6483f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN)
6493f421baaSArnaldo Carvalho de Melo break;
6503f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo);
6513f421baaSArnaldo Carvalho de Melo if (signal_pending(current))
6523f421baaSArnaldo Carvalho de Melo break;
6533f421baaSArnaldo Carvalho de Melo err = -EAGAIN;
6543f421baaSArnaldo Carvalho de Melo if (!timeo)
6553f421baaSArnaldo Carvalho de Melo break;
6563f421baaSArnaldo Carvalho de Melo }
657aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait);
6583f421baaSArnaldo Carvalho de Melo return err;
6593f421baaSArnaldo Carvalho de Melo }
6603f421baaSArnaldo Carvalho de Melo
6613f421baaSArnaldo Carvalho de Melo /*
6623f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection.
6633f421baaSArnaldo Carvalho de Melo */
inet_csk_accept(struct sock * sk,struct proto_accept_arg * arg)66492ef0fd5SJens Axboe struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
6653f421baaSArnaldo Carvalho de Melo {
6663f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
6678336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue;
6688336886fSJerry Chu struct request_sock *req;
669e3d95ad7SEric Dumazet struct sock *newsk;
6703f421baaSArnaldo Carvalho de Melo int error;
6713f421baaSArnaldo Carvalho de Melo
6723f421baaSArnaldo Carvalho de Melo lock_sock(sk);
6733f421baaSArnaldo Carvalho de Melo
6743f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening,
6753f421baaSArnaldo Carvalho de Melo * and that it has something pending.
6763f421baaSArnaldo Carvalho de Melo */
6773f421baaSArnaldo Carvalho de Melo error = -EINVAL;
6783f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN)
6793f421baaSArnaldo Carvalho de Melo goto out_err;
6803f421baaSArnaldo Carvalho de Melo
6813f421baaSArnaldo Carvalho de Melo /* Find already established connection */
6828336886fSJerry Chu if (reqsk_queue_empty(queue)) {
68392ef0fd5SJens Axboe long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
6843f421baaSArnaldo Carvalho de Melo
6853f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */
6863f421baaSArnaldo Carvalho de Melo error = -EAGAIN;
6873f421baaSArnaldo Carvalho de Melo if (!timeo)
6883f421baaSArnaldo Carvalho de Melo goto out_err;
6893f421baaSArnaldo Carvalho de Melo
6903f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo);
6913f421baaSArnaldo Carvalho de Melo if (error)
6923f421baaSArnaldo Carvalho de Melo goto out_err;
6933f421baaSArnaldo Carvalho de Melo }
694fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk);
6957951e36aSJens Axboe arg->is_empty = reqsk_queue_empty(queue);
6968336886fSJerry Chu newsk = req->sk;
6973f421baaSArnaldo Carvalho de Melo
698e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP &&
6990536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) {
7000536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock);
7019439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) {
7028336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS
7038336886fSJerry Chu * so can't free req now. Instead, we set req->sk to
7048336886fSJerry Chu * NULL to signify that the child socket is taken
7058336886fSJerry Chu * so reqsk_fastopen_remove() will free the req
7068336886fSJerry Chu * when 3WHS finishes (or is aborted).
7078336886fSJerry Chu */
7088336886fSJerry Chu req->sk = NULL;
7098336886fSJerry Chu req = NULL;
7108336886fSJerry Chu }
7110536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock);
7128336886fSJerry Chu }
713d752a498SShakeel Butt
7143f421baaSArnaldo Carvalho de Melo out:
7153f421baaSArnaldo Carvalho de Melo release_sock(sk);
71606669ea3SEric Dumazet if (newsk && mem_cgroup_sockets_enabled) {
7179028cdebSShakeel Butt gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
71853bf9164SAbel Wu int amt = 0;
719d752a498SShakeel Butt
720d752a498SShakeel Butt /* atomically get the memory usage, set and charge the
72106669ea3SEric Dumazet * newsk->sk_memcg.
722d752a498SShakeel Butt */
723d752a498SShakeel Butt lock_sock(newsk);
724d752a498SShakeel Butt
72553bf9164SAbel Wu mem_cgroup_sk_alloc(newsk);
72653bf9164SAbel Wu if (newsk->sk_memcg) {
72753bf9164SAbel Wu /* The socket has not been accepted yet, no need
72853bf9164SAbel Wu * to look at newsk->sk_wmem_queued.
729d752a498SShakeel Butt */
730d752a498SShakeel Butt amt = sk_mem_pages(newsk->sk_forward_alloc +
73106669ea3SEric Dumazet atomic_read(&newsk->sk_rmem_alloc));
73253bf9164SAbel Wu }
73353bf9164SAbel Wu
73453bf9164SAbel Wu if (amt)
7359028cdebSShakeel Butt mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
7369028cdebSShakeel Butt kmem_cache_charge(newsk, gfp);
737d752a498SShakeel Butt
738d752a498SShakeel Butt release_sock(newsk);
739d752a498SShakeel Butt }
7408336886fSJerry Chu if (req)
74113854e5aSEric Dumazet reqsk_put(req);
742198bc90eSZhengchao Shao
743198bc90eSZhengchao Shao if (newsk)
744198bc90eSZhengchao Shao inet_init_csk_locks(newsk);
745198bc90eSZhengchao Shao
7463f421baaSArnaldo Carvalho de Melo return newsk;
7473f421baaSArnaldo Carvalho de Melo out_err:
7483f421baaSArnaldo Carvalho de Melo newsk = NULL;
7498336886fSJerry Chu req = NULL;
75092ef0fd5SJens Axboe arg->err = error;
7513f421baaSArnaldo Carvalho de Melo goto out;
7523f421baaSArnaldo Carvalho de Melo }
7533f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept);
7543f421baaSArnaldo Carvalho de Melo
7553f421baaSArnaldo Carvalho de Melo /*
7563f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes
7573f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies
7583f421baaSArnaldo Carvalho de Melo * to optimize.
7593f421baaSArnaldo Carvalho de Melo */
inet_csk_init_xmit_timers(struct sock * sk,void (* retransmit_handler)(struct timer_list * t),void (* delack_handler)(struct timer_list * t),void (* keepalive_handler)(struct timer_list * t))7603f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk,
76159f379f9SKees Cook void (*retransmit_handler)(struct timer_list *t),
76259f379f9SKees Cook void (*delack_handler)(struct timer_list *t),
76359f379f9SKees Cook void (*keepalive_handler)(struct timer_list *t))
7643f421baaSArnaldo Carvalho de Melo {
7653f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
7663f421baaSArnaldo Carvalho de Melo
76759f379f9SKees Cook timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
76859f379f9SKees Cook timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
76959f379f9SKees Cook timer_setup(&sk->sk_timer, keepalive_handler, 0);
7703f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0;
7713f421baaSArnaldo Carvalho de Melo }
7723f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers);
7733f421baaSArnaldo Carvalho de Melo
inet_csk_clear_xmit_timers(struct sock * sk)7743f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk)
7753f421baaSArnaldo Carvalho de Melo {
7763f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
7773f421baaSArnaldo Carvalho de Melo
778b6b6d653SEric Dumazet icsk->icsk_pending = icsk->icsk_ack.pending = 0;
7793f421baaSArnaldo Carvalho de Melo
7803f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
7813f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer);
7823f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer);
7833f421baaSArnaldo Carvalho de Melo }
7843f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
7853f421baaSArnaldo Carvalho de Melo
inet_csk_clear_xmit_timers_sync(struct sock * sk)786151c9c72SEric Dumazet void inet_csk_clear_xmit_timers_sync(struct sock *sk)
787151c9c72SEric Dumazet {
788151c9c72SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk);
789151c9c72SEric Dumazet
790151c9c72SEric Dumazet /* ongoing timer handlers need to acquire socket lock. */
791151c9c72SEric Dumazet sock_not_owned_by_me(sk);
792151c9c72SEric Dumazet
793151c9c72SEric Dumazet icsk->icsk_pending = icsk->icsk_ack.pending = 0;
794151c9c72SEric Dumazet
795151c9c72SEric Dumazet sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
796151c9c72SEric Dumazet sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
797151c9c72SEric Dumazet sk_stop_timer_sync(sk, &sk->sk_timer);
798151c9c72SEric Dumazet }
799151c9c72SEric Dumazet
inet_csk_delete_keepalive_timer(struct sock * sk)8003f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk)
8013f421baaSArnaldo Carvalho de Melo {
8023f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer);
8033f421baaSArnaldo Carvalho de Melo }
8043f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
8053f421baaSArnaldo Carvalho de Melo
inet_csk_reset_keepalive_timer(struct sock * sk,unsigned long len)8063f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
8073f421baaSArnaldo Carvalho de Melo {
8083f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
8093f421baaSArnaldo Carvalho de Melo }
8103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
8113f421baaSArnaldo Carvalho de Melo
inet_csk_route_req(const struct sock * sk,struct flowi4 * fl4,const struct request_sock * req)812e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk,
8136bd023f3SDavid S. Miller struct flowi4 *fl4,
814ba3f7f04SDavid S. Miller const struct request_sock *req)
8153f421baaSArnaldo Carvalho de Melo {
8163f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req);
8178b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net);
818c92e8c02SEric Dumazet struct ip_options_rcu *opt;
8198b929ab1SEric Dumazet struct rtable *rt;
8203f421baaSArnaldo Carvalho de Melo
8212ab2ddd3SEric Dumazet rcu_read_lock();
8222ab2ddd3SEric Dumazet opt = rcu_dereference(ireq->ireq_opt);
82306f877d6SEric Dumazet
8248b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
8254b095281SGuillaume Nault ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
8268b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk),
827634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
8288b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port,
829e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid);
8303df98d79SPaul Moore security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
8316bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk);
832b23dd4feSDavid S. Miller if (IS_ERR(rt))
833857a6e0aSIlpo Järvinen goto no_route;
83477d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
835857a6e0aSIlpo Järvinen goto route_err;
8362ab2ddd3SEric Dumazet rcu_read_unlock();
837d8d1f30bSChangli Gao return &rt->dst;
838857a6e0aSIlpo Järvinen
839857a6e0aSIlpo Järvinen route_err:
840857a6e0aSIlpo Järvinen ip_rt_put(rt);
841857a6e0aSIlpo Järvinen no_route:
8422ab2ddd3SEric Dumazet rcu_read_unlock();
843b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
844857a6e0aSIlpo Järvinen return NULL;
8453f421baaSArnaldo Carvalho de Melo }
8463f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req);
8473f421baaSArnaldo Carvalho de Melo
inet_csk_route_child_sock(const struct sock * sk,struct sock * newsk,const struct request_sock * req)848a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
84977357a95SDavid S. Miller struct sock *newsk,
85077357a95SDavid S. Miller const struct request_sock *req)
85177357a95SDavid S. Miller {
85277357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req);
8538b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net);
85477357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk);
8551a7b27c9SChristoph Paasch struct ip_options_rcu *opt;
85677357a95SDavid S. Miller struct flowi4 *fl4;
85777357a95SDavid S. Miller struct rtable *rt;
85877357a95SDavid S. Miller
859c92e8c02SEric Dumazet opt = rcu_dereference(ireq->ireq_opt);
86077357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4;
8611a7b27c9SChristoph Paasch
8628b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
8634b095281SGuillaume Nault ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
86477357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk),
865634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
8668b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port,
867e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid);
8683df98d79SPaul Moore security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
86977357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk);
87077357a95SDavid S. Miller if (IS_ERR(rt))
87177357a95SDavid S. Miller goto no_route;
87277d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
87377357a95SDavid S. Miller goto route_err;
87477357a95SDavid S. Miller return &rt->dst;
87577357a95SDavid S. Miller
87677357a95SDavid S. Miller route_err:
87777357a95SDavid S. Miller ip_rt_put(rt);
87877357a95SDavid S. Miller no_route:
879b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
88077357a95SDavid S. Miller return NULL;
88177357a95SDavid S. Miller }
88277357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
88377357a95SDavid S. Miller
8840c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */
syn_ack_recalc(struct request_sock * req,const int max_syn_ack_retries,const u8 rskq_defer_accept,int * expire,int * resend)885a594920fSKuniyuki Iwashima static void syn_ack_recalc(struct request_sock *req,
886a594920fSKuniyuki Iwashima const int max_syn_ack_retries,
8870c3d79bcSJulian Anastasov const u8 rskq_defer_accept,
8880c3d79bcSJulian Anastasov int *expire, int *resend)
8890c3d79bcSJulian Anastasov {
8900c3d79bcSJulian Anastasov if (!rskq_defer_accept) {
891a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries;
8920c3d79bcSJulian Anastasov *resend = 1;
8930c3d79bcSJulian Anastasov return;
8940c3d79bcSJulian Anastasov }
895a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries &&
896a594920fSKuniyuki Iwashima (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
897a594920fSKuniyuki Iwashima /* Do not resend while waiting for data after ACK,
8980c3d79bcSJulian Anastasov * start to resend on end of deferring period to give
8990c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket.
9000c3d79bcSJulian Anastasov */
9010c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked ||
902e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1;
9030c3d79bcSJulian Anastasov }
9040c3d79bcSJulian Anastasov
inet_rtx_syn_ack(const struct sock * parent,struct request_sock * req)9051b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
906e6c022a4SEric Dumazet {
9071a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req);
908e6c022a4SEric Dumazet
909e6c022a4SEric Dumazet if (!err)
910e6c022a4SEric Dumazet req->num_retrans++;
911e6c022a4SEric Dumazet return err;
912e6c022a4SEric Dumazet }
913e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack);
914e6c022a4SEric Dumazet
9156971d216SEric Dumazet static struct request_sock *
reqsk_alloc_noprof(const struct request_sock_ops * ops,struct sock * sk_listener,bool attach_listener)9166971d216SEric Dumazet reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
9176971d216SEric Dumazet bool attach_listener)
9186971d216SEric Dumazet {
9196971d216SEric Dumazet struct request_sock *req;
9206971d216SEric Dumazet
9216971d216SEric Dumazet req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
9226971d216SEric Dumazet if (!req)
9236971d216SEric Dumazet return NULL;
9246971d216SEric Dumazet req->rsk_listener = NULL;
9256971d216SEric Dumazet if (attach_listener) {
9266971d216SEric Dumazet if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
9276971d216SEric Dumazet kmem_cache_free(ops->slab, req);
9286971d216SEric Dumazet return NULL;
9296971d216SEric Dumazet }
9306971d216SEric Dumazet req->rsk_listener = sk_listener;
9316971d216SEric Dumazet }
9326971d216SEric Dumazet req->rsk_ops = ops;
9336971d216SEric Dumazet req_to_sk(req)->sk_prot = sk_listener->sk_prot;
9346971d216SEric Dumazet sk_node_init(&req_to_sk(req)->sk_node);
9356971d216SEric Dumazet sk_tx_queue_clear(req_to_sk(req));
9366971d216SEric Dumazet req->saved_syn = NULL;
9376971d216SEric Dumazet req->syncookie = 0;
9386971d216SEric Dumazet req->timeout = 0;
9396971d216SEric Dumazet req->num_timeout = 0;
9406971d216SEric Dumazet req->num_retrans = 0;
9416971d216SEric Dumazet req->sk = NULL;
9426971d216SEric Dumazet refcount_set(&req->rsk_refcnt, 0);
9436971d216SEric Dumazet
9446971d216SEric Dumazet return req;
9456971d216SEric Dumazet }
9466971d216SEric Dumazet #define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__))
9476971d216SEric Dumazet
inet_reqsk_alloc(const struct request_sock_ops * ops,struct sock * sk_listener,bool attach_listener)948adbe695aSEric Dumazet struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
949adbe695aSEric Dumazet struct sock *sk_listener,
950adbe695aSEric Dumazet bool attach_listener)
951adbe695aSEric Dumazet {
952adbe695aSEric Dumazet struct request_sock *req = reqsk_alloc(ops, sk_listener,
953adbe695aSEric Dumazet attach_listener);
954adbe695aSEric Dumazet
955adbe695aSEric Dumazet if (req) {
956adbe695aSEric Dumazet struct inet_request_sock *ireq = inet_rsk(req);
957adbe695aSEric Dumazet
958adbe695aSEric Dumazet ireq->ireq_opt = NULL;
959adbe695aSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
960adbe695aSEric Dumazet ireq->pktopts = NULL;
961adbe695aSEric Dumazet #endif
962adbe695aSEric Dumazet atomic64_set(&ireq->ir_cookie, 0);
963adbe695aSEric Dumazet ireq->ireq_state = TCP_NEW_SYN_RECV;
964adbe695aSEric Dumazet write_pnet(&ireq->ireq_net, sock_net(sk_listener));
965adbe695aSEric Dumazet ireq->ireq_family = sk_listener->sk_family;
966adbe695aSEric Dumazet req->timeout = TCP_TIMEOUT_INIT;
967adbe695aSEric Dumazet }
968adbe695aSEric Dumazet
969adbe695aSEric Dumazet return req;
970adbe695aSEric Dumazet }
971adbe695aSEric Dumazet EXPORT_SYMBOL(inet_reqsk_alloc);
972adbe695aSEric Dumazet
inet_reqsk_clone(struct request_sock * req,struct sock * sk)97354b92e84SKuniyuki Iwashima static struct request_sock *inet_reqsk_clone(struct request_sock *req,
97454b92e84SKuniyuki Iwashima struct sock *sk)
97554b92e84SKuniyuki Iwashima {
97654b92e84SKuniyuki Iwashima struct sock *req_sk, *nreq_sk;
97754b92e84SKuniyuki Iwashima struct request_sock *nreq;
97854b92e84SKuniyuki Iwashima
97954b92e84SKuniyuki Iwashima nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
98054b92e84SKuniyuki Iwashima if (!nreq) {
98155d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
98255d444b3SKuniyuki Iwashima
98354b92e84SKuniyuki Iwashima /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
98454b92e84SKuniyuki Iwashima sock_put(sk);
98554b92e84SKuniyuki Iwashima return NULL;
98654b92e84SKuniyuki Iwashima }
98754b92e84SKuniyuki Iwashima
98854b92e84SKuniyuki Iwashima req_sk = req_to_sk(req);
98954b92e84SKuniyuki Iwashima nreq_sk = req_to_sk(nreq);
99054b92e84SKuniyuki Iwashima
99154b92e84SKuniyuki Iwashima memcpy(nreq_sk, req_sk,
99254b92e84SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_begin));
993ff73f834SKees Cook unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
994ff73f834SKees Cook req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end),
995ff73f834SKees Cook /* alloc is larger than struct, see above */);
99654b92e84SKuniyuki Iwashima
99754b92e84SKuniyuki Iwashima sk_node_init(&nreq_sk->sk_node);
99854b92e84SKuniyuki Iwashima nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
999a9418924SEric Dumazet #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
100054b92e84SKuniyuki Iwashima nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
100154b92e84SKuniyuki Iwashima #endif
100254b92e84SKuniyuki Iwashima nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
100354b92e84SKuniyuki Iwashima
100454b92e84SKuniyuki Iwashima nreq->rsk_listener = sk;
100554b92e84SKuniyuki Iwashima
100654b92e84SKuniyuki Iwashima /* We need not acquire fastopenq->lock
100754b92e84SKuniyuki Iwashima * because the child socket is locked in inet_csk_listen_stop().
100854b92e84SKuniyuki Iwashima */
100954b92e84SKuniyuki Iwashima if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
101054b92e84SKuniyuki Iwashima rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
101154b92e84SKuniyuki Iwashima
101254b92e84SKuniyuki Iwashima return nreq;
101354b92e84SKuniyuki Iwashima }
101454b92e84SKuniyuki Iwashima
reqsk_queue_migrated(struct request_sock_queue * queue,const struct request_sock * req)1015c905dee6SKuniyuki Iwashima static void reqsk_queue_migrated(struct request_sock_queue *queue,
1016c905dee6SKuniyuki Iwashima const struct request_sock *req)
1017c905dee6SKuniyuki Iwashima {
1018c905dee6SKuniyuki Iwashima if (req->num_timeout == 0)
1019c905dee6SKuniyuki Iwashima atomic_inc(&queue->young);
1020c905dee6SKuniyuki Iwashima atomic_inc(&queue->qlen);
1021c905dee6SKuniyuki Iwashima }
1022c905dee6SKuniyuki Iwashima
reqsk_migrate_reset(struct request_sock * req)102354b92e84SKuniyuki Iwashima static void reqsk_migrate_reset(struct request_sock *req)
102454b92e84SKuniyuki Iwashima {
1025c905dee6SKuniyuki Iwashima req->saved_syn = NULL;
102654b92e84SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IPV6)
102754b92e84SKuniyuki Iwashima inet_rsk(req)->ipv6_opt = NULL;
1028c905dee6SKuniyuki Iwashima inet_rsk(req)->pktopts = NULL;
1029c905dee6SKuniyuki Iwashima #else
1030c905dee6SKuniyuki Iwashima inet_rsk(req)->ireq_opt = NULL;
103154b92e84SKuniyuki Iwashima #endif
103254b92e84SKuniyuki Iwashima }
103354b92e84SKuniyuki Iwashima
1034079096f1SEric Dumazet /* return true if req was found in the ehash table */
reqsk_queue_unlink(struct request_sock * req)10358b5e07d7SZhiqiang Liu static bool reqsk_queue_unlink(struct request_sock *req)
1036b357a364SEric Dumazet {
103708eaef90SKuniyuki Iwashima struct sock *sk = req_to_sk(req);
10385e0724d0SEric Dumazet bool found = false;
1039b357a364SEric Dumazet
104008eaef90SKuniyuki Iwashima if (sk_hashed(sk)) {
1041429e42c1SKuniyuki Iwashima struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
1042429e42c1SKuniyuki Iwashima spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
1043b357a364SEric Dumazet
1044079096f1SEric Dumazet spin_lock(lock);
104508eaef90SKuniyuki Iwashima found = __sk_nulls_del_node_init_rcu(sk);
1046079096f1SEric Dumazet spin_unlock(lock);
10475e0724d0SEric Dumazet }
1048*e8c526f2SKuniyuki Iwashima
1049b357a364SEric Dumazet return found;
1050b357a364SEric Dumazet }
1051b357a364SEric Dumazet
__inet_csk_reqsk_queue_drop(struct sock * sk,struct request_sock * req,bool from_timer)1052*e8c526f2SKuniyuki Iwashima static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
1053*e8c526f2SKuniyuki Iwashima struct request_sock *req,
1054*e8c526f2SKuniyuki Iwashima bool from_timer)
1055b357a364SEric Dumazet {
10567233da86SAlexander Ovechkin bool unlinked = reqsk_queue_unlink(req);
10577233da86SAlexander Ovechkin
1058*e8c526f2SKuniyuki Iwashima if (!from_timer && timer_delete_sync(&req->rsk_timer))
1059*e8c526f2SKuniyuki Iwashima reqsk_put(req);
1060*e8c526f2SKuniyuki Iwashima
10617233da86SAlexander Ovechkin if (unlinked) {
1062b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
1063b357a364SEric Dumazet reqsk_put(req);
1064b357a364SEric Dumazet }
1065*e8c526f2SKuniyuki Iwashima
10667233da86SAlexander Ovechkin return unlinked;
1067b357a364SEric Dumazet }
1068*e8c526f2SKuniyuki Iwashima
inet_csk_reqsk_queue_drop(struct sock * sk,struct request_sock * req)1069*e8c526f2SKuniyuki Iwashima bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
1070*e8c526f2SKuniyuki Iwashima {
1071*e8c526f2SKuniyuki Iwashima return __inet_csk_reqsk_queue_drop(sk, req, false);
1072*e8c526f2SKuniyuki Iwashima }
1073b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
1074b357a364SEric Dumazet
inet_csk_reqsk_queue_drop_and_put(struct sock * sk,struct request_sock * req)1075f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
1076f03f2e15SEric Dumazet {
1077f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req);
1078f03f2e15SEric Dumazet reqsk_put(req);
1079f03f2e15SEric Dumazet }
1080f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
1081f03f2e15SEric Dumazet
reqsk_timer_handler(struct timer_list * t)108259f379f9SKees Cook static void reqsk_timer_handler(struct timer_list *t)
1083a019d6feSArnaldo Carvalho de Melo {
108459f379f9SKees Cook struct request_sock *req = from_timer(req, t, rsk_timer);
1085c905dee6SKuniyuki Iwashima struct request_sock *nreq = NULL, *oreq = req;
1086fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener;
1087c905dee6SKuniyuki Iwashima struct inet_connection_sock *icsk;
1088c905dee6SKuniyuki Iwashima struct request_sock_queue *queue;
1089c905dee6SKuniyuki Iwashima struct net *net;
1090a594920fSKuniyuki Iwashima int max_syn_ack_retries, qlen, expire = 0, resend = 0;
1091a019d6feSArnaldo Carvalho de Melo
1092c905dee6SKuniyuki Iwashima if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
1093c905dee6SKuniyuki Iwashima struct sock *nsk;
1094c905dee6SKuniyuki Iwashima
1095c905dee6SKuniyuki Iwashima nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
1096c905dee6SKuniyuki Iwashima if (!nsk)
1097079096f1SEric Dumazet goto drop;
1098a019d6feSArnaldo Carvalho de Melo
1099c905dee6SKuniyuki Iwashima nreq = inet_reqsk_clone(req, nsk);
1100c905dee6SKuniyuki Iwashima if (!nreq)
1101c905dee6SKuniyuki Iwashima goto drop;
1102c905dee6SKuniyuki Iwashima
1103c905dee6SKuniyuki Iwashima /* The new timer for the cloned req can decrease the 2
1104c905dee6SKuniyuki Iwashima * by calling inet_csk_reqsk_queue_drop_and_put(), so
1105c905dee6SKuniyuki Iwashima * hold another count to prevent use-after-free and
1106c905dee6SKuniyuki Iwashima * call reqsk_put() just before return.
1107c905dee6SKuniyuki Iwashima */
1108c905dee6SKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 2 + 1);
1109c905dee6SKuniyuki Iwashima timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
1110c905dee6SKuniyuki Iwashima reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
1111c905dee6SKuniyuki Iwashima
1112c905dee6SKuniyuki Iwashima req = nreq;
1113c905dee6SKuniyuki Iwashima sk_listener = nsk;
1114c905dee6SKuniyuki Iwashima }
1115c905dee6SKuniyuki Iwashima
1116c905dee6SKuniyuki Iwashima icsk = inet_csk(sk_listener);
1117c905dee6SKuniyuki Iwashima net = sock_net(sk_listener);
11183a037f0fSEric Dumazet max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
111920a3b1c0SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
1120a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature
1121a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout.
1122fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means
1123a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost,
1124a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood).
1125a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old
1126a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue.
1127a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero
1128a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood,
1129a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning
1130a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when
1131a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous.
1132a019d6feSArnaldo Carvalho de Melo *
1133a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young
1134a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old
1135a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table.
1136a019d6feSArnaldo Carvalho de Melo */
1137c905dee6SKuniyuki Iwashima queue = &icsk->icsk_accept_queue;
1138aac065c5SEric Dumazet qlen = reqsk_queue_len(queue);
1139099ecf59SEric Dumazet if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
1140aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1;
1141a019d6feSArnaldo Carvalho de Melo
1142a594920fSKuniyuki Iwashima while (max_syn_ack_retries > 2) {
11432b41fab7SEric Dumazet if (qlen < young)
1144a019d6feSArnaldo Carvalho de Melo break;
1145a594920fSKuniyuki Iwashima max_syn_ack_retries--;
1146a019d6feSArnaldo Carvalho de Melo young <<= 1;
1147a019d6feSArnaldo Carvalho de Melo }
1148a019d6feSArnaldo Carvalho de Melo }
1149a594920fSKuniyuki Iwashima syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
11500c3d79bcSJulian Anastasov &expire, &resend);
115142cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req);
11520c3d79bcSJulian Anastasov if (!expire &&
11530c3d79bcSJulian Anastasov (!resend ||
1154fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) ||
11550c3d79bcSJulian Anastasov inet_rsk(req)->acked)) {
1156e6c022a4SEric Dumazet if (req->num_timeout++ == 0)
1157aac065c5SEric Dumazet atomic_dec(&queue->young);
11585903123fSAkhmat Karakotov mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
1159c905dee6SKuniyuki Iwashima
1160c905dee6SKuniyuki Iwashima if (!nreq)
1161c905dee6SKuniyuki Iwashima return;
1162c905dee6SKuniyuki Iwashima
1163c905dee6SKuniyuki Iwashima if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
1164c905dee6SKuniyuki Iwashima /* delete timer */
1165*e8c526f2SKuniyuki Iwashima __inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
116655d444b3SKuniyuki Iwashima goto no_ownership;
1167c905dee6SKuniyuki Iwashima }
1168c905dee6SKuniyuki Iwashima
116955d444b3SKuniyuki Iwashima __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
1170c905dee6SKuniyuki Iwashima reqsk_migrate_reset(oreq);
1171c905dee6SKuniyuki Iwashima reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
1172c905dee6SKuniyuki Iwashima reqsk_put(oreq);
1173c905dee6SKuniyuki Iwashima
1174c905dee6SKuniyuki Iwashima reqsk_put(nreq);
1175fa76ce73SEric Dumazet return;
1176a019d6feSArnaldo Carvalho de Melo }
1177c905dee6SKuniyuki Iwashima
1178c905dee6SKuniyuki Iwashima /* Even if we can clone the req, we may need not retransmit any more
1179c905dee6SKuniyuki Iwashima * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
1180c905dee6SKuniyuki Iwashima * CPU may win the "own_req" race so that inet_ehash_insert() fails.
1181c905dee6SKuniyuki Iwashima */
1182c905dee6SKuniyuki Iwashima if (nreq) {
118355d444b3SKuniyuki Iwashima __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
118455d444b3SKuniyuki Iwashima no_ownership:
1185c905dee6SKuniyuki Iwashima reqsk_migrate_reset(nreq);
1186c905dee6SKuniyuki Iwashima reqsk_queue_removed(queue, nreq);
1187c905dee6SKuniyuki Iwashima __reqsk_free(nreq);
1188c905dee6SKuniyuki Iwashima }
1189c905dee6SKuniyuki Iwashima
119055d444b3SKuniyuki Iwashima drop:
1191*e8c526f2SKuniyuki Iwashima __inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
1192*e8c526f2SKuniyuki Iwashima reqsk_put(req);
1193a019d6feSArnaldo Carvalho de Melo }
1194fa76ce73SEric Dumazet
reqsk_queue_hash_req(struct request_sock * req,unsigned long timeout)1195ff46e3b4Sluoxuanqiang static bool reqsk_queue_hash_req(struct request_sock *req,
1196fa76ce73SEric Dumazet unsigned long timeout)
1197fa76ce73SEric Dumazet {
1198ff46e3b4Sluoxuanqiang bool found_dup_sk = false;
1199ff46e3b4Sluoxuanqiang
1200ff46e3b4Sluoxuanqiang if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
1201ff46e3b4Sluoxuanqiang return false;
1202ff46e3b4Sluoxuanqiang
1203ff46e3b4Sluoxuanqiang /* The timer needs to be setup after a successful insertion. */
120459f379f9SKees Cook timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
1205f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout);
120629c68526SEric Dumazet
1207fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields
1208fa76ce73SEric Dumazet * are committed to memory and refcnt initialized.
1209fa76ce73SEric Dumazet */
1210fa76ce73SEric Dumazet smp_wmb();
121141c6d650SReshetova, Elena refcount_set(&req->rsk_refcnt, 2 + 1);
1212ff46e3b4Sluoxuanqiang return true;
1213a019d6feSArnaldo Carvalho de Melo }
1214079096f1SEric Dumazet
inet_csk_reqsk_queue_hash_add(struct sock * sk,struct request_sock * req,unsigned long timeout)1215ff46e3b4Sluoxuanqiang bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
1216079096f1SEric Dumazet unsigned long timeout)
1217079096f1SEric Dumazet {
1218ff46e3b4Sluoxuanqiang if (!reqsk_queue_hash_req(req, timeout))
1219ff46e3b4Sluoxuanqiang return false;
1220ff46e3b4Sluoxuanqiang
1221079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk);
1222ff46e3b4Sluoxuanqiang return true;
1223079096f1SEric Dumazet }
1224079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
1225a019d6feSArnaldo Carvalho de Melo
inet_clone_ulp(const struct request_sock * req,struct sock * newsk,const gfp_t priority)122613230593SMat Martineau static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
122713230593SMat Martineau const gfp_t priority)
122813230593SMat Martineau {
122913230593SMat Martineau struct inet_connection_sock *icsk = inet_csk(newsk);
123013230593SMat Martineau
123113230593SMat Martineau if (!icsk->icsk_ulp_ops)
123213230593SMat Martineau return;
123313230593SMat Martineau
123413230593SMat Martineau icsk->icsk_ulp_ops->clone(req, newsk, priority);
123513230593SMat Martineau }
123613230593SMat Martineau
1237e56c57d0SEric Dumazet /**
1238e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone
1239e56c57d0SEric Dumazet * @sk: the socket to clone
1240e56c57d0SEric Dumazet * @req: request_sock
1241e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1242e56c57d0SEric Dumazet *
1243e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1244e56c57d0SEric Dumazet */
inet_csk_clone_lock(const struct sock * sk,const struct request_sock * req,const gfp_t priority)1245e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk,
1246e56c57d0SEric Dumazet const struct request_sock *req,
1247dd0fc66fSAl Viro const gfp_t priority)
12489f1d2604SArnaldo Carvalho de Melo {
1249e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority);
12509f1d2604SArnaldo Carvalho de Melo
125100db4124SIan Morris if (newsk) {
12529f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk);
12539f1d2604SArnaldo Carvalho de Melo
1254563e0bb0SYafang Shao inet_sk_set_state(newsk, TCP_SYN_RECV);
12559f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL;
125628044fc1SJoanne Koong newicsk->icsk_bind2_hash = NULL;
12579f1d2604SArnaldo Carvalho de Melo
1258634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
1259b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
1260b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
12619f1d2604SArnaldo Carvalho de Melo
126285017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */
126385017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE);
126485017869SEric Dumazet
1265657831ffSEric Dumazet inet_sk(newsk)->mc_list = NULL;
1266657831ffSEric Dumazet
126784f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark;
126833cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie,
126933cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie));
127084f39b08SLorenzo Colitti
12719f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0;
12729f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0;
12736687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0;
12749d9b1ee0SEnke Chen newicsk->icsk_probes_tstamp = 0;
12759f1d2604SArnaldo Carvalho de Melo
12769f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */
12779f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
12784237c75cSVenkat Yekkirala
127913230593SMat Martineau inet_clone_ulp(req, newsk, priority);
128013230593SMat Martineau
12814237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req);
12829f1d2604SArnaldo Carvalho de Melo }
12839f1d2604SArnaldo Carvalho de Melo return newsk;
12849f1d2604SArnaldo Carvalho de Melo }
1285e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
1286a019d6feSArnaldo Carvalho de Melo
1287a019d6feSArnaldo Carvalho de Melo /*
1288a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this
1289a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we
1290a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will
1291a019d6feSArnaldo Carvalho de Melo * try to jump onto it.
1292a019d6feSArnaldo Carvalho de Melo */
inet_csk_destroy_sock(struct sock * sk)1293a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk)
1294a019d6feSArnaldo Carvalho de Melo {
1295547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE);
1296547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD));
1297a019d6feSArnaldo Carvalho de Melo
1298a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */
1299547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk));
1300a019d6feSArnaldo Carvalho de Melo
1301c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
1302c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
1303a019d6feSArnaldo Carvalho de Melo
1304a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk);
1305a019d6feSArnaldo Carvalho de Melo
1306a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk);
1307a019d6feSArnaldo Carvalho de Melo
1308a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk);
1309a019d6feSArnaldo Carvalho de Melo
131019757cebSEric Dumazet this_cpu_dec(*sk->sk_prot->orphan_count);
1311c2a2efbbSEric Dumazet
1312a019d6feSArnaldo Carvalho de Melo sock_put(sk);
1313a019d6feSArnaldo Carvalho de Melo }
1314a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock);
1315a019d6feSArnaldo Carvalho de Melo
1316e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to
1317e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child().
1318e337e24dSChristoph Paasch */
inet_csk_prepare_forced_close(struct sock * sk)1319e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk)
1320c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock)
1321e337e24dSChristoph Paasch {
1322e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */
1323e337e24dSChristoph Paasch bh_unlock_sock(sk);
1324e337e24dSChristoph Paasch sock_put(sk);
13252f8a397dSPaolo Abeni inet_csk_prepare_for_destroy_sock(sk);
13266761893eSPaolo Abeni inet_sk(sk)->inet_num = 0;
1327e337e24dSChristoph Paasch }
1328e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close);
1329e337e24dSChristoph Paasch
inet_ulp_can_listen(const struct sock * sk)13302c02d41dSPaolo Abeni static int inet_ulp_can_listen(const struct sock *sk)
13312c02d41dSPaolo Abeni {
13322c02d41dSPaolo Abeni const struct inet_connection_sock *icsk = inet_csk(sk);
13332c02d41dSPaolo Abeni
13342c02d41dSPaolo Abeni if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
13352c02d41dSPaolo Abeni return -EINVAL;
13362c02d41dSPaolo Abeni
13372c02d41dSPaolo Abeni return 0;
13382c02d41dSPaolo Abeni }
13392c02d41dSPaolo Abeni
inet_csk_listen_start(struct sock * sk)1340e7049395SKuniyuki Iwashima int inet_csk_listen_start(struct sock *sk)
1341a019d6feSArnaldo Carvalho de Melo {
1342a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
134310cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk);
13447a7160edSKuniyuki Iwashima int err;
1345a019d6feSArnaldo Carvalho de Melo
13462c02d41dSPaolo Abeni err = inet_ulp_can_listen(sk);
13472c02d41dSPaolo Abeni if (unlikely(err))
13482c02d41dSPaolo Abeni return err;
13492c02d41dSPaolo Abeni
1350ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue);
1351a019d6feSArnaldo Carvalho de Melo
1352a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0;
1353a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk);
1354a019d6feSArnaldo Carvalho de Melo
1355a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening,
1356a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port().
1357a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only
1358a019d6feSArnaldo Carvalho de Melo * after validation is complete.
1359a019d6feSArnaldo Carvalho de Melo */
1360563e0bb0SYafang Shao inet_sk_state_store(sk, TCP_LISTEN);
13617a7160edSKuniyuki Iwashima err = sk->sk_prot->get_port(sk, inet->inet_num);
13627a7160edSKuniyuki Iwashima if (!err) {
1363c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num);
1364a019d6feSArnaldo Carvalho de Melo
1365a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk);
1366086c653fSCraig Gallek err = sk->sk_prot->hash(sk);
1367a019d6feSArnaldo Carvalho de Melo
1368086c653fSCraig Gallek if (likely(!err))
1369a019d6feSArnaldo Carvalho de Melo return 0;
1370a019d6feSArnaldo Carvalho de Melo }
1371a019d6feSArnaldo Carvalho de Melo
1372563e0bb0SYafang Shao inet_sk_set_state(sk, TCP_CLOSE);
1373086c653fSCraig Gallek return err;
1374a019d6feSArnaldo Carvalho de Melo }
1375a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start);
1376a019d6feSArnaldo Carvalho de Melo
inet_child_forget(struct sock * sk,struct request_sock * req,struct sock * child)1377ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req,
1378ebb516afSEric Dumazet struct sock *child)
1379ebb516afSEric Dumazet {
1380ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK);
1381ebb516afSEric Dumazet
1382ebb516afSEric Dumazet sock_orphan(child);
1383ebb516afSEric Dumazet
138419757cebSEric Dumazet this_cpu_inc(*sk->sk_prot->orphan_count);
1385ebb516afSEric Dumazet
1386ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
1387d983ea6fSEric Dumazet BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
1388ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener);
1389ebb516afSEric Dumazet
1390ebb516afSEric Dumazet /* Paranoid, to prevent race condition if
1391ebb516afSEric Dumazet * an inbound pkt destined for child is
1392ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv().
1393ebb516afSEric Dumazet * Also to satisfy an assertion in
1394ebb516afSEric Dumazet * tcp_v4_destroy_sock().
1395ebb516afSEric Dumazet */
1396d983ea6fSEric Dumazet RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
1397ebb516afSEric Dumazet }
1398ebb516afSEric Dumazet inet_csk_destroy_sock(child);
1399ebb516afSEric Dumazet }
1400ebb516afSEric Dumazet
inet_csk_reqsk_queue_add(struct sock * sk,struct request_sock * req,struct sock * child)14017716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
14027716682cSEric Dumazet struct request_sock *req,
1403ebb516afSEric Dumazet struct sock *child)
1404ebb516afSEric Dumazet {
1405ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1406ebb516afSEric Dumazet
1407ebb516afSEric Dumazet spin_lock(&queue->rskq_lock);
1408ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) {
1409ebb516afSEric Dumazet inet_child_forget(sk, req, child);
14107716682cSEric Dumazet child = NULL;
1411ebb516afSEric Dumazet } else {
1412ebb516afSEric Dumazet req->sk = child;
1413ebb516afSEric Dumazet req->dl_next = NULL;
1414ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL)
141560b173caSEric Dumazet WRITE_ONCE(queue->rskq_accept_head, req);
1416ebb516afSEric Dumazet else
1417ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req;
1418ebb516afSEric Dumazet queue->rskq_accept_tail = req;
1419ebb516afSEric Dumazet sk_acceptq_added(sk);
1420ebb516afSEric Dumazet }
1421ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock);
14227716682cSEric Dumazet return child;
1423ebb516afSEric Dumazet }
1424ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
1425ebb516afSEric Dumazet
inet_csk_complete_hashdance(struct sock * sk,struct sock * child,struct request_sock * req,bool own_req)14265e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
14275e0724d0SEric Dumazet struct request_sock *req, bool own_req)
14285e0724d0SEric Dumazet {
14295e0724d0SEric Dumazet if (own_req) {
1430d4f2c86bSKuniyuki Iwashima inet_csk_reqsk_queue_drop(req->rsk_listener, req);
1431d4f2c86bSKuniyuki Iwashima reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
1432d4f2c86bSKuniyuki Iwashima
1433d4f2c86bSKuniyuki Iwashima if (sk != req->rsk_listener) {
1434d4f2c86bSKuniyuki Iwashima /* another listening sk has been selected,
1435d4f2c86bSKuniyuki Iwashima * migrate the req to it.
1436d4f2c86bSKuniyuki Iwashima */
1437d4f2c86bSKuniyuki Iwashima struct request_sock *nreq;
1438d4f2c86bSKuniyuki Iwashima
1439d4f2c86bSKuniyuki Iwashima /* hold a refcnt for the nreq->rsk_listener
1440d4f2c86bSKuniyuki Iwashima * which is assigned in inet_reqsk_clone()
1441d4f2c86bSKuniyuki Iwashima */
1442d4f2c86bSKuniyuki Iwashima sock_hold(sk);
1443d4f2c86bSKuniyuki Iwashima nreq = inet_reqsk_clone(req, sk);
1444d4f2c86bSKuniyuki Iwashima if (!nreq) {
1445d4f2c86bSKuniyuki Iwashima inet_child_forget(sk, req, child);
1446d4f2c86bSKuniyuki Iwashima goto child_put;
1447d4f2c86bSKuniyuki Iwashima }
1448d4f2c86bSKuniyuki Iwashima
1449d4f2c86bSKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 1);
1450d4f2c86bSKuniyuki Iwashima if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
145155d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
1452d4f2c86bSKuniyuki Iwashima reqsk_migrate_reset(req);
1453d4f2c86bSKuniyuki Iwashima reqsk_put(req);
14545e0724d0SEric Dumazet return child;
14555e0724d0SEric Dumazet }
1456d4f2c86bSKuniyuki Iwashima
145755d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
1458d4f2c86bSKuniyuki Iwashima reqsk_migrate_reset(nreq);
1459d4f2c86bSKuniyuki Iwashima __reqsk_free(nreq);
1460d4f2c86bSKuniyuki Iwashima } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
1461d4f2c86bSKuniyuki Iwashima return child;
1462d4f2c86bSKuniyuki Iwashima }
1463d4f2c86bSKuniyuki Iwashima }
14645e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */
1465d4f2c86bSKuniyuki Iwashima child_put:
14665e0724d0SEric Dumazet bh_unlock_sock(child);
14675e0724d0SEric Dumazet sock_put(child);
14685e0724d0SEric Dumazet return NULL;
14695e0724d0SEric Dumazet }
14705e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance);
14715e0724d0SEric Dumazet
1472a019d6feSArnaldo Carvalho de Melo /*
1473a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially
1474a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted.
1475a019d6feSArnaldo Carvalho de Melo */
inet_csk_listen_stop(struct sock * sk)1476a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk)
1477a019d6feSArnaldo Carvalho de Melo {
1478a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
14798336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue;
1480fff1f300SEric Dumazet struct request_sock *next, *req;
1481a019d6feSArnaldo Carvalho de Melo
1482a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN
1483a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close)
1484a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort).
1485a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is
1486a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8)
1487a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either
1488a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK
1489a019d6feSArnaldo Carvalho de Melo */
1490fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
149154b92e84SKuniyuki Iwashima struct sock *child = req->sk, *nsk;
149254b92e84SKuniyuki Iwashima struct request_sock *nreq;
1493a019d6feSArnaldo Carvalho de Melo
1494a019d6feSArnaldo Carvalho de Melo local_bh_disable();
1495a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child);
1496547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child));
1497a019d6feSArnaldo Carvalho de Melo sock_hold(child);
1498a019d6feSArnaldo Carvalho de Melo
149954b92e84SKuniyuki Iwashima nsk = reuseport_migrate_sock(sk, child, NULL);
150054b92e84SKuniyuki Iwashima if (nsk) {
150154b92e84SKuniyuki Iwashima nreq = inet_reqsk_clone(req, nsk);
150254b92e84SKuniyuki Iwashima if (nreq) {
150354b92e84SKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 1);
150454b92e84SKuniyuki Iwashima
150554b92e84SKuniyuki Iwashima if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
150655d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(nsk),
150755d444b3SKuniyuki Iwashima LINUX_MIB_TCPMIGRATEREQSUCCESS);
150854b92e84SKuniyuki Iwashima reqsk_migrate_reset(req);
150954b92e84SKuniyuki Iwashima } else {
151055d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(nsk),
151155d444b3SKuniyuki Iwashima LINUX_MIB_TCPMIGRATEREQFAILURE);
151254b92e84SKuniyuki Iwashima reqsk_migrate_reset(nreq);
151354b92e84SKuniyuki Iwashima __reqsk_free(nreq);
151454b92e84SKuniyuki Iwashima }
151554b92e84SKuniyuki Iwashima
151654b92e84SKuniyuki Iwashima /* inet_csk_reqsk_queue_add() has already
151754b92e84SKuniyuki Iwashima * called inet_child_forget() on failure case.
151854b92e84SKuniyuki Iwashima */
151954b92e84SKuniyuki Iwashima goto skip_child_forget;
152054b92e84SKuniyuki Iwashima }
152154b92e84SKuniyuki Iwashima }
152254b92e84SKuniyuki Iwashima
1523ebb516afSEric Dumazet inet_child_forget(sk, req, child);
152454b92e84SKuniyuki Iwashima skip_child_forget:
1525da8ab578SEric Dumazet reqsk_put(req);
1526a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child);
1527a019d6feSArnaldo Carvalho de Melo local_bh_enable();
1528a019d6feSArnaldo Carvalho de Melo sock_put(child);
1529a019d6feSArnaldo Carvalho de Melo
153092d6f176SEric Dumazet cond_resched();
1531a019d6feSArnaldo Carvalho de Melo }
15320536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) {
15338336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */
15340536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock);
1535fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head;
15360536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL;
15370536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock);
1538fff1f300SEric Dumazet while (req != NULL) {
1539fff1f300SEric Dumazet next = req->dl_next;
154013854e5aSEric Dumazet reqsk_put(req);
1541fff1f300SEric Dumazet req = next;
15428336886fSJerry Chu }
15438336886fSJerry Chu }
1544ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog);
1545a019d6feSArnaldo Carvalho de Melo }
1546a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
1547af05dc93SArnaldo Carvalho de Melo
inet_csk_addr2sockaddr(struct sock * sk,struct sockaddr * uaddr)1548af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
1549af05dc93SArnaldo Carvalho de Melo {
1550af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
1551af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk);
1552af05dc93SArnaldo Carvalho de Melo
1553af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET;
1554c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr;
1555c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport;
1556af05dc93SArnaldo Carvalho de Melo }
1557af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
1558c4d93909SArnaldo Carvalho de Melo
inet_csk_rebuild_route(struct sock * sk,struct flowi * fl)155980d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
156080d0a69fSDavid S. Miller {
15615abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk);
15625abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt;
156380d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr;
156480d0a69fSDavid S. Miller struct flowi4 *fl4;
156580d0a69fSDavid S. Miller struct rtable *rt;
156680d0a69fSDavid S. Miller
156780d0a69fSDavid S. Miller rcu_read_lock();
156880d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt);
156980d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr)
157080d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr;
157180d0a69fSDavid S. Miller fl4 = &fl->u.ip4;
157280d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
157380d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport,
157480d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol,
1575a3522a2eSGuillaume Nault ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
157680d0a69fSDavid S. Miller if (IS_ERR(rt))
157780d0a69fSDavid S. Miller rt = NULL;
157880d0a69fSDavid S. Miller if (rt)
157980d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst);
158080d0a69fSDavid S. Miller rcu_read_unlock();
158180d0a69fSDavid S. Miller
158280d0a69fSDavid S. Miller return &rt->dst;
158380d0a69fSDavid S. Miller }
158480d0a69fSDavid S. Miller
inet_csk_update_pmtu(struct sock * sk,u32 mtu)158580d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
158680d0a69fSDavid S. Miller {
158780d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0);
158880d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk);
158980d0a69fSDavid S. Miller
159080d0a69fSDavid S. Miller if (!dst) {
159180d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
159280d0a69fSDavid S. Miller if (!dst)
159380d0a69fSDavid S. Miller goto out;
159480d0a69fSDavid S. Miller }
1595bd085ef6SHangbin Liu dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
159680d0a69fSDavid S. Miller
159780d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0);
159880d0a69fSDavid S. Miller if (!dst)
159980d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
160080d0a69fSDavid S. Miller out:
160180d0a69fSDavid S. Miller return dst;
160280d0a69fSDavid S. Miller }
160380d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);
1604