12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 23f421baaSArnaldo Carvalho de Melo /* 33f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 43f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 53f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 63f421baaSArnaldo Carvalho de Melo * 73f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 83f421baaSArnaldo Carvalho de Melo * 93f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 103f421baaSArnaldo Carvalho de Melo */ 113f421baaSArnaldo Carvalho de Melo 123f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 133f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 143f421baaSArnaldo Carvalho de Melo 153f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 163f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 173f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 183f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 193f421baaSArnaldo Carvalho de Melo #include <net/route.h> 203f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 21a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 22fa76ce73SEric Dumazet #include <net/tcp.h> 23c125e80bSCraig Gallek #include <net/sock_reuseport.h> 249691724eSstephen hemminger #include <net/addrconf.h> 253f421baaSArnaldo Carvalho de Melo 26fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 2788d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses 2888d7fcfaSMartin KaFai Lau * if IPv6 only, and any IPv4 addresses 2988d7fcfaSMartin KaFai Lau * if not IPv6 only 3088d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 31fe38d2a1SJosef Bacik * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 32fe38d2a1SJosef Bacik * and 0.0.0.0 equals to 0.0.0.0 only 33fe38d2a1SJosef Bacik */ 347016e062SJoe Perches static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 35637bc8bbSJosef Bacik const struct in6_addr *sk2_rcv_saddr6, 36637bc8bbSJosef Bacik __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 37637bc8bbSJosef Bacik bool sk1_ipv6only, bool sk2_ipv6only, 3888d7fcfaSMartin KaFai Lau bool match_sk1_wildcard, 3988d7fcfaSMartin KaFai Lau bool match_sk2_wildcard) 40fe38d2a1SJosef Bacik { 41637bc8bbSJosef Bacik int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 42fe38d2a1SJosef Bacik int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 43fe38d2a1SJosef Bacik 44fe38d2a1SJosef Bacik /* if both are mapped, treat as IPv4 */ 45fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 46fe38d2a1SJosef Bacik if (!sk2_ipv6only) { 47637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 487016e062SJoe Perches return true; 4988d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) || 5088d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr); 51fe38d2a1SJosef Bacik } 527016e062SJoe Perches return false; 53fe38d2a1SJosef Bacik } 54fe38d2a1SJosef Bacik 55fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 567016e062SJoe Perches return true; 57fe38d2a1SJosef Bacik 5888d7fcfaSMartin KaFai Lau if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && 59fe38d2a1SJosef Bacik !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 607016e062SJoe Perches return true; 61fe38d2a1SJosef Bacik 6288d7fcfaSMartin KaFai Lau if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && 63637bc8bbSJosef Bacik !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 647016e062SJoe Perches return true; 65fe38d2a1SJosef Bacik 66fe38d2a1SJosef Bacik if (sk2_rcv_saddr6 && 67637bc8bbSJosef Bacik ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 687016e062SJoe Perches return true; 69fe38d2a1SJosef Bacik 707016e062SJoe Perches return false; 71fe38d2a1SJosef Bacik } 72fe38d2a1SJosef Bacik #endif 73fe38d2a1SJosef Bacik 7488d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses 7588d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 76fe38d2a1SJosef Bacik * 0.0.0.0 only equals to 0.0.0.0 77fe38d2a1SJosef Bacik */ 787016e062SJoe Perches static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 7988d7fcfaSMartin KaFai Lau bool sk2_ipv6only, bool match_sk1_wildcard, 8088d7fcfaSMartin KaFai Lau bool match_sk2_wildcard) 81fe38d2a1SJosef Bacik { 82637bc8bbSJosef Bacik if (!sk2_ipv6only) { 83637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 847016e062SJoe Perches return true; 8588d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) || 8688d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr); 87fe38d2a1SJosef Bacik } 887016e062SJoe Perches return false; 89fe38d2a1SJosef Bacik } 90fe38d2a1SJosef Bacik 917016e062SJoe Perches bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 92fe38d2a1SJosef Bacik bool match_wildcard) 93fe38d2a1SJosef Bacik { 94fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 95fe38d2a1SJosef Bacik if (sk->sk_family == AF_INET6) 96637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, 97319554f2SJosef Bacik inet6_rcv_saddr(sk2), 98637bc8bbSJosef Bacik sk->sk_rcv_saddr, 99637bc8bbSJosef Bacik sk2->sk_rcv_saddr, 100637bc8bbSJosef Bacik ipv6_only_sock(sk), 101637bc8bbSJosef Bacik ipv6_only_sock(sk2), 10288d7fcfaSMartin KaFai Lau match_wildcard, 103637bc8bbSJosef Bacik match_wildcard); 104fe38d2a1SJosef Bacik #endif 105637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, 10688d7fcfaSMartin KaFai Lau ipv6_only_sock(sk2), match_wildcard, 10788d7fcfaSMartin KaFai Lau match_wildcard); 108fe38d2a1SJosef Bacik } 109fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal); 110fe38d2a1SJosef Bacik 1112dbb9b9eSMartin KaFai Lau bool inet_rcv_saddr_any(const struct sock *sk) 1122dbb9b9eSMartin KaFai Lau { 1132dbb9b9eSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6) 1142dbb9b9eSMartin KaFai Lau if (sk->sk_family == AF_INET6) 1152dbb9b9eSMartin KaFai Lau return ipv6_addr_any(&sk->sk_v6_rcv_saddr); 1162dbb9b9eSMartin KaFai Lau #endif 1172dbb9b9eSMartin KaFai Lau return !sk->sk_rcv_saddr; 1182dbb9b9eSMartin KaFai Lau } 1192dbb9b9eSMartin KaFai Lau 12091d0b78cSJakub Sitnicki void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) 12191d0b78cSJakub Sitnicki { 12291d0b78cSJakub Sitnicki const struct inet_sock *inet = inet_sk(sk); 12391d0b78cSJakub Sitnicki const struct net *net = sock_net(sk); 12491d0b78cSJakub Sitnicki int lo, hi, sk_lo, sk_hi; 125*d9f28735SDavid Laight u32 sk_range; 12691d0b78cSJakub Sitnicki 12791d0b78cSJakub Sitnicki inet_get_local_port_range(net, &lo, &hi); 12891d0b78cSJakub Sitnicki 129*d9f28735SDavid Laight sk_range = READ_ONCE(inet->local_port_range); 130*d9f28735SDavid Laight if (unlikely(sk_range)) { 131*d9f28735SDavid Laight sk_lo = sk_range & 0xffff; 132*d9f28735SDavid Laight sk_hi = sk_range >> 16; 13391d0b78cSJakub Sitnicki 134*d9f28735SDavid Laight if (lo <= sk_lo && sk_lo <= hi) 13591d0b78cSJakub Sitnicki lo = sk_lo; 136*d9f28735SDavid Laight if (lo <= sk_hi && sk_hi <= hi) 13791d0b78cSJakub Sitnicki hi = sk_hi; 138*d9f28735SDavid Laight } 13991d0b78cSJakub Sitnicki 14091d0b78cSJakub Sitnicki *low = lo; 14191d0b78cSJakub Sitnicki *high = hi; 14291d0b78cSJakub Sitnicki } 14391d0b78cSJakub Sitnicki EXPORT_SYMBOL(inet_sk_get_local_port_range); 14491d0b78cSJakub Sitnicki 14528044fc1SJoanne Koong static bool inet_use_bhash2_on_bind(const struct sock *sk) 14628044fc1SJoanne Koong { 14728044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6) 14828044fc1SJoanne Koong if (sk->sk_family == AF_INET6) { 14928044fc1SJoanne Koong int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); 15028044fc1SJoanne Koong 15128044fc1SJoanne Koong return addr_type != IPV6_ADDR_ANY && 15228044fc1SJoanne Koong addr_type != IPV6_ADDR_MAPPED; 15328044fc1SJoanne Koong } 15428044fc1SJoanne Koong #endif 15528044fc1SJoanne Koong return sk->sk_rcv_saddr != htonl(INADDR_ANY); 15628044fc1SJoanne Koong } 15728044fc1SJoanne Koong 15828044fc1SJoanne Koong static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, 15928044fc1SJoanne Koong kuid_t sk_uid, bool relax, 16028044fc1SJoanne Koong bool reuseport_cb_ok, bool reuseport_ok) 16128044fc1SJoanne Koong { 16228044fc1SJoanne Koong int bound_dev_if2; 16328044fc1SJoanne Koong 16428044fc1SJoanne Koong if (sk == sk2) 16528044fc1SJoanne Koong return false; 16628044fc1SJoanne Koong 16728044fc1SJoanne Koong bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); 16828044fc1SJoanne Koong 16928044fc1SJoanne Koong if (!sk->sk_bound_dev_if || !bound_dev_if2 || 17028044fc1SJoanne Koong sk->sk_bound_dev_if == bound_dev_if2) { 17128044fc1SJoanne Koong if (sk->sk_reuse && sk2->sk_reuse && 17228044fc1SJoanne Koong sk2->sk_state != TCP_LISTEN) { 17328044fc1SJoanne Koong if (!relax || (!reuseport_ok && sk->sk_reuseport && 17428044fc1SJoanne Koong sk2->sk_reuseport && reuseport_cb_ok && 17528044fc1SJoanne Koong (sk2->sk_state == TCP_TIME_WAIT || 17628044fc1SJoanne Koong uid_eq(sk_uid, sock_i_uid(sk2))))) 17728044fc1SJoanne Koong return true; 17828044fc1SJoanne Koong } else if (!reuseport_ok || !sk->sk_reuseport || 17928044fc1SJoanne Koong !sk2->sk_reuseport || !reuseport_cb_ok || 18028044fc1SJoanne Koong (sk2->sk_state != TCP_TIME_WAIT && 18128044fc1SJoanne Koong !uid_eq(sk_uid, sock_i_uid(sk2)))) { 18228044fc1SJoanne Koong return true; 18328044fc1SJoanne Koong } 18428044fc1SJoanne Koong } 18528044fc1SJoanne Koong return false; 18628044fc1SJoanne Koong } 18728044fc1SJoanne Koong 188936a192fSKuniyuki Iwashima static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, 189936a192fSKuniyuki Iwashima kuid_t sk_uid, bool relax, 190936a192fSKuniyuki Iwashima bool reuseport_cb_ok, bool reuseport_ok) 191936a192fSKuniyuki Iwashima { 192936a192fSKuniyuki Iwashima if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) 193936a192fSKuniyuki Iwashima return false; 194936a192fSKuniyuki Iwashima 195936a192fSKuniyuki Iwashima return inet_bind_conflict(sk, sk2, sk_uid, relax, 196936a192fSKuniyuki Iwashima reuseport_cb_ok, reuseport_ok); 197936a192fSKuniyuki Iwashima } 198936a192fSKuniyuki Iwashima 19928044fc1SJoanne Koong static bool inet_bhash2_conflict(const struct sock *sk, 20028044fc1SJoanne Koong const struct inet_bind2_bucket *tb2, 20128044fc1SJoanne Koong kuid_t sk_uid, 20228044fc1SJoanne Koong bool relax, bool reuseport_cb_ok, 20328044fc1SJoanne Koong bool reuseport_ok) 204d5a42de8SJoanne Koong { 205936a192fSKuniyuki Iwashima struct inet_timewait_sock *tw2; 206d5a42de8SJoanne Koong struct sock *sk2; 20728044fc1SJoanne Koong 20828044fc1SJoanne Koong sk_for_each_bound_bhash2(sk2, &tb2->owners) { 209936a192fSKuniyuki Iwashima if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 21028044fc1SJoanne Koong reuseport_cb_ok, reuseport_ok)) 21128044fc1SJoanne Koong return true; 21228044fc1SJoanne Koong } 213936a192fSKuniyuki Iwashima 214936a192fSKuniyuki Iwashima twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { 215936a192fSKuniyuki Iwashima sk2 = (struct sock *)tw2; 216936a192fSKuniyuki Iwashima 217936a192fSKuniyuki Iwashima if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 218936a192fSKuniyuki Iwashima reuseport_cb_ok, reuseport_ok)) 219936a192fSKuniyuki Iwashima return true; 220936a192fSKuniyuki Iwashima } 221936a192fSKuniyuki Iwashima 22228044fc1SJoanne Koong return false; 22328044fc1SJoanne Koong } 22428044fc1SJoanne Koong 22528044fc1SJoanne Koong /* This should be called only when the tb and tb2 hashbuckets' locks are held */ 22628044fc1SJoanne Koong static int inet_csk_bind_conflict(const struct sock *sk, 22728044fc1SJoanne Koong const struct inet_bind_bucket *tb, 22828044fc1SJoanne Koong const struct inet_bind2_bucket *tb2, /* may be null */ 22928044fc1SJoanne Koong bool relax, bool reuseport_ok) 23028044fc1SJoanne Koong { 231593d1ebeSJoanne Koong bool reuseport_cb_ok; 232593d1ebeSJoanne Koong struct sock_reuseport *reuseport_cb; 233593d1ebeSJoanne Koong kuid_t uid = sock_i_uid((struct sock *)sk); 2343f421baaSArnaldo Carvalho de Melo 235333bb73fSKuniyuki Iwashima rcu_read_lock(); 236333bb73fSKuniyuki Iwashima reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); 237333bb73fSKuniyuki Iwashima /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */ 238333bb73fSKuniyuki Iwashima reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); 239333bb73fSKuniyuki Iwashima rcu_read_unlock(); 240333bb73fSKuniyuki Iwashima 2417477fd2eSPavel Emelyanov /* 2427477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 2437477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 24428044fc1SJoanne Koong * in tb->owners and tb2->owners list belong 24528044fc1SJoanne Koong * to the same net - the one this bucket belongs to. 2467477fd2eSPavel Emelyanov */ 2477477fd2eSPavel Emelyanov 24828044fc1SJoanne Koong if (!inet_use_bhash2_on_bind(sk)) { 24928044fc1SJoanne Koong struct sock *sk2; 250593d1ebeSJoanne Koong 25128044fc1SJoanne Koong sk_for_each_bound(sk2, &tb->owners) 25228044fc1SJoanne Koong if (inet_bind_conflict(sk, sk2, uid, relax, 25328044fc1SJoanne Koong reuseport_cb_ok, reuseport_ok) && 25416f6c251SKuniyuki Iwashima inet_rcv_saddr_equal(sk, sk2, true)) 25528044fc1SJoanne Koong return true; 25628044fc1SJoanne Koong 25728044fc1SJoanne Koong return false; 25828044fc1SJoanne Koong } 25928044fc1SJoanne Koong 26028044fc1SJoanne Koong /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if 26128044fc1SJoanne Koong * ipv4) should have been checked already. We need to do these two 26228044fc1SJoanne Koong * checks separately because their spinlocks have to be acquired/released 26328044fc1SJoanne Koong * independently of each other, to prevent possible deadlocks 26428044fc1SJoanne Koong */ 26528044fc1SJoanne Koong return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 26628044fc1SJoanne Koong reuseport_ok); 26728044fc1SJoanne Koong } 26828044fc1SJoanne Koong 26928044fc1SJoanne Koong /* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or 27028044fc1SJoanne Koong * INADDR_ANY (if ipv4) socket. 27128044fc1SJoanne Koong * 27228044fc1SJoanne Koong * Caller must hold bhash hashbucket lock with local bh disabled, to protect 27328044fc1SJoanne Koong * against concurrent binds on the port for addr any 27428044fc1SJoanne Koong */ 27528044fc1SJoanne Koong static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev, 27628044fc1SJoanne Koong bool relax, bool reuseport_ok) 27728044fc1SJoanne Koong { 27828044fc1SJoanne Koong kuid_t uid = sock_i_uid((struct sock *)sk); 27928044fc1SJoanne Koong const struct net *net = sock_net(sk); 28028044fc1SJoanne Koong struct sock_reuseport *reuseport_cb; 28128044fc1SJoanne Koong struct inet_bind_hashbucket *head2; 28228044fc1SJoanne Koong struct inet_bind2_bucket *tb2; 28328044fc1SJoanne Koong bool reuseport_cb_ok; 28428044fc1SJoanne Koong 28528044fc1SJoanne Koong rcu_read_lock(); 28628044fc1SJoanne Koong reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); 28728044fc1SJoanne Koong /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */ 28828044fc1SJoanne Koong reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); 28928044fc1SJoanne Koong rcu_read_unlock(); 29028044fc1SJoanne Koong 29128044fc1SJoanne Koong head2 = inet_bhash2_addr_any_hashbucket(sk, net, port); 29228044fc1SJoanne Koong 29328044fc1SJoanne Koong spin_lock(&head2->lock); 29428044fc1SJoanne Koong 29528044fc1SJoanne Koong inet_bind_bucket_for_each(tb2, &head2->chain) 29628044fc1SJoanne Koong if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) 29716f6c251SKuniyuki Iwashima break; 29828044fc1SJoanne Koong 29928044fc1SJoanne Koong if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 30028044fc1SJoanne Koong reuseport_ok)) { 30128044fc1SJoanne Koong spin_unlock(&head2->lock); 30228044fc1SJoanne Koong return true; 303593d1ebeSJoanne Koong } 30428044fc1SJoanne Koong 30528044fc1SJoanne Koong spin_unlock(&head2->lock); 30628044fc1SJoanne Koong return false; 3073f421baaSArnaldo Carvalho de Melo } 308971af18bSArnaldo Carvalho de Melo 309289141b7SJosef Bacik /* 310289141b7SJosef Bacik * Find an open port number for the socket. Returns with the 31128044fc1SJoanne Koong * inet_bind_hashbucket locks held if successful. 3123f421baaSArnaldo Carvalho de Melo */ 313289141b7SJosef Bacik static struct inet_bind_hashbucket * 31428044fc1SJoanne Koong inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, 31528044fc1SJoanne Koong struct inet_bind2_bucket **tb2_ret, 31628044fc1SJoanne Koong struct inet_bind_hashbucket **head2_ret, int *port_ret) 3173f421baaSArnaldo Carvalho de Melo { 318429e42c1SKuniyuki Iwashima struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); 31908eaef90SKuniyuki Iwashima int i, low, high, attempt_half, port, l3mdev; 32028044fc1SJoanne Koong struct inet_bind_hashbucket *head, *head2; 3213b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 32228044fc1SJoanne Koong struct inet_bind2_bucket *tb2; 323ea8add2bSEric Dumazet struct inet_bind_bucket *tb; 324ea8add2bSEric Dumazet u32 remaining, offset; 32508eaef90SKuniyuki Iwashima bool relax = false; 3263f421baaSArnaldo Carvalho de Melo 3273c82a21fSRobert Shearman l3mdev = inet_sk_bound_l3mdev(sk); 3284b01a967SKuniyuki Iwashima ports_exhausted: 329ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 330ea8add2bSEric Dumazet other_half_scan: 33191d0b78cSJakub Sitnicki inet_sk_get_local_port_range(sk, &low, &high); 332ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 333ea8add2bSEric Dumazet if (high - low < 4) 334ea8add2bSEric Dumazet attempt_half = 0; 335946f9eb2SEric Dumazet if (attempt_half) { 336ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1); 337946f9eb2SEric Dumazet 338946f9eb2SEric Dumazet if (attempt_half == 1) 339946f9eb2SEric Dumazet high = half; 340946f9eb2SEric Dumazet else 341946f9eb2SEric Dumazet low = half; 342946f9eb2SEric Dumazet } 343ea8add2bSEric Dumazet remaining = high - low; 344ea8add2bSEric Dumazet if (likely(remaining > 1)) 345ea8add2bSEric Dumazet remaining &= ~1U; 3463f421baaSArnaldo Carvalho de Melo 3478032bf12SJason A. Donenfeld offset = get_random_u32_below(remaining); 348ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity 349ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users. 350ea8add2bSEric Dumazet */ 351ea8add2bSEric Dumazet offset |= 1U; 352ea8add2bSEric Dumazet 353ea8add2bSEric Dumazet other_parity_scan: 354ea8add2bSEric Dumazet port = low + offset; 355ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 356ea8add2bSEric Dumazet if (unlikely(port >= high)) 357ea8add2bSEric Dumazet port -= remaining; 358ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port)) 359ea8add2bSEric Dumazet continue; 360ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 361ea8add2bSEric Dumazet hinfo->bhash_size)]; 362ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 36328044fc1SJoanne Koong if (inet_use_bhash2_on_bind(sk)) { 36428044fc1SJoanne Koong if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) 36528044fc1SJoanne Koong goto next_port; 36628044fc1SJoanne Koong } 36728044fc1SJoanne Koong 36828044fc1SJoanne Koong head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); 36928044fc1SJoanne Koong spin_lock(&head2->lock); 37028044fc1SJoanne Koong tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); 371b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 37228044fc1SJoanne Koong if (inet_bind_bucket_match(tb, net, port, l3mdev)) { 37328044fc1SJoanne Koong if (!inet_csk_bind_conflict(sk, tb, tb2, 37428044fc1SJoanne Koong relax, false)) 3756cd66616SJosef Bacik goto success; 37628044fc1SJoanne Koong spin_unlock(&head2->lock); 377ea8add2bSEric Dumazet goto next_port; 3782b05ad33SFlavio Leitner } 379289141b7SJosef Bacik tb = NULL; 380289141b7SJosef Bacik goto success; 381ea8add2bSEric Dumazet next_port: 382ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 383ea8add2bSEric Dumazet cond_resched(); 384a9d8f911SEvgeniy Polyakov } 3853f421baaSArnaldo Carvalho de Melo 386ea8add2bSEric Dumazet offset--; 387ea8add2bSEric Dumazet if (!(offset & 1)) 388ea8add2bSEric Dumazet goto other_parity_scan; 389ea8add2bSEric Dumazet 390946f9eb2SEric Dumazet if (attempt_half == 1) { 391946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 392946f9eb2SEric Dumazet attempt_half = 2; 393ea8add2bSEric Dumazet goto other_half_scan; 394946f9eb2SEric Dumazet } 3954b01a967SKuniyuki Iwashima 3960db23276SKuniyuki Iwashima if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { 3974b01a967SKuniyuki Iwashima /* We still have a chance to connect to different destinations */ 3984b01a967SKuniyuki Iwashima relax = true; 3994b01a967SKuniyuki Iwashima goto ports_exhausted; 4004b01a967SKuniyuki Iwashima } 401289141b7SJosef Bacik return NULL; 402289141b7SJosef Bacik success: 403289141b7SJosef Bacik *port_ret = port; 404289141b7SJosef Bacik *tb_ret = tb; 40528044fc1SJoanne Koong *tb2_ret = tb2; 40628044fc1SJoanne Koong *head2_ret = head2; 407289141b7SJosef Bacik return head; 408289141b7SJosef Bacik } 409ea8add2bSEric Dumazet 410637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb, 411637bc8bbSJosef Bacik struct sock *sk) 412637bc8bbSJosef Bacik { 413637bc8bbSJosef Bacik kuid_t uid = sock_i_uid(sk); 414637bc8bbSJosef Bacik 415637bc8bbSJosef Bacik if (tb->fastreuseport <= 0) 416637bc8bbSJosef Bacik return 0; 417637bc8bbSJosef Bacik if (!sk->sk_reuseport) 418637bc8bbSJosef Bacik return 0; 419637bc8bbSJosef Bacik if (rcu_access_pointer(sk->sk_reuseport_cb)) 420637bc8bbSJosef Bacik return 0; 421637bc8bbSJosef Bacik if (!uid_eq(tb->fastuid, uid)) 422637bc8bbSJosef Bacik return 0; 423637bc8bbSJosef Bacik /* We only need to check the rcv_saddr if this tb was once marked 424637bc8bbSJosef Bacik * without fastreuseport and then was reset, as we can only know that 425637bc8bbSJosef Bacik * the fast_*rcv_saddr doesn't have any conflicts with the socks on the 426637bc8bbSJosef Bacik * owners list. 427637bc8bbSJosef Bacik */ 428637bc8bbSJosef Bacik if (tb->fastreuseport == FASTREUSEPORT_ANY) 429637bc8bbSJosef Bacik return 1; 430637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 431637bc8bbSJosef Bacik if (tb->fast_sk_family == AF_INET6) 432637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 4337a56673bSJosef Bacik inet6_rcv_saddr(sk), 434637bc8bbSJosef Bacik tb->fast_rcv_saddr, 435637bc8bbSJosef Bacik sk->sk_rcv_saddr, 436637bc8bbSJosef Bacik tb->fast_ipv6_only, 43788d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false); 438637bc8bbSJosef Bacik #endif 439637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, 44088d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false); 441637bc8bbSJosef Bacik } 442637bc8bbSJosef Bacik 44362ffc589STim Froidcoeur void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, 44462ffc589STim Froidcoeur struct sock *sk) 445289141b7SJosef Bacik { 446289141b7SJosef Bacik kuid_t uid = sock_i_uid(sk); 44762ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 4483c82a21fSRobert Shearman 449fbed24bcSJosef Bacik if (hlist_empty(&tb->owners)) { 450ea8add2bSEric Dumazet tb->fastreuse = reuse; 451da5e3630STom Herbert if (sk->sk_reuseport) { 452637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_ANY; 453da5e3630STom Herbert tb->fastuid = uid; 454637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 455637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 456cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 457637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 458637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 459637bc8bbSJosef Bacik #endif 460da5e3630STom Herbert } else { 461da5e3630STom Herbert tb->fastreuseport = 0; 462da5e3630STom Herbert } 4636cd66616SJosef Bacik } else { 4646cd66616SJosef Bacik if (!reuse) 4656cd66616SJosef Bacik tb->fastreuse = 0; 466637bc8bbSJosef Bacik if (sk->sk_reuseport) { 467637bc8bbSJosef Bacik /* We didn't match or we don't have fastreuseport set on 468637bc8bbSJosef Bacik * the tb, but we have sk_reuseport set on this socket 469637bc8bbSJosef Bacik * and we know that there are no bind conflicts with 470637bc8bbSJosef Bacik * this socket in this tb, so reset our tb's reuseport 471637bc8bbSJosef Bacik * settings so that any subsequent sockets that match 472637bc8bbSJosef Bacik * our current socket will be put on the fast path. 473637bc8bbSJosef Bacik * 474637bc8bbSJosef Bacik * If we reset we need to set FASTREUSEPORT_STRICT so we 475637bc8bbSJosef Bacik * do extra checking for all subsequent sk_reuseport 476637bc8bbSJosef Bacik * socks. 477637bc8bbSJosef Bacik */ 478637bc8bbSJosef Bacik if (!sk_reuseport_match(tb, sk)) { 479637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_STRICT; 480637bc8bbSJosef Bacik tb->fastuid = uid; 481637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 482637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 483cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 484637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 485637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 486637bc8bbSJosef Bacik #endif 487637bc8bbSJosef Bacik } 488637bc8bbSJosef Bacik } else { 4896cd66616SJosef Bacik tb->fastreuseport = 0; 490ea8add2bSEric Dumazet } 491637bc8bbSJosef Bacik } 49262ffc589STim Froidcoeur } 49362ffc589STim Froidcoeur 49462ffc589STim Froidcoeur /* Obtain a reference to a local port for the given sock, 49562ffc589STim Froidcoeur * if snum is zero it means select any available local port. 49662ffc589STim Froidcoeur * We try to allocate an odd port (and leave even ports for connect()) 49762ffc589STim Froidcoeur */ 49862ffc589STim Froidcoeur int inet_csk_get_port(struct sock *sk, unsigned short snum) 49962ffc589STim Froidcoeur { 500429e42c1SKuniyuki Iwashima struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); 50162ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 50228044fc1SJoanne Koong bool found_port = false, check_bind_conflict = true; 50328044fc1SJoanne Koong bool bhash_created = false, bhash2_created = false; 5047a7160edSKuniyuki Iwashima int ret = -EADDRINUSE, port = snum, l3mdev; 50528044fc1SJoanne Koong struct inet_bind_hashbucket *head, *head2; 50628044fc1SJoanne Koong struct inet_bind2_bucket *tb2 = NULL; 507593d1ebeSJoanne Koong struct inet_bind_bucket *tb = NULL; 50828044fc1SJoanne Koong bool head2_lock_acquired = false; 50908eaef90SKuniyuki Iwashima struct net *net = sock_net(sk); 51062ffc589STim Froidcoeur 51162ffc589STim Froidcoeur l3mdev = inet_sk_bound_l3mdev(sk); 51262ffc589STim Froidcoeur 51362ffc589STim Froidcoeur if (!port) { 51428044fc1SJoanne Koong head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port); 51562ffc589STim Froidcoeur if (!head) 51662ffc589STim Froidcoeur return ret; 51728044fc1SJoanne Koong 51828044fc1SJoanne Koong head2_lock_acquired = true; 51928044fc1SJoanne Koong 52028044fc1SJoanne Koong if (tb && tb2) 52162ffc589STim Froidcoeur goto success; 52228044fc1SJoanne Koong found_port = true; 52328044fc1SJoanne Koong } else { 52462ffc589STim Froidcoeur head = &hinfo->bhash[inet_bhashfn(net, port, 52562ffc589STim Froidcoeur hinfo->bhash_size)]; 52662ffc589STim Froidcoeur spin_lock_bh(&head->lock); 52762ffc589STim Froidcoeur inet_bind_bucket_for_each(tb, &head->chain) 52828044fc1SJoanne Koong if (inet_bind_bucket_match(tb, net, port, l3mdev)) 52928044fc1SJoanne Koong break; 53028044fc1SJoanne Koong } 53128044fc1SJoanne Koong 53228044fc1SJoanne Koong if (!tb) { 53328044fc1SJoanne Koong tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, 53428044fc1SJoanne Koong head, port, l3mdev); 53562ffc589STim Froidcoeur if (!tb) 53662ffc589STim Froidcoeur goto fail_unlock; 53728044fc1SJoanne Koong bhash_created = true; 53828044fc1SJoanne Koong } 53962ffc589STim Froidcoeur 54028044fc1SJoanne Koong if (!found_port) { 54128044fc1SJoanne Koong if (!hlist_empty(&tb->owners)) { 54228044fc1SJoanne Koong if (sk->sk_reuse == SK_FORCE_REUSE || 54328044fc1SJoanne Koong (tb->fastreuse > 0 && reuse) || 54462ffc589STim Froidcoeur sk_reuseport_match(tb, sk)) 54528044fc1SJoanne Koong check_bind_conflict = false; 54628044fc1SJoanne Koong } 54728044fc1SJoanne Koong 54828044fc1SJoanne Koong if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { 54928044fc1SJoanne Koong if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) 55062ffc589STim Froidcoeur goto fail_unlock; 55162ffc589STim Froidcoeur } 55228044fc1SJoanne Koong 55328044fc1SJoanne Koong head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); 55428044fc1SJoanne Koong spin_lock(&head2->lock); 55528044fc1SJoanne Koong head2_lock_acquired = true; 55628044fc1SJoanne Koong tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); 55728044fc1SJoanne Koong } 55828044fc1SJoanne Koong 55928044fc1SJoanne Koong if (!tb2) { 56028044fc1SJoanne Koong tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, 56128044fc1SJoanne Koong net, head2, port, l3mdev, sk); 56228044fc1SJoanne Koong if (!tb2) 56328044fc1SJoanne Koong goto fail_unlock; 56428044fc1SJoanne Koong bhash2_created = true; 56528044fc1SJoanne Koong } 56628044fc1SJoanne Koong 56728044fc1SJoanne Koong if (!found_port && check_bind_conflict) { 56828044fc1SJoanne Koong if (inet_csk_bind_conflict(sk, tb, tb2, true, true)) 56928044fc1SJoanne Koong goto fail_unlock; 57028044fc1SJoanne Koong } 57128044fc1SJoanne Koong 57262ffc589STim Froidcoeur success: 57362ffc589STim Froidcoeur inet_csk_update_fastreuse(tb, sk); 57462ffc589STim Froidcoeur 5753f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 57628044fc1SJoanne Koong inet_bind_hash(sk, tb, tb2, port); 577547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 57828044fc1SJoanne Koong WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); 5793f421baaSArnaldo Carvalho de Melo ret = 0; 5803f421baaSArnaldo Carvalho de Melo 5813f421baaSArnaldo Carvalho de Melo fail_unlock: 58228044fc1SJoanne Koong if (ret) { 58328044fc1SJoanne Koong if (bhash_created) 58428044fc1SJoanne Koong inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); 58528044fc1SJoanne Koong if (bhash2_created) 58628044fc1SJoanne Koong inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, 58728044fc1SJoanne Koong tb2); 58828044fc1SJoanne Koong } 58928044fc1SJoanne Koong if (head2_lock_acquired) 59028044fc1SJoanne Koong spin_unlock(&head2->lock); 591ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 5923f421baaSArnaldo Carvalho de Melo return ret; 5933f421baaSArnaldo Carvalho de Melo } 5943f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 5953f421baaSArnaldo Carvalho de Melo 5963f421baaSArnaldo Carvalho de Melo /* 5973f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 5983f421baaSArnaldo Carvalho de Melo * with the socket locked. 5993f421baaSArnaldo Carvalho de Melo */ 6003f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 6013f421baaSArnaldo Carvalho de Melo { 6023f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 6033f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 6043f421baaSArnaldo Carvalho de Melo int err; 6053f421baaSArnaldo Carvalho de Melo 6063f421baaSArnaldo Carvalho de Melo /* 6073f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 6083f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 6093f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 6103f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 6113f421baaSArnaldo Carvalho de Melo * 6123f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 6133f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 6143f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 6153f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 6163f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 6173f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 6183f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 6193f421baaSArnaldo Carvalho de Melo */ 6203f421baaSArnaldo Carvalho de Melo for (;;) { 621aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 6223f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 6233f421baaSArnaldo Carvalho de Melo release_sock(sk); 6243f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 6253f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 626cb7cf8a3SEric Dumazet sched_annotate_sleep(); 6273f421baaSArnaldo Carvalho de Melo lock_sock(sk); 6283f421baaSArnaldo Carvalho de Melo err = 0; 6293f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 6303f421baaSArnaldo Carvalho de Melo break; 6313f421baaSArnaldo Carvalho de Melo err = -EINVAL; 6323f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 6333f421baaSArnaldo Carvalho de Melo break; 6343f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 6353f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 6363f421baaSArnaldo Carvalho de Melo break; 6373f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 6383f421baaSArnaldo Carvalho de Melo if (!timeo) 6393f421baaSArnaldo Carvalho de Melo break; 6403f421baaSArnaldo Carvalho de Melo } 641aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 6423f421baaSArnaldo Carvalho de Melo return err; 6433f421baaSArnaldo Carvalho de Melo } 6443f421baaSArnaldo Carvalho de Melo 6453f421baaSArnaldo Carvalho de Melo /* 6463f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 6473f421baaSArnaldo Carvalho de Melo */ 648cdfbabfbSDavid Howells struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 6493f421baaSArnaldo Carvalho de Melo { 6503f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 6518336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 6528336886fSJerry Chu struct request_sock *req; 653e3d95ad7SEric Dumazet struct sock *newsk; 6543f421baaSArnaldo Carvalho de Melo int error; 6553f421baaSArnaldo Carvalho de Melo 6563f421baaSArnaldo Carvalho de Melo lock_sock(sk); 6573f421baaSArnaldo Carvalho de Melo 6583f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 6593f421baaSArnaldo Carvalho de Melo * and that it has something pending. 6603f421baaSArnaldo Carvalho de Melo */ 6613f421baaSArnaldo Carvalho de Melo error = -EINVAL; 6623f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 6633f421baaSArnaldo Carvalho de Melo goto out_err; 6643f421baaSArnaldo Carvalho de Melo 6653f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 6668336886fSJerry Chu if (reqsk_queue_empty(queue)) { 6673f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 6683f421baaSArnaldo Carvalho de Melo 6693f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 6703f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 6713f421baaSArnaldo Carvalho de Melo if (!timeo) 6723f421baaSArnaldo Carvalho de Melo goto out_err; 6733f421baaSArnaldo Carvalho de Melo 6743f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 6753f421baaSArnaldo Carvalho de Melo if (error) 6763f421baaSArnaldo Carvalho de Melo goto out_err; 6773f421baaSArnaldo Carvalho de Melo } 678fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 6798336886fSJerry Chu newsk = req->sk; 6803f421baaSArnaldo Carvalho de Melo 681e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 6820536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 6830536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 6849439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 6858336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 6868336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 6878336886fSJerry Chu * NULL to signify that the child socket is taken 6888336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 6898336886fSJerry Chu * when 3WHS finishes (or is aborted). 6908336886fSJerry Chu */ 6918336886fSJerry Chu req->sk = NULL; 6928336886fSJerry Chu req = NULL; 6938336886fSJerry Chu } 6940536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 6958336886fSJerry Chu } 696d752a498SShakeel Butt 6973f421baaSArnaldo Carvalho de Melo out: 6983f421baaSArnaldo Carvalho de Melo release_sock(sk); 69906669ea3SEric Dumazet if (newsk && mem_cgroup_sockets_enabled) { 70053bf9164SAbel Wu int amt = 0; 701d752a498SShakeel Butt 702d752a498SShakeel Butt /* atomically get the memory usage, set and charge the 70306669ea3SEric Dumazet * newsk->sk_memcg. 704d752a498SShakeel Butt */ 705d752a498SShakeel Butt lock_sock(newsk); 706d752a498SShakeel Butt 70753bf9164SAbel Wu mem_cgroup_sk_alloc(newsk); 70853bf9164SAbel Wu if (newsk->sk_memcg) { 70953bf9164SAbel Wu /* The socket has not been accepted yet, no need 71053bf9164SAbel Wu * to look at newsk->sk_wmem_queued. 711d752a498SShakeel Butt */ 712d752a498SShakeel Butt amt = sk_mem_pages(newsk->sk_forward_alloc + 71306669ea3SEric Dumazet atomic_read(&newsk->sk_rmem_alloc)); 71453bf9164SAbel Wu } 71553bf9164SAbel Wu 71653bf9164SAbel Wu if (amt) 7174b1327beSWei Wang mem_cgroup_charge_skmem(newsk->sk_memcg, amt, 7184b1327beSWei Wang GFP_KERNEL | __GFP_NOFAIL); 719d752a498SShakeel Butt 720d752a498SShakeel Butt release_sock(newsk); 721d752a498SShakeel Butt } 7228336886fSJerry Chu if (req) 72313854e5aSEric Dumazet reqsk_put(req); 7243f421baaSArnaldo Carvalho de Melo return newsk; 7253f421baaSArnaldo Carvalho de Melo out_err: 7263f421baaSArnaldo Carvalho de Melo newsk = NULL; 7278336886fSJerry Chu req = NULL; 7283f421baaSArnaldo Carvalho de Melo *err = error; 7293f421baaSArnaldo Carvalho de Melo goto out; 7303f421baaSArnaldo Carvalho de Melo } 7313f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 7323f421baaSArnaldo Carvalho de Melo 7333f421baaSArnaldo Carvalho de Melo /* 7343f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 7353f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 7363f421baaSArnaldo Carvalho de Melo * to optimize. 7373f421baaSArnaldo Carvalho de Melo */ 7383f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 73959f379f9SKees Cook void (*retransmit_handler)(struct timer_list *t), 74059f379f9SKees Cook void (*delack_handler)(struct timer_list *t), 74159f379f9SKees Cook void (*keepalive_handler)(struct timer_list *t)) 7423f421baaSArnaldo Carvalho de Melo { 7433f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 7443f421baaSArnaldo Carvalho de Melo 74559f379f9SKees Cook timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); 74659f379f9SKees Cook timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); 74759f379f9SKees Cook timer_setup(&sk->sk_timer, keepalive_handler, 0); 7483f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 7493f421baaSArnaldo Carvalho de Melo } 7503f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 7513f421baaSArnaldo Carvalho de Melo 7523f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 7533f421baaSArnaldo Carvalho de Melo { 7543f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 7553f421baaSArnaldo Carvalho de Melo 756b6b6d653SEric Dumazet icsk->icsk_pending = icsk->icsk_ack.pending = 0; 7573f421baaSArnaldo Carvalho de Melo 7583f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 7593f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 7603f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 7613f421baaSArnaldo Carvalho de Melo } 7623f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 7633f421baaSArnaldo Carvalho de Melo 7643f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 7653f421baaSArnaldo Carvalho de Melo { 7663f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 7673f421baaSArnaldo Carvalho de Melo } 7683f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 7693f421baaSArnaldo Carvalho de Melo 7703f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 7713f421baaSArnaldo Carvalho de Melo { 7723f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 7733f421baaSArnaldo Carvalho de Melo } 7743f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 7753f421baaSArnaldo Carvalho de Melo 776e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 7776bd023f3SDavid S. Miller struct flowi4 *fl4, 778ba3f7f04SDavid S. Miller const struct request_sock *req) 7793f421baaSArnaldo Carvalho de Melo { 7803f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 7818b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 782c92e8c02SEric Dumazet struct ip_options_rcu *opt; 7838b929ab1SEric Dumazet struct rtable *rt; 7843f421baaSArnaldo Carvalho de Melo 7852ab2ddd3SEric Dumazet rcu_read_lock(); 7862ab2ddd3SEric Dumazet opt = rcu_dereference(ireq->ireq_opt); 78706f877d6SEric Dumazet 7888b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 7894b095281SGuillaume Nault ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), 7908b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 791634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 7928b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 793e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 7943df98d79SPaul Moore security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); 7956bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 796b23dd4feSDavid S. Miller if (IS_ERR(rt)) 797857a6e0aSIlpo Järvinen goto no_route; 79877d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 799857a6e0aSIlpo Järvinen goto route_err; 8002ab2ddd3SEric Dumazet rcu_read_unlock(); 801d8d1f30bSChangli Gao return &rt->dst; 802857a6e0aSIlpo Järvinen 803857a6e0aSIlpo Järvinen route_err: 804857a6e0aSIlpo Järvinen ip_rt_put(rt); 805857a6e0aSIlpo Järvinen no_route: 8062ab2ddd3SEric Dumazet rcu_read_unlock(); 807b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 808857a6e0aSIlpo Järvinen return NULL; 8093f421baaSArnaldo Carvalho de Melo } 8103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 8113f421baaSArnaldo Carvalho de Melo 812a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 81377357a95SDavid S. Miller struct sock *newsk, 81477357a95SDavid S. Miller const struct request_sock *req) 81577357a95SDavid S. Miller { 81677357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 8178b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 81877357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 8191a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 82077357a95SDavid S. Miller struct flowi4 *fl4; 82177357a95SDavid S. Miller struct rtable *rt; 82277357a95SDavid S. Miller 823c92e8c02SEric Dumazet opt = rcu_dereference(ireq->ireq_opt); 82477357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 8251a7b27c9SChristoph Paasch 8268b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 8274b095281SGuillaume Nault ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), 82877357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 829634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 8308b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 831e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 8323df98d79SPaul Moore security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); 83377357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 83477357a95SDavid S. Miller if (IS_ERR(rt)) 83577357a95SDavid S. Miller goto no_route; 83677d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 83777357a95SDavid S. Miller goto route_err; 83877357a95SDavid S. Miller return &rt->dst; 83977357a95SDavid S. Miller 84077357a95SDavid S. Miller route_err: 84177357a95SDavid S. Miller ip_rt_put(rt); 84277357a95SDavid S. Miller no_route: 843b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 84477357a95SDavid S. Miller return NULL; 84577357a95SDavid S. Miller } 84677357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 84777357a95SDavid S. Miller 8480c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 849a594920fSKuniyuki Iwashima static void syn_ack_recalc(struct request_sock *req, 850a594920fSKuniyuki Iwashima const int max_syn_ack_retries, 8510c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 8520c3d79bcSJulian Anastasov int *expire, int *resend) 8530c3d79bcSJulian Anastasov { 8540c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 855a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries; 8560c3d79bcSJulian Anastasov *resend = 1; 8570c3d79bcSJulian Anastasov return; 8580c3d79bcSJulian Anastasov } 859a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries && 860a594920fSKuniyuki Iwashima (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept); 861a594920fSKuniyuki Iwashima /* Do not resend while waiting for data after ACK, 8620c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 8630c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 8640c3d79bcSJulian Anastasov */ 8650c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 866e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 8670c3d79bcSJulian Anastasov } 8680c3d79bcSJulian Anastasov 8691b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 870e6c022a4SEric Dumazet { 8711a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 872e6c022a4SEric Dumazet 873e6c022a4SEric Dumazet if (!err) 874e6c022a4SEric Dumazet req->num_retrans++; 875e6c022a4SEric Dumazet return err; 876e6c022a4SEric Dumazet } 877e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 878e6c022a4SEric Dumazet 87954b92e84SKuniyuki Iwashima static struct request_sock *inet_reqsk_clone(struct request_sock *req, 88054b92e84SKuniyuki Iwashima struct sock *sk) 88154b92e84SKuniyuki Iwashima { 88254b92e84SKuniyuki Iwashima struct sock *req_sk, *nreq_sk; 88354b92e84SKuniyuki Iwashima struct request_sock *nreq; 88454b92e84SKuniyuki Iwashima 88554b92e84SKuniyuki Iwashima nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN); 88654b92e84SKuniyuki Iwashima if (!nreq) { 88755d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 88855d444b3SKuniyuki Iwashima 88954b92e84SKuniyuki Iwashima /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */ 89054b92e84SKuniyuki Iwashima sock_put(sk); 89154b92e84SKuniyuki Iwashima return NULL; 89254b92e84SKuniyuki Iwashima } 89354b92e84SKuniyuki Iwashima 89454b92e84SKuniyuki Iwashima req_sk = req_to_sk(req); 89554b92e84SKuniyuki Iwashima nreq_sk = req_to_sk(nreq); 89654b92e84SKuniyuki Iwashima 89754b92e84SKuniyuki Iwashima memcpy(nreq_sk, req_sk, 89854b92e84SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_begin)); 89954b92e84SKuniyuki Iwashima memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, 90054b92e84SKuniyuki Iwashima req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end)); 90154b92e84SKuniyuki Iwashima 90254b92e84SKuniyuki Iwashima sk_node_init(&nreq_sk->sk_node); 90354b92e84SKuniyuki Iwashima nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; 904a9418924SEric Dumazet #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING 90554b92e84SKuniyuki Iwashima nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; 90654b92e84SKuniyuki Iwashima #endif 90754b92e84SKuniyuki Iwashima nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; 90854b92e84SKuniyuki Iwashima 90954b92e84SKuniyuki Iwashima nreq->rsk_listener = sk; 91054b92e84SKuniyuki Iwashima 91154b92e84SKuniyuki Iwashima /* We need not acquire fastopenq->lock 91254b92e84SKuniyuki Iwashima * because the child socket is locked in inet_csk_listen_stop(). 91354b92e84SKuniyuki Iwashima */ 91454b92e84SKuniyuki Iwashima if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener) 91554b92e84SKuniyuki Iwashima rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq); 91654b92e84SKuniyuki Iwashima 91754b92e84SKuniyuki Iwashima return nreq; 91854b92e84SKuniyuki Iwashima } 91954b92e84SKuniyuki Iwashima 920c905dee6SKuniyuki Iwashima static void reqsk_queue_migrated(struct request_sock_queue *queue, 921c905dee6SKuniyuki Iwashima const struct request_sock *req) 922c905dee6SKuniyuki Iwashima { 923c905dee6SKuniyuki Iwashima if (req->num_timeout == 0) 924c905dee6SKuniyuki Iwashima atomic_inc(&queue->young); 925c905dee6SKuniyuki Iwashima atomic_inc(&queue->qlen); 926c905dee6SKuniyuki Iwashima } 927c905dee6SKuniyuki Iwashima 92854b92e84SKuniyuki Iwashima static void reqsk_migrate_reset(struct request_sock *req) 92954b92e84SKuniyuki Iwashima { 930c905dee6SKuniyuki Iwashima req->saved_syn = NULL; 93154b92e84SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IPV6) 93254b92e84SKuniyuki Iwashima inet_rsk(req)->ipv6_opt = NULL; 933c905dee6SKuniyuki Iwashima inet_rsk(req)->pktopts = NULL; 934c905dee6SKuniyuki Iwashima #else 935c905dee6SKuniyuki Iwashima inet_rsk(req)->ireq_opt = NULL; 93654b92e84SKuniyuki Iwashima #endif 93754b92e84SKuniyuki Iwashima } 93854b92e84SKuniyuki Iwashima 939079096f1SEric Dumazet /* return true if req was found in the ehash table */ 9408b5e07d7SZhiqiang Liu static bool reqsk_queue_unlink(struct request_sock *req) 941b357a364SEric Dumazet { 94208eaef90SKuniyuki Iwashima struct sock *sk = req_to_sk(req); 9435e0724d0SEric Dumazet bool found = false; 944b357a364SEric Dumazet 94508eaef90SKuniyuki Iwashima if (sk_hashed(sk)) { 946429e42c1SKuniyuki Iwashima struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); 947429e42c1SKuniyuki Iwashima spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 948b357a364SEric Dumazet 949079096f1SEric Dumazet spin_lock(lock); 95008eaef90SKuniyuki Iwashima found = __sk_nulls_del_node_init_rcu(sk); 951079096f1SEric Dumazet spin_unlock(lock); 9525e0724d0SEric Dumazet } 95383fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 954b357a364SEric Dumazet reqsk_put(req); 955b357a364SEric Dumazet return found; 956b357a364SEric Dumazet } 957b357a364SEric Dumazet 9587233da86SAlexander Ovechkin bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 959b357a364SEric Dumazet { 9607233da86SAlexander Ovechkin bool unlinked = reqsk_queue_unlink(req); 9617233da86SAlexander Ovechkin 9627233da86SAlexander Ovechkin if (unlinked) { 963b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 964b357a364SEric Dumazet reqsk_put(req); 965b357a364SEric Dumazet } 9667233da86SAlexander Ovechkin return unlinked; 967b357a364SEric Dumazet } 968b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 969b357a364SEric Dumazet 970f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 971f03f2e15SEric Dumazet { 972f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 973f03f2e15SEric Dumazet reqsk_put(req); 974f03f2e15SEric Dumazet } 975f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 976f03f2e15SEric Dumazet 97759f379f9SKees Cook static void reqsk_timer_handler(struct timer_list *t) 978a019d6feSArnaldo Carvalho de Melo { 97959f379f9SKees Cook struct request_sock *req = from_timer(req, t, rsk_timer); 980c905dee6SKuniyuki Iwashima struct request_sock *nreq = NULL, *oreq = req; 981fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 982c905dee6SKuniyuki Iwashima struct inet_connection_sock *icsk; 983c905dee6SKuniyuki Iwashima struct request_sock_queue *queue; 984c905dee6SKuniyuki Iwashima struct net *net; 985a594920fSKuniyuki Iwashima int max_syn_ack_retries, qlen, expire = 0, resend = 0; 986a019d6feSArnaldo Carvalho de Melo 987c905dee6SKuniyuki Iwashima if (inet_sk_state_load(sk_listener) != TCP_LISTEN) { 988c905dee6SKuniyuki Iwashima struct sock *nsk; 989c905dee6SKuniyuki Iwashima 990c905dee6SKuniyuki Iwashima nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL); 991c905dee6SKuniyuki Iwashima if (!nsk) 992079096f1SEric Dumazet goto drop; 993a019d6feSArnaldo Carvalho de Melo 994c905dee6SKuniyuki Iwashima nreq = inet_reqsk_clone(req, nsk); 995c905dee6SKuniyuki Iwashima if (!nreq) 996c905dee6SKuniyuki Iwashima goto drop; 997c905dee6SKuniyuki Iwashima 998c905dee6SKuniyuki Iwashima /* The new timer for the cloned req can decrease the 2 999c905dee6SKuniyuki Iwashima * by calling inet_csk_reqsk_queue_drop_and_put(), so 1000c905dee6SKuniyuki Iwashima * hold another count to prevent use-after-free and 1001c905dee6SKuniyuki Iwashima * call reqsk_put() just before return. 1002c905dee6SKuniyuki Iwashima */ 1003c905dee6SKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 2 + 1); 1004c905dee6SKuniyuki Iwashima timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED); 1005c905dee6SKuniyuki Iwashima reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req); 1006c905dee6SKuniyuki Iwashima 1007c905dee6SKuniyuki Iwashima req = nreq; 1008c905dee6SKuniyuki Iwashima sk_listener = nsk; 1009c905dee6SKuniyuki Iwashima } 1010c905dee6SKuniyuki Iwashima 1011c905dee6SKuniyuki Iwashima icsk = inet_csk(sk_listener); 1012c905dee6SKuniyuki Iwashima net = sock_net(sk_listener); 10133a037f0fSEric Dumazet max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? : 101420a3b1c0SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); 1015a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 1016a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 1017fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 1018a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 1019a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 1020a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 1021a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 1022a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 1023a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 1024a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 1025a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 1026a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 1027a019d6feSArnaldo Carvalho de Melo * 1028a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 1029a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 1030a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 1031a019d6feSArnaldo Carvalho de Melo */ 1032c905dee6SKuniyuki Iwashima queue = &icsk->icsk_accept_queue; 1033aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 1034099ecf59SEric Dumazet if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { 1035aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 1036a019d6feSArnaldo Carvalho de Melo 1037a594920fSKuniyuki Iwashima while (max_syn_ack_retries > 2) { 10382b41fab7SEric Dumazet if (qlen < young) 1039a019d6feSArnaldo Carvalho de Melo break; 1040a594920fSKuniyuki Iwashima max_syn_ack_retries--; 1041a019d6feSArnaldo Carvalho de Melo young <<= 1; 1042a019d6feSArnaldo Carvalho de Melo } 1043a019d6feSArnaldo Carvalho de Melo } 1044a594920fSKuniyuki Iwashima syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), 10450c3d79bcSJulian Anastasov &expire, &resend); 104642cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 10470c3d79bcSJulian Anastasov if (!expire && 10480c3d79bcSJulian Anastasov (!resend || 1049fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 10500c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 1051e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 1052aac065c5SEric Dumazet atomic_dec(&queue->young); 10535903123fSAkhmat Karakotov mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); 1054c905dee6SKuniyuki Iwashima 1055c905dee6SKuniyuki Iwashima if (!nreq) 1056c905dee6SKuniyuki Iwashima return; 1057c905dee6SKuniyuki Iwashima 1058c905dee6SKuniyuki Iwashima if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { 1059c905dee6SKuniyuki Iwashima /* delete timer */ 1060c905dee6SKuniyuki Iwashima inet_csk_reqsk_queue_drop(sk_listener, nreq); 106155d444b3SKuniyuki Iwashima goto no_ownership; 1062c905dee6SKuniyuki Iwashima } 1063c905dee6SKuniyuki Iwashima 106455d444b3SKuniyuki Iwashima __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS); 1065c905dee6SKuniyuki Iwashima reqsk_migrate_reset(oreq); 1066c905dee6SKuniyuki Iwashima reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq); 1067c905dee6SKuniyuki Iwashima reqsk_put(oreq); 1068c905dee6SKuniyuki Iwashima 1069c905dee6SKuniyuki Iwashima reqsk_put(nreq); 1070fa76ce73SEric Dumazet return; 1071a019d6feSArnaldo Carvalho de Melo } 1072c905dee6SKuniyuki Iwashima 1073c905dee6SKuniyuki Iwashima /* Even if we can clone the req, we may need not retransmit any more 1074c905dee6SKuniyuki Iwashima * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another 1075c905dee6SKuniyuki Iwashima * CPU may win the "own_req" race so that inet_ehash_insert() fails. 1076c905dee6SKuniyuki Iwashima */ 1077c905dee6SKuniyuki Iwashima if (nreq) { 107855d444b3SKuniyuki Iwashima __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE); 107955d444b3SKuniyuki Iwashima no_ownership: 1080c905dee6SKuniyuki Iwashima reqsk_migrate_reset(nreq); 1081c905dee6SKuniyuki Iwashima reqsk_queue_removed(queue, nreq); 1082c905dee6SKuniyuki Iwashima __reqsk_free(nreq); 1083c905dee6SKuniyuki Iwashima } 1084c905dee6SKuniyuki Iwashima 108555d444b3SKuniyuki Iwashima drop: 1086c905dee6SKuniyuki Iwashima inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); 1087a019d6feSArnaldo Carvalho de Melo } 1088fa76ce73SEric Dumazet 1089079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 1090fa76ce73SEric Dumazet unsigned long timeout) 1091fa76ce73SEric Dumazet { 109259f379f9SKees Cook timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); 1093f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout); 109429c68526SEric Dumazet 109501770a16SRicardo Dias inet_ehash_insert(req_to_sk(req), NULL, NULL); 1096fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 1097fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 1098fa76ce73SEric Dumazet */ 1099fa76ce73SEric Dumazet smp_wmb(); 110041c6d650SReshetova, Elena refcount_set(&req->rsk_refcnt, 2 + 1); 1101a019d6feSArnaldo Carvalho de Melo } 1102079096f1SEric Dumazet 1103079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 1104079096f1SEric Dumazet unsigned long timeout) 1105079096f1SEric Dumazet { 1106079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 1107079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 1108079096f1SEric Dumazet } 1109079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 1110a019d6feSArnaldo Carvalho de Melo 111113230593SMat Martineau static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, 111213230593SMat Martineau const gfp_t priority) 111313230593SMat Martineau { 111413230593SMat Martineau struct inet_connection_sock *icsk = inet_csk(newsk); 111513230593SMat Martineau 111613230593SMat Martineau if (!icsk->icsk_ulp_ops) 111713230593SMat Martineau return; 111813230593SMat Martineau 111913230593SMat Martineau icsk->icsk_ulp_ops->clone(req, newsk, priority); 112013230593SMat Martineau } 112113230593SMat Martineau 1122e56c57d0SEric Dumazet /** 1123e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 1124e56c57d0SEric Dumazet * @sk: the socket to clone 1125e56c57d0SEric Dumazet * @req: request_sock 1126e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1127e56c57d0SEric Dumazet * 1128e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1129e56c57d0SEric Dumazet */ 1130e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 1131e56c57d0SEric Dumazet const struct request_sock *req, 1132dd0fc66fSAl Viro const gfp_t priority) 11339f1d2604SArnaldo Carvalho de Melo { 1134e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 11359f1d2604SArnaldo Carvalho de Melo 113600db4124SIan Morris if (newsk) { 11379f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 11389f1d2604SArnaldo Carvalho de Melo 1139563e0bb0SYafang Shao inet_sk_set_state(newsk, TCP_SYN_RECV); 11409f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 114128044fc1SJoanne Koong newicsk->icsk_bind2_hash = NULL; 11429f1d2604SArnaldo Carvalho de Melo 1143634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 1144b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 1145b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 11469f1d2604SArnaldo Carvalho de Melo 114785017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */ 114885017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE); 114985017869SEric Dumazet 1150657831ffSEric Dumazet inet_sk(newsk)->mc_list = NULL; 1151657831ffSEric Dumazet 115284f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 115333cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 115433cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 115584f39b08SLorenzo Colitti 11569f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 11579f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 11586687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 11599d9b1ee0SEnke Chen newicsk->icsk_probes_tstamp = 0; 11609f1d2604SArnaldo Carvalho de Melo 11619f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 11629f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 11634237c75cSVenkat Yekkirala 116413230593SMat Martineau inet_clone_ulp(req, newsk, priority); 116513230593SMat Martineau 11664237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 11679f1d2604SArnaldo Carvalho de Melo } 11689f1d2604SArnaldo Carvalho de Melo return newsk; 11699f1d2604SArnaldo Carvalho de Melo } 1170e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 1171a019d6feSArnaldo Carvalho de Melo 1172a019d6feSArnaldo Carvalho de Melo /* 1173a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 1174a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 1175a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 1176a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 1177a019d6feSArnaldo Carvalho de Melo */ 1178a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 1179a019d6feSArnaldo Carvalho de Melo { 1180547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 1181547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 1182a019d6feSArnaldo Carvalho de Melo 1183a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 1184547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 1185a019d6feSArnaldo Carvalho de Melo 1186c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 1187c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 1188a019d6feSArnaldo Carvalho de Melo 1189a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 1190a019d6feSArnaldo Carvalho de Melo 1191a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 1192a019d6feSArnaldo Carvalho de Melo 1193a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 1194a019d6feSArnaldo Carvalho de Melo 119519757cebSEric Dumazet this_cpu_dec(*sk->sk_prot->orphan_count); 1196c2a2efbbSEric Dumazet 1197a019d6feSArnaldo Carvalho de Melo sock_put(sk); 1198a019d6feSArnaldo Carvalho de Melo } 1199a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 1200a019d6feSArnaldo Carvalho de Melo 1201e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 1202e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 1203e337e24dSChristoph Paasch */ 1204e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 1205c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 1206e337e24dSChristoph Paasch { 1207e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 1208e337e24dSChristoph Paasch bh_unlock_sock(sk); 1209e337e24dSChristoph Paasch sock_put(sk); 12102f8a397dSPaolo Abeni inet_csk_prepare_for_destroy_sock(sk); 12116761893eSPaolo Abeni inet_sk(sk)->inet_num = 0; 1212e337e24dSChristoph Paasch } 1213e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 1214e337e24dSChristoph Paasch 12152c02d41dSPaolo Abeni static int inet_ulp_can_listen(const struct sock *sk) 12162c02d41dSPaolo Abeni { 12172c02d41dSPaolo Abeni const struct inet_connection_sock *icsk = inet_csk(sk); 12182c02d41dSPaolo Abeni 12192c02d41dSPaolo Abeni if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) 12202c02d41dSPaolo Abeni return -EINVAL; 12212c02d41dSPaolo Abeni 12222c02d41dSPaolo Abeni return 0; 12232c02d41dSPaolo Abeni } 12242c02d41dSPaolo Abeni 1225e7049395SKuniyuki Iwashima int inet_csk_listen_start(struct sock *sk) 1226a019d6feSArnaldo Carvalho de Melo { 1227a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 122810cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 12297a7160edSKuniyuki Iwashima int err; 1230a019d6feSArnaldo Carvalho de Melo 12312c02d41dSPaolo Abeni err = inet_ulp_can_listen(sk); 12322c02d41dSPaolo Abeni if (unlikely(err)) 12332c02d41dSPaolo Abeni return err; 12342c02d41dSPaolo Abeni 1235ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 1236a019d6feSArnaldo Carvalho de Melo 1237a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 1238a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 1239a019d6feSArnaldo Carvalho de Melo 1240a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 1241a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 1242a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 1243a019d6feSArnaldo Carvalho de Melo * after validation is complete. 1244a019d6feSArnaldo Carvalho de Melo */ 1245563e0bb0SYafang Shao inet_sk_state_store(sk, TCP_LISTEN); 12467a7160edSKuniyuki Iwashima err = sk->sk_prot->get_port(sk, inet->inet_num); 12477a7160edSKuniyuki Iwashima if (!err) { 1248c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 1249a019d6feSArnaldo Carvalho de Melo 1250a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 1251086c653fSCraig Gallek err = sk->sk_prot->hash(sk); 1252a019d6feSArnaldo Carvalho de Melo 1253086c653fSCraig Gallek if (likely(!err)) 1254a019d6feSArnaldo Carvalho de Melo return 0; 1255a019d6feSArnaldo Carvalho de Melo } 1256a019d6feSArnaldo Carvalho de Melo 1257563e0bb0SYafang Shao inet_sk_set_state(sk, TCP_CLOSE); 1258086c653fSCraig Gallek return err; 1259a019d6feSArnaldo Carvalho de Melo } 1260a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 1261a019d6feSArnaldo Carvalho de Melo 1262ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 1263ebb516afSEric Dumazet struct sock *child) 1264ebb516afSEric Dumazet { 1265ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 1266ebb516afSEric Dumazet 1267ebb516afSEric Dumazet sock_orphan(child); 1268ebb516afSEric Dumazet 126919757cebSEric Dumazet this_cpu_inc(*sk->sk_prot->orphan_count); 1270ebb516afSEric Dumazet 1271ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 1272d983ea6fSEric Dumazet BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); 1273ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 1274ebb516afSEric Dumazet 1275ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 1276ebb516afSEric Dumazet * an inbound pkt destined for child is 1277ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 1278ebb516afSEric Dumazet * Also to satisfy an assertion in 1279ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 1280ebb516afSEric Dumazet */ 1281d983ea6fSEric Dumazet RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); 1282ebb516afSEric Dumazet } 1283ebb516afSEric Dumazet inet_csk_destroy_sock(child); 1284ebb516afSEric Dumazet } 1285ebb516afSEric Dumazet 12867716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk, 12877716682cSEric Dumazet struct request_sock *req, 1288ebb516afSEric Dumazet struct sock *child) 1289ebb516afSEric Dumazet { 1290ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 1291ebb516afSEric Dumazet 1292ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 1293ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 1294ebb516afSEric Dumazet inet_child_forget(sk, req, child); 12957716682cSEric Dumazet child = NULL; 1296ebb516afSEric Dumazet } else { 1297ebb516afSEric Dumazet req->sk = child; 1298ebb516afSEric Dumazet req->dl_next = NULL; 1299ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 130060b173caSEric Dumazet WRITE_ONCE(queue->rskq_accept_head, req); 1301ebb516afSEric Dumazet else 1302ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 1303ebb516afSEric Dumazet queue->rskq_accept_tail = req; 1304ebb516afSEric Dumazet sk_acceptq_added(sk); 1305ebb516afSEric Dumazet } 1306ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 13077716682cSEric Dumazet return child; 1308ebb516afSEric Dumazet } 1309ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 1310ebb516afSEric Dumazet 13115e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 13125e0724d0SEric Dumazet struct request_sock *req, bool own_req) 13135e0724d0SEric Dumazet { 13145e0724d0SEric Dumazet if (own_req) { 1315d4f2c86bSKuniyuki Iwashima inet_csk_reqsk_queue_drop(req->rsk_listener, req); 1316d4f2c86bSKuniyuki Iwashima reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); 1317d4f2c86bSKuniyuki Iwashima 1318d4f2c86bSKuniyuki Iwashima if (sk != req->rsk_listener) { 1319d4f2c86bSKuniyuki Iwashima /* another listening sk has been selected, 1320d4f2c86bSKuniyuki Iwashima * migrate the req to it. 1321d4f2c86bSKuniyuki Iwashima */ 1322d4f2c86bSKuniyuki Iwashima struct request_sock *nreq; 1323d4f2c86bSKuniyuki Iwashima 1324d4f2c86bSKuniyuki Iwashima /* hold a refcnt for the nreq->rsk_listener 1325d4f2c86bSKuniyuki Iwashima * which is assigned in inet_reqsk_clone() 1326d4f2c86bSKuniyuki Iwashima */ 1327d4f2c86bSKuniyuki Iwashima sock_hold(sk); 1328d4f2c86bSKuniyuki Iwashima nreq = inet_reqsk_clone(req, sk); 1329d4f2c86bSKuniyuki Iwashima if (!nreq) { 1330d4f2c86bSKuniyuki Iwashima inet_child_forget(sk, req, child); 1331d4f2c86bSKuniyuki Iwashima goto child_put; 1332d4f2c86bSKuniyuki Iwashima } 1333d4f2c86bSKuniyuki Iwashima 1334d4f2c86bSKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 1); 1335d4f2c86bSKuniyuki Iwashima if (inet_csk_reqsk_queue_add(sk, nreq, child)) { 133655d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS); 1337d4f2c86bSKuniyuki Iwashima reqsk_migrate_reset(req); 1338d4f2c86bSKuniyuki Iwashima reqsk_put(req); 13395e0724d0SEric Dumazet return child; 13405e0724d0SEric Dumazet } 1341d4f2c86bSKuniyuki Iwashima 134255d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 1343d4f2c86bSKuniyuki Iwashima reqsk_migrate_reset(nreq); 1344d4f2c86bSKuniyuki Iwashima __reqsk_free(nreq); 1345d4f2c86bSKuniyuki Iwashima } else if (inet_csk_reqsk_queue_add(sk, req, child)) { 1346d4f2c86bSKuniyuki Iwashima return child; 1347d4f2c86bSKuniyuki Iwashima } 1348d4f2c86bSKuniyuki Iwashima } 13495e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 1350d4f2c86bSKuniyuki Iwashima child_put: 13515e0724d0SEric Dumazet bh_unlock_sock(child); 13525e0724d0SEric Dumazet sock_put(child); 13535e0724d0SEric Dumazet return NULL; 13545e0724d0SEric Dumazet } 13555e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 13565e0724d0SEric Dumazet 1357a019d6feSArnaldo Carvalho de Melo /* 1358a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 1359a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 1360a019d6feSArnaldo Carvalho de Melo */ 1361a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 1362a019d6feSArnaldo Carvalho de Melo { 1363a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 13648336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 1365fff1f300SEric Dumazet struct request_sock *next, *req; 1366a019d6feSArnaldo Carvalho de Melo 1367a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 1368a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 1369a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 1370a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 1371a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 1372a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 1373a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 1374a019d6feSArnaldo Carvalho de Melo */ 1375fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 137654b92e84SKuniyuki Iwashima struct sock *child = req->sk, *nsk; 137754b92e84SKuniyuki Iwashima struct request_sock *nreq; 1378a019d6feSArnaldo Carvalho de Melo 1379a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 1380a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 1381547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 1382a019d6feSArnaldo Carvalho de Melo sock_hold(child); 1383a019d6feSArnaldo Carvalho de Melo 138454b92e84SKuniyuki Iwashima nsk = reuseport_migrate_sock(sk, child, NULL); 138554b92e84SKuniyuki Iwashima if (nsk) { 138654b92e84SKuniyuki Iwashima nreq = inet_reqsk_clone(req, nsk); 138754b92e84SKuniyuki Iwashima if (nreq) { 138854b92e84SKuniyuki Iwashima refcount_set(&nreq->rsk_refcnt, 1); 138954b92e84SKuniyuki Iwashima 139054b92e84SKuniyuki Iwashima if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { 139155d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(nsk), 139255d444b3SKuniyuki Iwashima LINUX_MIB_TCPMIGRATEREQSUCCESS); 139354b92e84SKuniyuki Iwashima reqsk_migrate_reset(req); 139454b92e84SKuniyuki Iwashima } else { 139555d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(nsk), 139655d444b3SKuniyuki Iwashima LINUX_MIB_TCPMIGRATEREQFAILURE); 139754b92e84SKuniyuki Iwashima reqsk_migrate_reset(nreq); 139854b92e84SKuniyuki Iwashima __reqsk_free(nreq); 139954b92e84SKuniyuki Iwashima } 140054b92e84SKuniyuki Iwashima 140154b92e84SKuniyuki Iwashima /* inet_csk_reqsk_queue_add() has already 140254b92e84SKuniyuki Iwashima * called inet_child_forget() on failure case. 140354b92e84SKuniyuki Iwashima */ 140454b92e84SKuniyuki Iwashima goto skip_child_forget; 140554b92e84SKuniyuki Iwashima } 140654b92e84SKuniyuki Iwashima } 140754b92e84SKuniyuki Iwashima 1408ebb516afSEric Dumazet inet_child_forget(sk, req, child); 140954b92e84SKuniyuki Iwashima skip_child_forget: 1410da8ab578SEric Dumazet reqsk_put(req); 1411a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 1412a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 1413a019d6feSArnaldo Carvalho de Melo sock_put(child); 1414a019d6feSArnaldo Carvalho de Melo 141592d6f176SEric Dumazet cond_resched(); 1416a019d6feSArnaldo Carvalho de Melo } 14170536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 14188336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 14190536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 1420fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 14210536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 14220536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 1423fff1f300SEric Dumazet while (req != NULL) { 1424fff1f300SEric Dumazet next = req->dl_next; 142513854e5aSEric Dumazet reqsk_put(req); 1426fff1f300SEric Dumazet req = next; 14278336886fSJerry Chu } 14288336886fSJerry Chu } 1429ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 1430a019d6feSArnaldo Carvalho de Melo } 1431a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 1432af05dc93SArnaldo Carvalho de Melo 1433af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 1434af05dc93SArnaldo Carvalho de Melo { 1435af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 1436af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 1437af05dc93SArnaldo Carvalho de Melo 1438af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 1439c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 1440c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 1441af05dc93SArnaldo Carvalho de Melo } 1442af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 1443c4d93909SArnaldo Carvalho de Melo 144480d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 144580d0a69fSDavid S. Miller { 14465abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 14475abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 144880d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 144980d0a69fSDavid S. Miller struct flowi4 *fl4; 145080d0a69fSDavid S. Miller struct rtable *rt; 145180d0a69fSDavid S. Miller 145280d0a69fSDavid S. Miller rcu_read_lock(); 145380d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 145480d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 145580d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 145680d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 145780d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 145880d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 145980d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 146080d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 146180d0a69fSDavid S. Miller if (IS_ERR(rt)) 146280d0a69fSDavid S. Miller rt = NULL; 146380d0a69fSDavid S. Miller if (rt) 146480d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 146580d0a69fSDavid S. Miller rcu_read_unlock(); 146680d0a69fSDavid S. Miller 146780d0a69fSDavid S. Miller return &rt->dst; 146880d0a69fSDavid S. Miller } 146980d0a69fSDavid S. Miller 147080d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 147180d0a69fSDavid S. Miller { 147280d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 147380d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 147480d0a69fSDavid S. Miller 147580d0a69fSDavid S. Miller if (!dst) { 147680d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 147780d0a69fSDavid S. Miller if (!dst) 147880d0a69fSDavid S. Miller goto out; 147980d0a69fSDavid S. Miller } 1480bd085ef6SHangbin Liu dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 148180d0a69fSDavid S. Miller 148280d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 148380d0a69fSDavid S. Miller if (!dst) 148480d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 148580d0a69fSDavid S. Miller out: 148680d0a69fSDavid S. Miller return dst; 148780d0a69fSDavid S. Miller } 148880d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 1489