12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 23f421baaSArnaldo Carvalho de Melo /* 33f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 43f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 53f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 63f421baaSArnaldo Carvalho de Melo * 73f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 83f421baaSArnaldo Carvalho de Melo * 93f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 103f421baaSArnaldo Carvalho de Melo */ 113f421baaSArnaldo Carvalho de Melo 123f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 133f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 143f421baaSArnaldo Carvalho de Melo 153f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 163f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 173f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 183f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 193f421baaSArnaldo Carvalho de Melo #include <net/route.h> 203f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 21a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 22fa76ce73SEric Dumazet #include <net/tcp.h> 23c125e80bSCraig Gallek #include <net/sock_reuseport.h> 249691724eSstephen hemminger #include <net/addrconf.h> 253f421baaSArnaldo Carvalho de Melo 26fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 2788d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses 2888d7fcfaSMartin KaFai Lau * if IPv6 only, and any IPv4 addresses 2988d7fcfaSMartin KaFai Lau * if not IPv6 only 3088d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 31fe38d2a1SJosef Bacik * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 32fe38d2a1SJosef Bacik * and 0.0.0.0 equals to 0.0.0.0 only 33fe38d2a1SJosef Bacik */ 347016e062SJoe Perches static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 35637bc8bbSJosef Bacik const struct in6_addr *sk2_rcv_saddr6, 36637bc8bbSJosef Bacik __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 37637bc8bbSJosef Bacik bool sk1_ipv6only, bool sk2_ipv6only, 3888d7fcfaSMartin KaFai Lau bool match_sk1_wildcard, 3988d7fcfaSMartin KaFai Lau bool match_sk2_wildcard) 40fe38d2a1SJosef Bacik { 41637bc8bbSJosef Bacik int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 42fe38d2a1SJosef Bacik int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 43fe38d2a1SJosef Bacik 44fe38d2a1SJosef Bacik /* if both are mapped, treat as IPv4 */ 45fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 46fe38d2a1SJosef Bacik if (!sk2_ipv6only) { 47637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 487016e062SJoe Perches return true; 4988d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) || 5088d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr); 51fe38d2a1SJosef Bacik } 527016e062SJoe Perches return false; 53fe38d2a1SJosef Bacik } 54fe38d2a1SJosef Bacik 55fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 567016e062SJoe Perches return true; 57fe38d2a1SJosef Bacik 5888d7fcfaSMartin KaFai Lau if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && 59fe38d2a1SJosef Bacik !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 607016e062SJoe Perches return true; 61fe38d2a1SJosef Bacik 6288d7fcfaSMartin KaFai Lau if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && 63637bc8bbSJosef Bacik !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 647016e062SJoe Perches return true; 65fe38d2a1SJosef Bacik 66fe38d2a1SJosef Bacik if (sk2_rcv_saddr6 && 67637bc8bbSJosef Bacik ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 687016e062SJoe Perches return true; 69fe38d2a1SJosef Bacik 707016e062SJoe Perches return false; 71fe38d2a1SJosef Bacik } 72fe38d2a1SJosef Bacik #endif 73fe38d2a1SJosef Bacik 7488d7fcfaSMartin KaFai Lau /* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses 7588d7fcfaSMartin KaFai Lau * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 76fe38d2a1SJosef Bacik * 0.0.0.0 only equals to 0.0.0.0 77fe38d2a1SJosef Bacik */ 787016e062SJoe Perches static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 7988d7fcfaSMartin KaFai Lau bool sk2_ipv6only, bool match_sk1_wildcard, 8088d7fcfaSMartin KaFai Lau bool match_sk2_wildcard) 81fe38d2a1SJosef Bacik { 82637bc8bbSJosef Bacik if (!sk2_ipv6only) { 83637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 847016e062SJoe Perches return true; 8588d7fcfaSMartin KaFai Lau return (match_sk1_wildcard && !sk1_rcv_saddr) || 8688d7fcfaSMartin KaFai Lau (match_sk2_wildcard && !sk2_rcv_saddr); 87fe38d2a1SJosef Bacik } 887016e062SJoe Perches return false; 89fe38d2a1SJosef Bacik } 90fe38d2a1SJosef Bacik 917016e062SJoe Perches bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 92fe38d2a1SJosef Bacik bool match_wildcard) 93fe38d2a1SJosef Bacik { 94fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 95fe38d2a1SJosef Bacik if (sk->sk_family == AF_INET6) 96637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, 97319554f2SJosef Bacik inet6_rcv_saddr(sk2), 98637bc8bbSJosef Bacik sk->sk_rcv_saddr, 99637bc8bbSJosef Bacik sk2->sk_rcv_saddr, 100637bc8bbSJosef Bacik ipv6_only_sock(sk), 101637bc8bbSJosef Bacik ipv6_only_sock(sk2), 10288d7fcfaSMartin KaFai Lau match_wildcard, 103637bc8bbSJosef Bacik match_wildcard); 104fe38d2a1SJosef Bacik #endif 105637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, 10688d7fcfaSMartin KaFai Lau ipv6_only_sock(sk2), match_wildcard, 10788d7fcfaSMartin KaFai Lau match_wildcard); 108fe38d2a1SJosef Bacik } 109fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal); 110fe38d2a1SJosef Bacik 1112dbb9b9eSMartin KaFai Lau bool inet_rcv_saddr_any(const struct sock *sk) 1122dbb9b9eSMartin KaFai Lau { 1132dbb9b9eSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6) 1142dbb9b9eSMartin KaFai Lau if (sk->sk_family == AF_INET6) 1152dbb9b9eSMartin KaFai Lau return ipv6_addr_any(&sk->sk_v6_rcv_saddr); 1162dbb9b9eSMartin KaFai Lau #endif 1172dbb9b9eSMartin KaFai Lau return !sk->sk_rcv_saddr; 1182dbb9b9eSMartin KaFai Lau } 1192dbb9b9eSMartin KaFai Lau 1200bbf87d8SEric W. Biederman void inet_get_local_port_range(struct net *net, int *low, int *high) 121227b60f5SStephen Hemminger { 12295c96174SEric Dumazet unsigned int seq; 12395c96174SEric Dumazet 124227b60f5SStephen Hemminger do { 125c9d8f1a6SCong Wang seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 126227b60f5SStephen Hemminger 127c9d8f1a6SCong Wang *low = net->ipv4.ip_local_ports.range[0]; 128c9d8f1a6SCong Wang *high = net->ipv4.ip_local_ports.range[1]; 129c9d8f1a6SCong Wang } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 130227b60f5SStephen Hemminger } 131227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 1323f421baaSArnaldo Carvalho de Melo 133aa078842SJosef Bacik static int inet_csk_bind_conflict(const struct sock *sk, 134aa078842SJosef Bacik const struct inet_bind_bucket *tb, 135aa078842SJosef Bacik bool relax, bool reuseport_ok) 1363f421baaSArnaldo Carvalho de Melo { 1373f421baaSArnaldo Carvalho de Melo struct sock *sk2; 1380643ee4fSTom Herbert bool reuse = sk->sk_reuse; 13933575921SKuniyuki Iwashima bool reuseport = !!sk->sk_reuseport; 140da5e3630STom Herbert kuid_t uid = sock_i_uid((struct sock *)sk); 1413f421baaSArnaldo Carvalho de Melo 1427477fd2eSPavel Emelyanov /* 1437477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 1447477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 1457477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 1467477fd2eSPavel Emelyanov * one this bucket belongs to. 1477477fd2eSPavel Emelyanov */ 1487477fd2eSPavel Emelyanov 149b67bfe0dSSasha Levin sk_for_each_bound(sk2, &tb->owners) { 1503f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 1513f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 1523f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 1533f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 15416f6c251SKuniyuki Iwashima if (reuse && sk2->sk_reuse && 15516f6c251SKuniyuki Iwashima sk2->sk_state != TCP_LISTEN) { 15633575921SKuniyuki Iwashima if ((!relax || 15733575921SKuniyuki Iwashima (!reuseport_ok && 15833575921SKuniyuki Iwashima reuseport && sk2->sk_reuseport && 15933575921SKuniyuki Iwashima !rcu_access_pointer(sk->sk_reuseport_cb) && 16033575921SKuniyuki Iwashima (sk2->sk_state == TCP_TIME_WAIT || 16133575921SKuniyuki Iwashima uid_eq(uid, sock_i_uid(sk2))))) && 16216f6c251SKuniyuki Iwashima inet_rcv_saddr_equal(sk, sk2, true)) 16316f6c251SKuniyuki Iwashima break; 16433575921SKuniyuki Iwashima } else if (!reuseport_ok || 16533575921SKuniyuki Iwashima !reuseport || !sk2->sk_reuseport || 166c125e80bSCraig Gallek rcu_access_pointer(sk->sk_reuseport_cb) || 167da5e3630STom Herbert (sk2->sk_state != TCP_TIME_WAIT && 16816f6c251SKuniyuki Iwashima !uid_eq(uid, sock_i_uid(sk2)))) { 169aa078842SJosef Bacik if (inet_rcv_saddr_equal(sk, sk2, true)) 170aacd9289SAlex Copot break; 171aacd9289SAlex Copot } 1723f421baaSArnaldo Carvalho de Melo } 1733f421baaSArnaldo Carvalho de Melo } 174b67bfe0dSSasha Levin return sk2 != NULL; 1753f421baaSArnaldo Carvalho de Melo } 176971af18bSArnaldo Carvalho de Melo 177289141b7SJosef Bacik /* 178289141b7SJosef Bacik * Find an open port number for the socket. Returns with the 179289141b7SJosef Bacik * inet_bind_hashbucket lock held. 1803f421baaSArnaldo Carvalho de Melo */ 181289141b7SJosef Bacik static struct inet_bind_hashbucket * 182289141b7SJosef Bacik inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) 1833f421baaSArnaldo Carvalho de Melo { 184ea8add2bSEric Dumazet struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 185289141b7SJosef Bacik int port = 0; 1863f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1873b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 1884b01a967SKuniyuki Iwashima bool relax = false; 189ea8add2bSEric Dumazet int i, low, high, attempt_half; 190ea8add2bSEric Dumazet struct inet_bind_bucket *tb; 191ea8add2bSEric Dumazet u32 remaining, offset; 1923c82a21fSRobert Shearman int l3mdev; 1933f421baaSArnaldo Carvalho de Melo 1943c82a21fSRobert Shearman l3mdev = inet_sk_bound_l3mdev(sk); 1954b01a967SKuniyuki Iwashima ports_exhausted: 196ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 197ea8add2bSEric Dumazet other_half_scan: 1980bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 199ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 200ea8add2bSEric Dumazet if (high - low < 4) 201ea8add2bSEric Dumazet attempt_half = 0; 202946f9eb2SEric Dumazet if (attempt_half) { 203ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1); 204946f9eb2SEric Dumazet 205946f9eb2SEric Dumazet if (attempt_half == 1) 206946f9eb2SEric Dumazet high = half; 207946f9eb2SEric Dumazet else 208946f9eb2SEric Dumazet low = half; 209946f9eb2SEric Dumazet } 210ea8add2bSEric Dumazet remaining = high - low; 211ea8add2bSEric Dumazet if (likely(remaining > 1)) 212ea8add2bSEric Dumazet remaining &= ~1U; 2133f421baaSArnaldo Carvalho de Melo 214ea8add2bSEric Dumazet offset = prandom_u32() % remaining; 215ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity 216ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users. 217ea8add2bSEric Dumazet */ 218ea8add2bSEric Dumazet offset |= 1U; 219ea8add2bSEric Dumazet 220ea8add2bSEric Dumazet other_parity_scan: 221ea8add2bSEric Dumazet port = low + offset; 222ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 223ea8add2bSEric Dumazet if (unlikely(port >= high)) 224ea8add2bSEric Dumazet port -= remaining; 225ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port)) 226ea8add2bSEric Dumazet continue; 227ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 228ea8add2bSEric Dumazet hinfo->bhash_size)]; 229ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 230b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 2313c82a21fSRobert Shearman if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 2323c82a21fSRobert Shearman tb->port == port) { 2334b01a967SKuniyuki Iwashima if (!inet_csk_bind_conflict(sk, tb, relax, false)) 2346cd66616SJosef Bacik goto success; 235ea8add2bSEric Dumazet goto next_port; 2362b05ad33SFlavio Leitner } 237289141b7SJosef Bacik tb = NULL; 238289141b7SJosef Bacik goto success; 239ea8add2bSEric Dumazet next_port: 240ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 241ea8add2bSEric Dumazet cond_resched(); 242a9d8f911SEvgeniy Polyakov } 2433f421baaSArnaldo Carvalho de Melo 244ea8add2bSEric Dumazet offset--; 245ea8add2bSEric Dumazet if (!(offset & 1)) 246ea8add2bSEric Dumazet goto other_parity_scan; 247ea8add2bSEric Dumazet 248946f9eb2SEric Dumazet if (attempt_half == 1) { 249946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 250946f9eb2SEric Dumazet attempt_half = 2; 251ea8add2bSEric Dumazet goto other_half_scan; 252946f9eb2SEric Dumazet } 2534b01a967SKuniyuki Iwashima 2544b01a967SKuniyuki Iwashima if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { 2554b01a967SKuniyuki Iwashima /* We still have a chance to connect to different destinations */ 2564b01a967SKuniyuki Iwashima relax = true; 2574b01a967SKuniyuki Iwashima goto ports_exhausted; 2584b01a967SKuniyuki Iwashima } 259289141b7SJosef Bacik return NULL; 260289141b7SJosef Bacik success: 261289141b7SJosef Bacik *port_ret = port; 262289141b7SJosef Bacik *tb_ret = tb; 263289141b7SJosef Bacik return head; 264289141b7SJosef Bacik } 265ea8add2bSEric Dumazet 266637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb, 267637bc8bbSJosef Bacik struct sock *sk) 268637bc8bbSJosef Bacik { 269637bc8bbSJosef Bacik kuid_t uid = sock_i_uid(sk); 270637bc8bbSJosef Bacik 271637bc8bbSJosef Bacik if (tb->fastreuseport <= 0) 272637bc8bbSJosef Bacik return 0; 273637bc8bbSJosef Bacik if (!sk->sk_reuseport) 274637bc8bbSJosef Bacik return 0; 275637bc8bbSJosef Bacik if (rcu_access_pointer(sk->sk_reuseport_cb)) 276637bc8bbSJosef Bacik return 0; 277637bc8bbSJosef Bacik if (!uid_eq(tb->fastuid, uid)) 278637bc8bbSJosef Bacik return 0; 279637bc8bbSJosef Bacik /* We only need to check the rcv_saddr if this tb was once marked 280637bc8bbSJosef Bacik * without fastreuseport and then was reset, as we can only know that 281637bc8bbSJosef Bacik * the fast_*rcv_saddr doesn't have any conflicts with the socks on the 282637bc8bbSJosef Bacik * owners list. 283637bc8bbSJosef Bacik */ 284637bc8bbSJosef Bacik if (tb->fastreuseport == FASTREUSEPORT_ANY) 285637bc8bbSJosef Bacik return 1; 286637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 287637bc8bbSJosef Bacik if (tb->fast_sk_family == AF_INET6) 288637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 2897a56673bSJosef Bacik inet6_rcv_saddr(sk), 290637bc8bbSJosef Bacik tb->fast_rcv_saddr, 291637bc8bbSJosef Bacik sk->sk_rcv_saddr, 292637bc8bbSJosef Bacik tb->fast_ipv6_only, 29388d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false); 294637bc8bbSJosef Bacik #endif 295637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, 29688d7fcfaSMartin KaFai Lau ipv6_only_sock(sk), true, false); 297637bc8bbSJosef Bacik } 298637bc8bbSJosef Bacik 29962ffc589STim Froidcoeur void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, 30062ffc589STim Froidcoeur struct sock *sk) 301289141b7SJosef Bacik { 302289141b7SJosef Bacik kuid_t uid = sock_i_uid(sk); 30362ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 3043c82a21fSRobert Shearman 305fbed24bcSJosef Bacik if (hlist_empty(&tb->owners)) { 306ea8add2bSEric Dumazet tb->fastreuse = reuse; 307da5e3630STom Herbert if (sk->sk_reuseport) { 308637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_ANY; 309da5e3630STom Herbert tb->fastuid = uid; 310637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 311637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 312cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 313637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 314637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 315637bc8bbSJosef Bacik #endif 316da5e3630STom Herbert } else { 317da5e3630STom Herbert tb->fastreuseport = 0; 318da5e3630STom Herbert } 3196cd66616SJosef Bacik } else { 3206cd66616SJosef Bacik if (!reuse) 3216cd66616SJosef Bacik tb->fastreuse = 0; 322637bc8bbSJosef Bacik if (sk->sk_reuseport) { 323637bc8bbSJosef Bacik /* We didn't match or we don't have fastreuseport set on 324637bc8bbSJosef Bacik * the tb, but we have sk_reuseport set on this socket 325637bc8bbSJosef Bacik * and we know that there are no bind conflicts with 326637bc8bbSJosef Bacik * this socket in this tb, so reset our tb's reuseport 327637bc8bbSJosef Bacik * settings so that any subsequent sockets that match 328637bc8bbSJosef Bacik * our current socket will be put on the fast path. 329637bc8bbSJosef Bacik * 330637bc8bbSJosef Bacik * If we reset we need to set FASTREUSEPORT_STRICT so we 331637bc8bbSJosef Bacik * do extra checking for all subsequent sk_reuseport 332637bc8bbSJosef Bacik * socks. 333637bc8bbSJosef Bacik */ 334637bc8bbSJosef Bacik if (!sk_reuseport_match(tb, sk)) { 335637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_STRICT; 336637bc8bbSJosef Bacik tb->fastuid = uid; 337637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 338637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 339cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 340637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 341637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 342637bc8bbSJosef Bacik #endif 343637bc8bbSJosef Bacik } 344637bc8bbSJosef Bacik } else { 3456cd66616SJosef Bacik tb->fastreuseport = 0; 346ea8add2bSEric Dumazet } 347637bc8bbSJosef Bacik } 34862ffc589STim Froidcoeur } 34962ffc589STim Froidcoeur 35062ffc589STim Froidcoeur /* Obtain a reference to a local port for the given sock, 35162ffc589STim Froidcoeur * if snum is zero it means select any available local port. 35262ffc589STim Froidcoeur * We try to allocate an odd port (and leave even ports for connect()) 35362ffc589STim Froidcoeur */ 35462ffc589STim Froidcoeur int inet_csk_get_port(struct sock *sk, unsigned short snum) 35562ffc589STim Froidcoeur { 35662ffc589STim Froidcoeur bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 35762ffc589STim Froidcoeur struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 35862ffc589STim Froidcoeur int ret = 1, port = snum; 35962ffc589STim Froidcoeur struct inet_bind_hashbucket *head; 36062ffc589STim Froidcoeur struct net *net = sock_net(sk); 36162ffc589STim Froidcoeur struct inet_bind_bucket *tb = NULL; 36262ffc589STim Froidcoeur int l3mdev; 36362ffc589STim Froidcoeur 36462ffc589STim Froidcoeur l3mdev = inet_sk_bound_l3mdev(sk); 36562ffc589STim Froidcoeur 36662ffc589STim Froidcoeur if (!port) { 36762ffc589STim Froidcoeur head = inet_csk_find_open_port(sk, &tb, &port); 36862ffc589STim Froidcoeur if (!head) 36962ffc589STim Froidcoeur return ret; 37062ffc589STim Froidcoeur if (!tb) 37162ffc589STim Froidcoeur goto tb_not_found; 37262ffc589STim Froidcoeur goto success; 37362ffc589STim Froidcoeur } 37462ffc589STim Froidcoeur head = &hinfo->bhash[inet_bhashfn(net, port, 37562ffc589STim Froidcoeur hinfo->bhash_size)]; 37662ffc589STim Froidcoeur spin_lock_bh(&head->lock); 37762ffc589STim Froidcoeur inet_bind_bucket_for_each(tb, &head->chain) 37862ffc589STim Froidcoeur if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 37962ffc589STim Froidcoeur tb->port == port) 38062ffc589STim Froidcoeur goto tb_found; 38162ffc589STim Froidcoeur tb_not_found: 38262ffc589STim Froidcoeur tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 38362ffc589STim Froidcoeur net, head, port, l3mdev); 38462ffc589STim Froidcoeur if (!tb) 38562ffc589STim Froidcoeur goto fail_unlock; 38662ffc589STim Froidcoeur tb_found: 38762ffc589STim Froidcoeur if (!hlist_empty(&tb->owners)) { 38862ffc589STim Froidcoeur if (sk->sk_reuse == SK_FORCE_REUSE) 38962ffc589STim Froidcoeur goto success; 39062ffc589STim Froidcoeur 39162ffc589STim Froidcoeur if ((tb->fastreuse > 0 && reuse) || 39262ffc589STim Froidcoeur sk_reuseport_match(tb, sk)) 39362ffc589STim Froidcoeur goto success; 39462ffc589STim Froidcoeur if (inet_csk_bind_conflict(sk, tb, true, true)) 39562ffc589STim Froidcoeur goto fail_unlock; 39662ffc589STim Froidcoeur } 39762ffc589STim Froidcoeur success: 39862ffc589STim Froidcoeur inet_csk_update_fastreuse(tb, sk); 39962ffc589STim Froidcoeur 4003f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 401ea8add2bSEric Dumazet inet_bind_hash(sk, tb, port); 402547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 4033f421baaSArnaldo Carvalho de Melo ret = 0; 4043f421baaSArnaldo Carvalho de Melo 4053f421baaSArnaldo Carvalho de Melo fail_unlock: 406ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 4073f421baaSArnaldo Carvalho de Melo return ret; 4083f421baaSArnaldo Carvalho de Melo } 4093f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 4103f421baaSArnaldo Carvalho de Melo 4113f421baaSArnaldo Carvalho de Melo /* 4123f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 4133f421baaSArnaldo Carvalho de Melo * with the socket locked. 4143f421baaSArnaldo Carvalho de Melo */ 4153f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 4163f421baaSArnaldo Carvalho de Melo { 4173f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4183f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 4193f421baaSArnaldo Carvalho de Melo int err; 4203f421baaSArnaldo Carvalho de Melo 4213f421baaSArnaldo Carvalho de Melo /* 4223f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 4233f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 4243f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 4253f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 4263f421baaSArnaldo Carvalho de Melo * 4273f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 4283f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 4293f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 4303f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 4313f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 4323f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 4333f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 4343f421baaSArnaldo Carvalho de Melo */ 4353f421baaSArnaldo Carvalho de Melo for (;;) { 436aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 4373f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 4383f421baaSArnaldo Carvalho de Melo release_sock(sk); 4393f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 4403f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 441cb7cf8a3SEric Dumazet sched_annotate_sleep(); 4423f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4433f421baaSArnaldo Carvalho de Melo err = 0; 4443f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 4453f421baaSArnaldo Carvalho de Melo break; 4463f421baaSArnaldo Carvalho de Melo err = -EINVAL; 4473f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4483f421baaSArnaldo Carvalho de Melo break; 4493f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 4503f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 4513f421baaSArnaldo Carvalho de Melo break; 4523f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 4533f421baaSArnaldo Carvalho de Melo if (!timeo) 4543f421baaSArnaldo Carvalho de Melo break; 4553f421baaSArnaldo Carvalho de Melo } 456aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 4573f421baaSArnaldo Carvalho de Melo return err; 4583f421baaSArnaldo Carvalho de Melo } 4593f421baaSArnaldo Carvalho de Melo 4603f421baaSArnaldo Carvalho de Melo /* 4613f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 4623f421baaSArnaldo Carvalho de Melo */ 463cdfbabfbSDavid Howells struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 4643f421baaSArnaldo Carvalho de Melo { 4653f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4668336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 4678336886fSJerry Chu struct request_sock *req; 468e3d95ad7SEric Dumazet struct sock *newsk; 4693f421baaSArnaldo Carvalho de Melo int error; 4703f421baaSArnaldo Carvalho de Melo 4713f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4723f421baaSArnaldo Carvalho de Melo 4733f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 4743f421baaSArnaldo Carvalho de Melo * and that it has something pending. 4753f421baaSArnaldo Carvalho de Melo */ 4763f421baaSArnaldo Carvalho de Melo error = -EINVAL; 4773f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4783f421baaSArnaldo Carvalho de Melo goto out_err; 4793f421baaSArnaldo Carvalho de Melo 4803f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 4818336886fSJerry Chu if (reqsk_queue_empty(queue)) { 4823f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 4833f421baaSArnaldo Carvalho de Melo 4843f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 4853f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 4863f421baaSArnaldo Carvalho de Melo if (!timeo) 4873f421baaSArnaldo Carvalho de Melo goto out_err; 4883f421baaSArnaldo Carvalho de Melo 4893f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 4903f421baaSArnaldo Carvalho de Melo if (error) 4913f421baaSArnaldo Carvalho de Melo goto out_err; 4923f421baaSArnaldo Carvalho de Melo } 493fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 4948336886fSJerry Chu newsk = req->sk; 4953f421baaSArnaldo Carvalho de Melo 496e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 4970536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 4980536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 4999439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 5008336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 5018336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 5028336886fSJerry Chu * NULL to signify that the child socket is taken 5038336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 5048336886fSJerry Chu * when 3WHS finishes (or is aborted). 5058336886fSJerry Chu */ 5068336886fSJerry Chu req->sk = NULL; 5078336886fSJerry Chu req = NULL; 5088336886fSJerry Chu } 5090536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 5108336886fSJerry Chu } 511d752a498SShakeel Butt 5123f421baaSArnaldo Carvalho de Melo out: 5133f421baaSArnaldo Carvalho de Melo release_sock(sk); 51406669ea3SEric Dumazet if (newsk && mem_cgroup_sockets_enabled) { 515d752a498SShakeel Butt int amt; 516d752a498SShakeel Butt 517d752a498SShakeel Butt /* atomically get the memory usage, set and charge the 51806669ea3SEric Dumazet * newsk->sk_memcg. 519d752a498SShakeel Butt */ 520d752a498SShakeel Butt lock_sock(newsk); 521d752a498SShakeel Butt 52206669ea3SEric Dumazet /* The socket has not been accepted yet, no need to look at 52306669ea3SEric Dumazet * newsk->sk_wmem_queued. 524d752a498SShakeel Butt */ 525d752a498SShakeel Butt amt = sk_mem_pages(newsk->sk_forward_alloc + 52606669ea3SEric Dumazet atomic_read(&newsk->sk_rmem_alloc)); 527d752a498SShakeel Butt mem_cgroup_sk_alloc(newsk); 528d752a498SShakeel Butt if (newsk->sk_memcg && amt) 529d752a498SShakeel Butt mem_cgroup_charge_skmem(newsk->sk_memcg, amt); 530d752a498SShakeel Butt 531d752a498SShakeel Butt release_sock(newsk); 532d752a498SShakeel Butt } 5338336886fSJerry Chu if (req) 53413854e5aSEric Dumazet reqsk_put(req); 5353f421baaSArnaldo Carvalho de Melo return newsk; 5363f421baaSArnaldo Carvalho de Melo out_err: 5373f421baaSArnaldo Carvalho de Melo newsk = NULL; 5388336886fSJerry Chu req = NULL; 5393f421baaSArnaldo Carvalho de Melo *err = error; 5403f421baaSArnaldo Carvalho de Melo goto out; 5413f421baaSArnaldo Carvalho de Melo } 5423f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 5433f421baaSArnaldo Carvalho de Melo 5443f421baaSArnaldo Carvalho de Melo /* 5453f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 5463f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 5473f421baaSArnaldo Carvalho de Melo * to optimize. 5483f421baaSArnaldo Carvalho de Melo */ 5493f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 55059f379f9SKees Cook void (*retransmit_handler)(struct timer_list *t), 55159f379f9SKees Cook void (*delack_handler)(struct timer_list *t), 55259f379f9SKees Cook void (*keepalive_handler)(struct timer_list *t)) 5533f421baaSArnaldo Carvalho de Melo { 5543f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5553f421baaSArnaldo Carvalho de Melo 55659f379f9SKees Cook timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); 55759f379f9SKees Cook timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); 55859f379f9SKees Cook timer_setup(&sk->sk_timer, keepalive_handler, 0); 5593f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 5603f421baaSArnaldo Carvalho de Melo } 5613f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 5623f421baaSArnaldo Carvalho de Melo 5633f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 5643f421baaSArnaldo Carvalho de Melo { 5653f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5663f421baaSArnaldo Carvalho de Melo 567b6b6d653SEric Dumazet icsk->icsk_pending = icsk->icsk_ack.pending = 0; 5683f421baaSArnaldo Carvalho de Melo 5693f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 5703f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 5713f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5723f421baaSArnaldo Carvalho de Melo } 5733f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 5743f421baaSArnaldo Carvalho de Melo 5753f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 5763f421baaSArnaldo Carvalho de Melo { 5773f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5783f421baaSArnaldo Carvalho de Melo } 5793f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 5803f421baaSArnaldo Carvalho de Melo 5813f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 5823f421baaSArnaldo Carvalho de Melo { 5833f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 5843f421baaSArnaldo Carvalho de Melo } 5853f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 5863f421baaSArnaldo Carvalho de Melo 587e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 5886bd023f3SDavid S. Miller struct flowi4 *fl4, 589ba3f7f04SDavid S. Miller const struct request_sock *req) 5903f421baaSArnaldo Carvalho de Melo { 5913f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 5928b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 593c92e8c02SEric Dumazet struct ip_options_rcu *opt; 5948b929ab1SEric Dumazet struct rtable *rt; 5953f421baaSArnaldo Carvalho de Melo 5962ab2ddd3SEric Dumazet rcu_read_lock(); 5972ab2ddd3SEric Dumazet opt = rcu_dereference(ireq->ireq_opt); 59806f877d6SEric Dumazet 5998b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 600e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 6018b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 602634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 6038b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 604e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 6056bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 6066bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 607b23dd4feSDavid S. Miller if (IS_ERR(rt)) 608857a6e0aSIlpo Järvinen goto no_route; 60977d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 610857a6e0aSIlpo Järvinen goto route_err; 6112ab2ddd3SEric Dumazet rcu_read_unlock(); 612d8d1f30bSChangli Gao return &rt->dst; 613857a6e0aSIlpo Järvinen 614857a6e0aSIlpo Järvinen route_err: 615857a6e0aSIlpo Järvinen ip_rt_put(rt); 616857a6e0aSIlpo Järvinen no_route: 6172ab2ddd3SEric Dumazet rcu_read_unlock(); 618b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 619857a6e0aSIlpo Järvinen return NULL; 6203f421baaSArnaldo Carvalho de Melo } 6213f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 6223f421baaSArnaldo Carvalho de Melo 623a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 62477357a95SDavid S. Miller struct sock *newsk, 62577357a95SDavid S. Miller const struct request_sock *req) 62677357a95SDavid S. Miller { 62777357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 6288b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 62977357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 6301a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 63177357a95SDavid S. Miller struct flowi4 *fl4; 63277357a95SDavid S. Miller struct rtable *rt; 63377357a95SDavid S. Miller 634c92e8c02SEric Dumazet opt = rcu_dereference(ireq->ireq_opt); 63577357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 6361a7b27c9SChristoph Paasch 6378b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 63877357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 63977357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 640634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 6418b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 642e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 64377357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 64477357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 64577357a95SDavid S. Miller if (IS_ERR(rt)) 64677357a95SDavid S. Miller goto no_route; 64777d5bc7eSDavid Ahern if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 64877357a95SDavid S. Miller goto route_err; 64977357a95SDavid S. Miller return &rt->dst; 65077357a95SDavid S. Miller 65177357a95SDavid S. Miller route_err: 65277357a95SDavid S. Miller ip_rt_put(rt); 65377357a95SDavid S. Miller no_route: 654b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 65577357a95SDavid S. Miller return NULL; 65677357a95SDavid S. Miller } 65777357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 65877357a95SDavid S. Miller 6590c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 660a594920fSKuniyuki Iwashima static void syn_ack_recalc(struct request_sock *req, 661a594920fSKuniyuki Iwashima const int max_syn_ack_retries, 6620c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 6630c3d79bcSJulian Anastasov int *expire, int *resend) 6640c3d79bcSJulian Anastasov { 6650c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 666a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries; 6670c3d79bcSJulian Anastasov *resend = 1; 6680c3d79bcSJulian Anastasov return; 6690c3d79bcSJulian Anastasov } 670a594920fSKuniyuki Iwashima *expire = req->num_timeout >= max_syn_ack_retries && 671a594920fSKuniyuki Iwashima (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept); 672a594920fSKuniyuki Iwashima /* Do not resend while waiting for data after ACK, 6730c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 6740c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 6750c3d79bcSJulian Anastasov */ 6760c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 677e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 6780c3d79bcSJulian Anastasov } 6790c3d79bcSJulian Anastasov 6801b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 681e6c022a4SEric Dumazet { 6821a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 683e6c022a4SEric Dumazet 684e6c022a4SEric Dumazet if (!err) 685e6c022a4SEric Dumazet req->num_retrans++; 686e6c022a4SEric Dumazet return err; 687e6c022a4SEric Dumazet } 688e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 689e6c022a4SEric Dumazet 690079096f1SEric Dumazet /* return true if req was found in the ehash table */ 6918b5e07d7SZhiqiang Liu static bool reqsk_queue_unlink(struct request_sock *req) 692b357a364SEric Dumazet { 693079096f1SEric Dumazet struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 6945e0724d0SEric Dumazet bool found = false; 695b357a364SEric Dumazet 6965e0724d0SEric Dumazet if (sk_hashed(req_to_sk(req))) { 6975e0724d0SEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 698b357a364SEric Dumazet 699079096f1SEric Dumazet spin_lock(lock); 700079096f1SEric Dumazet found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 701079096f1SEric Dumazet spin_unlock(lock); 7025e0724d0SEric Dumazet } 70383fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 704b357a364SEric Dumazet reqsk_put(req); 705b357a364SEric Dumazet return found; 706b357a364SEric Dumazet } 707b357a364SEric Dumazet 708b357a364SEric Dumazet void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 709b357a364SEric Dumazet { 7108b5e07d7SZhiqiang Liu if (reqsk_queue_unlink(req)) { 711b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 712b357a364SEric Dumazet reqsk_put(req); 713b357a364SEric Dumazet } 714b357a364SEric Dumazet } 715b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 716b357a364SEric Dumazet 717f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 718f03f2e15SEric Dumazet { 719f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 720f03f2e15SEric Dumazet reqsk_put(req); 721f03f2e15SEric Dumazet } 722f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 723f03f2e15SEric Dumazet 72459f379f9SKees Cook static void reqsk_timer_handler(struct timer_list *t) 725a019d6feSArnaldo Carvalho de Melo { 72659f379f9SKees Cook struct request_sock *req = from_timer(req, t, rsk_timer); 727fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 7287c083ecbSNikolay Borisov struct net *net = sock_net(sk_listener); 729fa76ce73SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk_listener); 730a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 731a594920fSKuniyuki Iwashima int max_syn_ack_retries, qlen, expire = 0, resend = 0; 732a019d6feSArnaldo Carvalho de Melo 733986ffdfdSYafang Shao if (inet_sk_state_load(sk_listener) != TCP_LISTEN) 734079096f1SEric Dumazet goto drop; 735a019d6feSArnaldo Carvalho de Melo 736a594920fSKuniyuki Iwashima max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 737a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 738a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 739fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 740a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 741a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 742a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 743a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 744a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 745a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 746a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 747a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 748a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 749a019d6feSArnaldo Carvalho de Melo * 750a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 751a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 752a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 753a019d6feSArnaldo Carvalho de Melo */ 754aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 755099ecf59SEric Dumazet if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { 756aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 757a019d6feSArnaldo Carvalho de Melo 758a594920fSKuniyuki Iwashima while (max_syn_ack_retries > 2) { 7592b41fab7SEric Dumazet if (qlen < young) 760a019d6feSArnaldo Carvalho de Melo break; 761a594920fSKuniyuki Iwashima max_syn_ack_retries--; 762a019d6feSArnaldo Carvalho de Melo young <<= 1; 763a019d6feSArnaldo Carvalho de Melo } 764a019d6feSArnaldo Carvalho de Melo } 765a594920fSKuniyuki Iwashima syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), 7660c3d79bcSJulian Anastasov &expire, &resend); 76742cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 7680c3d79bcSJulian Anastasov if (!expire && 7690c3d79bcSJulian Anastasov (!resend || 770fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 7710c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 772a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 773a019d6feSArnaldo Carvalho de Melo 774e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 775aac065c5SEric Dumazet atomic_dec(&queue->young); 776fa76ce73SEric Dumazet timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 777f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeo); 778fa76ce73SEric Dumazet return; 779a019d6feSArnaldo Carvalho de Melo } 780079096f1SEric Dumazet drop: 781f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk_listener, req); 782a019d6feSArnaldo Carvalho de Melo } 783fa76ce73SEric Dumazet 784079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 785fa76ce73SEric Dumazet unsigned long timeout) 786fa76ce73SEric Dumazet { 78759f379f9SKees Cook timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); 788f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout); 78929c68526SEric Dumazet 790*01770a16SRicardo Dias inet_ehash_insert(req_to_sk(req), NULL, NULL); 791fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 792fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 793fa76ce73SEric Dumazet */ 794fa76ce73SEric Dumazet smp_wmb(); 79541c6d650SReshetova, Elena refcount_set(&req->rsk_refcnt, 2 + 1); 796a019d6feSArnaldo Carvalho de Melo } 797079096f1SEric Dumazet 798079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 799079096f1SEric Dumazet unsigned long timeout) 800079096f1SEric Dumazet { 801079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 802079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 803079096f1SEric Dumazet } 804079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 805a019d6feSArnaldo Carvalho de Melo 80613230593SMat Martineau static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, 80713230593SMat Martineau const gfp_t priority) 80813230593SMat Martineau { 80913230593SMat Martineau struct inet_connection_sock *icsk = inet_csk(newsk); 81013230593SMat Martineau 81113230593SMat Martineau if (!icsk->icsk_ulp_ops) 81213230593SMat Martineau return; 81313230593SMat Martineau 81413230593SMat Martineau if (icsk->icsk_ulp_ops->clone) 81513230593SMat Martineau icsk->icsk_ulp_ops->clone(req, newsk, priority); 81613230593SMat Martineau } 81713230593SMat Martineau 818e56c57d0SEric Dumazet /** 819e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 820e56c57d0SEric Dumazet * @sk: the socket to clone 821e56c57d0SEric Dumazet * @req: request_sock 822e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 823e56c57d0SEric Dumazet * 824e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 825e56c57d0SEric Dumazet */ 826e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 827e56c57d0SEric Dumazet const struct request_sock *req, 828dd0fc66fSAl Viro const gfp_t priority) 8299f1d2604SArnaldo Carvalho de Melo { 830e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 8319f1d2604SArnaldo Carvalho de Melo 83200db4124SIan Morris if (newsk) { 8339f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 8349f1d2604SArnaldo Carvalho de Melo 835563e0bb0SYafang Shao inet_sk_set_state(newsk, TCP_SYN_RECV); 8369f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 8379f1d2604SArnaldo Carvalho de Melo 838634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 839b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 840b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 8419f1d2604SArnaldo Carvalho de Melo 84285017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */ 84385017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE); 84485017869SEric Dumazet 845657831ffSEric Dumazet inet_sk(newsk)->mc_list = NULL; 846657831ffSEric Dumazet 84784f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 84833cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 84933cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 85084f39b08SLorenzo Colitti 8519f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 8529f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 8536687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 8549f1d2604SArnaldo Carvalho de Melo 8559f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 8569f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 8574237c75cSVenkat Yekkirala 85813230593SMat Martineau inet_clone_ulp(req, newsk, priority); 85913230593SMat Martineau 8604237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 8619f1d2604SArnaldo Carvalho de Melo } 8629f1d2604SArnaldo Carvalho de Melo return newsk; 8639f1d2604SArnaldo Carvalho de Melo } 864e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 865a019d6feSArnaldo Carvalho de Melo 866a019d6feSArnaldo Carvalho de Melo /* 867a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 868a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 869a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 870a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 871a019d6feSArnaldo Carvalho de Melo */ 872a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 873a019d6feSArnaldo Carvalho de Melo { 874547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 875547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 876a019d6feSArnaldo Carvalho de Melo 877a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 878547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 879a019d6feSArnaldo Carvalho de Melo 880c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 881c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 882a019d6feSArnaldo Carvalho de Melo 883a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 884a019d6feSArnaldo Carvalho de Melo 885a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 886a019d6feSArnaldo Carvalho de Melo 887a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 888a019d6feSArnaldo Carvalho de Melo 889a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 890a019d6feSArnaldo Carvalho de Melo 891dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 892c2a2efbbSEric Dumazet 893a019d6feSArnaldo Carvalho de Melo sock_put(sk); 894a019d6feSArnaldo Carvalho de Melo } 895a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 896a019d6feSArnaldo Carvalho de Melo 897e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 898e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 899e337e24dSChristoph Paasch */ 900e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 901c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 902e337e24dSChristoph Paasch { 903e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 904e337e24dSChristoph Paasch bh_unlock_sock(sk); 905e337e24dSChristoph Paasch sock_put(sk); 9062f8a397dSPaolo Abeni inet_csk_prepare_for_destroy_sock(sk); 9076761893eSPaolo Abeni inet_sk(sk)->inet_num = 0; 908e337e24dSChristoph Paasch } 909e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 910e337e24dSChristoph Paasch 911f985c65cSEric Dumazet int inet_csk_listen_start(struct sock *sk, int backlog) 912a019d6feSArnaldo Carvalho de Melo { 913a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 91410cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 915086c653fSCraig Gallek int err = -EADDRINUSE; 916a019d6feSArnaldo Carvalho de Melo 917ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 918a019d6feSArnaldo Carvalho de Melo 919a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 920a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 921a019d6feSArnaldo Carvalho de Melo 922a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 923a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 924a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 925a019d6feSArnaldo Carvalho de Melo * after validation is complete. 926a019d6feSArnaldo Carvalho de Melo */ 927563e0bb0SYafang Shao inet_sk_state_store(sk, TCP_LISTEN); 928c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 929c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 930a019d6feSArnaldo Carvalho de Melo 931a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 932086c653fSCraig Gallek err = sk->sk_prot->hash(sk); 933a019d6feSArnaldo Carvalho de Melo 934086c653fSCraig Gallek if (likely(!err)) 935a019d6feSArnaldo Carvalho de Melo return 0; 936a019d6feSArnaldo Carvalho de Melo } 937a019d6feSArnaldo Carvalho de Melo 938563e0bb0SYafang Shao inet_sk_set_state(sk, TCP_CLOSE); 939086c653fSCraig Gallek return err; 940a019d6feSArnaldo Carvalho de Melo } 941a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 942a019d6feSArnaldo Carvalho de Melo 943ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 944ebb516afSEric Dumazet struct sock *child) 945ebb516afSEric Dumazet { 946ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 947ebb516afSEric Dumazet 948ebb516afSEric Dumazet sock_orphan(child); 949ebb516afSEric Dumazet 950ebb516afSEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 951ebb516afSEric Dumazet 952ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 953d983ea6fSEric Dumazet BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); 954ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 955ebb516afSEric Dumazet 956ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 957ebb516afSEric Dumazet * an inbound pkt destined for child is 958ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 959ebb516afSEric Dumazet * Also to satisfy an assertion in 960ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 961ebb516afSEric Dumazet */ 962d983ea6fSEric Dumazet RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); 963ebb516afSEric Dumazet } 964ebb516afSEric Dumazet inet_csk_destroy_sock(child); 965ebb516afSEric Dumazet } 966ebb516afSEric Dumazet 9677716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk, 9687716682cSEric Dumazet struct request_sock *req, 969ebb516afSEric Dumazet struct sock *child) 970ebb516afSEric Dumazet { 971ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 972ebb516afSEric Dumazet 973ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 974ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 975ebb516afSEric Dumazet inet_child_forget(sk, req, child); 9767716682cSEric Dumazet child = NULL; 977ebb516afSEric Dumazet } else { 978ebb516afSEric Dumazet req->sk = child; 979ebb516afSEric Dumazet req->dl_next = NULL; 980ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 98160b173caSEric Dumazet WRITE_ONCE(queue->rskq_accept_head, req); 982ebb516afSEric Dumazet else 983ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 984ebb516afSEric Dumazet queue->rskq_accept_tail = req; 985ebb516afSEric Dumazet sk_acceptq_added(sk); 986ebb516afSEric Dumazet } 987ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 9887716682cSEric Dumazet return child; 989ebb516afSEric Dumazet } 990ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 991ebb516afSEric Dumazet 9925e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 9935e0724d0SEric Dumazet struct request_sock *req, bool own_req) 9945e0724d0SEric Dumazet { 9955e0724d0SEric Dumazet if (own_req) { 9965e0724d0SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 9975e0724d0SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 9987716682cSEric Dumazet if (inet_csk_reqsk_queue_add(sk, req, child)) 9995e0724d0SEric Dumazet return child; 10005e0724d0SEric Dumazet } 10015e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 10025e0724d0SEric Dumazet bh_unlock_sock(child); 10035e0724d0SEric Dumazet sock_put(child); 10045e0724d0SEric Dumazet return NULL; 10055e0724d0SEric Dumazet } 10065e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 10075e0724d0SEric Dumazet 1008a019d6feSArnaldo Carvalho de Melo /* 1009a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 1010a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 1011a019d6feSArnaldo Carvalho de Melo */ 1012a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 1013a019d6feSArnaldo Carvalho de Melo { 1014a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 10158336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 1016fff1f300SEric Dumazet struct request_sock *next, *req; 1017a019d6feSArnaldo Carvalho de Melo 1018a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 1019a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 1020a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 1021a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 1022a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 1023a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 1024a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 1025a019d6feSArnaldo Carvalho de Melo */ 1026fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 1027a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 1028a019d6feSArnaldo Carvalho de Melo 1029a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 1030a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 1031547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 1032a019d6feSArnaldo Carvalho de Melo sock_hold(child); 1033a019d6feSArnaldo Carvalho de Melo 1034ebb516afSEric Dumazet inet_child_forget(sk, req, child); 1035da8ab578SEric Dumazet reqsk_put(req); 1036a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 1037a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 1038a019d6feSArnaldo Carvalho de Melo sock_put(child); 1039a019d6feSArnaldo Carvalho de Melo 104092d6f176SEric Dumazet cond_resched(); 1041a019d6feSArnaldo Carvalho de Melo } 10420536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 10438336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 10440536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 1045fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 10460536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 10470536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 1048fff1f300SEric Dumazet while (req != NULL) { 1049fff1f300SEric Dumazet next = req->dl_next; 105013854e5aSEric Dumazet reqsk_put(req); 1051fff1f300SEric Dumazet req = next; 10528336886fSJerry Chu } 10538336886fSJerry Chu } 1054ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 1055a019d6feSArnaldo Carvalho de Melo } 1056a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 1057af05dc93SArnaldo Carvalho de Melo 1058af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 1059af05dc93SArnaldo Carvalho de Melo { 1060af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 1061af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 1062af05dc93SArnaldo Carvalho de Melo 1063af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 1064c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 1065c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 1066af05dc93SArnaldo Carvalho de Melo } 1067af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 1068c4d93909SArnaldo Carvalho de Melo 106980d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 107080d0a69fSDavid S. Miller { 10715abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 10725abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 107380d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 107480d0a69fSDavid S. Miller struct flowi4 *fl4; 107580d0a69fSDavid S. Miller struct rtable *rt; 107680d0a69fSDavid S. Miller 107780d0a69fSDavid S. Miller rcu_read_lock(); 107880d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 107980d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 108080d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 108180d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 108280d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 108380d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 108480d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 108580d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 108680d0a69fSDavid S. Miller if (IS_ERR(rt)) 108780d0a69fSDavid S. Miller rt = NULL; 108880d0a69fSDavid S. Miller if (rt) 108980d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 109080d0a69fSDavid S. Miller rcu_read_unlock(); 109180d0a69fSDavid S. Miller 109280d0a69fSDavid S. Miller return &rt->dst; 109380d0a69fSDavid S. Miller } 109480d0a69fSDavid S. Miller 109580d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 109680d0a69fSDavid S. Miller { 109780d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 109880d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 109980d0a69fSDavid S. Miller 110080d0a69fSDavid S. Miller if (!dst) { 110180d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 110280d0a69fSDavid S. Miller if (!dst) 110380d0a69fSDavid S. Miller goto out; 110480d0a69fSDavid S. Miller } 1105bd085ef6SHangbin Liu dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 110680d0a69fSDavid S. Miller 110780d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 110880d0a69fSDavid S. Miller if (!dst) 110980d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 111080d0a69fSDavid S. Miller out: 111180d0a69fSDavid S. Miller return dst; 111280d0a69fSDavid S. Miller } 111380d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 1114