13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 26fa76ce73SEric Dumazet #include <net/tcp.h> 27c125e80bSCraig Gallek #include <net/sock_reuseport.h> 289691724eSstephen hemminger #include <net/addrconf.h> 293f421baaSArnaldo Carvalho de Melo 303f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 313f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 323f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 333f421baaSArnaldo Carvalho de Melo #endif 343f421baaSArnaldo Carvalho de Melo 35fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 36fe38d2a1SJosef Bacik /* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 37fe38d2a1SJosef Bacik * only, and any IPv4 addresses if not IPv6 only 38fe38d2a1SJosef Bacik * match_wildcard == false: addresses must be exactly the same, i.e. 39fe38d2a1SJosef Bacik * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 40fe38d2a1SJosef Bacik * and 0.0.0.0 equals to 0.0.0.0 only 41fe38d2a1SJosef Bacik */ 42637bc8bbSJosef Bacik static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 43637bc8bbSJosef Bacik const struct in6_addr *sk2_rcv_saddr6, 44637bc8bbSJosef Bacik __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 45637bc8bbSJosef Bacik bool sk1_ipv6only, bool sk2_ipv6only, 46fe38d2a1SJosef Bacik bool match_wildcard) 47fe38d2a1SJosef Bacik { 48637bc8bbSJosef Bacik int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 49fe38d2a1SJosef Bacik int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 50fe38d2a1SJosef Bacik 51fe38d2a1SJosef Bacik /* if both are mapped, treat as IPv4 */ 52fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 53fe38d2a1SJosef Bacik if (!sk2_ipv6only) { 54637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 55fe38d2a1SJosef Bacik return 1; 56637bc8bbSJosef Bacik if (!sk1_rcv_saddr || !sk2_rcv_saddr) 57fe38d2a1SJosef Bacik return match_wildcard; 58fe38d2a1SJosef Bacik } 59fe38d2a1SJosef Bacik return 0; 60fe38d2a1SJosef Bacik } 61fe38d2a1SJosef Bacik 62fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 63fe38d2a1SJosef Bacik return 1; 64fe38d2a1SJosef Bacik 65fe38d2a1SJosef Bacik if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && 66fe38d2a1SJosef Bacik !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 67fe38d2a1SJosef Bacik return 1; 68fe38d2a1SJosef Bacik 69fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && match_wildcard && 70637bc8bbSJosef Bacik !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 71fe38d2a1SJosef Bacik return 1; 72fe38d2a1SJosef Bacik 73fe38d2a1SJosef Bacik if (sk2_rcv_saddr6 && 74637bc8bbSJosef Bacik ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 75fe38d2a1SJosef Bacik return 1; 76fe38d2a1SJosef Bacik 77fe38d2a1SJosef Bacik return 0; 78fe38d2a1SJosef Bacik } 79fe38d2a1SJosef Bacik #endif 80fe38d2a1SJosef Bacik 81fe38d2a1SJosef Bacik /* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses 82fe38d2a1SJosef Bacik * match_wildcard == false: addresses must be exactly the same, i.e. 83fe38d2a1SJosef Bacik * 0.0.0.0 only equals to 0.0.0.0 84fe38d2a1SJosef Bacik */ 85637bc8bbSJosef Bacik static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 86637bc8bbSJosef Bacik bool sk2_ipv6only, bool match_wildcard) 87fe38d2a1SJosef Bacik { 88637bc8bbSJosef Bacik if (!sk2_ipv6only) { 89637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 90fe38d2a1SJosef Bacik return 1; 91637bc8bbSJosef Bacik if (!sk1_rcv_saddr || !sk2_rcv_saddr) 92fe38d2a1SJosef Bacik return match_wildcard; 93fe38d2a1SJosef Bacik } 94fe38d2a1SJosef Bacik return 0; 95fe38d2a1SJosef Bacik } 96fe38d2a1SJosef Bacik 97fe38d2a1SJosef Bacik int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 98fe38d2a1SJosef Bacik bool match_wildcard) 99fe38d2a1SJosef Bacik { 100fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 101fe38d2a1SJosef Bacik if (sk->sk_family == AF_INET6) 102637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, 103319554f2SJosef Bacik inet6_rcv_saddr(sk2), 104637bc8bbSJosef Bacik sk->sk_rcv_saddr, 105637bc8bbSJosef Bacik sk2->sk_rcv_saddr, 106637bc8bbSJosef Bacik ipv6_only_sock(sk), 107637bc8bbSJosef Bacik ipv6_only_sock(sk2), 108637bc8bbSJosef Bacik match_wildcard); 109fe38d2a1SJosef Bacik #endif 110637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, 111637bc8bbSJosef Bacik ipv6_only_sock(sk2), match_wildcard); 112fe38d2a1SJosef Bacik } 113fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal); 114fe38d2a1SJosef Bacik 1150bbf87d8SEric W. Biederman void inet_get_local_port_range(struct net *net, int *low, int *high) 116227b60f5SStephen Hemminger { 11795c96174SEric Dumazet unsigned int seq; 11895c96174SEric Dumazet 119227b60f5SStephen Hemminger do { 120c9d8f1a6SCong Wang seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 121227b60f5SStephen Hemminger 122c9d8f1a6SCong Wang *low = net->ipv4.ip_local_ports.range[0]; 123c9d8f1a6SCong Wang *high = net->ipv4.ip_local_ports.range[1]; 124c9d8f1a6SCong Wang } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 125227b60f5SStephen Hemminger } 126227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 1273f421baaSArnaldo Carvalho de Melo 128aa078842SJosef Bacik static int inet_csk_bind_conflict(const struct sock *sk, 129aa078842SJosef Bacik const struct inet_bind_bucket *tb, 130aa078842SJosef Bacik bool relax, bool reuseport_ok) 1313f421baaSArnaldo Carvalho de Melo { 1323f421baaSArnaldo Carvalho de Melo struct sock *sk2; 1330643ee4fSTom Herbert bool reuse = sk->sk_reuse; 1340643ee4fSTom Herbert bool reuseport = !!sk->sk_reuseport && reuseport_ok; 135da5e3630STom Herbert kuid_t uid = sock_i_uid((struct sock *)sk); 1363f421baaSArnaldo Carvalho de Melo 1377477fd2eSPavel Emelyanov /* 1387477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 1397477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 1407477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 1417477fd2eSPavel Emelyanov * one this bucket belongs to. 1427477fd2eSPavel Emelyanov */ 1437477fd2eSPavel Emelyanov 144b67bfe0dSSasha Levin sk_for_each_bound(sk2, &tb->owners) { 1453f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 1463f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 1473f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 1483f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 149da5e3630STom Herbert if ((!reuse || !sk2->sk_reuse || 150da5e3630STom Herbert sk2->sk_state == TCP_LISTEN) && 151da5e3630STom Herbert (!reuseport || !sk2->sk_reuseport || 152c125e80bSCraig Gallek rcu_access_pointer(sk->sk_reuseport_cb) || 153da5e3630STom Herbert (sk2->sk_state != TCP_TIME_WAIT && 154da5e3630STom Herbert !uid_eq(uid, sock_i_uid(sk2))))) { 155aa078842SJosef Bacik if (inet_rcv_saddr_equal(sk, sk2, true)) 1563f421baaSArnaldo Carvalho de Melo break; 1578d238b25SDavid S. Miller } 158aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 159aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 160aa078842SJosef Bacik if (inet_rcv_saddr_equal(sk, sk2, true)) 161aacd9289SAlex Copot break; 162aacd9289SAlex Copot } 1633f421baaSArnaldo Carvalho de Melo } 1643f421baaSArnaldo Carvalho de Melo } 165b67bfe0dSSasha Levin return sk2 != NULL; 1663f421baaSArnaldo Carvalho de Melo } 167971af18bSArnaldo Carvalho de Melo 168289141b7SJosef Bacik /* 169289141b7SJosef Bacik * Find an open port number for the socket. Returns with the 170289141b7SJosef Bacik * inet_bind_hashbucket lock held. 1713f421baaSArnaldo Carvalho de Melo */ 172289141b7SJosef Bacik static struct inet_bind_hashbucket * 173289141b7SJosef Bacik inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) 1743f421baaSArnaldo Carvalho de Melo { 175ea8add2bSEric Dumazet struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 176289141b7SJosef Bacik int port = 0; 1773f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1783b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 179ea8add2bSEric Dumazet int i, low, high, attempt_half; 180ea8add2bSEric Dumazet struct inet_bind_bucket *tb; 181ea8add2bSEric Dumazet u32 remaining, offset; 1823f421baaSArnaldo Carvalho de Melo 183ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 184ea8add2bSEric Dumazet other_half_scan: 1850bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 186ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 187ea8add2bSEric Dumazet if (high - low < 4) 188ea8add2bSEric Dumazet attempt_half = 0; 189946f9eb2SEric Dumazet if (attempt_half) { 190ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1); 191946f9eb2SEric Dumazet 192946f9eb2SEric Dumazet if (attempt_half == 1) 193946f9eb2SEric Dumazet high = half; 194946f9eb2SEric Dumazet else 195946f9eb2SEric Dumazet low = half; 196946f9eb2SEric Dumazet } 197ea8add2bSEric Dumazet remaining = high - low; 198ea8add2bSEric Dumazet if (likely(remaining > 1)) 199ea8add2bSEric Dumazet remaining &= ~1U; 2003f421baaSArnaldo Carvalho de Melo 201ea8add2bSEric Dumazet offset = prandom_u32() % remaining; 202ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity 203ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users. 204ea8add2bSEric Dumazet */ 205ea8add2bSEric Dumazet offset |= 1U; 206ea8add2bSEric Dumazet 207ea8add2bSEric Dumazet other_parity_scan: 208ea8add2bSEric Dumazet port = low + offset; 209ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 210ea8add2bSEric Dumazet if (unlikely(port >= high)) 211ea8add2bSEric Dumazet port -= remaining; 212ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port)) 213ea8add2bSEric Dumazet continue; 214ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 215ea8add2bSEric Dumazet hinfo->bhash_size)]; 216ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 217b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 218ea8add2bSEric Dumazet if (net_eq(ib_net(tb), net) && tb->port == port) { 219289141b7SJosef Bacik if (!inet_csk_bind_conflict(sk, tb, false, false)) 2206cd66616SJosef Bacik goto success; 221ea8add2bSEric Dumazet goto next_port; 2222b05ad33SFlavio Leitner } 223289141b7SJosef Bacik tb = NULL; 224289141b7SJosef Bacik goto success; 225ea8add2bSEric Dumazet next_port: 226ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 227ea8add2bSEric Dumazet cond_resched(); 228a9d8f911SEvgeniy Polyakov } 2293f421baaSArnaldo Carvalho de Melo 230ea8add2bSEric Dumazet offset--; 231ea8add2bSEric Dumazet if (!(offset & 1)) 232ea8add2bSEric Dumazet goto other_parity_scan; 233ea8add2bSEric Dumazet 234946f9eb2SEric Dumazet if (attempt_half == 1) { 235946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 236946f9eb2SEric Dumazet attempt_half = 2; 237ea8add2bSEric Dumazet goto other_half_scan; 238946f9eb2SEric Dumazet } 239289141b7SJosef Bacik return NULL; 240289141b7SJosef Bacik success: 241289141b7SJosef Bacik *port_ret = port; 242289141b7SJosef Bacik *tb_ret = tb; 243289141b7SJosef Bacik return head; 244289141b7SJosef Bacik } 245ea8add2bSEric Dumazet 246637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb, 247637bc8bbSJosef Bacik struct sock *sk) 248637bc8bbSJosef Bacik { 249637bc8bbSJosef Bacik kuid_t uid = sock_i_uid(sk); 250637bc8bbSJosef Bacik 251637bc8bbSJosef Bacik if (tb->fastreuseport <= 0) 252637bc8bbSJosef Bacik return 0; 253637bc8bbSJosef Bacik if (!sk->sk_reuseport) 254637bc8bbSJosef Bacik return 0; 255637bc8bbSJosef Bacik if (rcu_access_pointer(sk->sk_reuseport_cb)) 256637bc8bbSJosef Bacik return 0; 257637bc8bbSJosef Bacik if (!uid_eq(tb->fastuid, uid)) 258637bc8bbSJosef Bacik return 0; 259637bc8bbSJosef Bacik /* We only need to check the rcv_saddr if this tb was once marked 260637bc8bbSJosef Bacik * without fastreuseport and then was reset, as we can only know that 261637bc8bbSJosef Bacik * the fast_*rcv_saddr doesn't have any conflicts with the socks on the 262637bc8bbSJosef Bacik * owners list. 263637bc8bbSJosef Bacik */ 264637bc8bbSJosef Bacik if (tb->fastreuseport == FASTREUSEPORT_ANY) 265637bc8bbSJosef Bacik return 1; 266637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 267637bc8bbSJosef Bacik if (tb->fast_sk_family == AF_INET6) 268637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 2697a56673bSJosef Bacik inet6_rcv_saddr(sk), 270637bc8bbSJosef Bacik tb->fast_rcv_saddr, 271637bc8bbSJosef Bacik sk->sk_rcv_saddr, 272637bc8bbSJosef Bacik tb->fast_ipv6_only, 273637bc8bbSJosef Bacik ipv6_only_sock(sk), true); 274637bc8bbSJosef Bacik #endif 275637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, 276637bc8bbSJosef Bacik ipv6_only_sock(sk), true); 277637bc8bbSJosef Bacik } 278637bc8bbSJosef Bacik 279289141b7SJosef Bacik /* Obtain a reference to a local port for the given sock, 280289141b7SJosef Bacik * if snum is zero it means select any available local port. 281289141b7SJosef Bacik * We try to allocate an odd port (and leave even ports for connect()) 282289141b7SJosef Bacik */ 283289141b7SJosef Bacik int inet_csk_get_port(struct sock *sk, unsigned short snum) 284289141b7SJosef Bacik { 285289141b7SJosef Bacik bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 286289141b7SJosef Bacik struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 287289141b7SJosef Bacik int ret = 1, port = snum; 288289141b7SJosef Bacik struct inet_bind_hashbucket *head; 289289141b7SJosef Bacik struct net *net = sock_net(sk); 290289141b7SJosef Bacik struct inet_bind_bucket *tb = NULL; 291289141b7SJosef Bacik kuid_t uid = sock_i_uid(sk); 292289141b7SJosef Bacik 293289141b7SJosef Bacik if (!port) { 294289141b7SJosef Bacik head = inet_csk_find_open_port(sk, &tb, &port); 295289141b7SJosef Bacik if (!head) 296289141b7SJosef Bacik return ret; 297289141b7SJosef Bacik if (!tb) 298289141b7SJosef Bacik goto tb_not_found; 299289141b7SJosef Bacik goto success; 300289141b7SJosef Bacik } 301289141b7SJosef Bacik head = &hinfo->bhash[inet_bhashfn(net, port, 302289141b7SJosef Bacik hinfo->bhash_size)]; 303289141b7SJosef Bacik spin_lock_bh(&head->lock); 304289141b7SJosef Bacik inet_bind_bucket_for_each(tb, &head->chain) 305289141b7SJosef Bacik if (net_eq(ib_net(tb), net) && tb->port == port) 306289141b7SJosef Bacik goto tb_found; 307ea8add2bSEric Dumazet tb_not_found: 308ea8add2bSEric Dumazet tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 309ea8add2bSEric Dumazet net, head, port); 310ea8add2bSEric Dumazet if (!tb) 311ea8add2bSEric Dumazet goto fail_unlock; 3123f421baaSArnaldo Carvalho de Melo tb_found: 3133f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 3144a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 3154a17fd52SPavel Emelyanov goto success; 3164a17fd52SPavel Emelyanov 317b9470c27SJosef Bacik if ((tb->fastreuse > 0 && reuse) || 318637bc8bbSJosef Bacik sk_reuseport_match(tb, sk)) 3193f421baaSArnaldo Carvalho de Melo goto success; 320289141b7SJosef Bacik if (inet_csk_bind_conflict(sk, tb, true, true)) 3213f421baaSArnaldo Carvalho de Melo goto fail_unlock; 3223f421baaSArnaldo Carvalho de Melo } 3236cd66616SJosef Bacik success: 324*fbed24bcSJosef Bacik if (hlist_empty(&tb->owners)) { 325ea8add2bSEric Dumazet tb->fastreuse = reuse; 326da5e3630STom Herbert if (sk->sk_reuseport) { 327637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_ANY; 328da5e3630STom Herbert tb->fastuid = uid; 329637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 330637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 331cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 332637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 333637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 334637bc8bbSJosef Bacik #endif 335da5e3630STom Herbert } else { 336da5e3630STom Herbert tb->fastreuseport = 0; 337da5e3630STom Herbert } 3386cd66616SJosef Bacik } else { 3396cd66616SJosef Bacik if (!reuse) 3406cd66616SJosef Bacik tb->fastreuse = 0; 341637bc8bbSJosef Bacik if (sk->sk_reuseport) { 342637bc8bbSJosef Bacik /* We didn't match or we don't have fastreuseport set on 343637bc8bbSJosef Bacik * the tb, but we have sk_reuseport set on this socket 344637bc8bbSJosef Bacik * and we know that there are no bind conflicts with 345637bc8bbSJosef Bacik * this socket in this tb, so reset our tb's reuseport 346637bc8bbSJosef Bacik * settings so that any subsequent sockets that match 347637bc8bbSJosef Bacik * our current socket will be put on the fast path. 348637bc8bbSJosef Bacik * 349637bc8bbSJosef Bacik * If we reset we need to set FASTREUSEPORT_STRICT so we 350637bc8bbSJosef Bacik * do extra checking for all subsequent sk_reuseport 351637bc8bbSJosef Bacik * socks. 352637bc8bbSJosef Bacik */ 353637bc8bbSJosef Bacik if (!sk_reuseport_match(tb, sk)) { 354637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_STRICT; 355637bc8bbSJosef Bacik tb->fastuid = uid; 356637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 357637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 358cbb2fb5cSJosef Bacik tb->fast_sk_family = sk->sk_family; 359637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 360637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 361637bc8bbSJosef Bacik #endif 362637bc8bbSJosef Bacik } 363637bc8bbSJosef Bacik } else { 3646cd66616SJosef Bacik tb->fastreuseport = 0; 365ea8add2bSEric Dumazet } 366637bc8bbSJosef Bacik } 3673f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 368ea8add2bSEric Dumazet inet_bind_hash(sk, tb, port); 369547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 3703f421baaSArnaldo Carvalho de Melo ret = 0; 3713f421baaSArnaldo Carvalho de Melo 3723f421baaSArnaldo Carvalho de Melo fail_unlock: 373ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 3743f421baaSArnaldo Carvalho de Melo return ret; 3753f421baaSArnaldo Carvalho de Melo } 3763f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 3773f421baaSArnaldo Carvalho de Melo 3783f421baaSArnaldo Carvalho de Melo /* 3793f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 3803f421baaSArnaldo Carvalho de Melo * with the socket locked. 3813f421baaSArnaldo Carvalho de Melo */ 3823f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 3833f421baaSArnaldo Carvalho de Melo { 3843f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3853f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 3863f421baaSArnaldo Carvalho de Melo int err; 3873f421baaSArnaldo Carvalho de Melo 3883f421baaSArnaldo Carvalho de Melo /* 3893f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 3903f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 3913f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 3923f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 3933f421baaSArnaldo Carvalho de Melo * 3943f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 3953f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 3963f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 3973f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 3983f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 3993f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 4003f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 4013f421baaSArnaldo Carvalho de Melo */ 4023f421baaSArnaldo Carvalho de Melo for (;;) { 403aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 4043f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 4053f421baaSArnaldo Carvalho de Melo release_sock(sk); 4063f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 4073f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 408cb7cf8a3SEric Dumazet sched_annotate_sleep(); 4093f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4103f421baaSArnaldo Carvalho de Melo err = 0; 4113f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 4123f421baaSArnaldo Carvalho de Melo break; 4133f421baaSArnaldo Carvalho de Melo err = -EINVAL; 4143f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4153f421baaSArnaldo Carvalho de Melo break; 4163f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 4173f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 4183f421baaSArnaldo Carvalho de Melo break; 4193f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 4203f421baaSArnaldo Carvalho de Melo if (!timeo) 4213f421baaSArnaldo Carvalho de Melo break; 4223f421baaSArnaldo Carvalho de Melo } 423aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 4243f421baaSArnaldo Carvalho de Melo return err; 4253f421baaSArnaldo Carvalho de Melo } 4263f421baaSArnaldo Carvalho de Melo 4273f421baaSArnaldo Carvalho de Melo /* 4283f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 4293f421baaSArnaldo Carvalho de Melo */ 430cdfbabfbSDavid Howells struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 4313f421baaSArnaldo Carvalho de Melo { 4323f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4338336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 4348336886fSJerry Chu struct request_sock *req; 435e3d95ad7SEric Dumazet struct sock *newsk; 4363f421baaSArnaldo Carvalho de Melo int error; 4373f421baaSArnaldo Carvalho de Melo 4383f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4393f421baaSArnaldo Carvalho de Melo 4403f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 4413f421baaSArnaldo Carvalho de Melo * and that it has something pending. 4423f421baaSArnaldo Carvalho de Melo */ 4433f421baaSArnaldo Carvalho de Melo error = -EINVAL; 4443f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4453f421baaSArnaldo Carvalho de Melo goto out_err; 4463f421baaSArnaldo Carvalho de Melo 4473f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 4488336886fSJerry Chu if (reqsk_queue_empty(queue)) { 4493f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 4503f421baaSArnaldo Carvalho de Melo 4513f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 4523f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 4533f421baaSArnaldo Carvalho de Melo if (!timeo) 4543f421baaSArnaldo Carvalho de Melo goto out_err; 4553f421baaSArnaldo Carvalho de Melo 4563f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 4573f421baaSArnaldo Carvalho de Melo if (error) 4583f421baaSArnaldo Carvalho de Melo goto out_err; 4593f421baaSArnaldo Carvalho de Melo } 460fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 4618336886fSJerry Chu newsk = req->sk; 4623f421baaSArnaldo Carvalho de Melo 463e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 4640536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 4650536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 4669439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 4678336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 4688336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 4698336886fSJerry Chu * NULL to signify that the child socket is taken 4708336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 4718336886fSJerry Chu * when 3WHS finishes (or is aborted). 4728336886fSJerry Chu */ 4738336886fSJerry Chu req->sk = NULL; 4748336886fSJerry Chu req = NULL; 4758336886fSJerry Chu } 4760536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 4778336886fSJerry Chu } 4783f421baaSArnaldo Carvalho de Melo out: 4793f421baaSArnaldo Carvalho de Melo release_sock(sk); 4808336886fSJerry Chu if (req) 48113854e5aSEric Dumazet reqsk_put(req); 4823f421baaSArnaldo Carvalho de Melo return newsk; 4833f421baaSArnaldo Carvalho de Melo out_err: 4843f421baaSArnaldo Carvalho de Melo newsk = NULL; 4858336886fSJerry Chu req = NULL; 4863f421baaSArnaldo Carvalho de Melo *err = error; 4873f421baaSArnaldo Carvalho de Melo goto out; 4883f421baaSArnaldo Carvalho de Melo } 4893f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 4903f421baaSArnaldo Carvalho de Melo 4913f421baaSArnaldo Carvalho de Melo /* 4923f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 4933f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 4943f421baaSArnaldo Carvalho de Melo * to optimize. 4953f421baaSArnaldo Carvalho de Melo */ 4963f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 4973f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 4983f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 4993f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 5003f421baaSArnaldo Carvalho de Melo { 5013f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5023f421baaSArnaldo Carvalho de Melo 503b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 504b24b8a24SPavel Emelyanov (unsigned long)sk); 505b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 506b24b8a24SPavel Emelyanov (unsigned long)sk); 507b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 5083f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 5093f421baaSArnaldo Carvalho de Melo } 5103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 5113f421baaSArnaldo Carvalho de Melo 5123f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 5133f421baaSArnaldo Carvalho de Melo { 5143f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5153f421baaSArnaldo Carvalho de Melo 5163f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 5173f421baaSArnaldo Carvalho de Melo 5183f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 5193f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 5203f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5213f421baaSArnaldo Carvalho de Melo } 5223f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 5233f421baaSArnaldo Carvalho de Melo 5243f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 5253f421baaSArnaldo Carvalho de Melo { 5263f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5273f421baaSArnaldo Carvalho de Melo } 5283f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 5293f421baaSArnaldo Carvalho de Melo 5303f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 5313f421baaSArnaldo Carvalho de Melo { 5323f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 5333f421baaSArnaldo Carvalho de Melo } 5343f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 5353f421baaSArnaldo Carvalho de Melo 536e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 5376bd023f3SDavid S. Miller struct flowi4 *fl4, 538ba3f7f04SDavid S. Miller const struct request_sock *req) 5393f421baaSArnaldo Carvalho de Melo { 5403f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 5418b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 5428b929ab1SEric Dumazet struct ip_options_rcu *opt = ireq->opt; 5438b929ab1SEric Dumazet struct rtable *rt; 5443f421baaSArnaldo Carvalho de Melo 5458b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 546e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 5478b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 548634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 5498b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 550e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 5516bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 5526bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 553b23dd4feSDavid S. Miller if (IS_ERR(rt)) 554857a6e0aSIlpo Järvinen goto no_route; 555155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 556857a6e0aSIlpo Järvinen goto route_err; 557d8d1f30bSChangli Gao return &rt->dst; 558857a6e0aSIlpo Järvinen 559857a6e0aSIlpo Järvinen route_err: 560857a6e0aSIlpo Järvinen ip_rt_put(rt); 561857a6e0aSIlpo Järvinen no_route: 562b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 563857a6e0aSIlpo Järvinen return NULL; 5643f421baaSArnaldo Carvalho de Melo } 5653f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 5663f421baaSArnaldo Carvalho de Melo 567a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 56877357a95SDavid S. Miller struct sock *newsk, 56977357a95SDavid S. Miller const struct request_sock *req) 57077357a95SDavid S. Miller { 57177357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 5728b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 57377357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 5741a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 57577357a95SDavid S. Miller struct flowi4 *fl4; 57677357a95SDavid S. Miller struct rtable *rt; 57777357a95SDavid S. Miller 57877357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 5791a7b27c9SChristoph Paasch 5801a7b27c9SChristoph Paasch rcu_read_lock(); 5811a7b27c9SChristoph Paasch opt = rcu_dereference(newinet->inet_opt); 5828b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 58377357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 58477357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 585634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 5868b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 587e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 58877357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 58977357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 59077357a95SDavid S. Miller if (IS_ERR(rt)) 59177357a95SDavid S. Miller goto no_route; 592155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 59377357a95SDavid S. Miller goto route_err; 5941a7b27c9SChristoph Paasch rcu_read_unlock(); 59577357a95SDavid S. Miller return &rt->dst; 59677357a95SDavid S. Miller 59777357a95SDavid S. Miller route_err: 59877357a95SDavid S. Miller ip_rt_put(rt); 59977357a95SDavid S. Miller no_route: 6001a7b27c9SChristoph Paasch rcu_read_unlock(); 601b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 60277357a95SDavid S. Miller return NULL; 60377357a95SDavid S. Miller } 60477357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 60577357a95SDavid S. Miller 606dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 6073f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 6083f421baaSArnaldo Carvalho de Melo #else 609fa76ce73SEric Dumazet #define AF_INET_FAMILY(fam) true 6103f421baaSArnaldo Carvalho de Melo #endif 6113f421baaSArnaldo Carvalho de Melo 6120c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 6130c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 6140c3d79bcSJulian Anastasov const int max_retries, 6150c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 6160c3d79bcSJulian Anastasov int *expire, int *resend) 6170c3d79bcSJulian Anastasov { 6180c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 619e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh; 6200c3d79bcSJulian Anastasov *resend = 1; 6210c3d79bcSJulian Anastasov return; 6220c3d79bcSJulian Anastasov } 623e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh && 624e6c022a4SEric Dumazet (!inet_rsk(req)->acked || req->num_timeout >= max_retries); 6250c3d79bcSJulian Anastasov /* 6260c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 6270c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 6280c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 6290c3d79bcSJulian Anastasov */ 6300c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 631e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 6320c3d79bcSJulian Anastasov } 6330c3d79bcSJulian Anastasov 6341b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 635e6c022a4SEric Dumazet { 6361a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 637e6c022a4SEric Dumazet 638e6c022a4SEric Dumazet if (!err) 639e6c022a4SEric Dumazet req->num_retrans++; 640e6c022a4SEric Dumazet return err; 641e6c022a4SEric Dumazet } 642e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 643e6c022a4SEric Dumazet 644079096f1SEric Dumazet /* return true if req was found in the ehash table */ 645b357a364SEric Dumazet static bool reqsk_queue_unlink(struct request_sock_queue *queue, 646b357a364SEric Dumazet struct request_sock *req) 647b357a364SEric Dumazet { 648079096f1SEric Dumazet struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 6495e0724d0SEric Dumazet bool found = false; 650b357a364SEric Dumazet 6515e0724d0SEric Dumazet if (sk_hashed(req_to_sk(req))) { 6525e0724d0SEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 653b357a364SEric Dumazet 654079096f1SEric Dumazet spin_lock(lock); 655079096f1SEric Dumazet found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 656079096f1SEric Dumazet spin_unlock(lock); 6575e0724d0SEric Dumazet } 65883fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 659b357a364SEric Dumazet reqsk_put(req); 660b357a364SEric Dumazet return found; 661b357a364SEric Dumazet } 662b357a364SEric Dumazet 663b357a364SEric Dumazet void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 664b357a364SEric Dumazet { 665b357a364SEric Dumazet if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { 666b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 667b357a364SEric Dumazet reqsk_put(req); 668b357a364SEric Dumazet } 669b357a364SEric Dumazet } 670b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 671b357a364SEric Dumazet 672f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 673f03f2e15SEric Dumazet { 674f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 675f03f2e15SEric Dumazet reqsk_put(req); 676f03f2e15SEric Dumazet } 677f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 678f03f2e15SEric Dumazet 679fa76ce73SEric Dumazet static void reqsk_timer_handler(unsigned long data) 680a019d6feSArnaldo Carvalho de Melo { 681fa76ce73SEric Dumazet struct request_sock *req = (struct request_sock *)data; 682fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 6837c083ecbSNikolay Borisov struct net *net = sock_net(sk_listener); 684fa76ce73SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk_listener); 685a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 6862b41fab7SEric Dumazet int qlen, expire = 0, resend = 0; 687fa76ce73SEric Dumazet int max_retries, thresh; 6882b41fab7SEric Dumazet u8 defer_accept; 689a019d6feSArnaldo Carvalho de Melo 69000fd38d9SEric Dumazet if (sk_state_load(sk_listener) != TCP_LISTEN) 691079096f1SEric Dumazet goto drop; 692a019d6feSArnaldo Carvalho de Melo 6937c083ecbSNikolay Borisov max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 694fa76ce73SEric Dumazet thresh = max_retries; 695a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 696a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 697fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 698a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 699a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 700a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 701a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 702a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 703a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 704a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 705a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 706a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 707a019d6feSArnaldo Carvalho de Melo * 708a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 709a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 710a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 711a019d6feSArnaldo Carvalho de Melo */ 712aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 713acb4a6bfSEric Dumazet if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { 714aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 715a019d6feSArnaldo Carvalho de Melo 716a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 7172b41fab7SEric Dumazet if (qlen < young) 718a019d6feSArnaldo Carvalho de Melo break; 719a019d6feSArnaldo Carvalho de Melo thresh--; 720a019d6feSArnaldo Carvalho de Melo young <<= 1; 721a019d6feSArnaldo Carvalho de Melo } 722a019d6feSArnaldo Carvalho de Melo } 7232b41fab7SEric Dumazet defer_accept = READ_ONCE(queue->rskq_defer_accept); 7242b41fab7SEric Dumazet if (defer_accept) 7252b41fab7SEric Dumazet max_retries = defer_accept; 7262b41fab7SEric Dumazet syn_ack_recalc(req, thresh, max_retries, defer_accept, 7270c3d79bcSJulian Anastasov &expire, &resend); 72842cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 7290c3d79bcSJulian Anastasov if (!expire && 7300c3d79bcSJulian Anastasov (!resend || 731fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 7320c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 733a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 734a019d6feSArnaldo Carvalho de Melo 735e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 736aac065c5SEric Dumazet atomic_dec(&queue->young); 737fa76ce73SEric Dumazet timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 738f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeo); 739fa76ce73SEric Dumazet return; 740a019d6feSArnaldo Carvalho de Melo } 741079096f1SEric Dumazet drop: 742f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk_listener, req); 743a019d6feSArnaldo Carvalho de Melo } 744fa76ce73SEric Dumazet 745079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 746fa76ce73SEric Dumazet unsigned long timeout) 747fa76ce73SEric Dumazet { 748fa76ce73SEric Dumazet req->num_retrans = 0; 749fa76ce73SEric Dumazet req->num_timeout = 0; 750fa76ce73SEric Dumazet req->sk = NULL; 751fa76ce73SEric Dumazet 752f3438bc7SThomas Gleixner setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, 753f3438bc7SThomas Gleixner (unsigned long)req); 754f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout); 75529c68526SEric Dumazet 756079096f1SEric Dumazet inet_ehash_insert(req_to_sk(req), NULL); 757fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 758fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 759fa76ce73SEric Dumazet */ 760fa76ce73SEric Dumazet smp_wmb(); 76141c6d650SReshetova, Elena refcount_set(&req->rsk_refcnt, 2 + 1); 762a019d6feSArnaldo Carvalho de Melo } 763079096f1SEric Dumazet 764079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 765079096f1SEric Dumazet unsigned long timeout) 766079096f1SEric Dumazet { 767079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 768079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 769079096f1SEric Dumazet } 770079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 771a019d6feSArnaldo Carvalho de Melo 772e56c57d0SEric Dumazet /** 773e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 774e56c57d0SEric Dumazet * @sk: the socket to clone 775e56c57d0SEric Dumazet * @req: request_sock 776e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 777e56c57d0SEric Dumazet * 778e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 779e56c57d0SEric Dumazet */ 780e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 781e56c57d0SEric Dumazet const struct request_sock *req, 782dd0fc66fSAl Viro const gfp_t priority) 7839f1d2604SArnaldo Carvalho de Melo { 784e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 7859f1d2604SArnaldo Carvalho de Melo 78600db4124SIan Morris if (newsk) { 7879f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 7889f1d2604SArnaldo Carvalho de Melo 7899f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 7909f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 7919f1d2604SArnaldo Carvalho de Melo 792634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 793b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 794b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 7959f1d2604SArnaldo Carvalho de Melo 79685017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */ 79785017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE); 79885017869SEric Dumazet 799657831ffSEric Dumazet inet_sk(newsk)->mc_list = NULL; 800657831ffSEric Dumazet 80184f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 80233cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 80333cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 80484f39b08SLorenzo Colitti 8059f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 8069f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 8076687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 8089f1d2604SArnaldo Carvalho de Melo 8099f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 8109f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 8114237c75cSVenkat Yekkirala 8124237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 8139f1d2604SArnaldo Carvalho de Melo } 8149f1d2604SArnaldo Carvalho de Melo return newsk; 8159f1d2604SArnaldo Carvalho de Melo } 816e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 817a019d6feSArnaldo Carvalho de Melo 818a019d6feSArnaldo Carvalho de Melo /* 819a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 820a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 821a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 822a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 823a019d6feSArnaldo Carvalho de Melo */ 824a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 825a019d6feSArnaldo Carvalho de Melo { 826547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 827547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 828a019d6feSArnaldo Carvalho de Melo 829a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 830547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 831a019d6feSArnaldo Carvalho de Melo 832c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 833c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 834a019d6feSArnaldo Carvalho de Melo 835a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 836a019d6feSArnaldo Carvalho de Melo 837a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 838a019d6feSArnaldo Carvalho de Melo 839a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 840a019d6feSArnaldo Carvalho de Melo 841a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 842a019d6feSArnaldo Carvalho de Melo 843dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 844c2a2efbbSEric Dumazet 845a019d6feSArnaldo Carvalho de Melo sock_put(sk); 846a019d6feSArnaldo Carvalho de Melo } 847a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 848a019d6feSArnaldo Carvalho de Melo 849e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 850e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 851e337e24dSChristoph Paasch */ 852e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 853c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 854e337e24dSChristoph Paasch { 855e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 856e337e24dSChristoph Paasch bh_unlock_sock(sk); 857e337e24dSChristoph Paasch sock_put(sk); 858e337e24dSChristoph Paasch 859e337e24dSChristoph Paasch /* The below has to be done to allow calling inet_csk_destroy_sock */ 860e337e24dSChristoph Paasch sock_set_flag(sk, SOCK_DEAD); 861e337e24dSChristoph Paasch percpu_counter_inc(sk->sk_prot->orphan_count); 862e337e24dSChristoph Paasch inet_sk(sk)->inet_num = 0; 863e337e24dSChristoph Paasch } 864e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 865e337e24dSChristoph Paasch 866f985c65cSEric Dumazet int inet_csk_listen_start(struct sock *sk, int backlog) 867a019d6feSArnaldo Carvalho de Melo { 868a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 86910cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 870086c653fSCraig Gallek int err = -EADDRINUSE; 871a019d6feSArnaldo Carvalho de Melo 872ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 873a019d6feSArnaldo Carvalho de Melo 874f985c65cSEric Dumazet sk->sk_max_ack_backlog = backlog; 875a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 876a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 877a019d6feSArnaldo Carvalho de Melo 878a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 879a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 880a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 881a019d6feSArnaldo Carvalho de Melo * after validation is complete. 882a019d6feSArnaldo Carvalho de Melo */ 88300fd38d9SEric Dumazet sk_state_store(sk, TCP_LISTEN); 884c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 885c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 886a019d6feSArnaldo Carvalho de Melo 887a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 888086c653fSCraig Gallek err = sk->sk_prot->hash(sk); 889a019d6feSArnaldo Carvalho de Melo 890086c653fSCraig Gallek if (likely(!err)) 891a019d6feSArnaldo Carvalho de Melo return 0; 892a019d6feSArnaldo Carvalho de Melo } 893a019d6feSArnaldo Carvalho de Melo 894a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 895086c653fSCraig Gallek return err; 896a019d6feSArnaldo Carvalho de Melo } 897a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 898a019d6feSArnaldo Carvalho de Melo 899ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 900ebb516afSEric Dumazet struct sock *child) 901ebb516afSEric Dumazet { 902ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 903ebb516afSEric Dumazet 904ebb516afSEric Dumazet sock_orphan(child); 905ebb516afSEric Dumazet 906ebb516afSEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 907ebb516afSEric Dumazet 908ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 909ebb516afSEric Dumazet BUG_ON(tcp_sk(child)->fastopen_rsk != req); 910ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 911ebb516afSEric Dumazet 912ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 913ebb516afSEric Dumazet * an inbound pkt destined for child is 914ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 915ebb516afSEric Dumazet * Also to satisfy an assertion in 916ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 917ebb516afSEric Dumazet */ 918ebb516afSEric Dumazet tcp_sk(child)->fastopen_rsk = NULL; 919ebb516afSEric Dumazet } 920ebb516afSEric Dumazet inet_csk_destroy_sock(child); 921ebb516afSEric Dumazet } 922ebb516afSEric Dumazet 9237716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk, 9247716682cSEric Dumazet struct request_sock *req, 925ebb516afSEric Dumazet struct sock *child) 926ebb516afSEric Dumazet { 927ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 928ebb516afSEric Dumazet 929ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 930ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 931ebb516afSEric Dumazet inet_child_forget(sk, req, child); 9327716682cSEric Dumazet child = NULL; 933ebb516afSEric Dumazet } else { 934ebb516afSEric Dumazet req->sk = child; 935ebb516afSEric Dumazet req->dl_next = NULL; 936ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 937ebb516afSEric Dumazet queue->rskq_accept_head = req; 938ebb516afSEric Dumazet else 939ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 940ebb516afSEric Dumazet queue->rskq_accept_tail = req; 941ebb516afSEric Dumazet sk_acceptq_added(sk); 942ebb516afSEric Dumazet } 943ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 9447716682cSEric Dumazet return child; 945ebb516afSEric Dumazet } 946ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 947ebb516afSEric Dumazet 9485e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 9495e0724d0SEric Dumazet struct request_sock *req, bool own_req) 9505e0724d0SEric Dumazet { 9515e0724d0SEric Dumazet if (own_req) { 9525e0724d0SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 9535e0724d0SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 9547716682cSEric Dumazet if (inet_csk_reqsk_queue_add(sk, req, child)) 9555e0724d0SEric Dumazet return child; 9565e0724d0SEric Dumazet } 9575e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 9585e0724d0SEric Dumazet bh_unlock_sock(child); 9595e0724d0SEric Dumazet sock_put(child); 9605e0724d0SEric Dumazet return NULL; 9615e0724d0SEric Dumazet } 9625e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 9635e0724d0SEric Dumazet 964a019d6feSArnaldo Carvalho de Melo /* 965a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 966a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 967a019d6feSArnaldo Carvalho de Melo */ 968a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 969a019d6feSArnaldo Carvalho de Melo { 970a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 9718336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 972fff1f300SEric Dumazet struct request_sock *next, *req; 973a019d6feSArnaldo Carvalho de Melo 974a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 975a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 976a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 977a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 978a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 979a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 980a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 981a019d6feSArnaldo Carvalho de Melo */ 982fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 983a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 984a019d6feSArnaldo Carvalho de Melo 985a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 986a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 987547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 988a019d6feSArnaldo Carvalho de Melo sock_hold(child); 989a019d6feSArnaldo Carvalho de Melo 990ebb516afSEric Dumazet inet_child_forget(sk, req, child); 991da8ab578SEric Dumazet reqsk_put(req); 992a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 993a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 994a019d6feSArnaldo Carvalho de Melo sock_put(child); 995a019d6feSArnaldo Carvalho de Melo 99692d6f176SEric Dumazet cond_resched(); 997a019d6feSArnaldo Carvalho de Melo } 9980536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 9998336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 10000536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 1001fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 10020536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 10030536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 1004fff1f300SEric Dumazet while (req != NULL) { 1005fff1f300SEric Dumazet next = req->dl_next; 100613854e5aSEric Dumazet reqsk_put(req); 1007fff1f300SEric Dumazet req = next; 10088336886fSJerry Chu } 10098336886fSJerry Chu } 1010ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 1011a019d6feSArnaldo Carvalho de Melo } 1012a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 1013af05dc93SArnaldo Carvalho de Melo 1014af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 1015af05dc93SArnaldo Carvalho de Melo { 1016af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 1017af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 1018af05dc93SArnaldo Carvalho de Melo 1019af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 1020c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 1021c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 1022af05dc93SArnaldo Carvalho de Melo } 1023af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 1024c4d93909SArnaldo Carvalho de Melo 1025dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 1026dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 1027dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 1028dec73ff0SArnaldo Carvalho de Melo { 1029dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 1030dec73ff0SArnaldo Carvalho de Melo 103100db4124SIan Morris if (icsk->icsk_af_ops->compat_getsockopt) 1032dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 1033dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1034dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 1035dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1036dec73ff0SArnaldo Carvalho de Melo } 1037dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 1038dec73ff0SArnaldo Carvalho de Melo 1039dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 1040b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 1041dec73ff0SArnaldo Carvalho de Melo { 1042dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 1043dec73ff0SArnaldo Carvalho de Melo 104400db4124SIan Morris if (icsk->icsk_af_ops->compat_setsockopt) 1045dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 1046dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1047dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 1048dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1049dec73ff0SArnaldo Carvalho de Melo } 1050dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 1051dec73ff0SArnaldo Carvalho de Melo #endif 105280d0a69fSDavid S. Miller 105380d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 105480d0a69fSDavid S. Miller { 10555abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 10565abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 105780d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 105880d0a69fSDavid S. Miller struct flowi4 *fl4; 105980d0a69fSDavid S. Miller struct rtable *rt; 106080d0a69fSDavid S. Miller 106180d0a69fSDavid S. Miller rcu_read_lock(); 106280d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 106380d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 106480d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 106580d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 106680d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 106780d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 106880d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 106980d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 107080d0a69fSDavid S. Miller if (IS_ERR(rt)) 107180d0a69fSDavid S. Miller rt = NULL; 107280d0a69fSDavid S. Miller if (rt) 107380d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 107480d0a69fSDavid S. Miller rcu_read_unlock(); 107580d0a69fSDavid S. Miller 107680d0a69fSDavid S. Miller return &rt->dst; 107780d0a69fSDavid S. Miller } 107880d0a69fSDavid S. Miller 107980d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 108080d0a69fSDavid S. Miller { 108180d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 108280d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 108380d0a69fSDavid S. Miller 108480d0a69fSDavid S. Miller if (!dst) { 108580d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 108680d0a69fSDavid S. Miller if (!dst) 108780d0a69fSDavid S. Miller goto out; 108880d0a69fSDavid S. Miller } 10896700c270SDavid S. Miller dst->ops->update_pmtu(dst, sk, NULL, mtu); 109080d0a69fSDavid S. Miller 109180d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 109280d0a69fSDavid S. Miller if (!dst) 109380d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 109480d0a69fSDavid S. Miller out: 109580d0a69fSDavid S. Miller return dst; 109680d0a69fSDavid S. Miller } 109780d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 1098