13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 26fa76ce73SEric Dumazet #include <net/tcp.h> 27c125e80bSCraig Gallek #include <net/sock_reuseport.h> 289691724eSstephen hemminger #include <net/addrconf.h> 293f421baaSArnaldo Carvalho de Melo 303f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 313f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 323f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 333f421baaSArnaldo Carvalho de Melo #endif 343f421baaSArnaldo Carvalho de Melo 35fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 36fe38d2a1SJosef Bacik /* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 37fe38d2a1SJosef Bacik * only, and any IPv4 addresses if not IPv6 only 38fe38d2a1SJosef Bacik * match_wildcard == false: addresses must be exactly the same, i.e. 39fe38d2a1SJosef Bacik * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 40fe38d2a1SJosef Bacik * and 0.0.0.0 equals to 0.0.0.0 only 41fe38d2a1SJosef Bacik */ 42637bc8bbSJosef Bacik static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 43637bc8bbSJosef Bacik const struct in6_addr *sk2_rcv_saddr6, 44637bc8bbSJosef Bacik __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 45637bc8bbSJosef Bacik bool sk1_ipv6only, bool sk2_ipv6only, 46fe38d2a1SJosef Bacik bool match_wildcard) 47fe38d2a1SJosef Bacik { 48637bc8bbSJosef Bacik int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 49fe38d2a1SJosef Bacik int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 50fe38d2a1SJosef Bacik 51fe38d2a1SJosef Bacik /* if both are mapped, treat as IPv4 */ 52fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 53fe38d2a1SJosef Bacik if (!sk2_ipv6only) { 54637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 55fe38d2a1SJosef Bacik return 1; 56637bc8bbSJosef Bacik if (!sk1_rcv_saddr || !sk2_rcv_saddr) 57fe38d2a1SJosef Bacik return match_wildcard; 58fe38d2a1SJosef Bacik } 59fe38d2a1SJosef Bacik return 0; 60fe38d2a1SJosef Bacik } 61fe38d2a1SJosef Bacik 62fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 63fe38d2a1SJosef Bacik return 1; 64fe38d2a1SJosef Bacik 65fe38d2a1SJosef Bacik if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && 66fe38d2a1SJosef Bacik !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 67fe38d2a1SJosef Bacik return 1; 68fe38d2a1SJosef Bacik 69fe38d2a1SJosef Bacik if (addr_type == IPV6_ADDR_ANY && match_wildcard && 70637bc8bbSJosef Bacik !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 71fe38d2a1SJosef Bacik return 1; 72fe38d2a1SJosef Bacik 73fe38d2a1SJosef Bacik if (sk2_rcv_saddr6 && 74637bc8bbSJosef Bacik ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 75fe38d2a1SJosef Bacik return 1; 76fe38d2a1SJosef Bacik 77fe38d2a1SJosef Bacik return 0; 78fe38d2a1SJosef Bacik } 79fe38d2a1SJosef Bacik #endif 80fe38d2a1SJosef Bacik 81fe38d2a1SJosef Bacik /* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses 82fe38d2a1SJosef Bacik * match_wildcard == false: addresses must be exactly the same, i.e. 83fe38d2a1SJosef Bacik * 0.0.0.0 only equals to 0.0.0.0 84fe38d2a1SJosef Bacik */ 85637bc8bbSJosef Bacik static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 86637bc8bbSJosef Bacik bool sk2_ipv6only, bool match_wildcard) 87fe38d2a1SJosef Bacik { 88637bc8bbSJosef Bacik if (!sk2_ipv6only) { 89637bc8bbSJosef Bacik if (sk1_rcv_saddr == sk2_rcv_saddr) 90fe38d2a1SJosef Bacik return 1; 91637bc8bbSJosef Bacik if (!sk1_rcv_saddr || !sk2_rcv_saddr) 92fe38d2a1SJosef Bacik return match_wildcard; 93fe38d2a1SJosef Bacik } 94fe38d2a1SJosef Bacik return 0; 95fe38d2a1SJosef Bacik } 96fe38d2a1SJosef Bacik 97fe38d2a1SJosef Bacik int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 98fe38d2a1SJosef Bacik bool match_wildcard) 99fe38d2a1SJosef Bacik { 100fe38d2a1SJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 101fe38d2a1SJosef Bacik if (sk->sk_family == AF_INET6) 102637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, 103319554f2SJosef Bacik inet6_rcv_saddr(sk2), 104637bc8bbSJosef Bacik sk->sk_rcv_saddr, 105637bc8bbSJosef Bacik sk2->sk_rcv_saddr, 106637bc8bbSJosef Bacik ipv6_only_sock(sk), 107637bc8bbSJosef Bacik ipv6_only_sock(sk2), 108637bc8bbSJosef Bacik match_wildcard); 109fe38d2a1SJosef Bacik #endif 110637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, 111637bc8bbSJosef Bacik ipv6_only_sock(sk2), match_wildcard); 112fe38d2a1SJosef Bacik } 113fe38d2a1SJosef Bacik EXPORT_SYMBOL(inet_rcv_saddr_equal); 114fe38d2a1SJosef Bacik 1150bbf87d8SEric W. Biederman void inet_get_local_port_range(struct net *net, int *low, int *high) 116227b60f5SStephen Hemminger { 11795c96174SEric Dumazet unsigned int seq; 11895c96174SEric Dumazet 119227b60f5SStephen Hemminger do { 120c9d8f1a6SCong Wang seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 121227b60f5SStephen Hemminger 122c9d8f1a6SCong Wang *low = net->ipv4.ip_local_ports.range[0]; 123c9d8f1a6SCong Wang *high = net->ipv4.ip_local_ports.range[1]; 124c9d8f1a6SCong Wang } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 125227b60f5SStephen Hemminger } 126227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 1273f421baaSArnaldo Carvalho de Melo 128aa078842SJosef Bacik static int inet_csk_bind_conflict(const struct sock *sk, 129aa078842SJosef Bacik const struct inet_bind_bucket *tb, 130aa078842SJosef Bacik bool relax, bool reuseport_ok) 1313f421baaSArnaldo Carvalho de Melo { 1323f421baaSArnaldo Carvalho de Melo struct sock *sk2; 1330643ee4fSTom Herbert bool reuse = sk->sk_reuse; 1340643ee4fSTom Herbert bool reuseport = !!sk->sk_reuseport && reuseport_ok; 135da5e3630STom Herbert kuid_t uid = sock_i_uid((struct sock *)sk); 1363f421baaSArnaldo Carvalho de Melo 1377477fd2eSPavel Emelyanov /* 1387477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 1397477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 1407477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 1417477fd2eSPavel Emelyanov * one this bucket belongs to. 1427477fd2eSPavel Emelyanov */ 1437477fd2eSPavel Emelyanov 144b67bfe0dSSasha Levin sk_for_each_bound(sk2, &tb->owners) { 1453f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 1463f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 1473f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 1483f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 149da5e3630STom Herbert if ((!reuse || !sk2->sk_reuse || 150da5e3630STom Herbert sk2->sk_state == TCP_LISTEN) && 151da5e3630STom Herbert (!reuseport || !sk2->sk_reuseport || 152c125e80bSCraig Gallek rcu_access_pointer(sk->sk_reuseport_cb) || 153da5e3630STom Herbert (sk2->sk_state != TCP_TIME_WAIT && 154da5e3630STom Herbert !uid_eq(uid, sock_i_uid(sk2))))) { 155aa078842SJosef Bacik if (inet_rcv_saddr_equal(sk, sk2, true)) 1563f421baaSArnaldo Carvalho de Melo break; 1578d238b25SDavid S. Miller } 158aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 159aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 160aa078842SJosef Bacik if (inet_rcv_saddr_equal(sk, sk2, true)) 161aacd9289SAlex Copot break; 162aacd9289SAlex Copot } 1633f421baaSArnaldo Carvalho de Melo } 1643f421baaSArnaldo Carvalho de Melo } 165b67bfe0dSSasha Levin return sk2 != NULL; 1663f421baaSArnaldo Carvalho de Melo } 167971af18bSArnaldo Carvalho de Melo 168289141b7SJosef Bacik /* 169289141b7SJosef Bacik * Find an open port number for the socket. Returns with the 170289141b7SJosef Bacik * inet_bind_hashbucket lock held. 1713f421baaSArnaldo Carvalho de Melo */ 172289141b7SJosef Bacik static struct inet_bind_hashbucket * 173289141b7SJosef Bacik inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) 1743f421baaSArnaldo Carvalho de Melo { 175ea8add2bSEric Dumazet struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 176289141b7SJosef Bacik int port = 0; 1773f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1783b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 179ea8add2bSEric Dumazet int i, low, high, attempt_half; 180ea8add2bSEric Dumazet struct inet_bind_bucket *tb; 181ea8add2bSEric Dumazet u32 remaining, offset; 1823f421baaSArnaldo Carvalho de Melo 183ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 184ea8add2bSEric Dumazet other_half_scan: 1850bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 186ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 187ea8add2bSEric Dumazet if (high - low < 4) 188ea8add2bSEric Dumazet attempt_half = 0; 189946f9eb2SEric Dumazet if (attempt_half) { 190ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1); 191946f9eb2SEric Dumazet 192946f9eb2SEric Dumazet if (attempt_half == 1) 193946f9eb2SEric Dumazet high = half; 194946f9eb2SEric Dumazet else 195946f9eb2SEric Dumazet low = half; 196946f9eb2SEric Dumazet } 197ea8add2bSEric Dumazet remaining = high - low; 198ea8add2bSEric Dumazet if (likely(remaining > 1)) 199ea8add2bSEric Dumazet remaining &= ~1U; 2003f421baaSArnaldo Carvalho de Melo 201ea8add2bSEric Dumazet offset = prandom_u32() % remaining; 202ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity 203ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users. 204ea8add2bSEric Dumazet */ 205ea8add2bSEric Dumazet offset |= 1U; 206ea8add2bSEric Dumazet 207ea8add2bSEric Dumazet other_parity_scan: 208ea8add2bSEric Dumazet port = low + offset; 209ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 210ea8add2bSEric Dumazet if (unlikely(port >= high)) 211ea8add2bSEric Dumazet port -= remaining; 212ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port)) 213ea8add2bSEric Dumazet continue; 214ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 215ea8add2bSEric Dumazet hinfo->bhash_size)]; 216ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 217b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 218ea8add2bSEric Dumazet if (net_eq(ib_net(tb), net) && tb->port == port) { 219289141b7SJosef Bacik if (!inet_csk_bind_conflict(sk, tb, false, false)) 2206cd66616SJosef Bacik goto success; 221ea8add2bSEric Dumazet goto next_port; 2222b05ad33SFlavio Leitner } 223289141b7SJosef Bacik tb = NULL; 224289141b7SJosef Bacik goto success; 225ea8add2bSEric Dumazet next_port: 226ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 227ea8add2bSEric Dumazet cond_resched(); 228a9d8f911SEvgeniy Polyakov } 2293f421baaSArnaldo Carvalho de Melo 230ea8add2bSEric Dumazet offset--; 231ea8add2bSEric Dumazet if (!(offset & 1)) 232ea8add2bSEric Dumazet goto other_parity_scan; 233ea8add2bSEric Dumazet 234946f9eb2SEric Dumazet if (attempt_half == 1) { 235946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 236946f9eb2SEric Dumazet attempt_half = 2; 237ea8add2bSEric Dumazet goto other_half_scan; 238946f9eb2SEric Dumazet } 239289141b7SJosef Bacik return NULL; 240289141b7SJosef Bacik success: 241289141b7SJosef Bacik *port_ret = port; 242289141b7SJosef Bacik *tb_ret = tb; 243289141b7SJosef Bacik return head; 244289141b7SJosef Bacik } 245ea8add2bSEric Dumazet 246637bc8bbSJosef Bacik static inline int sk_reuseport_match(struct inet_bind_bucket *tb, 247637bc8bbSJosef Bacik struct sock *sk) 248637bc8bbSJosef Bacik { 249637bc8bbSJosef Bacik kuid_t uid = sock_i_uid(sk); 250637bc8bbSJosef Bacik 251637bc8bbSJosef Bacik if (tb->fastreuseport <= 0) 252637bc8bbSJosef Bacik return 0; 253637bc8bbSJosef Bacik if (!sk->sk_reuseport) 254637bc8bbSJosef Bacik return 0; 255637bc8bbSJosef Bacik if (rcu_access_pointer(sk->sk_reuseport_cb)) 256637bc8bbSJosef Bacik return 0; 257637bc8bbSJosef Bacik if (!uid_eq(tb->fastuid, uid)) 258637bc8bbSJosef Bacik return 0; 259637bc8bbSJosef Bacik /* We only need to check the rcv_saddr if this tb was once marked 260637bc8bbSJosef Bacik * without fastreuseport and then was reset, as we can only know that 261637bc8bbSJosef Bacik * the fast_*rcv_saddr doesn't have any conflicts with the socks on the 262637bc8bbSJosef Bacik * owners list. 263637bc8bbSJosef Bacik */ 264637bc8bbSJosef Bacik if (tb->fastreuseport == FASTREUSEPORT_ANY) 265637bc8bbSJosef Bacik return 1; 266637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 267637bc8bbSJosef Bacik if (tb->fast_sk_family == AF_INET6) 268637bc8bbSJosef Bacik return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 269637bc8bbSJosef Bacik &sk->sk_v6_rcv_saddr, 270637bc8bbSJosef Bacik tb->fast_rcv_saddr, 271637bc8bbSJosef Bacik sk->sk_rcv_saddr, 272637bc8bbSJosef Bacik tb->fast_ipv6_only, 273637bc8bbSJosef Bacik ipv6_only_sock(sk), true); 274637bc8bbSJosef Bacik #endif 275637bc8bbSJosef Bacik return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, 276637bc8bbSJosef Bacik ipv6_only_sock(sk), true); 277637bc8bbSJosef Bacik } 278637bc8bbSJosef Bacik 279289141b7SJosef Bacik /* Obtain a reference to a local port for the given sock, 280289141b7SJosef Bacik * if snum is zero it means select any available local port. 281289141b7SJosef Bacik * We try to allocate an odd port (and leave even ports for connect()) 282289141b7SJosef Bacik */ 283289141b7SJosef Bacik int inet_csk_get_port(struct sock *sk, unsigned short snum) 284289141b7SJosef Bacik { 285289141b7SJosef Bacik bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 286289141b7SJosef Bacik struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 287289141b7SJosef Bacik int ret = 1, port = snum; 288289141b7SJosef Bacik struct inet_bind_hashbucket *head; 289289141b7SJosef Bacik struct net *net = sock_net(sk); 290289141b7SJosef Bacik struct inet_bind_bucket *tb = NULL; 291289141b7SJosef Bacik kuid_t uid = sock_i_uid(sk); 292289141b7SJosef Bacik 293289141b7SJosef Bacik if (!port) { 294289141b7SJosef Bacik head = inet_csk_find_open_port(sk, &tb, &port); 295289141b7SJosef Bacik if (!head) 296289141b7SJosef Bacik return ret; 297289141b7SJosef Bacik if (!tb) 298289141b7SJosef Bacik goto tb_not_found; 299289141b7SJosef Bacik goto success; 300289141b7SJosef Bacik } 301289141b7SJosef Bacik head = &hinfo->bhash[inet_bhashfn(net, port, 302289141b7SJosef Bacik hinfo->bhash_size)]; 303289141b7SJosef Bacik spin_lock_bh(&head->lock); 304289141b7SJosef Bacik inet_bind_bucket_for_each(tb, &head->chain) 305289141b7SJosef Bacik if (net_eq(ib_net(tb), net) && tb->port == port) 306289141b7SJosef Bacik goto tb_found; 307ea8add2bSEric Dumazet tb_not_found: 308ea8add2bSEric Dumazet tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 309ea8add2bSEric Dumazet net, head, port); 310ea8add2bSEric Dumazet if (!tb) 311ea8add2bSEric Dumazet goto fail_unlock; 3123f421baaSArnaldo Carvalho de Melo tb_found: 3133f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 3144a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 3154a17fd52SPavel Emelyanov goto success; 3164a17fd52SPavel Emelyanov 317b9470c27SJosef Bacik if ((tb->fastreuse > 0 && reuse) || 318637bc8bbSJosef Bacik sk_reuseport_match(tb, sk)) 3193f421baaSArnaldo Carvalho de Melo goto success; 320289141b7SJosef Bacik if (inet_csk_bind_conflict(sk, tb, true, true)) 3213f421baaSArnaldo Carvalho de Melo goto fail_unlock; 3223f421baaSArnaldo Carvalho de Melo } 3236cd66616SJosef Bacik success: 3246cd66616SJosef Bacik if (!hlist_empty(&tb->owners)) { 325ea8add2bSEric Dumazet tb->fastreuse = reuse; 326da5e3630STom Herbert if (sk->sk_reuseport) { 327637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_ANY; 328da5e3630STom Herbert tb->fastuid = uid; 329637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 330637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 331637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 332637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 333637bc8bbSJosef Bacik #endif 334da5e3630STom Herbert } else { 335da5e3630STom Herbert tb->fastreuseport = 0; 336da5e3630STom Herbert } 3376cd66616SJosef Bacik } else { 3386cd66616SJosef Bacik if (!reuse) 3396cd66616SJosef Bacik tb->fastreuse = 0; 340637bc8bbSJosef Bacik if (sk->sk_reuseport) { 341637bc8bbSJosef Bacik /* We didn't match or we don't have fastreuseport set on 342637bc8bbSJosef Bacik * the tb, but we have sk_reuseport set on this socket 343637bc8bbSJosef Bacik * and we know that there are no bind conflicts with 344637bc8bbSJosef Bacik * this socket in this tb, so reset our tb's reuseport 345637bc8bbSJosef Bacik * settings so that any subsequent sockets that match 346637bc8bbSJosef Bacik * our current socket will be put on the fast path. 347637bc8bbSJosef Bacik * 348637bc8bbSJosef Bacik * If we reset we need to set FASTREUSEPORT_STRICT so we 349637bc8bbSJosef Bacik * do extra checking for all subsequent sk_reuseport 350637bc8bbSJosef Bacik * socks. 351637bc8bbSJosef Bacik */ 352637bc8bbSJosef Bacik if (!sk_reuseport_match(tb, sk)) { 353637bc8bbSJosef Bacik tb->fastreuseport = FASTREUSEPORT_STRICT; 354637bc8bbSJosef Bacik tb->fastuid = uid; 355637bc8bbSJosef Bacik tb->fast_rcv_saddr = sk->sk_rcv_saddr; 356637bc8bbSJosef Bacik tb->fast_ipv6_only = ipv6_only_sock(sk); 357637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6) 358637bc8bbSJosef Bacik tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 359637bc8bbSJosef Bacik #endif 360637bc8bbSJosef Bacik } 361637bc8bbSJosef Bacik } else { 3626cd66616SJosef Bacik tb->fastreuseport = 0; 363ea8add2bSEric Dumazet } 364637bc8bbSJosef Bacik } 3653f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 366ea8add2bSEric Dumazet inet_bind_hash(sk, tb, port); 367547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 3683f421baaSArnaldo Carvalho de Melo ret = 0; 3693f421baaSArnaldo Carvalho de Melo 3703f421baaSArnaldo Carvalho de Melo fail_unlock: 371ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 3723f421baaSArnaldo Carvalho de Melo return ret; 3733f421baaSArnaldo Carvalho de Melo } 3743f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 3753f421baaSArnaldo Carvalho de Melo 3763f421baaSArnaldo Carvalho de Melo /* 3773f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 3783f421baaSArnaldo Carvalho de Melo * with the socket locked. 3793f421baaSArnaldo Carvalho de Melo */ 3803f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 3813f421baaSArnaldo Carvalho de Melo { 3823f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3833f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 3843f421baaSArnaldo Carvalho de Melo int err; 3853f421baaSArnaldo Carvalho de Melo 3863f421baaSArnaldo Carvalho de Melo /* 3873f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 3883f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 3893f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 3903f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 3913f421baaSArnaldo Carvalho de Melo * 3923f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 3933f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 3943f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 3953f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 3963f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 3973f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 3983f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 3993f421baaSArnaldo Carvalho de Melo */ 4003f421baaSArnaldo Carvalho de Melo for (;;) { 401aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 4023f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 4033f421baaSArnaldo Carvalho de Melo release_sock(sk); 4043f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 4053f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 406cb7cf8a3SEric Dumazet sched_annotate_sleep(); 4073f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4083f421baaSArnaldo Carvalho de Melo err = 0; 4093f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 4103f421baaSArnaldo Carvalho de Melo break; 4113f421baaSArnaldo Carvalho de Melo err = -EINVAL; 4123f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4133f421baaSArnaldo Carvalho de Melo break; 4143f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 4153f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 4163f421baaSArnaldo Carvalho de Melo break; 4173f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 4183f421baaSArnaldo Carvalho de Melo if (!timeo) 4193f421baaSArnaldo Carvalho de Melo break; 4203f421baaSArnaldo Carvalho de Melo } 421aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 4223f421baaSArnaldo Carvalho de Melo return err; 4233f421baaSArnaldo Carvalho de Melo } 4243f421baaSArnaldo Carvalho de Melo 4253f421baaSArnaldo Carvalho de Melo /* 4263f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 4273f421baaSArnaldo Carvalho de Melo */ 428cdfbabfbSDavid Howells struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 4293f421baaSArnaldo Carvalho de Melo { 4303f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4318336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 4328336886fSJerry Chu struct request_sock *req; 433e3d95ad7SEric Dumazet struct sock *newsk; 4343f421baaSArnaldo Carvalho de Melo int error; 4353f421baaSArnaldo Carvalho de Melo 4363f421baaSArnaldo Carvalho de Melo lock_sock(sk); 4373f421baaSArnaldo Carvalho de Melo 4383f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 4393f421baaSArnaldo Carvalho de Melo * and that it has something pending. 4403f421baaSArnaldo Carvalho de Melo */ 4413f421baaSArnaldo Carvalho de Melo error = -EINVAL; 4423f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 4433f421baaSArnaldo Carvalho de Melo goto out_err; 4443f421baaSArnaldo Carvalho de Melo 4453f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 4468336886fSJerry Chu if (reqsk_queue_empty(queue)) { 4473f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 4483f421baaSArnaldo Carvalho de Melo 4493f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 4503f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 4513f421baaSArnaldo Carvalho de Melo if (!timeo) 4523f421baaSArnaldo Carvalho de Melo goto out_err; 4533f421baaSArnaldo Carvalho de Melo 4543f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 4553f421baaSArnaldo Carvalho de Melo if (error) 4563f421baaSArnaldo Carvalho de Melo goto out_err; 4573f421baaSArnaldo Carvalho de Melo } 458fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 4598336886fSJerry Chu newsk = req->sk; 4603f421baaSArnaldo Carvalho de Melo 461e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 4620536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 4630536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 4649439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 4658336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 4668336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 4678336886fSJerry Chu * NULL to signify that the child socket is taken 4688336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 4698336886fSJerry Chu * when 3WHS finishes (or is aborted). 4708336886fSJerry Chu */ 4718336886fSJerry Chu req->sk = NULL; 4728336886fSJerry Chu req = NULL; 4738336886fSJerry Chu } 4740536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 4758336886fSJerry Chu } 4763f421baaSArnaldo Carvalho de Melo out: 4773f421baaSArnaldo Carvalho de Melo release_sock(sk); 4788336886fSJerry Chu if (req) 47913854e5aSEric Dumazet reqsk_put(req); 4803f421baaSArnaldo Carvalho de Melo return newsk; 4813f421baaSArnaldo Carvalho de Melo out_err: 4823f421baaSArnaldo Carvalho de Melo newsk = NULL; 4838336886fSJerry Chu req = NULL; 4843f421baaSArnaldo Carvalho de Melo *err = error; 4853f421baaSArnaldo Carvalho de Melo goto out; 4863f421baaSArnaldo Carvalho de Melo } 4873f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 4883f421baaSArnaldo Carvalho de Melo 4893f421baaSArnaldo Carvalho de Melo /* 4903f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 4913f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 4923f421baaSArnaldo Carvalho de Melo * to optimize. 4933f421baaSArnaldo Carvalho de Melo */ 4943f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 4953f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 4963f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 4973f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 4983f421baaSArnaldo Carvalho de Melo { 4993f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5003f421baaSArnaldo Carvalho de Melo 501b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 502b24b8a24SPavel Emelyanov (unsigned long)sk); 503b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 504b24b8a24SPavel Emelyanov (unsigned long)sk); 505b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 5063f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 5073f421baaSArnaldo Carvalho de Melo } 5083f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 5093f421baaSArnaldo Carvalho de Melo 5103f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 5113f421baaSArnaldo Carvalho de Melo { 5123f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5133f421baaSArnaldo Carvalho de Melo 5143f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 5153f421baaSArnaldo Carvalho de Melo 5163f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 5173f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 5183f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5193f421baaSArnaldo Carvalho de Melo } 5203f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 5213f421baaSArnaldo Carvalho de Melo 5223f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 5233f421baaSArnaldo Carvalho de Melo { 5243f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 5253f421baaSArnaldo Carvalho de Melo } 5263f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 5273f421baaSArnaldo Carvalho de Melo 5283f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 5293f421baaSArnaldo Carvalho de Melo { 5303f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 5313f421baaSArnaldo Carvalho de Melo } 5323f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 5333f421baaSArnaldo Carvalho de Melo 534e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 5356bd023f3SDavid S. Miller struct flowi4 *fl4, 536ba3f7f04SDavid S. Miller const struct request_sock *req) 5373f421baaSArnaldo Carvalho de Melo { 5383f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 5398b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 5408b929ab1SEric Dumazet struct ip_options_rcu *opt = ireq->opt; 5418b929ab1SEric Dumazet struct rtable *rt; 5423f421baaSArnaldo Carvalho de Melo 5438b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 544e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 5458b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 546634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 5478b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 548e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 5496bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 5506bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 551b23dd4feSDavid S. Miller if (IS_ERR(rt)) 552857a6e0aSIlpo Järvinen goto no_route; 553155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 554857a6e0aSIlpo Järvinen goto route_err; 555d8d1f30bSChangli Gao return &rt->dst; 556857a6e0aSIlpo Järvinen 557857a6e0aSIlpo Järvinen route_err: 558857a6e0aSIlpo Järvinen ip_rt_put(rt); 559857a6e0aSIlpo Järvinen no_route: 560b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 561857a6e0aSIlpo Järvinen return NULL; 5623f421baaSArnaldo Carvalho de Melo } 5633f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 5643f421baaSArnaldo Carvalho de Melo 565a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 56677357a95SDavid S. Miller struct sock *newsk, 56777357a95SDavid S. Miller const struct request_sock *req) 56877357a95SDavid S. Miller { 56977357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 5708b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 57177357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 5721a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 57377357a95SDavid S. Miller struct flowi4 *fl4; 57477357a95SDavid S. Miller struct rtable *rt; 57577357a95SDavid S. Miller 57677357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 5771a7b27c9SChristoph Paasch 5781a7b27c9SChristoph Paasch rcu_read_lock(); 5791a7b27c9SChristoph Paasch opt = rcu_dereference(newinet->inet_opt); 5808b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 58177357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 58277357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 583634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 5848b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 585e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 58677357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 58777357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 58877357a95SDavid S. Miller if (IS_ERR(rt)) 58977357a95SDavid S. Miller goto no_route; 590155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 59177357a95SDavid S. Miller goto route_err; 5921a7b27c9SChristoph Paasch rcu_read_unlock(); 59377357a95SDavid S. Miller return &rt->dst; 59477357a95SDavid S. Miller 59577357a95SDavid S. Miller route_err: 59677357a95SDavid S. Miller ip_rt_put(rt); 59777357a95SDavid S. Miller no_route: 5981a7b27c9SChristoph Paasch rcu_read_unlock(); 599b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 60077357a95SDavid S. Miller return NULL; 60177357a95SDavid S. Miller } 60277357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 60377357a95SDavid S. Miller 604dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 6053f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 6063f421baaSArnaldo Carvalho de Melo #else 607fa76ce73SEric Dumazet #define AF_INET_FAMILY(fam) true 6083f421baaSArnaldo Carvalho de Melo #endif 6093f421baaSArnaldo Carvalho de Melo 6100c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 6110c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 6120c3d79bcSJulian Anastasov const int max_retries, 6130c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 6140c3d79bcSJulian Anastasov int *expire, int *resend) 6150c3d79bcSJulian Anastasov { 6160c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 617e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh; 6180c3d79bcSJulian Anastasov *resend = 1; 6190c3d79bcSJulian Anastasov return; 6200c3d79bcSJulian Anastasov } 621e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh && 622e6c022a4SEric Dumazet (!inet_rsk(req)->acked || req->num_timeout >= max_retries); 6230c3d79bcSJulian Anastasov /* 6240c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 6250c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 6260c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 6270c3d79bcSJulian Anastasov */ 6280c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 629e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 6300c3d79bcSJulian Anastasov } 6310c3d79bcSJulian Anastasov 6321b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 633e6c022a4SEric Dumazet { 6341a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 635e6c022a4SEric Dumazet 636e6c022a4SEric Dumazet if (!err) 637e6c022a4SEric Dumazet req->num_retrans++; 638e6c022a4SEric Dumazet return err; 639e6c022a4SEric Dumazet } 640e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 641e6c022a4SEric Dumazet 642079096f1SEric Dumazet /* return true if req was found in the ehash table */ 643b357a364SEric Dumazet static bool reqsk_queue_unlink(struct request_sock_queue *queue, 644b357a364SEric Dumazet struct request_sock *req) 645b357a364SEric Dumazet { 646079096f1SEric Dumazet struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 6475e0724d0SEric Dumazet bool found = false; 648b357a364SEric Dumazet 6495e0724d0SEric Dumazet if (sk_hashed(req_to_sk(req))) { 6505e0724d0SEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 651b357a364SEric Dumazet 652079096f1SEric Dumazet spin_lock(lock); 653079096f1SEric Dumazet found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 654079096f1SEric Dumazet spin_unlock(lock); 6555e0724d0SEric Dumazet } 65683fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 657b357a364SEric Dumazet reqsk_put(req); 658b357a364SEric Dumazet return found; 659b357a364SEric Dumazet } 660b357a364SEric Dumazet 661b357a364SEric Dumazet void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 662b357a364SEric Dumazet { 663b357a364SEric Dumazet if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { 664b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 665b357a364SEric Dumazet reqsk_put(req); 666b357a364SEric Dumazet } 667b357a364SEric Dumazet } 668b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 669b357a364SEric Dumazet 670f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 671f03f2e15SEric Dumazet { 672f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 673f03f2e15SEric Dumazet reqsk_put(req); 674f03f2e15SEric Dumazet } 675f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 676f03f2e15SEric Dumazet 677fa76ce73SEric Dumazet static void reqsk_timer_handler(unsigned long data) 678a019d6feSArnaldo Carvalho de Melo { 679fa76ce73SEric Dumazet struct request_sock *req = (struct request_sock *)data; 680fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 6817c083ecbSNikolay Borisov struct net *net = sock_net(sk_listener); 682fa76ce73SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk_listener); 683a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 6842b41fab7SEric Dumazet int qlen, expire = 0, resend = 0; 685fa76ce73SEric Dumazet int max_retries, thresh; 6862b41fab7SEric Dumazet u8 defer_accept; 687a019d6feSArnaldo Carvalho de Melo 68800fd38d9SEric Dumazet if (sk_state_load(sk_listener) != TCP_LISTEN) 689079096f1SEric Dumazet goto drop; 690a019d6feSArnaldo Carvalho de Melo 6917c083ecbSNikolay Borisov max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 692fa76ce73SEric Dumazet thresh = max_retries; 693a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 694a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 695fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 696a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 697a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 698a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 699a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 700a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 701a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 702a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 703a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 704a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 705a019d6feSArnaldo Carvalho de Melo * 706a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 707a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 708a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 709a019d6feSArnaldo Carvalho de Melo */ 710aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 711acb4a6bfSEric Dumazet if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { 712aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 713a019d6feSArnaldo Carvalho de Melo 714a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 7152b41fab7SEric Dumazet if (qlen < young) 716a019d6feSArnaldo Carvalho de Melo break; 717a019d6feSArnaldo Carvalho de Melo thresh--; 718a019d6feSArnaldo Carvalho de Melo young <<= 1; 719a019d6feSArnaldo Carvalho de Melo } 720a019d6feSArnaldo Carvalho de Melo } 7212b41fab7SEric Dumazet defer_accept = READ_ONCE(queue->rskq_defer_accept); 7222b41fab7SEric Dumazet if (defer_accept) 7232b41fab7SEric Dumazet max_retries = defer_accept; 7242b41fab7SEric Dumazet syn_ack_recalc(req, thresh, max_retries, defer_accept, 7250c3d79bcSJulian Anastasov &expire, &resend); 72642cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 7270c3d79bcSJulian Anastasov if (!expire && 7280c3d79bcSJulian Anastasov (!resend || 729fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 7300c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 731a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 732a019d6feSArnaldo Carvalho de Melo 733e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 734aac065c5SEric Dumazet atomic_dec(&queue->young); 735fa76ce73SEric Dumazet timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 736f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeo); 737fa76ce73SEric Dumazet return; 738a019d6feSArnaldo Carvalho de Melo } 739079096f1SEric Dumazet drop: 740f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk_listener, req); 741a019d6feSArnaldo Carvalho de Melo } 742fa76ce73SEric Dumazet 743079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 744fa76ce73SEric Dumazet unsigned long timeout) 745fa76ce73SEric Dumazet { 746fa76ce73SEric Dumazet req->num_retrans = 0; 747fa76ce73SEric Dumazet req->num_timeout = 0; 748fa76ce73SEric Dumazet req->sk = NULL; 749fa76ce73SEric Dumazet 750f3438bc7SThomas Gleixner setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, 751f3438bc7SThomas Gleixner (unsigned long)req); 752f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout); 75329c68526SEric Dumazet 754079096f1SEric Dumazet inet_ehash_insert(req_to_sk(req), NULL); 755fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 756fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 757fa76ce73SEric Dumazet */ 758fa76ce73SEric Dumazet smp_wmb(); 75941c6d650SReshetova, Elena refcount_set(&req->rsk_refcnt, 2 + 1); 760a019d6feSArnaldo Carvalho de Melo } 761079096f1SEric Dumazet 762079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 763079096f1SEric Dumazet unsigned long timeout) 764079096f1SEric Dumazet { 765079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 766079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 767079096f1SEric Dumazet } 768079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 769a019d6feSArnaldo Carvalho de Melo 770e56c57d0SEric Dumazet /** 771e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 772e56c57d0SEric Dumazet * @sk: the socket to clone 773e56c57d0SEric Dumazet * @req: request_sock 774e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 775e56c57d0SEric Dumazet * 776e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 777e56c57d0SEric Dumazet */ 778e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 779e56c57d0SEric Dumazet const struct request_sock *req, 780dd0fc66fSAl Viro const gfp_t priority) 7819f1d2604SArnaldo Carvalho de Melo { 782e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 7839f1d2604SArnaldo Carvalho de Melo 78400db4124SIan Morris if (newsk) { 7859f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 7869f1d2604SArnaldo Carvalho de Melo 7879f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 7889f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 7899f1d2604SArnaldo Carvalho de Melo 790634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 791b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 792b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 7939f1d2604SArnaldo Carvalho de Melo 79485017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */ 79585017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE); 79685017869SEric Dumazet 797657831ffSEric Dumazet inet_sk(newsk)->mc_list = NULL; 798657831ffSEric Dumazet 79984f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 80033cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 80133cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 80284f39b08SLorenzo Colitti 8039f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 8049f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 8056687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 8069f1d2604SArnaldo Carvalho de Melo 8079f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 8089f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 8094237c75cSVenkat Yekkirala 8104237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 8119f1d2604SArnaldo Carvalho de Melo } 8129f1d2604SArnaldo Carvalho de Melo return newsk; 8139f1d2604SArnaldo Carvalho de Melo } 814e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 815a019d6feSArnaldo Carvalho de Melo 816a019d6feSArnaldo Carvalho de Melo /* 817a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 818a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 819a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 820a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 821a019d6feSArnaldo Carvalho de Melo */ 822a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 823a019d6feSArnaldo Carvalho de Melo { 824547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 825547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 826a019d6feSArnaldo Carvalho de Melo 827a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 828547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 829a019d6feSArnaldo Carvalho de Melo 830c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 831c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 832a019d6feSArnaldo Carvalho de Melo 833a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 834a019d6feSArnaldo Carvalho de Melo 835a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 836a019d6feSArnaldo Carvalho de Melo 837a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 838a019d6feSArnaldo Carvalho de Melo 839a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 840a019d6feSArnaldo Carvalho de Melo 841dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 842c2a2efbbSEric Dumazet 843a019d6feSArnaldo Carvalho de Melo sock_put(sk); 844a019d6feSArnaldo Carvalho de Melo } 845a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 846a019d6feSArnaldo Carvalho de Melo 847e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 848e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 849e337e24dSChristoph Paasch */ 850e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 851c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 852e337e24dSChristoph Paasch { 853e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 854e337e24dSChristoph Paasch bh_unlock_sock(sk); 855e337e24dSChristoph Paasch sock_put(sk); 856e337e24dSChristoph Paasch 857e337e24dSChristoph Paasch /* The below has to be done to allow calling inet_csk_destroy_sock */ 858e337e24dSChristoph Paasch sock_set_flag(sk, SOCK_DEAD); 859e337e24dSChristoph Paasch percpu_counter_inc(sk->sk_prot->orphan_count); 860e337e24dSChristoph Paasch inet_sk(sk)->inet_num = 0; 861e337e24dSChristoph Paasch } 862e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 863e337e24dSChristoph Paasch 864f985c65cSEric Dumazet int inet_csk_listen_start(struct sock *sk, int backlog) 865a019d6feSArnaldo Carvalho de Melo { 866a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 86710cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 868086c653fSCraig Gallek int err = -EADDRINUSE; 869a019d6feSArnaldo Carvalho de Melo 870ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 871a019d6feSArnaldo Carvalho de Melo 872f985c65cSEric Dumazet sk->sk_max_ack_backlog = backlog; 873a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 874a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 875a019d6feSArnaldo Carvalho de Melo 876a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 877a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 878a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 879a019d6feSArnaldo Carvalho de Melo * after validation is complete. 880a019d6feSArnaldo Carvalho de Melo */ 88100fd38d9SEric Dumazet sk_state_store(sk, TCP_LISTEN); 882c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 883c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 884a019d6feSArnaldo Carvalho de Melo 885a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 886086c653fSCraig Gallek err = sk->sk_prot->hash(sk); 887a019d6feSArnaldo Carvalho de Melo 888086c653fSCraig Gallek if (likely(!err)) 889a019d6feSArnaldo Carvalho de Melo return 0; 890a019d6feSArnaldo Carvalho de Melo } 891a019d6feSArnaldo Carvalho de Melo 892a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 893086c653fSCraig Gallek return err; 894a019d6feSArnaldo Carvalho de Melo } 895a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 896a019d6feSArnaldo Carvalho de Melo 897ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 898ebb516afSEric Dumazet struct sock *child) 899ebb516afSEric Dumazet { 900ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 901ebb516afSEric Dumazet 902ebb516afSEric Dumazet sock_orphan(child); 903ebb516afSEric Dumazet 904ebb516afSEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 905ebb516afSEric Dumazet 906ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 907ebb516afSEric Dumazet BUG_ON(tcp_sk(child)->fastopen_rsk != req); 908ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 909ebb516afSEric Dumazet 910ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 911ebb516afSEric Dumazet * an inbound pkt destined for child is 912ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 913ebb516afSEric Dumazet * Also to satisfy an assertion in 914ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 915ebb516afSEric Dumazet */ 916ebb516afSEric Dumazet tcp_sk(child)->fastopen_rsk = NULL; 917ebb516afSEric Dumazet } 918ebb516afSEric Dumazet inet_csk_destroy_sock(child); 919ebb516afSEric Dumazet } 920ebb516afSEric Dumazet 9217716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk, 9227716682cSEric Dumazet struct request_sock *req, 923ebb516afSEric Dumazet struct sock *child) 924ebb516afSEric Dumazet { 925ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 926ebb516afSEric Dumazet 927ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 928ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 929ebb516afSEric Dumazet inet_child_forget(sk, req, child); 9307716682cSEric Dumazet child = NULL; 931ebb516afSEric Dumazet } else { 932ebb516afSEric Dumazet req->sk = child; 933ebb516afSEric Dumazet req->dl_next = NULL; 934ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 935ebb516afSEric Dumazet queue->rskq_accept_head = req; 936ebb516afSEric Dumazet else 937ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 938ebb516afSEric Dumazet queue->rskq_accept_tail = req; 939ebb516afSEric Dumazet sk_acceptq_added(sk); 940ebb516afSEric Dumazet } 941ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 9427716682cSEric Dumazet return child; 943ebb516afSEric Dumazet } 944ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 945ebb516afSEric Dumazet 9465e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 9475e0724d0SEric Dumazet struct request_sock *req, bool own_req) 9485e0724d0SEric Dumazet { 9495e0724d0SEric Dumazet if (own_req) { 9505e0724d0SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 9515e0724d0SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 9527716682cSEric Dumazet if (inet_csk_reqsk_queue_add(sk, req, child)) 9535e0724d0SEric Dumazet return child; 9545e0724d0SEric Dumazet } 9555e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 9565e0724d0SEric Dumazet bh_unlock_sock(child); 9575e0724d0SEric Dumazet sock_put(child); 9585e0724d0SEric Dumazet return NULL; 9595e0724d0SEric Dumazet } 9605e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 9615e0724d0SEric Dumazet 962a019d6feSArnaldo Carvalho de Melo /* 963a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 964a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 965a019d6feSArnaldo Carvalho de Melo */ 966a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 967a019d6feSArnaldo Carvalho de Melo { 968a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 9698336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 970fff1f300SEric Dumazet struct request_sock *next, *req; 971a019d6feSArnaldo Carvalho de Melo 972a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 973a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 974a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 975a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 976a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 977a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 978a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 979a019d6feSArnaldo Carvalho de Melo */ 980fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 981a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 982a019d6feSArnaldo Carvalho de Melo 983a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 984a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 985547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 986a019d6feSArnaldo Carvalho de Melo sock_hold(child); 987a019d6feSArnaldo Carvalho de Melo 988ebb516afSEric Dumazet inet_child_forget(sk, req, child); 989*da8ab578SEric Dumazet reqsk_put(req); 990a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 991a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 992a019d6feSArnaldo Carvalho de Melo sock_put(child); 993a019d6feSArnaldo Carvalho de Melo 99492d6f176SEric Dumazet cond_resched(); 995a019d6feSArnaldo Carvalho de Melo } 9960536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 9978336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 9980536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 999fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 10000536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 10010536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 1002fff1f300SEric Dumazet while (req != NULL) { 1003fff1f300SEric Dumazet next = req->dl_next; 100413854e5aSEric Dumazet reqsk_put(req); 1005fff1f300SEric Dumazet req = next; 10068336886fSJerry Chu } 10078336886fSJerry Chu } 1008ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 1009a019d6feSArnaldo Carvalho de Melo } 1010a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 1011af05dc93SArnaldo Carvalho de Melo 1012af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 1013af05dc93SArnaldo Carvalho de Melo { 1014af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 1015af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 1016af05dc93SArnaldo Carvalho de Melo 1017af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 1018c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 1019c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 1020af05dc93SArnaldo Carvalho de Melo } 1021af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 1022c4d93909SArnaldo Carvalho de Melo 1023dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 1024dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 1025dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 1026dec73ff0SArnaldo Carvalho de Melo { 1027dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 1028dec73ff0SArnaldo Carvalho de Melo 102900db4124SIan Morris if (icsk->icsk_af_ops->compat_getsockopt) 1030dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 1031dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1032dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 1033dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1034dec73ff0SArnaldo Carvalho de Melo } 1035dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 1036dec73ff0SArnaldo Carvalho de Melo 1037dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 1038b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 1039dec73ff0SArnaldo Carvalho de Melo { 1040dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 1041dec73ff0SArnaldo Carvalho de Melo 104200db4124SIan Morris if (icsk->icsk_af_ops->compat_setsockopt) 1043dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 1044dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1045dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 1046dec73ff0SArnaldo Carvalho de Melo optval, optlen); 1047dec73ff0SArnaldo Carvalho de Melo } 1048dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 1049dec73ff0SArnaldo Carvalho de Melo #endif 105080d0a69fSDavid S. Miller 105180d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 105280d0a69fSDavid S. Miller { 10535abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 10545abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 105580d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 105680d0a69fSDavid S. Miller struct flowi4 *fl4; 105780d0a69fSDavid S. Miller struct rtable *rt; 105880d0a69fSDavid S. Miller 105980d0a69fSDavid S. Miller rcu_read_lock(); 106080d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 106180d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 106280d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 106380d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 106480d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 106580d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 106680d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 106780d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 106880d0a69fSDavid S. Miller if (IS_ERR(rt)) 106980d0a69fSDavid S. Miller rt = NULL; 107080d0a69fSDavid S. Miller if (rt) 107180d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 107280d0a69fSDavid S. Miller rcu_read_unlock(); 107380d0a69fSDavid S. Miller 107480d0a69fSDavid S. Miller return &rt->dst; 107580d0a69fSDavid S. Miller } 107680d0a69fSDavid S. Miller 107780d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 107880d0a69fSDavid S. Miller { 107980d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 108080d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 108180d0a69fSDavid S. Miller 108280d0a69fSDavid S. Miller if (!dst) { 108380d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 108480d0a69fSDavid S. Miller if (!dst) 108580d0a69fSDavid S. Miller goto out; 108680d0a69fSDavid S. Miller } 10876700c270SDavid S. Miller dst->ops->update_pmtu(dst, sk, NULL, mtu); 108880d0a69fSDavid S. Miller 108980d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 109080d0a69fSDavid S. Miller if (!dst) 109180d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 109280d0a69fSDavid S. Miller out: 109380d0a69fSDavid S. Miller return dst; 109480d0a69fSDavid S. Miller } 109580d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 1096