13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 26fa76ce73SEric Dumazet #include <net/tcp.h> 27c125e80bSCraig Gallek #include <net/sock_reuseport.h> 283f421baaSArnaldo Carvalho de Melo 293f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 303f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 313f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 323f421baaSArnaldo Carvalho de Melo #endif 333f421baaSArnaldo Carvalho de Melo 340bbf87d8SEric W. Biederman void inet_get_local_port_range(struct net *net, int *low, int *high) 35227b60f5SStephen Hemminger { 3695c96174SEric Dumazet unsigned int seq; 3795c96174SEric Dumazet 38227b60f5SStephen Hemminger do { 39c9d8f1a6SCong Wang seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 40227b60f5SStephen Hemminger 41c9d8f1a6SCong Wang *low = net->ipv4.ip_local_ports.range[0]; 42c9d8f1a6SCong Wang *high = net->ipv4.ip_local_ports.range[1]; 43c9d8f1a6SCong Wang } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 44227b60f5SStephen Hemminger } 45227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 463f421baaSArnaldo Carvalho de Melo 47971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk, 48*0643ee4fSTom Herbert const struct inet_bind_bucket *tb, bool relax, 49*0643ee4fSTom Herbert bool reuseport_ok) 503f421baaSArnaldo Carvalho de Melo { 513f421baaSArnaldo Carvalho de Melo struct sock *sk2; 52*0643ee4fSTom Herbert bool reuse = sk->sk_reuse; 53*0643ee4fSTom Herbert bool reuseport = !!sk->sk_reuseport && reuseport_ok; 54da5e3630STom Herbert kuid_t uid = sock_i_uid((struct sock *)sk); 553f421baaSArnaldo Carvalho de Melo 567477fd2eSPavel Emelyanov /* 577477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 587477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 597477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 607477fd2eSPavel Emelyanov * one this bucket belongs to. 617477fd2eSPavel Emelyanov */ 627477fd2eSPavel Emelyanov 63b67bfe0dSSasha Levin sk_for_each_bound(sk2, &tb->owners) { 643f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 653f421baaSArnaldo Carvalho de Melo !inet_v6_ipv6only(sk2) && 663f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 673f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 683f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 69da5e3630STom Herbert if ((!reuse || !sk2->sk_reuse || 70da5e3630STom Herbert sk2->sk_state == TCP_LISTEN) && 71da5e3630STom Herbert (!reuseport || !sk2->sk_reuseport || 72c125e80bSCraig Gallek rcu_access_pointer(sk->sk_reuseport_cb) || 73da5e3630STom Herbert (sk2->sk_state != TCP_TIME_WAIT && 74da5e3630STom Herbert !uid_eq(uid, sock_i_uid(sk2))))) { 7550805466SEric Dumazet 7650805466SEric Dumazet if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 7750805466SEric Dumazet sk2->sk_rcv_saddr == sk->sk_rcv_saddr) 783f421baaSArnaldo Carvalho de Melo break; 798d238b25SDavid S. Miller } 80aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 81aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 82aacd9289SAlex Copot 8350805466SEric Dumazet if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 8450805466SEric Dumazet sk2->sk_rcv_saddr == sk->sk_rcv_saddr) 85aacd9289SAlex Copot break; 86aacd9289SAlex Copot } 873f421baaSArnaldo Carvalho de Melo } 883f421baaSArnaldo Carvalho de Melo } 89b67bfe0dSSasha Levin return sk2 != NULL; 903f421baaSArnaldo Carvalho de Melo } 91971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 92971af18bSArnaldo Carvalho de Melo 933f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock, 943f421baaSArnaldo Carvalho de Melo * if snum is zero it means select any available local port. 95ea8add2bSEric Dumazet * We try to allocate an odd port (and leave even ports for connect()) 963f421baaSArnaldo Carvalho de Melo */ 97ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum) 983f421baaSArnaldo Carvalho de Melo { 99ea8add2bSEric Dumazet bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 100ea8add2bSEric Dumazet struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 101ea8add2bSEric Dumazet int ret = 1, attempts = 5, port = snum; 102ea8add2bSEric Dumazet int smallest_size = -1, smallest_port; 1033f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1043b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 105ea8add2bSEric Dumazet int i, low, high, attempt_half; 106ea8add2bSEric Dumazet struct inet_bind_bucket *tb; 107da5e3630STom Herbert kuid_t uid = sock_i_uid(sk); 108ea8add2bSEric Dumazet u32 remaining, offset; 109*0643ee4fSTom Herbert bool reuseport_ok = !!snum; 1103f421baaSArnaldo Carvalho de Melo 111ea8add2bSEric Dumazet if (port) { 112ea8add2bSEric Dumazet have_port: 113ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 114ea8add2bSEric Dumazet hinfo->bhash_size)]; 115ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 116ea8add2bSEric Dumazet inet_bind_bucket_for_each(tb, &head->chain) 117ea8add2bSEric Dumazet if (net_eq(ib_net(tb), net) && tb->port == port) 118ea8add2bSEric Dumazet goto tb_found; 119227b60f5SStephen Hemminger 120ea8add2bSEric Dumazet goto tb_not_found; 121ea8add2bSEric Dumazet } 122a9d8f911SEvgeniy Polyakov again: 123ea8add2bSEric Dumazet attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 124ea8add2bSEric Dumazet other_half_scan: 1250bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 126ea8add2bSEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 127ea8add2bSEric Dumazet if (high - low < 4) 128ea8add2bSEric Dumazet attempt_half = 0; 129946f9eb2SEric Dumazet if (attempt_half) { 130ea8add2bSEric Dumazet int half = low + (((high - low) >> 2) << 1); 131946f9eb2SEric Dumazet 132946f9eb2SEric Dumazet if (attempt_half == 1) 133946f9eb2SEric Dumazet high = half; 134946f9eb2SEric Dumazet else 135946f9eb2SEric Dumazet low = half; 136946f9eb2SEric Dumazet } 137ea8add2bSEric Dumazet remaining = high - low; 138ea8add2bSEric Dumazet if (likely(remaining > 1)) 139ea8add2bSEric Dumazet remaining &= ~1U; 1403f421baaSArnaldo Carvalho de Melo 141ea8add2bSEric Dumazet offset = prandom_u32() % remaining; 142ea8add2bSEric Dumazet /* __inet_hash_connect() favors ports having @low parity 143ea8add2bSEric Dumazet * We do the opposite to not pollute connect() users. 144ea8add2bSEric Dumazet */ 145ea8add2bSEric Dumazet offset |= 1U; 146a9d8f911SEvgeniy Polyakov smallest_size = -1; 147ea8add2bSEric Dumazet smallest_port = low; /* avoid compiler warning */ 148ea8add2bSEric Dumazet 149ea8add2bSEric Dumazet other_parity_scan: 150ea8add2bSEric Dumazet port = low + offset; 151ea8add2bSEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 152ea8add2bSEric Dumazet if (unlikely(port >= high)) 153ea8add2bSEric Dumazet port -= remaining; 154ea8add2bSEric Dumazet if (inet_is_local_reserved_port(net, port)) 155ea8add2bSEric Dumazet continue; 156ea8add2bSEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 157ea8add2bSEric Dumazet hinfo->bhash_size)]; 158ea8add2bSEric Dumazet spin_lock_bh(&head->lock); 159b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 160ea8add2bSEric Dumazet if (net_eq(ib_net(tb), net) && tb->port == port) { 161ea8add2bSEric Dumazet if (((tb->fastreuse > 0 && reuse) || 162da5e3630STom Herbert (tb->fastreuseport > 0 && 163da5e3630STom Herbert sk->sk_reuseport && 164c125e80bSCraig Gallek !rcu_access_pointer(sk->sk_reuseport_cb) && 165da5e3630STom Herbert uid_eq(tb->fastuid, uid))) && 166a9d8f911SEvgeniy Polyakov (tb->num_owners < smallest_size || smallest_size == -1)) { 167a9d8f911SEvgeniy Polyakov smallest_size = tb->num_owners; 168ea8add2bSEric Dumazet smallest_port = port; 169a9d8f911SEvgeniy Polyakov } 170*0643ee4fSTom Herbert if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false, 171*0643ee4fSTom Herbert reuseport_ok)) 172fddb7b57SFlavio Leitner goto tb_found; 173ea8add2bSEric Dumazet goto next_port; 1742b05ad33SFlavio Leitner } 175ea8add2bSEric Dumazet goto tb_not_found; 176ea8add2bSEric Dumazet next_port: 177ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 178ea8add2bSEric Dumazet cond_resched(); 179a9d8f911SEvgeniy Polyakov } 1803f421baaSArnaldo Carvalho de Melo 181a9d8f911SEvgeniy Polyakov if (smallest_size != -1) { 182ea8add2bSEric Dumazet port = smallest_port; 183ea8add2bSEric Dumazet goto have_port; 184a9d8f911SEvgeniy Polyakov } 185ea8add2bSEric Dumazet offset--; 186ea8add2bSEric Dumazet if (!(offset & 1)) 187ea8add2bSEric Dumazet goto other_parity_scan; 188ea8add2bSEric Dumazet 189946f9eb2SEric Dumazet if (attempt_half == 1) { 190946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 191946f9eb2SEric Dumazet attempt_half = 2; 192ea8add2bSEric Dumazet goto other_half_scan; 193946f9eb2SEric Dumazet } 194ea8add2bSEric Dumazet return ret; 195ea8add2bSEric Dumazet 196ea8add2bSEric Dumazet tb_not_found: 197ea8add2bSEric Dumazet tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 198ea8add2bSEric Dumazet net, head, port); 199ea8add2bSEric Dumazet if (!tb) 200ea8add2bSEric Dumazet goto fail_unlock; 2013f421baaSArnaldo Carvalho de Melo tb_found: 2023f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 2034a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 2044a17fd52SPavel Emelyanov goto success; 2054a17fd52SPavel Emelyanov 206ea8add2bSEric Dumazet if (((tb->fastreuse > 0 && reuse) || 207da5e3630STom Herbert (tb->fastreuseport > 0 && 208e5fbfc1cSCraig Gallek !rcu_access_pointer(sk->sk_reuseport_cb) && 209ea8add2bSEric Dumazet sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 210ea8add2bSEric Dumazet smallest_size == -1) 2113f421baaSArnaldo Carvalho de Melo goto success; 212*0643ee4fSTom Herbert if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true, 213*0643ee4fSTom Herbert reuseport_ok)) { 214ea8add2bSEric Dumazet if ((reuse || 2159c5e0c0bSTom Herbert (tb->fastreuseport > 0 && 216c125e80bSCraig Gallek sk->sk_reuseport && 217c125e80bSCraig Gallek !rcu_access_pointer(sk->sk_reuseport_cb) && 218c125e80bSCraig Gallek uid_eq(tb->fastuid, uid))) && 2199af7e923STom Herbert !snum && smallest_size != -1 && --attempts >= 0) { 220ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 221a9d8f911SEvgeniy Polyakov goto again; 222a9d8f911SEvgeniy Polyakov } 2233f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2243f421baaSArnaldo Carvalho de Melo } 225ea8add2bSEric Dumazet if (!reuse) 2263f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 227ea8add2bSEric Dumazet if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)) 228ea8add2bSEric Dumazet tb->fastreuseport = 0; 229ea8add2bSEric Dumazet } else { 230ea8add2bSEric Dumazet tb->fastreuse = reuse; 231da5e3630STom Herbert if (sk->sk_reuseport) { 232da5e3630STom Herbert tb->fastreuseport = 1; 233da5e3630STom Herbert tb->fastuid = uid; 234da5e3630STom Herbert } else { 235da5e3630STom Herbert tb->fastreuseport = 0; 236da5e3630STom Herbert } 237ea8add2bSEric Dumazet } 2383f421baaSArnaldo Carvalho de Melo success: 2393f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 240ea8add2bSEric Dumazet inet_bind_hash(sk, tb, port); 241547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 2423f421baaSArnaldo Carvalho de Melo ret = 0; 2433f421baaSArnaldo Carvalho de Melo 2443f421baaSArnaldo Carvalho de Melo fail_unlock: 245ea8add2bSEric Dumazet spin_unlock_bh(&head->lock); 2463f421baaSArnaldo Carvalho de Melo return ret; 2473f421baaSArnaldo Carvalho de Melo } 2483f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 2493f421baaSArnaldo Carvalho de Melo 2503f421baaSArnaldo Carvalho de Melo /* 2513f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 2523f421baaSArnaldo Carvalho de Melo * with the socket locked. 2533f421baaSArnaldo Carvalho de Melo */ 2543f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2553f421baaSArnaldo Carvalho de Melo { 2563f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2573f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 2583f421baaSArnaldo Carvalho de Melo int err; 2593f421baaSArnaldo Carvalho de Melo 2603f421baaSArnaldo Carvalho de Melo /* 2613f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 2623f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 2633f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 2643f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 2653f421baaSArnaldo Carvalho de Melo * 2663f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 2673f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 2683f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 2693f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 2703f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 2713f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 2723f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 2733f421baaSArnaldo Carvalho de Melo */ 2743f421baaSArnaldo Carvalho de Melo for (;;) { 275aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2763f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 2773f421baaSArnaldo Carvalho de Melo release_sock(sk); 2783f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 2793f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 280cb7cf8a3SEric Dumazet sched_annotate_sleep(); 2813f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2823f421baaSArnaldo Carvalho de Melo err = 0; 2833f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 2843f421baaSArnaldo Carvalho de Melo break; 2853f421baaSArnaldo Carvalho de Melo err = -EINVAL; 2863f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2873f421baaSArnaldo Carvalho de Melo break; 2883f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 2893f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 2903f421baaSArnaldo Carvalho de Melo break; 2913f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 2923f421baaSArnaldo Carvalho de Melo if (!timeo) 2933f421baaSArnaldo Carvalho de Melo break; 2943f421baaSArnaldo Carvalho de Melo } 295aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 2963f421baaSArnaldo Carvalho de Melo return err; 2973f421baaSArnaldo Carvalho de Melo } 2983f421baaSArnaldo Carvalho de Melo 2993f421baaSArnaldo Carvalho de Melo /* 3003f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 3013f421baaSArnaldo Carvalho de Melo */ 3023f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 3033f421baaSArnaldo Carvalho de Melo { 3043f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3058336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 3068336886fSJerry Chu struct request_sock *req; 307e3d95ad7SEric Dumazet struct sock *newsk; 3083f421baaSArnaldo Carvalho de Melo int error; 3093f421baaSArnaldo Carvalho de Melo 3103f421baaSArnaldo Carvalho de Melo lock_sock(sk); 3113f421baaSArnaldo Carvalho de Melo 3123f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 3133f421baaSArnaldo Carvalho de Melo * and that it has something pending. 3143f421baaSArnaldo Carvalho de Melo */ 3153f421baaSArnaldo Carvalho de Melo error = -EINVAL; 3163f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 3173f421baaSArnaldo Carvalho de Melo goto out_err; 3183f421baaSArnaldo Carvalho de Melo 3193f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 3208336886fSJerry Chu if (reqsk_queue_empty(queue)) { 3213f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 3223f421baaSArnaldo Carvalho de Melo 3233f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 3243f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 3253f421baaSArnaldo Carvalho de Melo if (!timeo) 3263f421baaSArnaldo Carvalho de Melo goto out_err; 3273f421baaSArnaldo Carvalho de Melo 3283f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 3293f421baaSArnaldo Carvalho de Melo if (error) 3303f421baaSArnaldo Carvalho de Melo goto out_err; 3313f421baaSArnaldo Carvalho de Melo } 332fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 3338336886fSJerry Chu newsk = req->sk; 3343f421baaSArnaldo Carvalho de Melo 335e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 3360536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 3370536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 3389439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 3398336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 3408336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 3418336886fSJerry Chu * NULL to signify that the child socket is taken 3428336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 3438336886fSJerry Chu * when 3WHS finishes (or is aborted). 3448336886fSJerry Chu */ 3458336886fSJerry Chu req->sk = NULL; 3468336886fSJerry Chu req = NULL; 3478336886fSJerry Chu } 3480536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 3498336886fSJerry Chu } 3503f421baaSArnaldo Carvalho de Melo out: 3513f421baaSArnaldo Carvalho de Melo release_sock(sk); 3528336886fSJerry Chu if (req) 35313854e5aSEric Dumazet reqsk_put(req); 3543f421baaSArnaldo Carvalho de Melo return newsk; 3553f421baaSArnaldo Carvalho de Melo out_err: 3563f421baaSArnaldo Carvalho de Melo newsk = NULL; 3578336886fSJerry Chu req = NULL; 3583f421baaSArnaldo Carvalho de Melo *err = error; 3593f421baaSArnaldo Carvalho de Melo goto out; 3603f421baaSArnaldo Carvalho de Melo } 3613f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 3623f421baaSArnaldo Carvalho de Melo 3633f421baaSArnaldo Carvalho de Melo /* 3643f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 3653f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 3663f421baaSArnaldo Carvalho de Melo * to optimize. 3673f421baaSArnaldo Carvalho de Melo */ 3683f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 3693f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 3703f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 3713f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 3723f421baaSArnaldo Carvalho de Melo { 3733f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3743f421baaSArnaldo Carvalho de Melo 375b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 376b24b8a24SPavel Emelyanov (unsigned long)sk); 377b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 378b24b8a24SPavel Emelyanov (unsigned long)sk); 379b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 3803f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 3813f421baaSArnaldo Carvalho de Melo } 3823f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 3833f421baaSArnaldo Carvalho de Melo 3843f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 3853f421baaSArnaldo Carvalho de Melo { 3863f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3873f421baaSArnaldo Carvalho de Melo 3883f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 3893f421baaSArnaldo Carvalho de Melo 3903f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 3913f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 3923f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3933f421baaSArnaldo Carvalho de Melo } 3943f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 3953f421baaSArnaldo Carvalho de Melo 3963f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 3973f421baaSArnaldo Carvalho de Melo { 3983f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3993f421baaSArnaldo Carvalho de Melo } 4003f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 4013f421baaSArnaldo Carvalho de Melo 4023f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 4033f421baaSArnaldo Carvalho de Melo { 4043f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 4053f421baaSArnaldo Carvalho de Melo } 4063f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 4073f421baaSArnaldo Carvalho de Melo 408e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 4096bd023f3SDavid S. Miller struct flowi4 *fl4, 410ba3f7f04SDavid S. Miller const struct request_sock *req) 4113f421baaSArnaldo Carvalho de Melo { 4123f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 4138b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 4148b929ab1SEric Dumazet struct ip_options_rcu *opt = ireq->opt; 4158b929ab1SEric Dumazet struct rtable *rt; 4163f421baaSArnaldo Carvalho de Melo 4178b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 418e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 4198b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 420634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 4218b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 422e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 4236bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 4246bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 425b23dd4feSDavid S. Miller if (IS_ERR(rt)) 426857a6e0aSIlpo Järvinen goto no_route; 427155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 428857a6e0aSIlpo Järvinen goto route_err; 429d8d1f30bSChangli Gao return &rt->dst; 430857a6e0aSIlpo Järvinen 431857a6e0aSIlpo Järvinen route_err: 432857a6e0aSIlpo Järvinen ip_rt_put(rt); 433857a6e0aSIlpo Järvinen no_route: 434b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 435857a6e0aSIlpo Järvinen return NULL; 4363f421baaSArnaldo Carvalho de Melo } 4373f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 4383f421baaSArnaldo Carvalho de Melo 439a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 44077357a95SDavid S. Miller struct sock *newsk, 44177357a95SDavid S. Miller const struct request_sock *req) 44277357a95SDavid S. Miller { 44377357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 4448b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 44577357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 4461a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 44777357a95SDavid S. Miller struct flowi4 *fl4; 44877357a95SDavid S. Miller struct rtable *rt; 44977357a95SDavid S. Miller 45077357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 4511a7b27c9SChristoph Paasch 4521a7b27c9SChristoph Paasch rcu_read_lock(); 4531a7b27c9SChristoph Paasch opt = rcu_dereference(newinet->inet_opt); 4548b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 45577357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 45677357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 457634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 4588b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 459e2d118a1SLorenzo Colitti htons(ireq->ir_num), sk->sk_uid); 46077357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 46177357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 46277357a95SDavid S. Miller if (IS_ERR(rt)) 46377357a95SDavid S. Miller goto no_route; 464155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 46577357a95SDavid S. Miller goto route_err; 4661a7b27c9SChristoph Paasch rcu_read_unlock(); 46777357a95SDavid S. Miller return &rt->dst; 46877357a95SDavid S. Miller 46977357a95SDavid S. Miller route_err: 47077357a95SDavid S. Miller ip_rt_put(rt); 47177357a95SDavid S. Miller no_route: 4721a7b27c9SChristoph Paasch rcu_read_unlock(); 473b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 47477357a95SDavid S. Miller return NULL; 47577357a95SDavid S. Miller } 47677357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 47777357a95SDavid S. Miller 478dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 4793f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 4803f421baaSArnaldo Carvalho de Melo #else 481fa76ce73SEric Dumazet #define AF_INET_FAMILY(fam) true 4823f421baaSArnaldo Carvalho de Melo #endif 4833f421baaSArnaldo Carvalho de Melo 4840c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 4850c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 4860c3d79bcSJulian Anastasov const int max_retries, 4870c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 4880c3d79bcSJulian Anastasov int *expire, int *resend) 4890c3d79bcSJulian Anastasov { 4900c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 491e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh; 4920c3d79bcSJulian Anastasov *resend = 1; 4930c3d79bcSJulian Anastasov return; 4940c3d79bcSJulian Anastasov } 495e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh && 496e6c022a4SEric Dumazet (!inet_rsk(req)->acked || req->num_timeout >= max_retries); 4970c3d79bcSJulian Anastasov /* 4980c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 4990c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 5000c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 5010c3d79bcSJulian Anastasov */ 5020c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 503e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 5040c3d79bcSJulian Anastasov } 5050c3d79bcSJulian Anastasov 5061b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 507e6c022a4SEric Dumazet { 5081a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 509e6c022a4SEric Dumazet 510e6c022a4SEric Dumazet if (!err) 511e6c022a4SEric Dumazet req->num_retrans++; 512e6c022a4SEric Dumazet return err; 513e6c022a4SEric Dumazet } 514e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 515e6c022a4SEric Dumazet 516079096f1SEric Dumazet /* return true if req was found in the ehash table */ 517b357a364SEric Dumazet static bool reqsk_queue_unlink(struct request_sock_queue *queue, 518b357a364SEric Dumazet struct request_sock *req) 519b357a364SEric Dumazet { 520079096f1SEric Dumazet struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 5215e0724d0SEric Dumazet bool found = false; 522b357a364SEric Dumazet 5235e0724d0SEric Dumazet if (sk_hashed(req_to_sk(req))) { 5245e0724d0SEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 525b357a364SEric Dumazet 526079096f1SEric Dumazet spin_lock(lock); 527079096f1SEric Dumazet found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 528079096f1SEric Dumazet spin_unlock(lock); 5295e0724d0SEric Dumazet } 53083fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 531b357a364SEric Dumazet reqsk_put(req); 532b357a364SEric Dumazet return found; 533b357a364SEric Dumazet } 534b357a364SEric Dumazet 535b357a364SEric Dumazet void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 536b357a364SEric Dumazet { 537b357a364SEric Dumazet if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { 538b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 539b357a364SEric Dumazet reqsk_put(req); 540b357a364SEric Dumazet } 541b357a364SEric Dumazet } 542b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 543b357a364SEric Dumazet 544f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 545f03f2e15SEric Dumazet { 546f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 547f03f2e15SEric Dumazet reqsk_put(req); 548f03f2e15SEric Dumazet } 549f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 550f03f2e15SEric Dumazet 551fa76ce73SEric Dumazet static void reqsk_timer_handler(unsigned long data) 552a019d6feSArnaldo Carvalho de Melo { 553fa76ce73SEric Dumazet struct request_sock *req = (struct request_sock *)data; 554fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 5557c083ecbSNikolay Borisov struct net *net = sock_net(sk_listener); 556fa76ce73SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk_listener); 557a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 5582b41fab7SEric Dumazet int qlen, expire = 0, resend = 0; 559fa76ce73SEric Dumazet int max_retries, thresh; 5602b41fab7SEric Dumazet u8 defer_accept; 561a019d6feSArnaldo Carvalho de Melo 56200fd38d9SEric Dumazet if (sk_state_load(sk_listener) != TCP_LISTEN) 563079096f1SEric Dumazet goto drop; 564a019d6feSArnaldo Carvalho de Melo 5657c083ecbSNikolay Borisov max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 566fa76ce73SEric Dumazet thresh = max_retries; 567a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 568a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 569fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 570a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 571a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 572a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 573a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 574a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 575a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 576a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 577a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 578a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 579a019d6feSArnaldo Carvalho de Melo * 580a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 581a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 582a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 583a019d6feSArnaldo Carvalho de Melo */ 584aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 585acb4a6bfSEric Dumazet if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { 586aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 587a019d6feSArnaldo Carvalho de Melo 588a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 5892b41fab7SEric Dumazet if (qlen < young) 590a019d6feSArnaldo Carvalho de Melo break; 591a019d6feSArnaldo Carvalho de Melo thresh--; 592a019d6feSArnaldo Carvalho de Melo young <<= 1; 593a019d6feSArnaldo Carvalho de Melo } 594a019d6feSArnaldo Carvalho de Melo } 5952b41fab7SEric Dumazet defer_accept = READ_ONCE(queue->rskq_defer_accept); 5962b41fab7SEric Dumazet if (defer_accept) 5972b41fab7SEric Dumazet max_retries = defer_accept; 5982b41fab7SEric Dumazet syn_ack_recalc(req, thresh, max_retries, defer_accept, 5990c3d79bcSJulian Anastasov &expire, &resend); 60042cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 6010c3d79bcSJulian Anastasov if (!expire && 6020c3d79bcSJulian Anastasov (!resend || 603fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 6040c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 605a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 606a019d6feSArnaldo Carvalho de Melo 607e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 608aac065c5SEric Dumazet atomic_dec(&queue->young); 609fa76ce73SEric Dumazet timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 610f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeo); 611fa76ce73SEric Dumazet return; 612a019d6feSArnaldo Carvalho de Melo } 613079096f1SEric Dumazet drop: 614f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk_listener, req); 615a019d6feSArnaldo Carvalho de Melo } 616fa76ce73SEric Dumazet 617079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 618fa76ce73SEric Dumazet unsigned long timeout) 619fa76ce73SEric Dumazet { 620fa76ce73SEric Dumazet req->num_retrans = 0; 621fa76ce73SEric Dumazet req->num_timeout = 0; 622fa76ce73SEric Dumazet req->sk = NULL; 623fa76ce73SEric Dumazet 624f3438bc7SThomas Gleixner setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, 625f3438bc7SThomas Gleixner (unsigned long)req); 626f3438bc7SThomas Gleixner mod_timer(&req->rsk_timer, jiffies + timeout); 62729c68526SEric Dumazet 628079096f1SEric Dumazet inet_ehash_insert(req_to_sk(req), NULL); 629fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 630fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 631fa76ce73SEric Dumazet */ 632fa76ce73SEric Dumazet smp_wmb(); 633ca6fb065SEric Dumazet atomic_set(&req->rsk_refcnt, 2 + 1); 634a019d6feSArnaldo Carvalho de Melo } 635079096f1SEric Dumazet 636079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 637079096f1SEric Dumazet unsigned long timeout) 638079096f1SEric Dumazet { 639079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 640079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 641079096f1SEric Dumazet } 642079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 643a019d6feSArnaldo Carvalho de Melo 644e56c57d0SEric Dumazet /** 645e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 646e56c57d0SEric Dumazet * @sk: the socket to clone 647e56c57d0SEric Dumazet * @req: request_sock 648e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 649e56c57d0SEric Dumazet * 650e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 651e56c57d0SEric Dumazet */ 652e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 653e56c57d0SEric Dumazet const struct request_sock *req, 654dd0fc66fSAl Viro const gfp_t priority) 6559f1d2604SArnaldo Carvalho de Melo { 656e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 6579f1d2604SArnaldo Carvalho de Melo 65800db4124SIan Morris if (newsk) { 6599f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 6609f1d2604SArnaldo Carvalho de Melo 6619f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 6629f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 6639f1d2604SArnaldo Carvalho de Melo 664634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 665b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 666b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 6679f1d2604SArnaldo Carvalho de Melo newsk->sk_write_space = sk_stream_write_space; 6689f1d2604SArnaldo Carvalho de Melo 66985017869SEric Dumazet /* listeners have SOCK_RCU_FREE, not the children */ 67085017869SEric Dumazet sock_reset_flag(newsk, SOCK_RCU_FREE); 67185017869SEric Dumazet 67284f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 67333cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 67433cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 67584f39b08SLorenzo Colitti 6769f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 6779f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 6786687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 6799f1d2604SArnaldo Carvalho de Melo 6809f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 6819f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 6824237c75cSVenkat Yekkirala 6834237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 6849f1d2604SArnaldo Carvalho de Melo } 6859f1d2604SArnaldo Carvalho de Melo return newsk; 6869f1d2604SArnaldo Carvalho de Melo } 687e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 688a019d6feSArnaldo Carvalho de Melo 689a019d6feSArnaldo Carvalho de Melo /* 690a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 691a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 692a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 693a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 694a019d6feSArnaldo Carvalho de Melo */ 695a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 696a019d6feSArnaldo Carvalho de Melo { 697547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 698547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 699a019d6feSArnaldo Carvalho de Melo 700a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 701547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 702a019d6feSArnaldo Carvalho de Melo 703c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 704c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 705a019d6feSArnaldo Carvalho de Melo 706a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 707a019d6feSArnaldo Carvalho de Melo 708a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 709a019d6feSArnaldo Carvalho de Melo 710a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 711a019d6feSArnaldo Carvalho de Melo 712a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 713a019d6feSArnaldo Carvalho de Melo 714777c6ae5SEric Dumazet local_bh_disable(); 715dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 716777c6ae5SEric Dumazet local_bh_enable(); 717a019d6feSArnaldo Carvalho de Melo sock_put(sk); 718a019d6feSArnaldo Carvalho de Melo } 719a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 720a019d6feSArnaldo Carvalho de Melo 721e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 722e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 723e337e24dSChristoph Paasch */ 724e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 725c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 726e337e24dSChristoph Paasch { 727e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 728e337e24dSChristoph Paasch bh_unlock_sock(sk); 729e337e24dSChristoph Paasch sock_put(sk); 730e337e24dSChristoph Paasch 731e337e24dSChristoph Paasch /* The below has to be done to allow calling inet_csk_destroy_sock */ 732e337e24dSChristoph Paasch sock_set_flag(sk, SOCK_DEAD); 733e337e24dSChristoph Paasch percpu_counter_inc(sk->sk_prot->orphan_count); 734e337e24dSChristoph Paasch inet_sk(sk)->inet_num = 0; 735e337e24dSChristoph Paasch } 736e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 737e337e24dSChristoph Paasch 738f985c65cSEric Dumazet int inet_csk_listen_start(struct sock *sk, int backlog) 739a019d6feSArnaldo Carvalho de Melo { 740a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 74110cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 742086c653fSCraig Gallek int err = -EADDRINUSE; 743a019d6feSArnaldo Carvalho de Melo 744ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 745a019d6feSArnaldo Carvalho de Melo 746f985c65cSEric Dumazet sk->sk_max_ack_backlog = backlog; 747a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 748a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 749a019d6feSArnaldo Carvalho de Melo 750a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 751a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 752a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 753a019d6feSArnaldo Carvalho de Melo * after validation is complete. 754a019d6feSArnaldo Carvalho de Melo */ 75500fd38d9SEric Dumazet sk_state_store(sk, TCP_LISTEN); 756c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 757c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 758a019d6feSArnaldo Carvalho de Melo 759a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 760086c653fSCraig Gallek err = sk->sk_prot->hash(sk); 761a019d6feSArnaldo Carvalho de Melo 762086c653fSCraig Gallek if (likely(!err)) 763a019d6feSArnaldo Carvalho de Melo return 0; 764a019d6feSArnaldo Carvalho de Melo } 765a019d6feSArnaldo Carvalho de Melo 766a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 767086c653fSCraig Gallek return err; 768a019d6feSArnaldo Carvalho de Melo } 769a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 770a019d6feSArnaldo Carvalho de Melo 771ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 772ebb516afSEric Dumazet struct sock *child) 773ebb516afSEric Dumazet { 774ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 775ebb516afSEric Dumazet 776ebb516afSEric Dumazet sock_orphan(child); 777ebb516afSEric Dumazet 778ebb516afSEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 779ebb516afSEric Dumazet 780ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 781ebb516afSEric Dumazet BUG_ON(tcp_sk(child)->fastopen_rsk != req); 782ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 783ebb516afSEric Dumazet 784ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 785ebb516afSEric Dumazet * an inbound pkt destined for child is 786ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 787ebb516afSEric Dumazet * Also to satisfy an assertion in 788ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 789ebb516afSEric Dumazet */ 790ebb516afSEric Dumazet tcp_sk(child)->fastopen_rsk = NULL; 791ebb516afSEric Dumazet } 792ebb516afSEric Dumazet inet_csk_destroy_sock(child); 793ebb516afSEric Dumazet reqsk_put(req); 794ebb516afSEric Dumazet } 795ebb516afSEric Dumazet 7967716682cSEric Dumazet struct sock *inet_csk_reqsk_queue_add(struct sock *sk, 7977716682cSEric Dumazet struct request_sock *req, 798ebb516afSEric Dumazet struct sock *child) 799ebb516afSEric Dumazet { 800ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 801ebb516afSEric Dumazet 802ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 803ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 804ebb516afSEric Dumazet inet_child_forget(sk, req, child); 8057716682cSEric Dumazet child = NULL; 806ebb516afSEric Dumazet } else { 807ebb516afSEric Dumazet req->sk = child; 808ebb516afSEric Dumazet req->dl_next = NULL; 809ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 810ebb516afSEric Dumazet queue->rskq_accept_head = req; 811ebb516afSEric Dumazet else 812ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 813ebb516afSEric Dumazet queue->rskq_accept_tail = req; 814ebb516afSEric Dumazet sk_acceptq_added(sk); 815ebb516afSEric Dumazet } 816ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 8177716682cSEric Dumazet return child; 818ebb516afSEric Dumazet } 819ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 820ebb516afSEric Dumazet 8215e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 8225e0724d0SEric Dumazet struct request_sock *req, bool own_req) 8235e0724d0SEric Dumazet { 8245e0724d0SEric Dumazet if (own_req) { 8255e0724d0SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 8265e0724d0SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 8277716682cSEric Dumazet if (inet_csk_reqsk_queue_add(sk, req, child)) 8285e0724d0SEric Dumazet return child; 8295e0724d0SEric Dumazet } 8305e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 8315e0724d0SEric Dumazet bh_unlock_sock(child); 8325e0724d0SEric Dumazet sock_put(child); 8335e0724d0SEric Dumazet return NULL; 8345e0724d0SEric Dumazet } 8355e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 8365e0724d0SEric Dumazet 837a019d6feSArnaldo Carvalho de Melo /* 838a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 839a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 840a019d6feSArnaldo Carvalho de Melo */ 841a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 842a019d6feSArnaldo Carvalho de Melo { 843a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 8448336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 845fff1f300SEric Dumazet struct request_sock *next, *req; 846a019d6feSArnaldo Carvalho de Melo 847a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 848a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 849a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 850a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 851a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 852a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 853a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 854a019d6feSArnaldo Carvalho de Melo */ 855fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 856a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 857a019d6feSArnaldo Carvalho de Melo 858a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 859a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 860547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 861a019d6feSArnaldo Carvalho de Melo sock_hold(child); 862a019d6feSArnaldo Carvalho de Melo 863ebb516afSEric Dumazet inet_child_forget(sk, req, child); 864a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 865a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 866a019d6feSArnaldo Carvalho de Melo sock_put(child); 867a019d6feSArnaldo Carvalho de Melo 86892d6f176SEric Dumazet cond_resched(); 869a019d6feSArnaldo Carvalho de Melo } 8700536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 8718336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 8720536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 873fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 8740536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 8750536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 876fff1f300SEric Dumazet while (req != NULL) { 877fff1f300SEric Dumazet next = req->dl_next; 87813854e5aSEric Dumazet reqsk_put(req); 879fff1f300SEric Dumazet req = next; 8808336886fSJerry Chu } 8818336886fSJerry Chu } 882ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 883a019d6feSArnaldo Carvalho de Melo } 884a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 885af05dc93SArnaldo Carvalho de Melo 886af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 887af05dc93SArnaldo Carvalho de Melo { 888af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 889af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 890af05dc93SArnaldo Carvalho de Melo 891af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 892c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 893c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 894af05dc93SArnaldo Carvalho de Melo } 895af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 896c4d93909SArnaldo Carvalho de Melo 897dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 898dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 899dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 900dec73ff0SArnaldo Carvalho de Melo { 901dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 902dec73ff0SArnaldo Carvalho de Melo 90300db4124SIan Morris if (icsk->icsk_af_ops->compat_getsockopt) 904dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 905dec73ff0SArnaldo Carvalho de Melo optval, optlen); 906dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 907dec73ff0SArnaldo Carvalho de Melo optval, optlen); 908dec73ff0SArnaldo Carvalho de Melo } 909dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 910dec73ff0SArnaldo Carvalho de Melo 911dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 912b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 913dec73ff0SArnaldo Carvalho de Melo { 914dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 915dec73ff0SArnaldo Carvalho de Melo 91600db4124SIan Morris if (icsk->icsk_af_ops->compat_setsockopt) 917dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 918dec73ff0SArnaldo Carvalho de Melo optval, optlen); 919dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 920dec73ff0SArnaldo Carvalho de Melo optval, optlen); 921dec73ff0SArnaldo Carvalho de Melo } 922dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 923dec73ff0SArnaldo Carvalho de Melo #endif 92480d0a69fSDavid S. Miller 92580d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 92680d0a69fSDavid S. Miller { 9275abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 9285abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 92980d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 93080d0a69fSDavid S. Miller struct flowi4 *fl4; 93180d0a69fSDavid S. Miller struct rtable *rt; 93280d0a69fSDavid S. Miller 93380d0a69fSDavid S. Miller rcu_read_lock(); 93480d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 93580d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 93680d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 93780d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 93880d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 93980d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 94080d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 94180d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 94280d0a69fSDavid S. Miller if (IS_ERR(rt)) 94380d0a69fSDavid S. Miller rt = NULL; 94480d0a69fSDavid S. Miller if (rt) 94580d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 94680d0a69fSDavid S. Miller rcu_read_unlock(); 94780d0a69fSDavid S. Miller 94880d0a69fSDavid S. Miller return &rt->dst; 94980d0a69fSDavid S. Miller } 95080d0a69fSDavid S. Miller 95180d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 95280d0a69fSDavid S. Miller { 95380d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 95480d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 95580d0a69fSDavid S. Miller 95680d0a69fSDavid S. Miller if (!dst) { 95780d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 95880d0a69fSDavid S. Miller if (!dst) 95980d0a69fSDavid S. Miller goto out; 96080d0a69fSDavid S. Miller } 9616700c270SDavid S. Miller dst->ops->update_pmtu(dst, sk, NULL, mtu); 96280d0a69fSDavid S. Miller 96380d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 96480d0a69fSDavid S. Miller if (!dst) 96580d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 96680d0a69fSDavid S. Miller out: 96780d0a69fSDavid S. Miller return dst; 96880d0a69fSDavid S. Miller } 96980d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 970