13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 26fa76ce73SEric Dumazet #include <net/tcp.h> 273f421baaSArnaldo Carvalho de Melo 283f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 293f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 303f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 313f421baaSArnaldo Carvalho de Melo #endif 323f421baaSArnaldo Carvalho de Melo 330bbf87d8SEric W. Biederman void inet_get_local_port_range(struct net *net, int *low, int *high) 34227b60f5SStephen Hemminger { 3595c96174SEric Dumazet unsigned int seq; 3695c96174SEric Dumazet 37227b60f5SStephen Hemminger do { 38c9d8f1a6SCong Wang seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 39227b60f5SStephen Hemminger 40c9d8f1a6SCong Wang *low = net->ipv4.ip_local_ports.range[0]; 41c9d8f1a6SCong Wang *high = net->ipv4.ip_local_ports.range[1]; 42c9d8f1a6SCong Wang } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 43227b60f5SStephen Hemminger } 44227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 453f421baaSArnaldo Carvalho de Melo 46971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk, 47aacd9289SAlex Copot const struct inet_bind_bucket *tb, bool relax) 483f421baaSArnaldo Carvalho de Melo { 493f421baaSArnaldo Carvalho de Melo struct sock *sk2; 503f421baaSArnaldo Carvalho de Melo int reuse = sk->sk_reuse; 51da5e3630STom Herbert int reuseport = sk->sk_reuseport; 52da5e3630STom Herbert kuid_t uid = sock_i_uid((struct sock *)sk); 533f421baaSArnaldo Carvalho de Melo 547477fd2eSPavel Emelyanov /* 557477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 567477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 577477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 587477fd2eSPavel Emelyanov * one this bucket belongs to. 597477fd2eSPavel Emelyanov */ 607477fd2eSPavel Emelyanov 61b67bfe0dSSasha Levin sk_for_each_bound(sk2, &tb->owners) { 623f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 633f421baaSArnaldo Carvalho de Melo !inet_v6_ipv6only(sk2) && 643f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 653f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 663f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 67da5e3630STom Herbert if ((!reuse || !sk2->sk_reuse || 68da5e3630STom Herbert sk2->sk_state == TCP_LISTEN) && 69da5e3630STom Herbert (!reuseport || !sk2->sk_reuseport || 70da5e3630STom Herbert (sk2->sk_state != TCP_TIME_WAIT && 71da5e3630STom Herbert !uid_eq(uid, sock_i_uid(sk2))))) { 7250805466SEric Dumazet 7350805466SEric Dumazet if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 7450805466SEric Dumazet sk2->sk_rcv_saddr == sk->sk_rcv_saddr) 753f421baaSArnaldo Carvalho de Melo break; 768d238b25SDavid S. Miller } 77aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 78aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 79aacd9289SAlex Copot 8050805466SEric Dumazet if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 8150805466SEric Dumazet sk2->sk_rcv_saddr == sk->sk_rcv_saddr) 82aacd9289SAlex Copot break; 83aacd9289SAlex Copot } 843f421baaSArnaldo Carvalho de Melo } 853f421baaSArnaldo Carvalho de Melo } 86b67bfe0dSSasha Levin return sk2 != NULL; 873f421baaSArnaldo Carvalho de Melo } 88971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 89971af18bSArnaldo Carvalho de Melo 903f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock, 913f421baaSArnaldo Carvalho de Melo * if snum is zero it means select any available local port. 923f421baaSArnaldo Carvalho de Melo */ 93ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum) 943f421baaSArnaldo Carvalho de Melo { 9539d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 963f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 973f421baaSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 98a9d8f911SEvgeniy Polyakov int ret, attempts = 5; 993b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 100a9d8f911SEvgeniy Polyakov int smallest_size = -1, smallest_rover; 101da5e3630STom Herbert kuid_t uid = sock_i_uid(sk); 102946f9eb2SEric Dumazet int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 1033f421baaSArnaldo Carvalho de Melo 1043f421baaSArnaldo Carvalho de Melo local_bh_disable(); 1053f421baaSArnaldo Carvalho de Melo if (!snum) { 106227b60f5SStephen Hemminger int remaining, rover, low, high; 107227b60f5SStephen Hemminger 108a9d8f911SEvgeniy Polyakov again: 1090bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 110946f9eb2SEric Dumazet if (attempt_half) { 111946f9eb2SEric Dumazet int half = low + ((high - low) >> 1); 112946f9eb2SEric Dumazet 113946f9eb2SEric Dumazet if (attempt_half == 1) 114946f9eb2SEric Dumazet high = half; 115946f9eb2SEric Dumazet else 116946f9eb2SEric Dumazet low = half; 117946f9eb2SEric Dumazet } 118a25de534SAnton Arapov remaining = (high - low) + 1; 11963862b5bSAruna-Hewapathirane smallest_rover = rover = prandom_u32() % remaining + low; 1203f421baaSArnaldo Carvalho de Melo 121a9d8f911SEvgeniy Polyakov smallest_size = -1; 1223f421baaSArnaldo Carvalho de Melo do { 123122ff243SWANG Cong if (inet_is_local_reserved_port(net, rover)) 124e3826f1eSAmerigo Wang goto next_nolock; 1257f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, rover, 1267f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1273f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 128b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 12909ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == rover) { 130da5e3630STom Herbert if (((tb->fastreuse > 0 && 131a9d8f911SEvgeniy Polyakov sk->sk_reuse && 132da5e3630STom Herbert sk->sk_state != TCP_LISTEN) || 133da5e3630STom Herbert (tb->fastreuseport > 0 && 134da5e3630STom Herbert sk->sk_reuseport && 135da5e3630STom Herbert uid_eq(tb->fastuid, uid))) && 136a9d8f911SEvgeniy Polyakov (tb->num_owners < smallest_size || smallest_size == -1)) { 137a9d8f911SEvgeniy Polyakov smallest_size = tb->num_owners; 138a9d8f911SEvgeniy Polyakov smallest_rover = rover; 139a9d8f911SEvgeniy Polyakov } 140aacd9289SAlex Copot if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 1412b05ad33SFlavio Leitner snum = rover; 142fddb7b57SFlavio Leitner goto tb_found; 1432b05ad33SFlavio Leitner } 1443f421baaSArnaldo Carvalho de Melo goto next; 145a9d8f911SEvgeniy Polyakov } 1463f421baaSArnaldo Carvalho de Melo break; 1473f421baaSArnaldo Carvalho de Melo next: 1483f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 149e3826f1eSAmerigo Wang next_nolock: 1506df71634SStephen Hemminger if (++rover > high) 1516df71634SStephen Hemminger rover = low; 1523f421baaSArnaldo Carvalho de Melo } while (--remaining > 0); 1533f421baaSArnaldo Carvalho de Melo 1543f421baaSArnaldo Carvalho de Melo /* Exhausted local port range during search? It is not 1553f421baaSArnaldo Carvalho de Melo * possible for us to be holding one of the bind hash 1563f421baaSArnaldo Carvalho de Melo * locks if this test triggers, because if 'remaining' 1573f421baaSArnaldo Carvalho de Melo * drops to zero, we broke out of the do/while loop at 1583f421baaSArnaldo Carvalho de Melo * the top level, not from the 'break;' statement. 1593f421baaSArnaldo Carvalho de Melo */ 1603f421baaSArnaldo Carvalho de Melo ret = 1; 161a9d8f911SEvgeniy Polyakov if (remaining <= 0) { 162a9d8f911SEvgeniy Polyakov if (smallest_size != -1) { 163a9d8f911SEvgeniy Polyakov snum = smallest_rover; 164a9d8f911SEvgeniy Polyakov goto have_snum; 165a9d8f911SEvgeniy Polyakov } 166946f9eb2SEric Dumazet if (attempt_half == 1) { 167946f9eb2SEric Dumazet /* OK we now try the upper half of the range */ 168946f9eb2SEric Dumazet attempt_half = 2; 169946f9eb2SEric Dumazet goto again; 170946f9eb2SEric Dumazet } 1713f421baaSArnaldo Carvalho de Melo goto fail; 172a9d8f911SEvgeniy Polyakov } 1733f421baaSArnaldo Carvalho de Melo /* OK, here is the one we will use. HEAD is 1743f421baaSArnaldo Carvalho de Melo * non-NULL and we hold it's mutex. 1753f421baaSArnaldo Carvalho de Melo */ 1763f421baaSArnaldo Carvalho de Melo snum = rover; 1773f421baaSArnaldo Carvalho de Melo } else { 178a9d8f911SEvgeniy Polyakov have_snum: 1797f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, snum, 1807f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1813f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 182b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) 18309ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == snum) 1843f421baaSArnaldo Carvalho de Melo goto tb_found; 1853f421baaSArnaldo Carvalho de Melo } 1863f421baaSArnaldo Carvalho de Melo tb = NULL; 1873f421baaSArnaldo Carvalho de Melo goto tb_not_found; 1883f421baaSArnaldo Carvalho de Melo tb_found: 1893f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 1904a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 1914a17fd52SPavel Emelyanov goto success; 1924a17fd52SPavel Emelyanov 193da5e3630STom Herbert if (((tb->fastreuse > 0 && 194da5e3630STom Herbert sk->sk_reuse && sk->sk_state != TCP_LISTEN) || 195da5e3630STom Herbert (tb->fastreuseport > 0 && 196da5e3630STom Herbert sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 197a9d8f911SEvgeniy Polyakov smallest_size == -1) { 1983f421baaSArnaldo Carvalho de Melo goto success; 1993f421baaSArnaldo Carvalho de Melo } else { 2003f421baaSArnaldo Carvalho de Melo ret = 1; 201aacd9289SAlex Copot if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 202da5e3630STom Herbert if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || 2039c5e0c0bSTom Herbert (tb->fastreuseport > 0 && 2049c5e0c0bSTom Herbert sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 2055add3009SStephen Hemminger smallest_size != -1 && --attempts >= 0) { 206a9d8f911SEvgeniy Polyakov spin_unlock(&head->lock); 207a9d8f911SEvgeniy Polyakov goto again; 208a9d8f911SEvgeniy Polyakov } 209aacd9289SAlex Copot 2103f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2113f421baaSArnaldo Carvalho de Melo } 2123f421baaSArnaldo Carvalho de Melo } 213a9d8f911SEvgeniy Polyakov } 2143f421baaSArnaldo Carvalho de Melo tb_not_found: 2153f421baaSArnaldo Carvalho de Melo ret = 1; 216941b1d22SPavel Emelyanov if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, 217941b1d22SPavel Emelyanov net, head, snum)) == NULL) 2183f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2193f421baaSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 2203f421baaSArnaldo Carvalho de Melo if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 2213f421baaSArnaldo Carvalho de Melo tb->fastreuse = 1; 2223f421baaSArnaldo Carvalho de Melo else 2233f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 224da5e3630STom Herbert if (sk->sk_reuseport) { 225da5e3630STom Herbert tb->fastreuseport = 1; 226da5e3630STom Herbert tb->fastuid = uid; 2279c5e0c0bSTom Herbert } else 228da5e3630STom Herbert tb->fastreuseport = 0; 229da5e3630STom Herbert } else { 230da5e3630STom Herbert if (tb->fastreuse && 2313f421baaSArnaldo Carvalho de Melo (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) 2323f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 233da5e3630STom Herbert if (tb->fastreuseport && 2349c5e0c0bSTom Herbert (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) 235da5e3630STom Herbert tb->fastreuseport = 0; 236da5e3630STom Herbert } 2373f421baaSArnaldo Carvalho de Melo success: 2383f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 2393f421baaSArnaldo Carvalho de Melo inet_bind_hash(sk, tb, snum); 240547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 2413f421baaSArnaldo Carvalho de Melo ret = 0; 2423f421baaSArnaldo Carvalho de Melo 2433f421baaSArnaldo Carvalho de Melo fail_unlock: 2443f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 2453f421baaSArnaldo Carvalho de Melo fail: 2463f421baaSArnaldo Carvalho de Melo local_bh_enable(); 2473f421baaSArnaldo Carvalho de Melo return ret; 2483f421baaSArnaldo Carvalho de Melo } 2493f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 2503f421baaSArnaldo Carvalho de Melo 2513f421baaSArnaldo Carvalho de Melo /* 2523f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 2533f421baaSArnaldo Carvalho de Melo * with the socket locked. 2543f421baaSArnaldo Carvalho de Melo */ 2553f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2563f421baaSArnaldo Carvalho de Melo { 2573f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2583f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 2593f421baaSArnaldo Carvalho de Melo int err; 2603f421baaSArnaldo Carvalho de Melo 2613f421baaSArnaldo Carvalho de Melo /* 2623f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 2633f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 2643f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 2653f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 2663f421baaSArnaldo Carvalho de Melo * 2673f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 2683f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 2693f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 2703f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 2713f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 2723f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 2733f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 2743f421baaSArnaldo Carvalho de Melo */ 2753f421baaSArnaldo Carvalho de Melo for (;;) { 276aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2773f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 2783f421baaSArnaldo Carvalho de Melo release_sock(sk); 2793f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 2803f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 281cb7cf8a3SEric Dumazet sched_annotate_sleep(); 2823f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2833f421baaSArnaldo Carvalho de Melo err = 0; 2843f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 2853f421baaSArnaldo Carvalho de Melo break; 2863f421baaSArnaldo Carvalho de Melo err = -EINVAL; 2873f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2883f421baaSArnaldo Carvalho de Melo break; 2893f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 2903f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 2913f421baaSArnaldo Carvalho de Melo break; 2923f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 2933f421baaSArnaldo Carvalho de Melo if (!timeo) 2943f421baaSArnaldo Carvalho de Melo break; 2953f421baaSArnaldo Carvalho de Melo } 296aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 2973f421baaSArnaldo Carvalho de Melo return err; 2983f421baaSArnaldo Carvalho de Melo } 2993f421baaSArnaldo Carvalho de Melo 3003f421baaSArnaldo Carvalho de Melo /* 3013f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 3023f421baaSArnaldo Carvalho de Melo */ 3033f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 3043f421baaSArnaldo Carvalho de Melo { 3053f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3068336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 3078336886fSJerry Chu struct request_sock *req; 308e3d95ad7SEric Dumazet struct sock *newsk; 3093f421baaSArnaldo Carvalho de Melo int error; 3103f421baaSArnaldo Carvalho de Melo 3113f421baaSArnaldo Carvalho de Melo lock_sock(sk); 3123f421baaSArnaldo Carvalho de Melo 3133f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 3143f421baaSArnaldo Carvalho de Melo * and that it has something pending. 3153f421baaSArnaldo Carvalho de Melo */ 3163f421baaSArnaldo Carvalho de Melo error = -EINVAL; 3173f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 3183f421baaSArnaldo Carvalho de Melo goto out_err; 3193f421baaSArnaldo Carvalho de Melo 3203f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 3218336886fSJerry Chu if (reqsk_queue_empty(queue)) { 3223f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 3233f421baaSArnaldo Carvalho de Melo 3243f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 3253f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 3263f421baaSArnaldo Carvalho de Melo if (!timeo) 3273f421baaSArnaldo Carvalho de Melo goto out_err; 3283f421baaSArnaldo Carvalho de Melo 3293f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 3303f421baaSArnaldo Carvalho de Melo if (error) 3313f421baaSArnaldo Carvalho de Melo goto out_err; 3323f421baaSArnaldo Carvalho de Melo } 333fff1f300SEric Dumazet req = reqsk_queue_remove(queue, sk); 3348336886fSJerry Chu newsk = req->sk; 3353f421baaSArnaldo Carvalho de Melo 336e3d95ad7SEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && 3370536fcc0SEric Dumazet tcp_rsk(req)->tfo_listener) { 3380536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 3399439ce00SEric Dumazet if (tcp_rsk(req)->tfo_listener) { 3408336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 3418336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 3428336886fSJerry Chu * NULL to signify that the child socket is taken 3438336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 3448336886fSJerry Chu * when 3WHS finishes (or is aborted). 3458336886fSJerry Chu */ 3468336886fSJerry Chu req->sk = NULL; 3478336886fSJerry Chu req = NULL; 3488336886fSJerry Chu } 3490536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 3508336886fSJerry Chu } 3513f421baaSArnaldo Carvalho de Melo out: 3523f421baaSArnaldo Carvalho de Melo release_sock(sk); 3538336886fSJerry Chu if (req) 35413854e5aSEric Dumazet reqsk_put(req); 3553f421baaSArnaldo Carvalho de Melo return newsk; 3563f421baaSArnaldo Carvalho de Melo out_err: 3573f421baaSArnaldo Carvalho de Melo newsk = NULL; 3588336886fSJerry Chu req = NULL; 3593f421baaSArnaldo Carvalho de Melo *err = error; 3603f421baaSArnaldo Carvalho de Melo goto out; 3613f421baaSArnaldo Carvalho de Melo } 3623f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 3633f421baaSArnaldo Carvalho de Melo 3643f421baaSArnaldo Carvalho de Melo /* 3653f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 3663f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 3673f421baaSArnaldo Carvalho de Melo * to optimize. 3683f421baaSArnaldo Carvalho de Melo */ 3693f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 3703f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 3713f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 3723f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 3733f421baaSArnaldo Carvalho de Melo { 3743f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3753f421baaSArnaldo Carvalho de Melo 376b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 377b24b8a24SPavel Emelyanov (unsigned long)sk); 378b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 379b24b8a24SPavel Emelyanov (unsigned long)sk); 380b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 3813f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 3823f421baaSArnaldo Carvalho de Melo } 3833f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 3843f421baaSArnaldo Carvalho de Melo 3853f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 3863f421baaSArnaldo Carvalho de Melo { 3873f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3883f421baaSArnaldo Carvalho de Melo 3893f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 3903f421baaSArnaldo Carvalho de Melo 3913f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 3923f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 3933f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3943f421baaSArnaldo Carvalho de Melo } 3953f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 3963f421baaSArnaldo Carvalho de Melo 3973f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 3983f421baaSArnaldo Carvalho de Melo { 3993f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 4003f421baaSArnaldo Carvalho de Melo } 4013f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 4023f421baaSArnaldo Carvalho de Melo 4033f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 4043f421baaSArnaldo Carvalho de Melo { 4053f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 4063f421baaSArnaldo Carvalho de Melo } 4073f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 4083f421baaSArnaldo Carvalho de Melo 409e5895bc6SEric Dumazet struct dst_entry *inet_csk_route_req(const struct sock *sk, 4106bd023f3SDavid S. Miller struct flowi4 *fl4, 411ba3f7f04SDavid S. Miller const struct request_sock *req) 4123f421baaSArnaldo Carvalho de Melo { 4133f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 4148b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 4158b929ab1SEric Dumazet struct ip_options_rcu *opt = ireq->opt; 4168b929ab1SEric Dumazet struct rtable *rt; 4173f421baaSArnaldo Carvalho de Melo 4188b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 419e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 4208b929ab1SEric Dumazet sk->sk_protocol, inet_sk_flowi_flags(sk), 421634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 4228b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 4238b929ab1SEric Dumazet htons(ireq->ir_num)); 4246bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 4256bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 426b23dd4feSDavid S. Miller if (IS_ERR(rt)) 427857a6e0aSIlpo Järvinen goto no_route; 428155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 429857a6e0aSIlpo Järvinen goto route_err; 430d8d1f30bSChangli Gao return &rt->dst; 431857a6e0aSIlpo Järvinen 432857a6e0aSIlpo Järvinen route_err: 433857a6e0aSIlpo Järvinen ip_rt_put(rt); 434857a6e0aSIlpo Järvinen no_route: 435857a6e0aSIlpo Järvinen IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 436857a6e0aSIlpo Järvinen return NULL; 4373f421baaSArnaldo Carvalho de Melo } 4383f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 4393f421baaSArnaldo Carvalho de Melo 440a2432c4fSEric Dumazet struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 44177357a95SDavid S. Miller struct sock *newsk, 44277357a95SDavid S. Miller const struct request_sock *req) 44377357a95SDavid S. Miller { 44477357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 4458b929ab1SEric Dumazet struct net *net = read_pnet(&ireq->ireq_net); 44677357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 4471a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 44877357a95SDavid S. Miller struct flowi4 *fl4; 44977357a95SDavid S. Miller struct rtable *rt; 45077357a95SDavid S. Miller 45177357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 4521a7b27c9SChristoph Paasch 4531a7b27c9SChristoph Paasch rcu_read_lock(); 4541a7b27c9SChristoph Paasch opt = rcu_dereference(newinet->inet_opt); 4558b929ab1SEric Dumazet flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 45677357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 45777357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 458634fb979SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 4598b929ab1SEric Dumazet ireq->ir_loc_addr, ireq->ir_rmt_port, 4608b929ab1SEric Dumazet htons(ireq->ir_num)); 46177357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 46277357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 46377357a95SDavid S. Miller if (IS_ERR(rt)) 46477357a95SDavid S. Miller goto no_route; 465155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 46677357a95SDavid S. Miller goto route_err; 4671a7b27c9SChristoph Paasch rcu_read_unlock(); 46877357a95SDavid S. Miller return &rt->dst; 46977357a95SDavid S. Miller 47077357a95SDavid S. Miller route_err: 47177357a95SDavid S. Miller ip_rt_put(rt); 47277357a95SDavid S. Miller no_route: 4731a7b27c9SChristoph Paasch rcu_read_unlock(); 47477357a95SDavid S. Miller IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 47577357a95SDavid S. Miller return NULL; 47677357a95SDavid S. Miller } 47777357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 47877357a95SDavid S. Miller 479dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 4803f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 4813f421baaSArnaldo Carvalho de Melo #else 482fa76ce73SEric Dumazet #define AF_INET_FAMILY(fam) true 4833f421baaSArnaldo Carvalho de Melo #endif 4843f421baaSArnaldo Carvalho de Melo 4850c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 4860c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 4870c3d79bcSJulian Anastasov const int max_retries, 4880c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 4890c3d79bcSJulian Anastasov int *expire, int *resend) 4900c3d79bcSJulian Anastasov { 4910c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 492e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh; 4930c3d79bcSJulian Anastasov *resend = 1; 4940c3d79bcSJulian Anastasov return; 4950c3d79bcSJulian Anastasov } 496e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh && 497e6c022a4SEric Dumazet (!inet_rsk(req)->acked || req->num_timeout >= max_retries); 4980c3d79bcSJulian Anastasov /* 4990c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 5000c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 5010c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 5020c3d79bcSJulian Anastasov */ 5030c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 504e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 5050c3d79bcSJulian Anastasov } 5060c3d79bcSJulian Anastasov 5071b70e977SEric Dumazet int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 508e6c022a4SEric Dumazet { 5091a2c6181SChristoph Paasch int err = req->rsk_ops->rtx_syn_ack(parent, req); 510e6c022a4SEric Dumazet 511e6c022a4SEric Dumazet if (!err) 512e6c022a4SEric Dumazet req->num_retrans++; 513e6c022a4SEric Dumazet return err; 514e6c022a4SEric Dumazet } 515e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 516e6c022a4SEric Dumazet 517079096f1SEric Dumazet /* return true if req was found in the ehash table */ 518b357a364SEric Dumazet static bool reqsk_queue_unlink(struct request_sock_queue *queue, 519b357a364SEric Dumazet struct request_sock *req) 520b357a364SEric Dumazet { 521079096f1SEric Dumazet struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 5225e0724d0SEric Dumazet bool found = false; 523b357a364SEric Dumazet 5245e0724d0SEric Dumazet if (sk_hashed(req_to_sk(req))) { 5255e0724d0SEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 526b357a364SEric Dumazet 527079096f1SEric Dumazet spin_lock(lock); 528079096f1SEric Dumazet found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 529079096f1SEric Dumazet spin_unlock(lock); 5305e0724d0SEric Dumazet } 53183fccfc3SEric Dumazet if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 532b357a364SEric Dumazet reqsk_put(req); 533b357a364SEric Dumazet return found; 534b357a364SEric Dumazet } 535b357a364SEric Dumazet 536b357a364SEric Dumazet void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 537b357a364SEric Dumazet { 538b357a364SEric Dumazet if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { 539b357a364SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 540b357a364SEric Dumazet reqsk_put(req); 541b357a364SEric Dumazet } 542b357a364SEric Dumazet } 543b357a364SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 544b357a364SEric Dumazet 545f03f2e15SEric Dumazet void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 546f03f2e15SEric Dumazet { 547f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 548f03f2e15SEric Dumazet reqsk_put(req); 549f03f2e15SEric Dumazet } 550f03f2e15SEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 551f03f2e15SEric Dumazet 552fa76ce73SEric Dumazet static void reqsk_timer_handler(unsigned long data) 553a019d6feSArnaldo Carvalho de Melo { 554fa76ce73SEric Dumazet struct request_sock *req = (struct request_sock *)data; 555fa76ce73SEric Dumazet struct sock *sk_listener = req->rsk_listener; 556*7c083ecbSNikolay Borisov struct net *net = sock_net(sk_listener); 557fa76ce73SEric Dumazet struct inet_connection_sock *icsk = inet_csk(sk_listener); 558a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 5592b41fab7SEric Dumazet int qlen, expire = 0, resend = 0; 560fa76ce73SEric Dumazet int max_retries, thresh; 5612b41fab7SEric Dumazet u8 defer_accept; 562a019d6feSArnaldo Carvalho de Melo 56300fd38d9SEric Dumazet if (sk_state_load(sk_listener) != TCP_LISTEN) 564079096f1SEric Dumazet goto drop; 565a019d6feSArnaldo Carvalho de Melo 566*7c083ecbSNikolay Borisov max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; 567fa76ce73SEric Dumazet thresh = max_retries; 568a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 569a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 570fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 571a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 572a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 573a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 574a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 575a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 576a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 577a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 578a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 579a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 580a019d6feSArnaldo Carvalho de Melo * 581a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 582a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 583a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 584a019d6feSArnaldo Carvalho de Melo */ 585aac065c5SEric Dumazet qlen = reqsk_queue_len(queue); 586acb4a6bfSEric Dumazet if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) { 587aac065c5SEric Dumazet int young = reqsk_queue_len_young(queue) << 1; 588a019d6feSArnaldo Carvalho de Melo 589a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 5902b41fab7SEric Dumazet if (qlen < young) 591a019d6feSArnaldo Carvalho de Melo break; 592a019d6feSArnaldo Carvalho de Melo thresh--; 593a019d6feSArnaldo Carvalho de Melo young <<= 1; 594a019d6feSArnaldo Carvalho de Melo } 595a019d6feSArnaldo Carvalho de Melo } 5962b41fab7SEric Dumazet defer_accept = READ_ONCE(queue->rskq_defer_accept); 5972b41fab7SEric Dumazet if (defer_accept) 5982b41fab7SEric Dumazet max_retries = defer_accept; 5992b41fab7SEric Dumazet syn_ack_recalc(req, thresh, max_retries, defer_accept, 6000c3d79bcSJulian Anastasov &expire, &resend); 60142cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 6020c3d79bcSJulian Anastasov if (!expire && 6030c3d79bcSJulian Anastasov (!resend || 604fa76ce73SEric Dumazet !inet_rtx_syn_ack(sk_listener, req) || 6050c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 606a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 607a019d6feSArnaldo Carvalho de Melo 608e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 609aac065c5SEric Dumazet atomic_dec(&queue->young); 610fa76ce73SEric Dumazet timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 611fa76ce73SEric Dumazet mod_timer_pinned(&req->rsk_timer, jiffies + timeo); 612fa76ce73SEric Dumazet return; 613a019d6feSArnaldo Carvalho de Melo } 614079096f1SEric Dumazet drop: 615f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk_listener, req); 616a019d6feSArnaldo Carvalho de Melo } 617fa76ce73SEric Dumazet 618079096f1SEric Dumazet static void reqsk_queue_hash_req(struct request_sock *req, 619fa76ce73SEric Dumazet unsigned long timeout) 620fa76ce73SEric Dumazet { 621fa76ce73SEric Dumazet req->num_retrans = 0; 622fa76ce73SEric Dumazet req->num_timeout = 0; 623fa76ce73SEric Dumazet req->sk = NULL; 624fa76ce73SEric Dumazet 62529c68526SEric Dumazet setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); 62629c68526SEric Dumazet mod_timer_pinned(&req->rsk_timer, jiffies + timeout); 62729c68526SEric Dumazet 628079096f1SEric Dumazet inet_ehash_insert(req_to_sk(req), NULL); 629fa76ce73SEric Dumazet /* before letting lookups find us, make sure all req fields 630fa76ce73SEric Dumazet * are committed to memory and refcnt initialized. 631fa76ce73SEric Dumazet */ 632fa76ce73SEric Dumazet smp_wmb(); 633ca6fb065SEric Dumazet atomic_set(&req->rsk_refcnt, 2 + 1); 634a019d6feSArnaldo Carvalho de Melo } 635079096f1SEric Dumazet 636079096f1SEric Dumazet void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 637079096f1SEric Dumazet unsigned long timeout) 638079096f1SEric Dumazet { 639079096f1SEric Dumazet reqsk_queue_hash_req(req, timeout); 640079096f1SEric Dumazet inet_csk_reqsk_queue_added(sk); 641079096f1SEric Dumazet } 642079096f1SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 643a019d6feSArnaldo Carvalho de Melo 644e56c57d0SEric Dumazet /** 645e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 646e56c57d0SEric Dumazet * @sk: the socket to clone 647e56c57d0SEric Dumazet * @req: request_sock 648e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 649e56c57d0SEric Dumazet * 650e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 651e56c57d0SEric Dumazet */ 652e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 653e56c57d0SEric Dumazet const struct request_sock *req, 654dd0fc66fSAl Viro const gfp_t priority) 6559f1d2604SArnaldo Carvalho de Melo { 656e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 6579f1d2604SArnaldo Carvalho de Melo 65800db4124SIan Morris if (newsk) { 6599f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 6609f1d2604SArnaldo Carvalho de Melo 6619f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 6629f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 6639f1d2604SArnaldo Carvalho de Melo 664634fb979SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 665b44084c2SEric Dumazet inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 666b44084c2SEric Dumazet inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 6679f1d2604SArnaldo Carvalho de Melo newsk->sk_write_space = sk_stream_write_space; 6689f1d2604SArnaldo Carvalho de Melo 66984f39b08SLorenzo Colitti newsk->sk_mark = inet_rsk(req)->ir_mark; 67033cf7c90SEric Dumazet atomic64_set(&newsk->sk_cookie, 67133cf7c90SEric Dumazet atomic64_read(&inet_rsk(req)->ir_cookie)); 67284f39b08SLorenzo Colitti 6739f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 6749f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 6756687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 6769f1d2604SArnaldo Carvalho de Melo 6779f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 6789f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 6794237c75cSVenkat Yekkirala 6804237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 6819f1d2604SArnaldo Carvalho de Melo } 6829f1d2604SArnaldo Carvalho de Melo return newsk; 6839f1d2604SArnaldo Carvalho de Melo } 684e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 685a019d6feSArnaldo Carvalho de Melo 686a019d6feSArnaldo Carvalho de Melo /* 687a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 688a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 689a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 690a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 691a019d6feSArnaldo Carvalho de Melo */ 692a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 693a019d6feSArnaldo Carvalho de Melo { 694547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 695547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 696a019d6feSArnaldo Carvalho de Melo 697a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 698547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 699a019d6feSArnaldo Carvalho de Melo 700c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 701c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 702a019d6feSArnaldo Carvalho de Melo 703a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 704a019d6feSArnaldo Carvalho de Melo 705a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 706a019d6feSArnaldo Carvalho de Melo 707a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 708a019d6feSArnaldo Carvalho de Melo 709a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 710a019d6feSArnaldo Carvalho de Melo 711dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 712a019d6feSArnaldo Carvalho de Melo sock_put(sk); 713a019d6feSArnaldo Carvalho de Melo } 714a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 715a019d6feSArnaldo Carvalho de Melo 716e337e24dSChristoph Paasch /* This function allows to force a closure of a socket after the call to 717e337e24dSChristoph Paasch * tcp/dccp_create_openreq_child(). 718e337e24dSChristoph Paasch */ 719e337e24dSChristoph Paasch void inet_csk_prepare_forced_close(struct sock *sk) 720c10cb5fcSChristoph Paasch __releases(&sk->sk_lock.slock) 721e337e24dSChristoph Paasch { 722e337e24dSChristoph Paasch /* sk_clone_lock locked the socket and set refcnt to 2 */ 723e337e24dSChristoph Paasch bh_unlock_sock(sk); 724e337e24dSChristoph Paasch sock_put(sk); 725e337e24dSChristoph Paasch 726e337e24dSChristoph Paasch /* The below has to be done to allow calling inet_csk_destroy_sock */ 727e337e24dSChristoph Paasch sock_set_flag(sk, SOCK_DEAD); 728e337e24dSChristoph Paasch percpu_counter_inc(sk->sk_prot->orphan_count); 729e337e24dSChristoph Paasch inet_sk(sk)->inet_num = 0; 730e337e24dSChristoph Paasch } 731e337e24dSChristoph Paasch EXPORT_SYMBOL(inet_csk_prepare_forced_close); 732e337e24dSChristoph Paasch 733f985c65cSEric Dumazet int inet_csk_listen_start(struct sock *sk, int backlog) 734a019d6feSArnaldo Carvalho de Melo { 735a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 73610cbc8f1SEric Dumazet struct inet_sock *inet = inet_sk(sk); 737a019d6feSArnaldo Carvalho de Melo 738ef547f2aSEric Dumazet reqsk_queue_alloc(&icsk->icsk_accept_queue); 739a019d6feSArnaldo Carvalho de Melo 740f985c65cSEric Dumazet sk->sk_max_ack_backlog = backlog; 741a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 742a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 743a019d6feSArnaldo Carvalho de Melo 744a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 745a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 746a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 747a019d6feSArnaldo Carvalho de Melo * after validation is complete. 748a019d6feSArnaldo Carvalho de Melo */ 74900fd38d9SEric Dumazet sk_state_store(sk, TCP_LISTEN); 750c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 751c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 752a019d6feSArnaldo Carvalho de Melo 753a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 754a019d6feSArnaldo Carvalho de Melo sk->sk_prot->hash(sk); 755a019d6feSArnaldo Carvalho de Melo 756a019d6feSArnaldo Carvalho de Melo return 0; 757a019d6feSArnaldo Carvalho de Melo } 758a019d6feSArnaldo Carvalho de Melo 759a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 760a019d6feSArnaldo Carvalho de Melo return -EADDRINUSE; 761a019d6feSArnaldo Carvalho de Melo } 762a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 763a019d6feSArnaldo Carvalho de Melo 764ebb516afSEric Dumazet static void inet_child_forget(struct sock *sk, struct request_sock *req, 765ebb516afSEric Dumazet struct sock *child) 766ebb516afSEric Dumazet { 767ebb516afSEric Dumazet sk->sk_prot->disconnect(child, O_NONBLOCK); 768ebb516afSEric Dumazet 769ebb516afSEric Dumazet sock_orphan(child); 770ebb516afSEric Dumazet 771ebb516afSEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 772ebb516afSEric Dumazet 773ebb516afSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 774ebb516afSEric Dumazet BUG_ON(tcp_sk(child)->fastopen_rsk != req); 775ebb516afSEric Dumazet BUG_ON(sk != req->rsk_listener); 776ebb516afSEric Dumazet 777ebb516afSEric Dumazet /* Paranoid, to prevent race condition if 778ebb516afSEric Dumazet * an inbound pkt destined for child is 779ebb516afSEric Dumazet * blocked by sock lock in tcp_v4_rcv(). 780ebb516afSEric Dumazet * Also to satisfy an assertion in 781ebb516afSEric Dumazet * tcp_v4_destroy_sock(). 782ebb516afSEric Dumazet */ 783ebb516afSEric Dumazet tcp_sk(child)->fastopen_rsk = NULL; 784ebb516afSEric Dumazet } 785ebb516afSEric Dumazet inet_csk_destroy_sock(child); 786ebb516afSEric Dumazet reqsk_put(req); 787ebb516afSEric Dumazet } 788ebb516afSEric Dumazet 789ebb516afSEric Dumazet void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, 790ebb516afSEric Dumazet struct sock *child) 791ebb516afSEric Dumazet { 792ebb516afSEric Dumazet struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 793ebb516afSEric Dumazet 794ebb516afSEric Dumazet spin_lock(&queue->rskq_lock); 795ebb516afSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 796ebb516afSEric Dumazet inet_child_forget(sk, req, child); 797ebb516afSEric Dumazet } else { 798ebb516afSEric Dumazet req->sk = child; 799ebb516afSEric Dumazet req->dl_next = NULL; 800ebb516afSEric Dumazet if (queue->rskq_accept_head == NULL) 801ebb516afSEric Dumazet queue->rskq_accept_head = req; 802ebb516afSEric Dumazet else 803ebb516afSEric Dumazet queue->rskq_accept_tail->dl_next = req; 804ebb516afSEric Dumazet queue->rskq_accept_tail = req; 805ebb516afSEric Dumazet sk_acceptq_added(sk); 806ebb516afSEric Dumazet } 807ebb516afSEric Dumazet spin_unlock(&queue->rskq_lock); 808ebb516afSEric Dumazet } 809ebb516afSEric Dumazet EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 810ebb516afSEric Dumazet 8115e0724d0SEric Dumazet struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 8125e0724d0SEric Dumazet struct request_sock *req, bool own_req) 8135e0724d0SEric Dumazet { 8145e0724d0SEric Dumazet if (own_req) { 8155e0724d0SEric Dumazet inet_csk_reqsk_queue_drop(sk, req); 8165e0724d0SEric Dumazet reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 8175e0724d0SEric Dumazet inet_csk_reqsk_queue_add(sk, req, child); 8185e0724d0SEric Dumazet /* Warning: caller must not call reqsk_put(req); 8195e0724d0SEric Dumazet * child stole last reference on it. 8205e0724d0SEric Dumazet */ 8215e0724d0SEric Dumazet return child; 8225e0724d0SEric Dumazet } 8235e0724d0SEric Dumazet /* Too bad, another child took ownership of the request, undo. */ 8245e0724d0SEric Dumazet bh_unlock_sock(child); 8255e0724d0SEric Dumazet sock_put(child); 8265e0724d0SEric Dumazet return NULL; 8275e0724d0SEric Dumazet } 8285e0724d0SEric Dumazet EXPORT_SYMBOL(inet_csk_complete_hashdance); 8295e0724d0SEric Dumazet 830a019d6feSArnaldo Carvalho de Melo /* 831a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 832a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 833a019d6feSArnaldo Carvalho de Melo */ 834a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 835a019d6feSArnaldo Carvalho de Melo { 836a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 8378336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 838fff1f300SEric Dumazet struct request_sock *next, *req; 839a019d6feSArnaldo Carvalho de Melo 840a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 841a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 842a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 843a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 844a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 845a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 846a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 847a019d6feSArnaldo Carvalho de Melo */ 848fff1f300SEric Dumazet while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 849a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 850a019d6feSArnaldo Carvalho de Melo 851a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 852a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 853547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 854a019d6feSArnaldo Carvalho de Melo sock_hold(child); 855a019d6feSArnaldo Carvalho de Melo 856ebb516afSEric Dumazet inet_child_forget(sk, req, child); 857a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 858a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 859a019d6feSArnaldo Carvalho de Melo sock_put(child); 860a019d6feSArnaldo Carvalho de Melo 86192d6f176SEric Dumazet cond_resched(); 862a019d6feSArnaldo Carvalho de Melo } 8630536fcc0SEric Dumazet if (queue->fastopenq.rskq_rst_head) { 8648336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 8650536fcc0SEric Dumazet spin_lock_bh(&queue->fastopenq.lock); 866fff1f300SEric Dumazet req = queue->fastopenq.rskq_rst_head; 8670536fcc0SEric Dumazet queue->fastopenq.rskq_rst_head = NULL; 8680536fcc0SEric Dumazet spin_unlock_bh(&queue->fastopenq.lock); 869fff1f300SEric Dumazet while (req != NULL) { 870fff1f300SEric Dumazet next = req->dl_next; 87113854e5aSEric Dumazet reqsk_put(req); 872fff1f300SEric Dumazet req = next; 8738336886fSJerry Chu } 8748336886fSJerry Chu } 875ebb516afSEric Dumazet WARN_ON_ONCE(sk->sk_ack_backlog); 876a019d6feSArnaldo Carvalho de Melo } 877a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 878af05dc93SArnaldo Carvalho de Melo 879af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 880af05dc93SArnaldo Carvalho de Melo { 881af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 882af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 883af05dc93SArnaldo Carvalho de Melo 884af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 885c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 886c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 887af05dc93SArnaldo Carvalho de Melo } 888af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 889c4d93909SArnaldo Carvalho de Melo 890dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 891dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 892dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 893dec73ff0SArnaldo Carvalho de Melo { 894dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 895dec73ff0SArnaldo Carvalho de Melo 89600db4124SIan Morris if (icsk->icsk_af_ops->compat_getsockopt) 897dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 898dec73ff0SArnaldo Carvalho de Melo optval, optlen); 899dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 900dec73ff0SArnaldo Carvalho de Melo optval, optlen); 901dec73ff0SArnaldo Carvalho de Melo } 902dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 903dec73ff0SArnaldo Carvalho de Melo 904dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 905b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 906dec73ff0SArnaldo Carvalho de Melo { 907dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 908dec73ff0SArnaldo Carvalho de Melo 90900db4124SIan Morris if (icsk->icsk_af_ops->compat_setsockopt) 910dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 911dec73ff0SArnaldo Carvalho de Melo optval, optlen); 912dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 913dec73ff0SArnaldo Carvalho de Melo optval, optlen); 914dec73ff0SArnaldo Carvalho de Melo } 915dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 916dec73ff0SArnaldo Carvalho de Melo #endif 91780d0a69fSDavid S. Miller 91880d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 91980d0a69fSDavid S. Miller { 9205abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 9215abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 92280d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 92380d0a69fSDavid S. Miller struct flowi4 *fl4; 92480d0a69fSDavid S. Miller struct rtable *rt; 92580d0a69fSDavid S. Miller 92680d0a69fSDavid S. Miller rcu_read_lock(); 92780d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 92880d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 92980d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 93080d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 93180d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 93280d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 93380d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 93480d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 93580d0a69fSDavid S. Miller if (IS_ERR(rt)) 93680d0a69fSDavid S. Miller rt = NULL; 93780d0a69fSDavid S. Miller if (rt) 93880d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 93980d0a69fSDavid S. Miller rcu_read_unlock(); 94080d0a69fSDavid S. Miller 94180d0a69fSDavid S. Miller return &rt->dst; 94280d0a69fSDavid S. Miller } 94380d0a69fSDavid S. Miller 94480d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 94580d0a69fSDavid S. Miller { 94680d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 94780d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 94880d0a69fSDavid S. Miller 94980d0a69fSDavid S. Miller if (!dst) { 95080d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 95180d0a69fSDavid S. Miller if (!dst) 95280d0a69fSDavid S. Miller goto out; 95380d0a69fSDavid S. Miller } 9546700c270SDavid S. Miller dst->ops->update_pmtu(dst, sk, NULL, mtu); 95580d0a69fSDavid S. Miller 95680d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 95780d0a69fSDavid S. Miller if (!dst) 95880d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 95980d0a69fSDavid S. Miller out: 96080d0a69fSDavid S. Miller return dst; 96180d0a69fSDavid S. Miller } 96280d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 963