13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 263f421baaSArnaldo Carvalho de Melo 273f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 283f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 293f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 303f421baaSArnaldo Carvalho de Melo #endif 313f421baaSArnaldo Carvalho de Melo 323f421baaSArnaldo Carvalho de Melo /* 333c689b73SEric Dumazet * This struct holds the first and last local port number. 343f421baaSArnaldo Carvalho de Melo */ 353c689b73SEric Dumazet struct local_ports sysctl_local_ports __read_mostly = { 36c4dbe54eSEric Dumazet .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), 373c689b73SEric Dumazet .range = { 32768, 61000 }, 383c689b73SEric Dumazet }; 39227b60f5SStephen Hemminger 40e3826f1eSAmerigo Wang unsigned long *sysctl_local_reserved_ports; 41e3826f1eSAmerigo Wang EXPORT_SYMBOL(sysctl_local_reserved_ports); 42e3826f1eSAmerigo Wang 43227b60f5SStephen Hemminger void inet_get_local_port_range(int *low, int *high) 44227b60f5SStephen Hemminger { 4595c96174SEric Dumazet unsigned int seq; 4695c96174SEric Dumazet 47227b60f5SStephen Hemminger do { 483c689b73SEric Dumazet seq = read_seqbegin(&sysctl_local_ports.lock); 49227b60f5SStephen Hemminger 503c689b73SEric Dumazet *low = sysctl_local_ports.range[0]; 513c689b73SEric Dumazet *high = sysctl_local_ports.range[1]; 523c689b73SEric Dumazet } while (read_seqretry(&sysctl_local_ports.lock, seq)); 53227b60f5SStephen Hemminger } 54227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 553f421baaSArnaldo Carvalho de Melo 56971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk, 57aacd9289SAlex Copot const struct inet_bind_bucket *tb, bool relax) 583f421baaSArnaldo Carvalho de Melo { 593f421baaSArnaldo Carvalho de Melo struct sock *sk2; 603f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 613f421baaSArnaldo Carvalho de Melo int reuse = sk->sk_reuse; 623f421baaSArnaldo Carvalho de Melo 637477fd2eSPavel Emelyanov /* 647477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 657477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 667477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 677477fd2eSPavel Emelyanov * one this bucket belongs to. 687477fd2eSPavel Emelyanov */ 697477fd2eSPavel Emelyanov 703f421baaSArnaldo Carvalho de Melo sk_for_each_bound(sk2, node, &tb->owners) { 713f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 723f421baaSArnaldo Carvalho de Melo !inet_v6_ipv6only(sk2) && 733f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 743f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 753f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 763f421baaSArnaldo Carvalho de Melo if (!reuse || !sk2->sk_reuse || 773e8c806aSDavid S. Miller sk2->sk_state == TCP_LISTEN) { 7868835abaSEric Dumazet const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); 7968835abaSEric Dumazet if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || 8068835abaSEric Dumazet sk2_rcv_saddr == sk_rcv_saddr(sk)) 813f421baaSArnaldo Carvalho de Melo break; 828d238b25SDavid S. Miller } 83aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 84aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 85aacd9289SAlex Copot const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); 86aacd9289SAlex Copot 87aacd9289SAlex Copot if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || 88aacd9289SAlex Copot sk2_rcv_saddr == sk_rcv_saddr(sk)) 89aacd9289SAlex Copot break; 90aacd9289SAlex Copot } 913f421baaSArnaldo Carvalho de Melo } 923f421baaSArnaldo Carvalho de Melo } 933f421baaSArnaldo Carvalho de Melo return node != NULL; 943f421baaSArnaldo Carvalho de Melo } 95971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 96971af18bSArnaldo Carvalho de Melo 973f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock, 983f421baaSArnaldo Carvalho de Melo * if snum is zero it means select any available local port. 993f421baaSArnaldo Carvalho de Melo */ 100ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum) 1013f421baaSArnaldo Carvalho de Melo { 10239d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 1033f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1043f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 1053f421baaSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 106a9d8f911SEvgeniy Polyakov int ret, attempts = 5; 1073b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 108a9d8f911SEvgeniy Polyakov int smallest_size = -1, smallest_rover; 1093f421baaSArnaldo Carvalho de Melo 1103f421baaSArnaldo Carvalho de Melo local_bh_disable(); 1113f421baaSArnaldo Carvalho de Melo if (!snum) { 112227b60f5SStephen Hemminger int remaining, rover, low, high; 113227b60f5SStephen Hemminger 114a9d8f911SEvgeniy Polyakov again: 115227b60f5SStephen Hemminger inet_get_local_port_range(&low, &high); 116a25de534SAnton Arapov remaining = (high - low) + 1; 117a9d8f911SEvgeniy Polyakov smallest_rover = rover = net_random() % remaining + low; 1183f421baaSArnaldo Carvalho de Melo 119a9d8f911SEvgeniy Polyakov smallest_size = -1; 1203f421baaSArnaldo Carvalho de Melo do { 121e3826f1eSAmerigo Wang if (inet_is_reserved_local_port(rover)) 122e3826f1eSAmerigo Wang goto next_nolock; 1237f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, rover, 1247f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1253f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1263f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 12709ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == rover) { 128a9d8f911SEvgeniy Polyakov if (tb->fastreuse > 0 && 129a9d8f911SEvgeniy Polyakov sk->sk_reuse && 130a9d8f911SEvgeniy Polyakov sk->sk_state != TCP_LISTEN && 131a9d8f911SEvgeniy Polyakov (tb->num_owners < smallest_size || smallest_size == -1)) { 132a9d8f911SEvgeniy Polyakov smallest_size = tb->num_owners; 133a9d8f911SEvgeniy Polyakov smallest_rover = rover; 134aacd9289SAlex Copot if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && 135aacd9289SAlex Copot !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 136a9d8f911SEvgeniy Polyakov snum = smallest_rover; 137fddb7b57SFlavio Leitner goto tb_found; 138a9d8f911SEvgeniy Polyakov } 139a9d8f911SEvgeniy Polyakov } 140aacd9289SAlex Copot if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 1412b05ad33SFlavio Leitner snum = rover; 142fddb7b57SFlavio Leitner goto tb_found; 1432b05ad33SFlavio Leitner } 1443f421baaSArnaldo Carvalho de Melo goto next; 145a9d8f911SEvgeniy Polyakov } 1463f421baaSArnaldo Carvalho de Melo break; 1473f421baaSArnaldo Carvalho de Melo next: 1483f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 149e3826f1eSAmerigo Wang next_nolock: 1506df71634SStephen Hemminger if (++rover > high) 1516df71634SStephen Hemminger rover = low; 1523f421baaSArnaldo Carvalho de Melo } while (--remaining > 0); 1533f421baaSArnaldo Carvalho de Melo 1543f421baaSArnaldo Carvalho de Melo /* Exhausted local port range during search? It is not 1553f421baaSArnaldo Carvalho de Melo * possible for us to be holding one of the bind hash 1563f421baaSArnaldo Carvalho de Melo * locks if this test triggers, because if 'remaining' 1573f421baaSArnaldo Carvalho de Melo * drops to zero, we broke out of the do/while loop at 1583f421baaSArnaldo Carvalho de Melo * the top level, not from the 'break;' statement. 1593f421baaSArnaldo Carvalho de Melo */ 1603f421baaSArnaldo Carvalho de Melo ret = 1; 161a9d8f911SEvgeniy Polyakov if (remaining <= 0) { 162a9d8f911SEvgeniy Polyakov if (smallest_size != -1) { 163a9d8f911SEvgeniy Polyakov snum = smallest_rover; 164a9d8f911SEvgeniy Polyakov goto have_snum; 165a9d8f911SEvgeniy Polyakov } 1663f421baaSArnaldo Carvalho de Melo goto fail; 167a9d8f911SEvgeniy Polyakov } 1683f421baaSArnaldo Carvalho de Melo /* OK, here is the one we will use. HEAD is 1693f421baaSArnaldo Carvalho de Melo * non-NULL and we hold it's mutex. 1703f421baaSArnaldo Carvalho de Melo */ 1713f421baaSArnaldo Carvalho de Melo snum = rover; 1723f421baaSArnaldo Carvalho de Melo } else { 173a9d8f911SEvgeniy Polyakov have_snum: 1747f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, snum, 1757f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1763f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1773f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 17809ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == snum) 1793f421baaSArnaldo Carvalho de Melo goto tb_found; 1803f421baaSArnaldo Carvalho de Melo } 1813f421baaSArnaldo Carvalho de Melo tb = NULL; 1823f421baaSArnaldo Carvalho de Melo goto tb_not_found; 1833f421baaSArnaldo Carvalho de Melo tb_found: 1843f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 1854a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 1864a17fd52SPavel Emelyanov goto success; 1874a17fd52SPavel Emelyanov 1883f421baaSArnaldo Carvalho de Melo if (tb->fastreuse > 0 && 189a9d8f911SEvgeniy Polyakov sk->sk_reuse && sk->sk_state != TCP_LISTEN && 190a9d8f911SEvgeniy Polyakov smallest_size == -1) { 1913f421baaSArnaldo Carvalho de Melo goto success; 1923f421baaSArnaldo Carvalho de Melo } else { 1933f421baaSArnaldo Carvalho de Melo ret = 1; 194aacd9289SAlex Copot if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 1955add3009SStephen Hemminger if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && 1965add3009SStephen Hemminger smallest_size != -1 && --attempts >= 0) { 197a9d8f911SEvgeniy Polyakov spin_unlock(&head->lock); 198a9d8f911SEvgeniy Polyakov goto again; 199a9d8f911SEvgeniy Polyakov } 200aacd9289SAlex Copot 2013f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2023f421baaSArnaldo Carvalho de Melo } 2033f421baaSArnaldo Carvalho de Melo } 204a9d8f911SEvgeniy Polyakov } 2053f421baaSArnaldo Carvalho de Melo tb_not_found: 2063f421baaSArnaldo Carvalho de Melo ret = 1; 207941b1d22SPavel Emelyanov if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, 208941b1d22SPavel Emelyanov net, head, snum)) == NULL) 2093f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2103f421baaSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 2113f421baaSArnaldo Carvalho de Melo if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 2123f421baaSArnaldo Carvalho de Melo tb->fastreuse = 1; 2133f421baaSArnaldo Carvalho de Melo else 2143f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 2153f421baaSArnaldo Carvalho de Melo } else if (tb->fastreuse && 2163f421baaSArnaldo Carvalho de Melo (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) 2173f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 2183f421baaSArnaldo Carvalho de Melo success: 2193f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 2203f421baaSArnaldo Carvalho de Melo inet_bind_hash(sk, tb, snum); 221547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 2223f421baaSArnaldo Carvalho de Melo ret = 0; 2233f421baaSArnaldo Carvalho de Melo 2243f421baaSArnaldo Carvalho de Melo fail_unlock: 2253f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 2263f421baaSArnaldo Carvalho de Melo fail: 2273f421baaSArnaldo Carvalho de Melo local_bh_enable(); 2283f421baaSArnaldo Carvalho de Melo return ret; 2293f421baaSArnaldo Carvalho de Melo } 2303f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 2313f421baaSArnaldo Carvalho de Melo 2323f421baaSArnaldo Carvalho de Melo /* 2333f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 2343f421baaSArnaldo Carvalho de Melo * with the socket locked. 2353f421baaSArnaldo Carvalho de Melo */ 2363f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2373f421baaSArnaldo Carvalho de Melo { 2383f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2393f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 2403f421baaSArnaldo Carvalho de Melo int err; 2413f421baaSArnaldo Carvalho de Melo 2423f421baaSArnaldo Carvalho de Melo /* 2433f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 2443f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 2453f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 2463f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 2473f421baaSArnaldo Carvalho de Melo * 2483f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 2493f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 2503f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 2513f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 2523f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 2533f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 2543f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 2553f421baaSArnaldo Carvalho de Melo */ 2563f421baaSArnaldo Carvalho de Melo for (;;) { 257aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2583f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 2593f421baaSArnaldo Carvalho de Melo release_sock(sk); 2603f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 2613f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 2623f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2633f421baaSArnaldo Carvalho de Melo err = 0; 2643f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 2653f421baaSArnaldo Carvalho de Melo break; 2663f421baaSArnaldo Carvalho de Melo err = -EINVAL; 2673f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2683f421baaSArnaldo Carvalho de Melo break; 2693f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 2703f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 2713f421baaSArnaldo Carvalho de Melo break; 2723f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 2733f421baaSArnaldo Carvalho de Melo if (!timeo) 2743f421baaSArnaldo Carvalho de Melo break; 2753f421baaSArnaldo Carvalho de Melo } 276aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 2773f421baaSArnaldo Carvalho de Melo return err; 2783f421baaSArnaldo Carvalho de Melo } 2793f421baaSArnaldo Carvalho de Melo 2803f421baaSArnaldo Carvalho de Melo /* 2813f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 2823f421baaSArnaldo Carvalho de Melo */ 2833f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 2843f421baaSArnaldo Carvalho de Melo { 2853f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2868336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 2873f421baaSArnaldo Carvalho de Melo struct sock *newsk; 2888336886fSJerry Chu struct request_sock *req; 2893f421baaSArnaldo Carvalho de Melo int error; 2903f421baaSArnaldo Carvalho de Melo 2913f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2923f421baaSArnaldo Carvalho de Melo 2933f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 2943f421baaSArnaldo Carvalho de Melo * and that it has something pending. 2953f421baaSArnaldo Carvalho de Melo */ 2963f421baaSArnaldo Carvalho de Melo error = -EINVAL; 2973f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2983f421baaSArnaldo Carvalho de Melo goto out_err; 2993f421baaSArnaldo Carvalho de Melo 3003f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 3018336886fSJerry Chu if (reqsk_queue_empty(queue)) { 3023f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 3033f421baaSArnaldo Carvalho de Melo 3043f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 3053f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 3063f421baaSArnaldo Carvalho de Melo if (!timeo) 3073f421baaSArnaldo Carvalho de Melo goto out_err; 3083f421baaSArnaldo Carvalho de Melo 3093f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 3103f421baaSArnaldo Carvalho de Melo if (error) 3113f421baaSArnaldo Carvalho de Melo goto out_err; 3123f421baaSArnaldo Carvalho de Melo } 3138336886fSJerry Chu req = reqsk_queue_remove(queue); 3148336886fSJerry Chu newsk = req->sk; 3153f421baaSArnaldo Carvalho de Melo 3168336886fSJerry Chu sk_acceptq_removed(sk); 3177ab4551fSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { 3188336886fSJerry Chu spin_lock_bh(&queue->fastopenq->lock); 3198336886fSJerry Chu if (tcp_rsk(req)->listener) { 3208336886fSJerry Chu /* We are still waiting for the final ACK from 3WHS 3218336886fSJerry Chu * so can't free req now. Instead, we set req->sk to 3228336886fSJerry Chu * NULL to signify that the child socket is taken 3238336886fSJerry Chu * so reqsk_fastopen_remove() will free the req 3248336886fSJerry Chu * when 3WHS finishes (or is aborted). 3258336886fSJerry Chu */ 3268336886fSJerry Chu req->sk = NULL; 3278336886fSJerry Chu req = NULL; 3288336886fSJerry Chu } 3298336886fSJerry Chu spin_unlock_bh(&queue->fastopenq->lock); 3308336886fSJerry Chu } 3313f421baaSArnaldo Carvalho de Melo out: 3323f421baaSArnaldo Carvalho de Melo release_sock(sk); 3338336886fSJerry Chu if (req) 3348336886fSJerry Chu __reqsk_free(req); 3353f421baaSArnaldo Carvalho de Melo return newsk; 3363f421baaSArnaldo Carvalho de Melo out_err: 3373f421baaSArnaldo Carvalho de Melo newsk = NULL; 3388336886fSJerry Chu req = NULL; 3393f421baaSArnaldo Carvalho de Melo *err = error; 3403f421baaSArnaldo Carvalho de Melo goto out; 3413f421baaSArnaldo Carvalho de Melo } 3423f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 3433f421baaSArnaldo Carvalho de Melo 3443f421baaSArnaldo Carvalho de Melo /* 3453f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 3463f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 3473f421baaSArnaldo Carvalho de Melo * to optimize. 3483f421baaSArnaldo Carvalho de Melo */ 3493f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 3503f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 3513f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 3523f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 3533f421baaSArnaldo Carvalho de Melo { 3543f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3553f421baaSArnaldo Carvalho de Melo 356b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 357b24b8a24SPavel Emelyanov (unsigned long)sk); 358b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 359b24b8a24SPavel Emelyanov (unsigned long)sk); 360b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 3613f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 3623f421baaSArnaldo Carvalho de Melo } 3633f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 3643f421baaSArnaldo Carvalho de Melo 3653f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 3663f421baaSArnaldo Carvalho de Melo { 3673f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3683f421baaSArnaldo Carvalho de Melo 3693f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 3703f421baaSArnaldo Carvalho de Melo 3713f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 3723f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 3733f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3743f421baaSArnaldo Carvalho de Melo } 3753f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 3763f421baaSArnaldo Carvalho de Melo 3773f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 3783f421baaSArnaldo Carvalho de Melo { 3793f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3803f421baaSArnaldo Carvalho de Melo } 3813f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 3823f421baaSArnaldo Carvalho de Melo 3833f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 3843f421baaSArnaldo Carvalho de Melo { 3853f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 3863f421baaSArnaldo Carvalho de Melo } 3873f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 3883f421baaSArnaldo Carvalho de Melo 3893f421baaSArnaldo Carvalho de Melo struct dst_entry *inet_csk_route_req(struct sock *sk, 3906bd023f3SDavid S. Miller struct flowi4 *fl4, 391ba3f7f04SDavid S. Miller const struct request_sock *req) 3923f421baaSArnaldo Carvalho de Melo { 3933f421baaSArnaldo Carvalho de Melo struct rtable *rt; 3943f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 395f6d8bd05SEric Dumazet struct ip_options_rcu *opt = inet_rsk(req)->opt; 39684a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 3973e12939aSDavid S. Miller int flags = inet_sk_flowi_flags(sk); 3983f421baaSArnaldo Carvalho de Melo 3996bd023f3SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 400e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 4017433819aSEric Dumazet sk->sk_protocol, 4027586ecebSEric Dumazet flags, 403f6d8bd05SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, 404e79d9bc7SDavid S. Miller ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 4056bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 4066bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 407b23dd4feSDavid S. Miller if (IS_ERR(rt)) 408857a6e0aSIlpo Järvinen goto no_route; 409155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 410857a6e0aSIlpo Järvinen goto route_err; 411d8d1f30bSChangli Gao return &rt->dst; 412857a6e0aSIlpo Järvinen 413857a6e0aSIlpo Järvinen route_err: 414857a6e0aSIlpo Järvinen ip_rt_put(rt); 415857a6e0aSIlpo Järvinen no_route: 416857a6e0aSIlpo Järvinen IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 417857a6e0aSIlpo Järvinen return NULL; 4183f421baaSArnaldo Carvalho de Melo } 4193f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 4203f421baaSArnaldo Carvalho de Melo 42177357a95SDavid S. Miller struct dst_entry *inet_csk_route_child_sock(struct sock *sk, 42277357a95SDavid S. Miller struct sock *newsk, 42377357a95SDavid S. Miller const struct request_sock *req) 42477357a95SDavid S. Miller { 42577357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 42677357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 4271a7b27c9SChristoph Paasch struct ip_options_rcu *opt; 42877357a95SDavid S. Miller struct net *net = sock_net(sk); 42977357a95SDavid S. Miller struct flowi4 *fl4; 43077357a95SDavid S. Miller struct rtable *rt; 43177357a95SDavid S. Miller 43277357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 4331a7b27c9SChristoph Paasch 4341a7b27c9SChristoph Paasch rcu_read_lock(); 4351a7b27c9SChristoph Paasch opt = rcu_dereference(newinet->inet_opt); 43677357a95SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 43777357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 43877357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 43977357a95SDavid S. Miller (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, 44077357a95SDavid S. Miller ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 44177357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 44277357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 44377357a95SDavid S. Miller if (IS_ERR(rt)) 44477357a95SDavid S. Miller goto no_route; 445155e8336SJulian Anastasov if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 44677357a95SDavid S. Miller goto route_err; 4471a7b27c9SChristoph Paasch rcu_read_unlock(); 44877357a95SDavid S. Miller return &rt->dst; 44977357a95SDavid S. Miller 45077357a95SDavid S. Miller route_err: 45177357a95SDavid S. Miller ip_rt_put(rt); 45277357a95SDavid S. Miller no_route: 4531a7b27c9SChristoph Paasch rcu_read_unlock(); 45477357a95SDavid S. Miller IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 45577357a95SDavid S. Miller return NULL; 45677357a95SDavid S. Miller } 45777357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 45877357a95SDavid S. Miller 4596b72977bSAl Viro static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 46072a3effaSEric Dumazet const u32 rnd, const u32 synq_hsize) 4613f421baaSArnaldo Carvalho de Melo { 4626b72977bSAl Viro return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 4633f421baaSArnaldo Carvalho de Melo } 4643f421baaSArnaldo Carvalho de Melo 465dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 4663f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 4673f421baaSArnaldo Carvalho de Melo #else 4683f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) 1 4693f421baaSArnaldo Carvalho de Melo #endif 4703f421baaSArnaldo Carvalho de Melo 4713f421baaSArnaldo Carvalho de Melo struct request_sock *inet_csk_search_req(const struct sock *sk, 4723f421baaSArnaldo Carvalho de Melo struct request_sock ***prevp, 4736b72977bSAl Viro const __be16 rport, const __be32 raddr, 4747f25afbbSAl Viro const __be32 laddr) 4753f421baaSArnaldo Carvalho de Melo { 4763f421baaSArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sk); 4773f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 4783f421baaSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4793f421baaSArnaldo Carvalho de Melo 4803f421baaSArnaldo Carvalho de Melo for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, 4813f421baaSArnaldo Carvalho de Melo lopt->nr_table_entries)]; 4823f421baaSArnaldo Carvalho de Melo (req = *prev) != NULL; 4833f421baaSArnaldo Carvalho de Melo prev = &req->dl_next) { 4843f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 4853f421baaSArnaldo Carvalho de Melo 4863f421baaSArnaldo Carvalho de Melo if (ireq->rmt_port == rport && 4873f421baaSArnaldo Carvalho de Melo ireq->rmt_addr == raddr && 4883f421baaSArnaldo Carvalho de Melo ireq->loc_addr == laddr && 4893f421baaSArnaldo Carvalho de Melo AF_INET_FAMILY(req->rsk_ops->family)) { 490547b792cSIlpo Järvinen WARN_ON(req->sk); 4913f421baaSArnaldo Carvalho de Melo *prevp = prev; 4923f421baaSArnaldo Carvalho de Melo break; 4933f421baaSArnaldo Carvalho de Melo } 4943f421baaSArnaldo Carvalho de Melo } 4953f421baaSArnaldo Carvalho de Melo 4963f421baaSArnaldo Carvalho de Melo return req; 4973f421baaSArnaldo Carvalho de Melo } 4983f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_search_req); 4993f421baaSArnaldo Carvalho de Melo 5003f421baaSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 501c2977c22SArnaldo Carvalho de Melo unsigned long timeout) 5023f421baaSArnaldo Carvalho de Melo { 5033f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 5043f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 5053f421baaSArnaldo Carvalho de Melo const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, 5063f421baaSArnaldo Carvalho de Melo lopt->hash_rnd, lopt->nr_table_entries); 5073f421baaSArnaldo Carvalho de Melo 5083f421baaSArnaldo Carvalho de Melo reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 5093f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_added(sk, timeout); 5103f421baaSArnaldo Carvalho de Melo } 5114bc2f18bSEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 5123f421baaSArnaldo Carvalho de Melo 513a019d6feSArnaldo Carvalho de Melo /* Only thing we need from tcp.h */ 514a019d6feSArnaldo Carvalho de Melo extern int sysctl_tcp_synack_retries; 515a019d6feSArnaldo Carvalho de Melo 5169f1d2604SArnaldo Carvalho de Melo 5170c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 5180c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 5190c3d79bcSJulian Anastasov const int max_retries, 5200c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 5210c3d79bcSJulian Anastasov int *expire, int *resend) 5220c3d79bcSJulian Anastasov { 5230c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 524*e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh; 5250c3d79bcSJulian Anastasov *resend = 1; 5260c3d79bcSJulian Anastasov return; 5270c3d79bcSJulian Anastasov } 528*e6c022a4SEric Dumazet *expire = req->num_timeout >= thresh && 529*e6c022a4SEric Dumazet (!inet_rsk(req)->acked || req->num_timeout >= max_retries); 5300c3d79bcSJulian Anastasov /* 5310c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 5320c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 5330c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 5340c3d79bcSJulian Anastasov */ 5350c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 536*e6c022a4SEric Dumazet req->num_timeout >= rskq_defer_accept - 1; 5370c3d79bcSJulian Anastasov } 5380c3d79bcSJulian Anastasov 539*e6c022a4SEric Dumazet int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) 540*e6c022a4SEric Dumazet { 541*e6c022a4SEric Dumazet int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); 542*e6c022a4SEric Dumazet 543*e6c022a4SEric Dumazet if (!err) 544*e6c022a4SEric Dumazet req->num_retrans++; 545*e6c022a4SEric Dumazet return err; 546*e6c022a4SEric Dumazet } 547*e6c022a4SEric Dumazet EXPORT_SYMBOL(inet_rtx_syn_ack); 548*e6c022a4SEric Dumazet 549a019d6feSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_prune(struct sock *parent, 550a019d6feSArnaldo Carvalho de Melo const unsigned long interval, 551a019d6feSArnaldo Carvalho de Melo const unsigned long timeout, 552a019d6feSArnaldo Carvalho de Melo const unsigned long max_rto) 553a019d6feSArnaldo Carvalho de Melo { 554a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(parent); 555a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 556a019d6feSArnaldo Carvalho de Melo struct listen_sock *lopt = queue->listen_opt; 557ec0a1966SDavid S. Miller int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 558ec0a1966SDavid S. Miller int thresh = max_retries; 559a019d6feSArnaldo Carvalho de Melo unsigned long now = jiffies; 560a019d6feSArnaldo Carvalho de Melo struct request_sock **reqp, *req; 561a019d6feSArnaldo Carvalho de Melo int i, budget; 562a019d6feSArnaldo Carvalho de Melo 563a019d6feSArnaldo Carvalho de Melo if (lopt == NULL || lopt->qlen == 0) 564a019d6feSArnaldo Carvalho de Melo return; 565a019d6feSArnaldo Carvalho de Melo 566a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 567a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 568fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 569a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 570a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 571a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 572a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 573a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 574a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 575a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 576a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 577a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 578a019d6feSArnaldo Carvalho de Melo * 579a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 580a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 581a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 582a019d6feSArnaldo Carvalho de Melo */ 583a019d6feSArnaldo Carvalho de Melo if (lopt->qlen>>(lopt->max_qlen_log-1)) { 584a019d6feSArnaldo Carvalho de Melo int young = (lopt->qlen_young<<1); 585a019d6feSArnaldo Carvalho de Melo 586a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 587a019d6feSArnaldo Carvalho de Melo if (lopt->qlen < young) 588a019d6feSArnaldo Carvalho de Melo break; 589a019d6feSArnaldo Carvalho de Melo thresh--; 590a019d6feSArnaldo Carvalho de Melo young <<= 1; 591a019d6feSArnaldo Carvalho de Melo } 592a019d6feSArnaldo Carvalho de Melo } 593a019d6feSArnaldo Carvalho de Melo 594ec0a1966SDavid S. Miller if (queue->rskq_defer_accept) 595ec0a1966SDavid S. Miller max_retries = queue->rskq_defer_accept; 596ec0a1966SDavid S. Miller 597a019d6feSArnaldo Carvalho de Melo budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 598a019d6feSArnaldo Carvalho de Melo i = lopt->clock_hand; 599a019d6feSArnaldo Carvalho de Melo 600a019d6feSArnaldo Carvalho de Melo do { 601a019d6feSArnaldo Carvalho de Melo reqp=&lopt->syn_table[i]; 602a019d6feSArnaldo Carvalho de Melo while ((req = *reqp) != NULL) { 603a019d6feSArnaldo Carvalho de Melo if (time_after_eq(now, req->expires)) { 6040c3d79bcSJulian Anastasov int expire = 0, resend = 0; 6050c3d79bcSJulian Anastasov 6060c3d79bcSJulian Anastasov syn_ack_recalc(req, thresh, max_retries, 6070c3d79bcSJulian Anastasov queue->rskq_defer_accept, 6080c3d79bcSJulian Anastasov &expire, &resend); 60972659eccSOctavian Purdila req->rsk_ops->syn_ack_timeout(parent, req); 6100c3d79bcSJulian Anastasov if (!expire && 6110c3d79bcSJulian Anastasov (!resend || 612*e6c022a4SEric Dumazet !inet_rtx_syn_ack(parent, req) || 6130c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 614a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 615a019d6feSArnaldo Carvalho de Melo 616*e6c022a4SEric Dumazet if (req->num_timeout++ == 0) 617a019d6feSArnaldo Carvalho de Melo lopt->qlen_young--; 618*e6c022a4SEric Dumazet timeo = min(timeout << req->num_timeout, 619*e6c022a4SEric Dumazet max_rto); 620a019d6feSArnaldo Carvalho de Melo req->expires = now + timeo; 621a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 622a019d6feSArnaldo Carvalho de Melo continue; 623a019d6feSArnaldo Carvalho de Melo } 624a019d6feSArnaldo Carvalho de Melo 625a019d6feSArnaldo Carvalho de Melo /* Drop this request */ 626a019d6feSArnaldo Carvalho de Melo inet_csk_reqsk_queue_unlink(parent, req, reqp); 627a019d6feSArnaldo Carvalho de Melo reqsk_queue_removed(queue, req); 628a019d6feSArnaldo Carvalho de Melo reqsk_free(req); 629a019d6feSArnaldo Carvalho de Melo continue; 630a019d6feSArnaldo Carvalho de Melo } 631a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 632a019d6feSArnaldo Carvalho de Melo } 633a019d6feSArnaldo Carvalho de Melo 634a019d6feSArnaldo Carvalho de Melo i = (i + 1) & (lopt->nr_table_entries - 1); 635a019d6feSArnaldo Carvalho de Melo 636a019d6feSArnaldo Carvalho de Melo } while (--budget > 0); 637a019d6feSArnaldo Carvalho de Melo 638a019d6feSArnaldo Carvalho de Melo lopt->clock_hand = i; 639a019d6feSArnaldo Carvalho de Melo 640a019d6feSArnaldo Carvalho de Melo if (lopt->qlen) 641a019d6feSArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer(parent, interval); 642a019d6feSArnaldo Carvalho de Melo } 643a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 644a019d6feSArnaldo Carvalho de Melo 645e56c57d0SEric Dumazet /** 646e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 647e56c57d0SEric Dumazet * @sk: the socket to clone 648e56c57d0SEric Dumazet * @req: request_sock 649e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 650e56c57d0SEric Dumazet * 651e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 652e56c57d0SEric Dumazet */ 653e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 654e56c57d0SEric Dumazet const struct request_sock *req, 655dd0fc66fSAl Viro const gfp_t priority) 6569f1d2604SArnaldo Carvalho de Melo { 657e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 6589f1d2604SArnaldo Carvalho de Melo 6599f1d2604SArnaldo Carvalho de Melo if (newsk != NULL) { 6609f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 6619f1d2604SArnaldo Carvalho de Melo 6629f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 6639f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 6649f1d2604SArnaldo Carvalho de Melo 665c720c7e8SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; 666c720c7e8SEric Dumazet inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); 667c720c7e8SEric Dumazet inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; 6689f1d2604SArnaldo Carvalho de Melo newsk->sk_write_space = sk_stream_write_space; 6699f1d2604SArnaldo Carvalho de Melo 6709f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 6719f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 6726687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 6739f1d2604SArnaldo Carvalho de Melo 6749f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 6759f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 6764237c75cSVenkat Yekkirala 6774237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 6789f1d2604SArnaldo Carvalho de Melo } 6799f1d2604SArnaldo Carvalho de Melo return newsk; 6809f1d2604SArnaldo Carvalho de Melo } 681e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 682a019d6feSArnaldo Carvalho de Melo 683a019d6feSArnaldo Carvalho de Melo /* 684a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 685a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 686a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 687a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 688a019d6feSArnaldo Carvalho de Melo */ 689a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 690a019d6feSArnaldo Carvalho de Melo { 691547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 692547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 693a019d6feSArnaldo Carvalho de Melo 694a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 695547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 696a019d6feSArnaldo Carvalho de Melo 697c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 698c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 699a019d6feSArnaldo Carvalho de Melo 700a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 701a019d6feSArnaldo Carvalho de Melo 702a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 703a019d6feSArnaldo Carvalho de Melo 704a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 705a019d6feSArnaldo Carvalho de Melo 706a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 707a019d6feSArnaldo Carvalho de Melo 708dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 709a019d6feSArnaldo Carvalho de Melo sock_put(sk); 710a019d6feSArnaldo Carvalho de Melo } 711a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 712a019d6feSArnaldo Carvalho de Melo 713a019d6feSArnaldo Carvalho de Melo int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 714a019d6feSArnaldo Carvalho de Melo { 715a019d6feSArnaldo Carvalho de Melo struct inet_sock *inet = inet_sk(sk); 716a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 717a019d6feSArnaldo Carvalho de Melo int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 718a019d6feSArnaldo Carvalho de Melo 719a019d6feSArnaldo Carvalho de Melo if (rc != 0) 720a019d6feSArnaldo Carvalho de Melo return rc; 721a019d6feSArnaldo Carvalho de Melo 722a019d6feSArnaldo Carvalho de Melo sk->sk_max_ack_backlog = 0; 723a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 724a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 725a019d6feSArnaldo Carvalho de Melo 726a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 727a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 728a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 729a019d6feSArnaldo Carvalho de Melo * after validation is complete. 730a019d6feSArnaldo Carvalho de Melo */ 731a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_LISTEN; 732c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 733c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 734a019d6feSArnaldo Carvalho de Melo 735a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 736a019d6feSArnaldo Carvalho de Melo sk->sk_prot->hash(sk); 737a019d6feSArnaldo Carvalho de Melo 738a019d6feSArnaldo Carvalho de Melo return 0; 739a019d6feSArnaldo Carvalho de Melo } 740a019d6feSArnaldo Carvalho de Melo 741a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 742a019d6feSArnaldo Carvalho de Melo __reqsk_queue_destroy(&icsk->icsk_accept_queue); 743a019d6feSArnaldo Carvalho de Melo return -EADDRINUSE; 744a019d6feSArnaldo Carvalho de Melo } 745a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 746a019d6feSArnaldo Carvalho de Melo 747a019d6feSArnaldo Carvalho de Melo /* 748a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 749a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 750a019d6feSArnaldo Carvalho de Melo */ 751a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 752a019d6feSArnaldo Carvalho de Melo { 753a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 7548336886fSJerry Chu struct request_sock_queue *queue = &icsk->icsk_accept_queue; 755a019d6feSArnaldo Carvalho de Melo struct request_sock *acc_req; 756a019d6feSArnaldo Carvalho de Melo struct request_sock *req; 757a019d6feSArnaldo Carvalho de Melo 758a019d6feSArnaldo Carvalho de Melo inet_csk_delete_keepalive_timer(sk); 759a019d6feSArnaldo Carvalho de Melo 760a019d6feSArnaldo Carvalho de Melo /* make all the listen_opt local to us */ 7618336886fSJerry Chu acc_req = reqsk_queue_yank_acceptq(queue); 762a019d6feSArnaldo Carvalho de Melo 763a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 764a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 765a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 766a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 767a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 768a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 769a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 770a019d6feSArnaldo Carvalho de Melo */ 7718336886fSJerry Chu reqsk_queue_destroy(queue); 772a019d6feSArnaldo Carvalho de Melo 773a019d6feSArnaldo Carvalho de Melo while ((req = acc_req) != NULL) { 774a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 775a019d6feSArnaldo Carvalho de Melo 776a019d6feSArnaldo Carvalho de Melo acc_req = req->dl_next; 777a019d6feSArnaldo Carvalho de Melo 778a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 779a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 780547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 781a019d6feSArnaldo Carvalho de Melo sock_hold(child); 782a019d6feSArnaldo Carvalho de Melo 783a019d6feSArnaldo Carvalho de Melo sk->sk_prot->disconnect(child, O_NONBLOCK); 784a019d6feSArnaldo Carvalho de Melo 785a019d6feSArnaldo Carvalho de Melo sock_orphan(child); 786a019d6feSArnaldo Carvalho de Melo 787eb4dea58SHerbert Xu percpu_counter_inc(sk->sk_prot->orphan_count); 788eb4dea58SHerbert Xu 7897ab4551fSEric Dumazet if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { 7908336886fSJerry Chu BUG_ON(tcp_sk(child)->fastopen_rsk != req); 7918336886fSJerry Chu BUG_ON(sk != tcp_rsk(req)->listener); 7928336886fSJerry Chu 7938336886fSJerry Chu /* Paranoid, to prevent race condition if 7948336886fSJerry Chu * an inbound pkt destined for child is 7958336886fSJerry Chu * blocked by sock lock in tcp_v4_rcv(). 7968336886fSJerry Chu * Also to satisfy an assertion in 7978336886fSJerry Chu * tcp_v4_destroy_sock(). 7988336886fSJerry Chu */ 7998336886fSJerry Chu tcp_sk(child)->fastopen_rsk = NULL; 8008336886fSJerry Chu sock_put(sk); 8018336886fSJerry Chu } 802a019d6feSArnaldo Carvalho de Melo inet_csk_destroy_sock(child); 803a019d6feSArnaldo Carvalho de Melo 804a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 805a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 806a019d6feSArnaldo Carvalho de Melo sock_put(child); 807a019d6feSArnaldo Carvalho de Melo 808a019d6feSArnaldo Carvalho de Melo sk_acceptq_removed(sk); 809a019d6feSArnaldo Carvalho de Melo __reqsk_free(req); 810a019d6feSArnaldo Carvalho de Melo } 8118336886fSJerry Chu if (queue->fastopenq != NULL) { 8128336886fSJerry Chu /* Free all the reqs queued in rskq_rst_head. */ 8138336886fSJerry Chu spin_lock_bh(&queue->fastopenq->lock); 8148336886fSJerry Chu acc_req = queue->fastopenq->rskq_rst_head; 8158336886fSJerry Chu queue->fastopenq->rskq_rst_head = NULL; 8168336886fSJerry Chu spin_unlock_bh(&queue->fastopenq->lock); 8178336886fSJerry Chu while ((req = acc_req) != NULL) { 8188336886fSJerry Chu acc_req = req->dl_next; 8198336886fSJerry Chu __reqsk_free(req); 8208336886fSJerry Chu } 8218336886fSJerry Chu } 822547b792cSIlpo Järvinen WARN_ON(sk->sk_ack_backlog); 823a019d6feSArnaldo Carvalho de Melo } 824a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 825af05dc93SArnaldo Carvalho de Melo 826af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 827af05dc93SArnaldo Carvalho de Melo { 828af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 829af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 830af05dc93SArnaldo Carvalho de Melo 831af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 832c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 833c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 834af05dc93SArnaldo Carvalho de Melo } 835af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 836c4d93909SArnaldo Carvalho de Melo 837dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 838dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 839dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 840dec73ff0SArnaldo Carvalho de Melo { 841dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 842dec73ff0SArnaldo Carvalho de Melo 843dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_getsockopt != NULL) 844dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 845dec73ff0SArnaldo Carvalho de Melo optval, optlen); 846dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 847dec73ff0SArnaldo Carvalho de Melo optval, optlen); 848dec73ff0SArnaldo Carvalho de Melo } 849dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 850dec73ff0SArnaldo Carvalho de Melo 851dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 852b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 853dec73ff0SArnaldo Carvalho de Melo { 854dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 855dec73ff0SArnaldo Carvalho de Melo 856dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_setsockopt != NULL) 857dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 858dec73ff0SArnaldo Carvalho de Melo optval, optlen); 859dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 860dec73ff0SArnaldo Carvalho de Melo optval, optlen); 861dec73ff0SArnaldo Carvalho de Melo } 862dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 863dec73ff0SArnaldo Carvalho de Melo #endif 86480d0a69fSDavid S. Miller 86580d0a69fSDavid S. Miller static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 86680d0a69fSDavid S. Miller { 8675abf7f7eSEric Dumazet const struct inet_sock *inet = inet_sk(sk); 8685abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 86980d0a69fSDavid S. Miller __be32 daddr = inet->inet_daddr; 87080d0a69fSDavid S. Miller struct flowi4 *fl4; 87180d0a69fSDavid S. Miller struct rtable *rt; 87280d0a69fSDavid S. Miller 87380d0a69fSDavid S. Miller rcu_read_lock(); 87480d0a69fSDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 87580d0a69fSDavid S. Miller if (inet_opt && inet_opt->opt.srr) 87680d0a69fSDavid S. Miller daddr = inet_opt->opt.faddr; 87780d0a69fSDavid S. Miller fl4 = &fl->u.ip4; 87880d0a69fSDavid S. Miller rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 87980d0a69fSDavid S. Miller inet->inet_saddr, inet->inet_dport, 88080d0a69fSDavid S. Miller inet->inet_sport, sk->sk_protocol, 88180d0a69fSDavid S. Miller RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 88280d0a69fSDavid S. Miller if (IS_ERR(rt)) 88380d0a69fSDavid S. Miller rt = NULL; 88480d0a69fSDavid S. Miller if (rt) 88580d0a69fSDavid S. Miller sk_setup_caps(sk, &rt->dst); 88680d0a69fSDavid S. Miller rcu_read_unlock(); 88780d0a69fSDavid S. Miller 88880d0a69fSDavid S. Miller return &rt->dst; 88980d0a69fSDavid S. Miller } 89080d0a69fSDavid S. Miller 89180d0a69fSDavid S. Miller struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 89280d0a69fSDavid S. Miller { 89380d0a69fSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 89480d0a69fSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 89580d0a69fSDavid S. Miller 89680d0a69fSDavid S. Miller if (!dst) { 89780d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 89880d0a69fSDavid S. Miller if (!dst) 89980d0a69fSDavid S. Miller goto out; 90080d0a69fSDavid S. Miller } 9016700c270SDavid S. Miller dst->ops->update_pmtu(dst, sk, NULL, mtu); 90280d0a69fSDavid S. Miller 90380d0a69fSDavid S. Miller dst = __sk_dst_check(sk, 0); 90480d0a69fSDavid S. Miller if (!dst) 90580d0a69fSDavid S. Miller dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 90680d0a69fSDavid S. Miller out: 90780d0a69fSDavid S. Miller return dst; 90880d0a69fSDavid S. Miller } 90980d0a69fSDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 910