13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 263f421baaSArnaldo Carvalho de Melo 273f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 283f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 293f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 303f421baaSArnaldo Carvalho de Melo #endif 313f421baaSArnaldo Carvalho de Melo 323f421baaSArnaldo Carvalho de Melo /* 333c689b73SEric Dumazet * This struct holds the first and last local port number. 343f421baaSArnaldo Carvalho de Melo */ 353c689b73SEric Dumazet struct local_ports sysctl_local_ports __read_mostly = { 36c4dbe54eSEric Dumazet .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), 373c689b73SEric Dumazet .range = { 32768, 61000 }, 383c689b73SEric Dumazet }; 39227b60f5SStephen Hemminger 40e3826f1eSAmerigo Wang unsigned long *sysctl_local_reserved_ports; 41e3826f1eSAmerigo Wang EXPORT_SYMBOL(sysctl_local_reserved_ports); 42e3826f1eSAmerigo Wang 43227b60f5SStephen Hemminger void inet_get_local_port_range(int *low, int *high) 44227b60f5SStephen Hemminger { 4595c96174SEric Dumazet unsigned int seq; 4695c96174SEric Dumazet 47227b60f5SStephen Hemminger do { 483c689b73SEric Dumazet seq = read_seqbegin(&sysctl_local_ports.lock); 49227b60f5SStephen Hemminger 503c689b73SEric Dumazet *low = sysctl_local_ports.range[0]; 513c689b73SEric Dumazet *high = sysctl_local_ports.range[1]; 523c689b73SEric Dumazet } while (read_seqretry(&sysctl_local_ports.lock, seq)); 53227b60f5SStephen Hemminger } 54227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 553f421baaSArnaldo Carvalho de Melo 56971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk, 57aacd9289SAlex Copot const struct inet_bind_bucket *tb, bool relax) 583f421baaSArnaldo Carvalho de Melo { 593f421baaSArnaldo Carvalho de Melo struct sock *sk2; 603f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 613f421baaSArnaldo Carvalho de Melo int reuse = sk->sk_reuse; 623f421baaSArnaldo Carvalho de Melo 637477fd2eSPavel Emelyanov /* 647477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 657477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 667477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 677477fd2eSPavel Emelyanov * one this bucket belongs to. 687477fd2eSPavel Emelyanov */ 697477fd2eSPavel Emelyanov 703f421baaSArnaldo Carvalho de Melo sk_for_each_bound(sk2, node, &tb->owners) { 713f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 723f421baaSArnaldo Carvalho de Melo !inet_v6_ipv6only(sk2) && 733f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 743f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 753f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 763f421baaSArnaldo Carvalho de Melo if (!reuse || !sk2->sk_reuse || 773e8c806aSDavid S. Miller sk2->sk_state == TCP_LISTEN) { 7868835abaSEric Dumazet const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); 7968835abaSEric Dumazet if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || 8068835abaSEric Dumazet sk2_rcv_saddr == sk_rcv_saddr(sk)) 813f421baaSArnaldo Carvalho de Melo break; 828d238b25SDavid S. Miller } 83aacd9289SAlex Copot if (!relax && reuse && sk2->sk_reuse && 84aacd9289SAlex Copot sk2->sk_state != TCP_LISTEN) { 85aacd9289SAlex Copot const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); 86aacd9289SAlex Copot 87aacd9289SAlex Copot if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || 88aacd9289SAlex Copot sk2_rcv_saddr == sk_rcv_saddr(sk)) 89aacd9289SAlex Copot break; 90aacd9289SAlex Copot } 913f421baaSArnaldo Carvalho de Melo } 923f421baaSArnaldo Carvalho de Melo } 933f421baaSArnaldo Carvalho de Melo return node != NULL; 943f421baaSArnaldo Carvalho de Melo } 95971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 96971af18bSArnaldo Carvalho de Melo 973f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock, 983f421baaSArnaldo Carvalho de Melo * if snum is zero it means select any available local port. 993f421baaSArnaldo Carvalho de Melo */ 100ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum) 1013f421baaSArnaldo Carvalho de Melo { 10239d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 1033f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 1043f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 1053f421baaSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 106a9d8f911SEvgeniy Polyakov int ret, attempts = 5; 1073b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 108a9d8f911SEvgeniy Polyakov int smallest_size = -1, smallest_rover; 1093f421baaSArnaldo Carvalho de Melo 1103f421baaSArnaldo Carvalho de Melo local_bh_disable(); 1113f421baaSArnaldo Carvalho de Melo if (!snum) { 112227b60f5SStephen Hemminger int remaining, rover, low, high; 113227b60f5SStephen Hemminger 114a9d8f911SEvgeniy Polyakov again: 115227b60f5SStephen Hemminger inet_get_local_port_range(&low, &high); 116a25de534SAnton Arapov remaining = (high - low) + 1; 117a9d8f911SEvgeniy Polyakov smallest_rover = rover = net_random() % remaining + low; 1183f421baaSArnaldo Carvalho de Melo 119a9d8f911SEvgeniy Polyakov smallest_size = -1; 1203f421baaSArnaldo Carvalho de Melo do { 121e3826f1eSAmerigo Wang if (inet_is_reserved_local_port(rover)) 122e3826f1eSAmerigo Wang goto next_nolock; 1237f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, rover, 1247f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1253f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1263f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 12709ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == rover) { 128a9d8f911SEvgeniy Polyakov if (tb->fastreuse > 0 && 129a9d8f911SEvgeniy Polyakov sk->sk_reuse && 130a9d8f911SEvgeniy Polyakov sk->sk_state != TCP_LISTEN && 131a9d8f911SEvgeniy Polyakov (tb->num_owners < smallest_size || smallest_size == -1)) { 132a9d8f911SEvgeniy Polyakov smallest_size = tb->num_owners; 133a9d8f911SEvgeniy Polyakov smallest_rover = rover; 134aacd9289SAlex Copot if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && 135aacd9289SAlex Copot !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 136a9d8f911SEvgeniy Polyakov snum = smallest_rover; 137fddb7b57SFlavio Leitner goto tb_found; 138a9d8f911SEvgeniy Polyakov } 139a9d8f911SEvgeniy Polyakov } 140aacd9289SAlex Copot if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 1412b05ad33SFlavio Leitner snum = rover; 142fddb7b57SFlavio Leitner goto tb_found; 1432b05ad33SFlavio Leitner } 1443f421baaSArnaldo Carvalho de Melo goto next; 145a9d8f911SEvgeniy Polyakov } 1463f421baaSArnaldo Carvalho de Melo break; 1473f421baaSArnaldo Carvalho de Melo next: 1483f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 149e3826f1eSAmerigo Wang next_nolock: 1506df71634SStephen Hemminger if (++rover > high) 1516df71634SStephen Hemminger rover = low; 1523f421baaSArnaldo Carvalho de Melo } while (--remaining > 0); 1533f421baaSArnaldo Carvalho de Melo 1543f421baaSArnaldo Carvalho de Melo /* Exhausted local port range during search? It is not 1553f421baaSArnaldo Carvalho de Melo * possible for us to be holding one of the bind hash 1563f421baaSArnaldo Carvalho de Melo * locks if this test triggers, because if 'remaining' 1573f421baaSArnaldo Carvalho de Melo * drops to zero, we broke out of the do/while loop at 1583f421baaSArnaldo Carvalho de Melo * the top level, not from the 'break;' statement. 1593f421baaSArnaldo Carvalho de Melo */ 1603f421baaSArnaldo Carvalho de Melo ret = 1; 161a9d8f911SEvgeniy Polyakov if (remaining <= 0) { 162a9d8f911SEvgeniy Polyakov if (smallest_size != -1) { 163a9d8f911SEvgeniy Polyakov snum = smallest_rover; 164a9d8f911SEvgeniy Polyakov goto have_snum; 165a9d8f911SEvgeniy Polyakov } 1663f421baaSArnaldo Carvalho de Melo goto fail; 167a9d8f911SEvgeniy Polyakov } 1683f421baaSArnaldo Carvalho de Melo /* OK, here is the one we will use. HEAD is 1693f421baaSArnaldo Carvalho de Melo * non-NULL and we hold it's mutex. 1703f421baaSArnaldo Carvalho de Melo */ 1713f421baaSArnaldo Carvalho de Melo snum = rover; 1723f421baaSArnaldo Carvalho de Melo } else { 173a9d8f911SEvgeniy Polyakov have_snum: 1747f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, snum, 1757f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1763f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1773f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 17809ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == snum) 1793f421baaSArnaldo Carvalho de Melo goto tb_found; 1803f421baaSArnaldo Carvalho de Melo } 1813f421baaSArnaldo Carvalho de Melo tb = NULL; 1823f421baaSArnaldo Carvalho de Melo goto tb_not_found; 1833f421baaSArnaldo Carvalho de Melo tb_found: 1843f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 1854a17fd52SPavel Emelyanov if (sk->sk_reuse == SK_FORCE_REUSE) 1864a17fd52SPavel Emelyanov goto success; 1874a17fd52SPavel Emelyanov 1883f421baaSArnaldo Carvalho de Melo if (tb->fastreuse > 0 && 189a9d8f911SEvgeniy Polyakov sk->sk_reuse && sk->sk_state != TCP_LISTEN && 190a9d8f911SEvgeniy Polyakov smallest_size == -1) { 1913f421baaSArnaldo Carvalho de Melo goto success; 1923f421baaSArnaldo Carvalho de Melo } else { 1933f421baaSArnaldo Carvalho de Melo ret = 1; 194aacd9289SAlex Copot if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 1955add3009SStephen Hemminger if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && 1965add3009SStephen Hemminger smallest_size != -1 && --attempts >= 0) { 197a9d8f911SEvgeniy Polyakov spin_unlock(&head->lock); 198a9d8f911SEvgeniy Polyakov goto again; 199a9d8f911SEvgeniy Polyakov } 200aacd9289SAlex Copot 2013f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2023f421baaSArnaldo Carvalho de Melo } 2033f421baaSArnaldo Carvalho de Melo } 204a9d8f911SEvgeniy Polyakov } 2053f421baaSArnaldo Carvalho de Melo tb_not_found: 2063f421baaSArnaldo Carvalho de Melo ret = 1; 207941b1d22SPavel Emelyanov if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, 208941b1d22SPavel Emelyanov net, head, snum)) == NULL) 2093f421baaSArnaldo Carvalho de Melo goto fail_unlock; 2103f421baaSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 2113f421baaSArnaldo Carvalho de Melo if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 2123f421baaSArnaldo Carvalho de Melo tb->fastreuse = 1; 2133f421baaSArnaldo Carvalho de Melo else 2143f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 2153f421baaSArnaldo Carvalho de Melo } else if (tb->fastreuse && 2163f421baaSArnaldo Carvalho de Melo (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) 2173f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 2183f421baaSArnaldo Carvalho de Melo success: 2193f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 2203f421baaSArnaldo Carvalho de Melo inet_bind_hash(sk, tb, snum); 221547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 2223f421baaSArnaldo Carvalho de Melo ret = 0; 2233f421baaSArnaldo Carvalho de Melo 2243f421baaSArnaldo Carvalho de Melo fail_unlock: 2253f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 2263f421baaSArnaldo Carvalho de Melo fail: 2273f421baaSArnaldo Carvalho de Melo local_bh_enable(); 2283f421baaSArnaldo Carvalho de Melo return ret; 2293f421baaSArnaldo Carvalho de Melo } 2303f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 2313f421baaSArnaldo Carvalho de Melo 2323f421baaSArnaldo Carvalho de Melo /* 2333f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 2343f421baaSArnaldo Carvalho de Melo * with the socket locked. 2353f421baaSArnaldo Carvalho de Melo */ 2363f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2373f421baaSArnaldo Carvalho de Melo { 2383f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2393f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 2403f421baaSArnaldo Carvalho de Melo int err; 2413f421baaSArnaldo Carvalho de Melo 2423f421baaSArnaldo Carvalho de Melo /* 2433f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 2443f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 2453f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 2463f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 2473f421baaSArnaldo Carvalho de Melo * 2483f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 2493f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 2503f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 2513f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 2523f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 2533f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 2543f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 2553f421baaSArnaldo Carvalho de Melo */ 2563f421baaSArnaldo Carvalho de Melo for (;;) { 257aa395145SEric Dumazet prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2583f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 2593f421baaSArnaldo Carvalho de Melo release_sock(sk); 2603f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 2613f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 2623f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2633f421baaSArnaldo Carvalho de Melo err = 0; 2643f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 2653f421baaSArnaldo Carvalho de Melo break; 2663f421baaSArnaldo Carvalho de Melo err = -EINVAL; 2673f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2683f421baaSArnaldo Carvalho de Melo break; 2693f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 2703f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 2713f421baaSArnaldo Carvalho de Melo break; 2723f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 2733f421baaSArnaldo Carvalho de Melo if (!timeo) 2743f421baaSArnaldo Carvalho de Melo break; 2753f421baaSArnaldo Carvalho de Melo } 276aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait); 2773f421baaSArnaldo Carvalho de Melo return err; 2783f421baaSArnaldo Carvalho de Melo } 2793f421baaSArnaldo Carvalho de Melo 2803f421baaSArnaldo Carvalho de Melo /* 2813f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 2823f421baaSArnaldo Carvalho de Melo */ 2833f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 2843f421baaSArnaldo Carvalho de Melo { 2853f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2863f421baaSArnaldo Carvalho de Melo struct sock *newsk; 2873f421baaSArnaldo Carvalho de Melo int error; 2883f421baaSArnaldo Carvalho de Melo 2893f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2903f421baaSArnaldo Carvalho de Melo 2913f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 2923f421baaSArnaldo Carvalho de Melo * and that it has something pending. 2933f421baaSArnaldo Carvalho de Melo */ 2943f421baaSArnaldo Carvalho de Melo error = -EINVAL; 2953f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2963f421baaSArnaldo Carvalho de Melo goto out_err; 2973f421baaSArnaldo Carvalho de Melo 2983f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 2993f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { 3003f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 3013f421baaSArnaldo Carvalho de Melo 3023f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 3033f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 3043f421baaSArnaldo Carvalho de Melo if (!timeo) 3053f421baaSArnaldo Carvalho de Melo goto out_err; 3063f421baaSArnaldo Carvalho de Melo 3073f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 3083f421baaSArnaldo Carvalho de Melo if (error) 3093f421baaSArnaldo Carvalho de Melo goto out_err; 3103f421baaSArnaldo Carvalho de Melo } 3113f421baaSArnaldo Carvalho de Melo 3123f421baaSArnaldo Carvalho de Melo newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); 313547b792cSIlpo Järvinen WARN_ON(newsk->sk_state == TCP_SYN_RECV); 3143f421baaSArnaldo Carvalho de Melo out: 3153f421baaSArnaldo Carvalho de Melo release_sock(sk); 3163f421baaSArnaldo Carvalho de Melo return newsk; 3173f421baaSArnaldo Carvalho de Melo out_err: 3183f421baaSArnaldo Carvalho de Melo newsk = NULL; 3193f421baaSArnaldo Carvalho de Melo *err = error; 3203f421baaSArnaldo Carvalho de Melo goto out; 3213f421baaSArnaldo Carvalho de Melo } 3223f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 3233f421baaSArnaldo Carvalho de Melo 3243f421baaSArnaldo Carvalho de Melo /* 3253f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 3263f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 3273f421baaSArnaldo Carvalho de Melo * to optimize. 3283f421baaSArnaldo Carvalho de Melo */ 3293f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 3303f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 3313f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 3323f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 3333f421baaSArnaldo Carvalho de Melo { 3343f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3353f421baaSArnaldo Carvalho de Melo 336b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 337b24b8a24SPavel Emelyanov (unsigned long)sk); 338b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 339b24b8a24SPavel Emelyanov (unsigned long)sk); 340b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 3413f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 3423f421baaSArnaldo Carvalho de Melo } 3433f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 3443f421baaSArnaldo Carvalho de Melo 3453f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 3463f421baaSArnaldo Carvalho de Melo { 3473f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3483f421baaSArnaldo Carvalho de Melo 3493f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 3503f421baaSArnaldo Carvalho de Melo 3513f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 3523f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 3533f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3543f421baaSArnaldo Carvalho de Melo } 3553f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 3563f421baaSArnaldo Carvalho de Melo 3573f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 3583f421baaSArnaldo Carvalho de Melo { 3593f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3603f421baaSArnaldo Carvalho de Melo } 3613f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 3623f421baaSArnaldo Carvalho de Melo 3633f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 3643f421baaSArnaldo Carvalho de Melo { 3653f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 3663f421baaSArnaldo Carvalho de Melo } 3673f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 3683f421baaSArnaldo Carvalho de Melo 3693f421baaSArnaldo Carvalho de Melo struct dst_entry *inet_csk_route_req(struct sock *sk, 3706bd023f3SDavid S. Miller struct flowi4 *fl4, 3717586ecebSEric Dumazet const struct request_sock *req, 3727586ecebSEric Dumazet bool nocache) 3733f421baaSArnaldo Carvalho de Melo { 3743f421baaSArnaldo Carvalho de Melo struct rtable *rt; 3753f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 376f6d8bd05SEric Dumazet struct ip_options_rcu *opt = inet_rsk(req)->opt; 37784a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 378*3e12939aSDavid S. Miller int flags = inet_sk_flowi_flags(sk); 3793f421baaSArnaldo Carvalho de Melo 3807586ecebSEric Dumazet if (nocache) 3817586ecebSEric Dumazet flags |= FLOWI_FLAG_RT_NOCACHE; 3826bd023f3SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 383e79d9bc7SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 3847433819aSEric Dumazet sk->sk_protocol, 3857586ecebSEric Dumazet flags, 386f6d8bd05SEric Dumazet (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, 387e79d9bc7SDavid S. Miller ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 3886bd023f3SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 3896bd023f3SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 390b23dd4feSDavid S. Miller if (IS_ERR(rt)) 391857a6e0aSIlpo Järvinen goto no_route; 3926bd023f3SDavid S. Miller if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) 393857a6e0aSIlpo Järvinen goto route_err; 394d8d1f30bSChangli Gao return &rt->dst; 395857a6e0aSIlpo Järvinen 396857a6e0aSIlpo Järvinen route_err: 397857a6e0aSIlpo Järvinen ip_rt_put(rt); 398857a6e0aSIlpo Järvinen no_route: 399857a6e0aSIlpo Järvinen IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 400857a6e0aSIlpo Järvinen return NULL; 4013f421baaSArnaldo Carvalho de Melo } 4023f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 4033f421baaSArnaldo Carvalho de Melo 40477357a95SDavid S. Miller struct dst_entry *inet_csk_route_child_sock(struct sock *sk, 40577357a95SDavid S. Miller struct sock *newsk, 40677357a95SDavid S. Miller const struct request_sock *req) 40777357a95SDavid S. Miller { 40877357a95SDavid S. Miller const struct inet_request_sock *ireq = inet_rsk(req); 40977357a95SDavid S. Miller struct inet_sock *newinet = inet_sk(newsk); 41077357a95SDavid S. Miller struct ip_options_rcu *opt = ireq->opt; 41177357a95SDavid S. Miller struct net *net = sock_net(sk); 41277357a95SDavid S. Miller struct flowi4 *fl4; 41377357a95SDavid S. Miller struct rtable *rt; 41477357a95SDavid S. Miller 41577357a95SDavid S. Miller fl4 = &newinet->cork.fl.u.ip4; 41677357a95SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 41777357a95SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 41877357a95SDavid S. Miller sk->sk_protocol, inet_sk_flowi_flags(sk), 41977357a95SDavid S. Miller (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, 42077357a95SDavid S. Miller ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 42177357a95SDavid S. Miller security_req_classify_flow(req, flowi4_to_flowi(fl4)); 42277357a95SDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 42377357a95SDavid S. Miller if (IS_ERR(rt)) 42477357a95SDavid S. Miller goto no_route; 42577357a95SDavid S. Miller if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) 42677357a95SDavid S. Miller goto route_err; 42777357a95SDavid S. Miller return &rt->dst; 42877357a95SDavid S. Miller 42977357a95SDavid S. Miller route_err: 43077357a95SDavid S. Miller ip_rt_put(rt); 43177357a95SDavid S. Miller no_route: 43277357a95SDavid S. Miller IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 43377357a95SDavid S. Miller return NULL; 43477357a95SDavid S. Miller } 43577357a95SDavid S. Miller EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 43677357a95SDavid S. Miller 4376b72977bSAl Viro static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 43872a3effaSEric Dumazet const u32 rnd, const u32 synq_hsize) 4393f421baaSArnaldo Carvalho de Melo { 4406b72977bSAl Viro return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 4413f421baaSArnaldo Carvalho de Melo } 4423f421baaSArnaldo Carvalho de Melo 443dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 4443f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 4453f421baaSArnaldo Carvalho de Melo #else 4463f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) 1 4473f421baaSArnaldo Carvalho de Melo #endif 4483f421baaSArnaldo Carvalho de Melo 4493f421baaSArnaldo Carvalho de Melo struct request_sock *inet_csk_search_req(const struct sock *sk, 4503f421baaSArnaldo Carvalho de Melo struct request_sock ***prevp, 4516b72977bSAl Viro const __be16 rport, const __be32 raddr, 4527f25afbbSAl Viro const __be32 laddr) 4533f421baaSArnaldo Carvalho de Melo { 4543f421baaSArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sk); 4553f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 4563f421baaSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4573f421baaSArnaldo Carvalho de Melo 4583f421baaSArnaldo Carvalho de Melo for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, 4593f421baaSArnaldo Carvalho de Melo lopt->nr_table_entries)]; 4603f421baaSArnaldo Carvalho de Melo (req = *prev) != NULL; 4613f421baaSArnaldo Carvalho de Melo prev = &req->dl_next) { 4623f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 4633f421baaSArnaldo Carvalho de Melo 4643f421baaSArnaldo Carvalho de Melo if (ireq->rmt_port == rport && 4653f421baaSArnaldo Carvalho de Melo ireq->rmt_addr == raddr && 4663f421baaSArnaldo Carvalho de Melo ireq->loc_addr == laddr && 4673f421baaSArnaldo Carvalho de Melo AF_INET_FAMILY(req->rsk_ops->family)) { 468547b792cSIlpo Järvinen WARN_ON(req->sk); 4693f421baaSArnaldo Carvalho de Melo *prevp = prev; 4703f421baaSArnaldo Carvalho de Melo break; 4713f421baaSArnaldo Carvalho de Melo } 4723f421baaSArnaldo Carvalho de Melo } 4733f421baaSArnaldo Carvalho de Melo 4743f421baaSArnaldo Carvalho de Melo return req; 4753f421baaSArnaldo Carvalho de Melo } 4763f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_search_req); 4773f421baaSArnaldo Carvalho de Melo 4783f421baaSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 479c2977c22SArnaldo Carvalho de Melo unsigned long timeout) 4803f421baaSArnaldo Carvalho de Melo { 4813f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4823f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 4833f421baaSArnaldo Carvalho de Melo const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, 4843f421baaSArnaldo Carvalho de Melo lopt->hash_rnd, lopt->nr_table_entries); 4853f421baaSArnaldo Carvalho de Melo 4863f421baaSArnaldo Carvalho de Melo reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 4873f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_added(sk, timeout); 4883f421baaSArnaldo Carvalho de Melo } 4894bc2f18bSEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 4903f421baaSArnaldo Carvalho de Melo 491a019d6feSArnaldo Carvalho de Melo /* Only thing we need from tcp.h */ 492a019d6feSArnaldo Carvalho de Melo extern int sysctl_tcp_synack_retries; 493a019d6feSArnaldo Carvalho de Melo 4949f1d2604SArnaldo Carvalho de Melo 4950c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 4960c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 4970c3d79bcSJulian Anastasov const int max_retries, 4980c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 4990c3d79bcSJulian Anastasov int *expire, int *resend) 5000c3d79bcSJulian Anastasov { 5010c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 5020c3d79bcSJulian Anastasov *expire = req->retrans >= thresh; 5030c3d79bcSJulian Anastasov *resend = 1; 5040c3d79bcSJulian Anastasov return; 5050c3d79bcSJulian Anastasov } 5060c3d79bcSJulian Anastasov *expire = req->retrans >= thresh && 5070c3d79bcSJulian Anastasov (!inet_rsk(req)->acked || req->retrans >= max_retries); 5080c3d79bcSJulian Anastasov /* 5090c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 5100c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 5110c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 5120c3d79bcSJulian Anastasov */ 5130c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 5140c3d79bcSJulian Anastasov req->retrans >= rskq_defer_accept - 1; 5150c3d79bcSJulian Anastasov } 5160c3d79bcSJulian Anastasov 517a019d6feSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_prune(struct sock *parent, 518a019d6feSArnaldo Carvalho de Melo const unsigned long interval, 519a019d6feSArnaldo Carvalho de Melo const unsigned long timeout, 520a019d6feSArnaldo Carvalho de Melo const unsigned long max_rto) 521a019d6feSArnaldo Carvalho de Melo { 522a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(parent); 523a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 524a019d6feSArnaldo Carvalho de Melo struct listen_sock *lopt = queue->listen_opt; 525ec0a1966SDavid S. Miller int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 526ec0a1966SDavid S. Miller int thresh = max_retries; 527a019d6feSArnaldo Carvalho de Melo unsigned long now = jiffies; 528a019d6feSArnaldo Carvalho de Melo struct request_sock **reqp, *req; 529a019d6feSArnaldo Carvalho de Melo int i, budget; 530a019d6feSArnaldo Carvalho de Melo 531a019d6feSArnaldo Carvalho de Melo if (lopt == NULL || lopt->qlen == 0) 532a019d6feSArnaldo Carvalho de Melo return; 533a019d6feSArnaldo Carvalho de Melo 534a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 535a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 536fd4f2ceaSEric Dumazet * If synack was not acknowledged for 1 second, it means 537a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 538a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 539a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 540a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 541a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 542a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 543a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 544a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 545a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 546a019d6feSArnaldo Carvalho de Melo * 547a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 548a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 549a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 550a019d6feSArnaldo Carvalho de Melo */ 551a019d6feSArnaldo Carvalho de Melo if (lopt->qlen>>(lopt->max_qlen_log-1)) { 552a019d6feSArnaldo Carvalho de Melo int young = (lopt->qlen_young<<1); 553a019d6feSArnaldo Carvalho de Melo 554a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 555a019d6feSArnaldo Carvalho de Melo if (lopt->qlen < young) 556a019d6feSArnaldo Carvalho de Melo break; 557a019d6feSArnaldo Carvalho de Melo thresh--; 558a019d6feSArnaldo Carvalho de Melo young <<= 1; 559a019d6feSArnaldo Carvalho de Melo } 560a019d6feSArnaldo Carvalho de Melo } 561a019d6feSArnaldo Carvalho de Melo 562ec0a1966SDavid S. Miller if (queue->rskq_defer_accept) 563ec0a1966SDavid S. Miller max_retries = queue->rskq_defer_accept; 564ec0a1966SDavid S. Miller 565a019d6feSArnaldo Carvalho de Melo budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 566a019d6feSArnaldo Carvalho de Melo i = lopt->clock_hand; 567a019d6feSArnaldo Carvalho de Melo 568a019d6feSArnaldo Carvalho de Melo do { 569a019d6feSArnaldo Carvalho de Melo reqp=&lopt->syn_table[i]; 570a019d6feSArnaldo Carvalho de Melo while ((req = *reqp) != NULL) { 571a019d6feSArnaldo Carvalho de Melo if (time_after_eq(now, req->expires)) { 5720c3d79bcSJulian Anastasov int expire = 0, resend = 0; 5730c3d79bcSJulian Anastasov 5740c3d79bcSJulian Anastasov syn_ack_recalc(req, thresh, max_retries, 5750c3d79bcSJulian Anastasov queue->rskq_defer_accept, 5760c3d79bcSJulian Anastasov &expire, &resend); 57772659eccSOctavian Purdila req->rsk_ops->syn_ack_timeout(parent, req); 5780c3d79bcSJulian Anastasov if (!expire && 5790c3d79bcSJulian Anastasov (!resend || 580e6b4d113SWilliam Allen Simpson !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 5810c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 582a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 583a019d6feSArnaldo Carvalho de Melo 584a019d6feSArnaldo Carvalho de Melo if (req->retrans++ == 0) 585a019d6feSArnaldo Carvalho de Melo lopt->qlen_young--; 586a019d6feSArnaldo Carvalho de Melo timeo = min((timeout << req->retrans), max_rto); 587a019d6feSArnaldo Carvalho de Melo req->expires = now + timeo; 588a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 589a019d6feSArnaldo Carvalho de Melo continue; 590a019d6feSArnaldo Carvalho de Melo } 591a019d6feSArnaldo Carvalho de Melo 592a019d6feSArnaldo Carvalho de Melo /* Drop this request */ 593a019d6feSArnaldo Carvalho de Melo inet_csk_reqsk_queue_unlink(parent, req, reqp); 594a019d6feSArnaldo Carvalho de Melo reqsk_queue_removed(queue, req); 595a019d6feSArnaldo Carvalho de Melo reqsk_free(req); 596a019d6feSArnaldo Carvalho de Melo continue; 597a019d6feSArnaldo Carvalho de Melo } 598a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 599a019d6feSArnaldo Carvalho de Melo } 600a019d6feSArnaldo Carvalho de Melo 601a019d6feSArnaldo Carvalho de Melo i = (i + 1) & (lopt->nr_table_entries - 1); 602a019d6feSArnaldo Carvalho de Melo 603a019d6feSArnaldo Carvalho de Melo } while (--budget > 0); 604a019d6feSArnaldo Carvalho de Melo 605a019d6feSArnaldo Carvalho de Melo lopt->clock_hand = i; 606a019d6feSArnaldo Carvalho de Melo 607a019d6feSArnaldo Carvalho de Melo if (lopt->qlen) 608a019d6feSArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer(parent, interval); 609a019d6feSArnaldo Carvalho de Melo } 610a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 611a019d6feSArnaldo Carvalho de Melo 612e56c57d0SEric Dumazet /** 613e56c57d0SEric Dumazet * inet_csk_clone_lock - clone an inet socket, and lock its clone 614e56c57d0SEric Dumazet * @sk: the socket to clone 615e56c57d0SEric Dumazet * @req: request_sock 616e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 617e56c57d0SEric Dumazet * 618e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 619e56c57d0SEric Dumazet */ 620e56c57d0SEric Dumazet struct sock *inet_csk_clone_lock(const struct sock *sk, 621e56c57d0SEric Dumazet const struct request_sock *req, 622dd0fc66fSAl Viro const gfp_t priority) 6239f1d2604SArnaldo Carvalho de Melo { 624e56c57d0SEric Dumazet struct sock *newsk = sk_clone_lock(sk, priority); 6259f1d2604SArnaldo Carvalho de Melo 6269f1d2604SArnaldo Carvalho de Melo if (newsk != NULL) { 6279f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 6289f1d2604SArnaldo Carvalho de Melo 6299f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 6309f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 6319f1d2604SArnaldo Carvalho de Melo 632c720c7e8SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; 633c720c7e8SEric Dumazet inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); 634c720c7e8SEric Dumazet inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; 6359f1d2604SArnaldo Carvalho de Melo newsk->sk_write_space = sk_stream_write_space; 6369f1d2604SArnaldo Carvalho de Melo 6379f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 6389f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 6396687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 6409f1d2604SArnaldo Carvalho de Melo 6419f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 6429f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 6434237c75cSVenkat Yekkirala 6444237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 6459f1d2604SArnaldo Carvalho de Melo } 6469f1d2604SArnaldo Carvalho de Melo return newsk; 6479f1d2604SArnaldo Carvalho de Melo } 648e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_csk_clone_lock); 649a019d6feSArnaldo Carvalho de Melo 650a019d6feSArnaldo Carvalho de Melo /* 651a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 652a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 653a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 654a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 655a019d6feSArnaldo Carvalho de Melo */ 656a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 657a019d6feSArnaldo Carvalho de Melo { 658547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 659547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 660a019d6feSArnaldo Carvalho de Melo 661a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 662547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 663a019d6feSArnaldo Carvalho de Melo 664c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 665c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 666a019d6feSArnaldo Carvalho de Melo 667a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 668a019d6feSArnaldo Carvalho de Melo 669a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 670a019d6feSArnaldo Carvalho de Melo 671a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 672a019d6feSArnaldo Carvalho de Melo 673a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 674a019d6feSArnaldo Carvalho de Melo 675dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 676a019d6feSArnaldo Carvalho de Melo sock_put(sk); 677a019d6feSArnaldo Carvalho de Melo } 678a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 679a019d6feSArnaldo Carvalho de Melo 680a019d6feSArnaldo Carvalho de Melo int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 681a019d6feSArnaldo Carvalho de Melo { 682a019d6feSArnaldo Carvalho de Melo struct inet_sock *inet = inet_sk(sk); 683a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 684a019d6feSArnaldo Carvalho de Melo int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 685a019d6feSArnaldo Carvalho de Melo 686a019d6feSArnaldo Carvalho de Melo if (rc != 0) 687a019d6feSArnaldo Carvalho de Melo return rc; 688a019d6feSArnaldo Carvalho de Melo 689a019d6feSArnaldo Carvalho de Melo sk->sk_max_ack_backlog = 0; 690a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 691a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 692a019d6feSArnaldo Carvalho de Melo 693a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 694a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 695a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 696a019d6feSArnaldo Carvalho de Melo * after validation is complete. 697a019d6feSArnaldo Carvalho de Melo */ 698a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_LISTEN; 699c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 700c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 701a019d6feSArnaldo Carvalho de Melo 702a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 703a019d6feSArnaldo Carvalho de Melo sk->sk_prot->hash(sk); 704a019d6feSArnaldo Carvalho de Melo 705a019d6feSArnaldo Carvalho de Melo return 0; 706a019d6feSArnaldo Carvalho de Melo } 707a019d6feSArnaldo Carvalho de Melo 708a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 709a019d6feSArnaldo Carvalho de Melo __reqsk_queue_destroy(&icsk->icsk_accept_queue); 710a019d6feSArnaldo Carvalho de Melo return -EADDRINUSE; 711a019d6feSArnaldo Carvalho de Melo } 712a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 713a019d6feSArnaldo Carvalho de Melo 714a019d6feSArnaldo Carvalho de Melo /* 715a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 716a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 717a019d6feSArnaldo Carvalho de Melo */ 718a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 719a019d6feSArnaldo Carvalho de Melo { 720a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 721a019d6feSArnaldo Carvalho de Melo struct request_sock *acc_req; 722a019d6feSArnaldo Carvalho de Melo struct request_sock *req; 723a019d6feSArnaldo Carvalho de Melo 724a019d6feSArnaldo Carvalho de Melo inet_csk_delete_keepalive_timer(sk); 725a019d6feSArnaldo Carvalho de Melo 726a019d6feSArnaldo Carvalho de Melo /* make all the listen_opt local to us */ 727a019d6feSArnaldo Carvalho de Melo acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); 728a019d6feSArnaldo Carvalho de Melo 729a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 730a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 731a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 732a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 733a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 734a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 735a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 736a019d6feSArnaldo Carvalho de Melo */ 737a019d6feSArnaldo Carvalho de Melo reqsk_queue_destroy(&icsk->icsk_accept_queue); 738a019d6feSArnaldo Carvalho de Melo 739a019d6feSArnaldo Carvalho de Melo while ((req = acc_req) != NULL) { 740a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 741a019d6feSArnaldo Carvalho de Melo 742a019d6feSArnaldo Carvalho de Melo acc_req = req->dl_next; 743a019d6feSArnaldo Carvalho de Melo 744a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 745a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 746547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 747a019d6feSArnaldo Carvalho de Melo sock_hold(child); 748a019d6feSArnaldo Carvalho de Melo 749a019d6feSArnaldo Carvalho de Melo sk->sk_prot->disconnect(child, O_NONBLOCK); 750a019d6feSArnaldo Carvalho de Melo 751a019d6feSArnaldo Carvalho de Melo sock_orphan(child); 752a019d6feSArnaldo Carvalho de Melo 753eb4dea58SHerbert Xu percpu_counter_inc(sk->sk_prot->orphan_count); 754eb4dea58SHerbert Xu 755a019d6feSArnaldo Carvalho de Melo inet_csk_destroy_sock(child); 756a019d6feSArnaldo Carvalho de Melo 757a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 758a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 759a019d6feSArnaldo Carvalho de Melo sock_put(child); 760a019d6feSArnaldo Carvalho de Melo 761a019d6feSArnaldo Carvalho de Melo sk_acceptq_removed(sk); 762a019d6feSArnaldo Carvalho de Melo __reqsk_free(req); 763a019d6feSArnaldo Carvalho de Melo } 764547b792cSIlpo Järvinen WARN_ON(sk->sk_ack_backlog); 765a019d6feSArnaldo Carvalho de Melo } 766a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 767af05dc93SArnaldo Carvalho de Melo 768af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 769af05dc93SArnaldo Carvalho de Melo { 770af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 771af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 772af05dc93SArnaldo Carvalho de Melo 773af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 774c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 775c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 776af05dc93SArnaldo Carvalho de Melo } 777af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 778c4d93909SArnaldo Carvalho de Melo 779dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 780dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 781dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 782dec73ff0SArnaldo Carvalho de Melo { 783dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 784dec73ff0SArnaldo Carvalho de Melo 785dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_getsockopt != NULL) 786dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 787dec73ff0SArnaldo Carvalho de Melo optval, optlen); 788dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 789dec73ff0SArnaldo Carvalho de Melo optval, optlen); 790dec73ff0SArnaldo Carvalho de Melo } 791dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 792dec73ff0SArnaldo Carvalho de Melo 793dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 794b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 795dec73ff0SArnaldo Carvalho de Melo { 796dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 797dec73ff0SArnaldo Carvalho de Melo 798dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_setsockopt != NULL) 799dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 800dec73ff0SArnaldo Carvalho de Melo optval, optlen); 801dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 802dec73ff0SArnaldo Carvalho de Melo optval, optlen); 803dec73ff0SArnaldo Carvalho de Melo } 804dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 805dec73ff0SArnaldo Carvalho de Melo #endif 806