13f421baaSArnaldo Carvalho de Melo /* 23f421baaSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 33f421baaSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 43f421baaSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 53f421baaSArnaldo Carvalho de Melo * 63f421baaSArnaldo Carvalho de Melo * Support for INET connection oriented protocols. 73f421baaSArnaldo Carvalho de Melo * 83f421baaSArnaldo Carvalho de Melo * Authors: See the TCP sources 93f421baaSArnaldo Carvalho de Melo * 103f421baaSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 113f421baaSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 123f421baaSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 133f421baaSArnaldo Carvalho de Melo * 2 of the License, or(at your option) any later version. 143f421baaSArnaldo Carvalho de Melo */ 153f421baaSArnaldo Carvalho de Melo 163f421baaSArnaldo Carvalho de Melo #include <linux/module.h> 173f421baaSArnaldo Carvalho de Melo #include <linux/jhash.h> 183f421baaSArnaldo Carvalho de Melo 193f421baaSArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 203f421baaSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 213f421baaSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 223f421baaSArnaldo Carvalho de Melo #include <net/ip.h> 233f421baaSArnaldo Carvalho de Melo #include <net/route.h> 243f421baaSArnaldo Carvalho de Melo #include <net/tcp_states.h> 25a019d6feSArnaldo Carvalho de Melo #include <net/xfrm.h> 263f421baaSArnaldo Carvalho de Melo 273f421baaSArnaldo Carvalho de Melo #ifdef INET_CSK_DEBUG 283f421baaSArnaldo Carvalho de Melo const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 293f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_timer_bug_msg); 303f421baaSArnaldo Carvalho de Melo #endif 313f421baaSArnaldo Carvalho de Melo 323f421baaSArnaldo Carvalho de Melo /* 333c689b73SEric Dumazet * This struct holds the first and last local port number. 343f421baaSArnaldo Carvalho de Melo */ 353c689b73SEric Dumazet struct local_ports sysctl_local_ports __read_mostly = { 363c689b73SEric Dumazet .lock = SEQLOCK_UNLOCKED, 373c689b73SEric Dumazet .range = { 32768, 61000 }, 383c689b73SEric Dumazet }; 39227b60f5SStephen Hemminger 40227b60f5SStephen Hemminger void inet_get_local_port_range(int *low, int *high) 41227b60f5SStephen Hemminger { 42227b60f5SStephen Hemminger unsigned seq; 43227b60f5SStephen Hemminger do { 443c689b73SEric Dumazet seq = read_seqbegin(&sysctl_local_ports.lock); 45227b60f5SStephen Hemminger 463c689b73SEric Dumazet *low = sysctl_local_ports.range[0]; 473c689b73SEric Dumazet *high = sysctl_local_ports.range[1]; 483c689b73SEric Dumazet } while (read_seqretry(&sysctl_local_ports.lock, seq)); 49227b60f5SStephen Hemminger } 50227b60f5SStephen Hemminger EXPORT_SYMBOL(inet_get_local_port_range); 513f421baaSArnaldo Carvalho de Melo 52971af18bSArnaldo Carvalho de Melo int inet_csk_bind_conflict(const struct sock *sk, 53971af18bSArnaldo Carvalho de Melo const struct inet_bind_bucket *tb) 543f421baaSArnaldo Carvalho de Melo { 5582103232SAl Viro const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); 563f421baaSArnaldo Carvalho de Melo struct sock *sk2; 573f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 583f421baaSArnaldo Carvalho de Melo int reuse = sk->sk_reuse; 593f421baaSArnaldo Carvalho de Melo 607477fd2eSPavel Emelyanov /* 617477fd2eSPavel Emelyanov * Unlike other sk lookup places we do not check 627477fd2eSPavel Emelyanov * for sk_net here, since _all_ the socks listed 637477fd2eSPavel Emelyanov * in tb->owners list belong to the same net - the 647477fd2eSPavel Emelyanov * one this bucket belongs to. 657477fd2eSPavel Emelyanov */ 667477fd2eSPavel Emelyanov 673f421baaSArnaldo Carvalho de Melo sk_for_each_bound(sk2, node, &tb->owners) { 683f421baaSArnaldo Carvalho de Melo if (sk != sk2 && 693f421baaSArnaldo Carvalho de Melo !inet_v6_ipv6only(sk2) && 703f421baaSArnaldo Carvalho de Melo (!sk->sk_bound_dev_if || 713f421baaSArnaldo Carvalho de Melo !sk2->sk_bound_dev_if || 723f421baaSArnaldo Carvalho de Melo sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 733f421baaSArnaldo Carvalho de Melo if (!reuse || !sk2->sk_reuse || 743f421baaSArnaldo Carvalho de Melo sk2->sk_state == TCP_LISTEN) { 7582103232SAl Viro const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 763f421baaSArnaldo Carvalho de Melo if (!sk2_rcv_saddr || !sk_rcv_saddr || 773f421baaSArnaldo Carvalho de Melo sk2_rcv_saddr == sk_rcv_saddr) 783f421baaSArnaldo Carvalho de Melo break; 793f421baaSArnaldo Carvalho de Melo } 803f421baaSArnaldo Carvalho de Melo } 813f421baaSArnaldo Carvalho de Melo } 823f421baaSArnaldo Carvalho de Melo return node != NULL; 833f421baaSArnaldo Carvalho de Melo } 843f421baaSArnaldo Carvalho de Melo 85971af18bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 86971af18bSArnaldo Carvalho de Melo 873f421baaSArnaldo Carvalho de Melo /* Obtain a reference to a local port for the given sock, 883f421baaSArnaldo Carvalho de Melo * if snum is zero it means select any available local port. 893f421baaSArnaldo Carvalho de Melo */ 90ab1e0a13SArnaldo Carvalho de Melo int inet_csk_get_port(struct sock *sk, unsigned short snum) 913f421baaSArnaldo Carvalho de Melo { 9239d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 933f421baaSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head; 943f421baaSArnaldo Carvalho de Melo struct hlist_node *node; 953f421baaSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 96a9d8f911SEvgeniy Polyakov int ret, attempts = 5; 973b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 98a9d8f911SEvgeniy Polyakov int smallest_size = -1, smallest_rover; 993f421baaSArnaldo Carvalho de Melo 1003f421baaSArnaldo Carvalho de Melo local_bh_disable(); 1013f421baaSArnaldo Carvalho de Melo if (!snum) { 102227b60f5SStephen Hemminger int remaining, rover, low, high; 103227b60f5SStephen Hemminger 104a9d8f911SEvgeniy Polyakov again: 105227b60f5SStephen Hemminger inet_get_local_port_range(&low, &high); 106a25de534SAnton Arapov remaining = (high - low) + 1; 107a9d8f911SEvgeniy Polyakov smallest_rover = rover = net_random() % remaining + low; 1083f421baaSArnaldo Carvalho de Melo 109a9d8f911SEvgeniy Polyakov smallest_size = -1; 1103f421baaSArnaldo Carvalho de Melo do { 1117f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, rover, 1127f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1133f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1143f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 115*09ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == rover) { 116a9d8f911SEvgeniy Polyakov if (tb->fastreuse > 0 && 117a9d8f911SEvgeniy Polyakov sk->sk_reuse && 118a9d8f911SEvgeniy Polyakov sk->sk_state != TCP_LISTEN && 119a9d8f911SEvgeniy Polyakov (tb->num_owners < smallest_size || smallest_size == -1)) { 120a9d8f911SEvgeniy Polyakov smallest_size = tb->num_owners; 121a9d8f911SEvgeniy Polyakov smallest_rover = rover; 12224dd1fa1SEric Dumazet if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { 123a9d8f911SEvgeniy Polyakov spin_unlock(&head->lock); 124a9d8f911SEvgeniy Polyakov snum = smallest_rover; 125a9d8f911SEvgeniy Polyakov goto have_snum; 126a9d8f911SEvgeniy Polyakov } 127a9d8f911SEvgeniy Polyakov } 1283f421baaSArnaldo Carvalho de Melo goto next; 129a9d8f911SEvgeniy Polyakov } 1303f421baaSArnaldo Carvalho de Melo break; 1313f421baaSArnaldo Carvalho de Melo next: 1323f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 1336df71634SStephen Hemminger if (++rover > high) 1346df71634SStephen Hemminger rover = low; 1353f421baaSArnaldo Carvalho de Melo } while (--remaining > 0); 1363f421baaSArnaldo Carvalho de Melo 1373f421baaSArnaldo Carvalho de Melo /* Exhausted local port range during search? It is not 1383f421baaSArnaldo Carvalho de Melo * possible for us to be holding one of the bind hash 1393f421baaSArnaldo Carvalho de Melo * locks if this test triggers, because if 'remaining' 1403f421baaSArnaldo Carvalho de Melo * drops to zero, we broke out of the do/while loop at 1413f421baaSArnaldo Carvalho de Melo * the top level, not from the 'break;' statement. 1423f421baaSArnaldo Carvalho de Melo */ 1433f421baaSArnaldo Carvalho de Melo ret = 1; 144a9d8f911SEvgeniy Polyakov if (remaining <= 0) { 145a9d8f911SEvgeniy Polyakov if (smallest_size != -1) { 146a9d8f911SEvgeniy Polyakov snum = smallest_rover; 147a9d8f911SEvgeniy Polyakov goto have_snum; 148a9d8f911SEvgeniy Polyakov } 1493f421baaSArnaldo Carvalho de Melo goto fail; 150a9d8f911SEvgeniy Polyakov } 1513f421baaSArnaldo Carvalho de Melo /* OK, here is the one we will use. HEAD is 1523f421baaSArnaldo Carvalho de Melo * non-NULL and we hold it's mutex. 1533f421baaSArnaldo Carvalho de Melo */ 1543f421baaSArnaldo Carvalho de Melo snum = rover; 1553f421baaSArnaldo Carvalho de Melo } else { 156a9d8f911SEvgeniy Polyakov have_snum: 1577f635ab7SPavel Emelyanov head = &hashinfo->bhash[inet_bhashfn(net, snum, 1587f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 1593f421baaSArnaldo Carvalho de Melo spin_lock(&head->lock); 1603f421baaSArnaldo Carvalho de Melo inet_bind_bucket_for_each(tb, node, &head->chain) 161*09ad9bc7SOctavian Purdila if (net_eq(ib_net(tb), net) && tb->port == snum) 1623f421baaSArnaldo Carvalho de Melo goto tb_found; 1633f421baaSArnaldo Carvalho de Melo } 1643f421baaSArnaldo Carvalho de Melo tb = NULL; 1653f421baaSArnaldo Carvalho de Melo goto tb_not_found; 1663f421baaSArnaldo Carvalho de Melo tb_found: 1673f421baaSArnaldo Carvalho de Melo if (!hlist_empty(&tb->owners)) { 1683f421baaSArnaldo Carvalho de Melo if (tb->fastreuse > 0 && 169a9d8f911SEvgeniy Polyakov sk->sk_reuse && sk->sk_state != TCP_LISTEN && 170a9d8f911SEvgeniy Polyakov smallest_size == -1) { 1713f421baaSArnaldo Carvalho de Melo goto success; 1723f421baaSArnaldo Carvalho de Melo } else { 1733f421baaSArnaldo Carvalho de Melo ret = 1; 174a9d8f911SEvgeniy Polyakov if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { 1755add3009SStephen Hemminger if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && 1765add3009SStephen Hemminger smallest_size != -1 && --attempts >= 0) { 177a9d8f911SEvgeniy Polyakov spin_unlock(&head->lock); 178a9d8f911SEvgeniy Polyakov goto again; 179a9d8f911SEvgeniy Polyakov } 1803f421baaSArnaldo Carvalho de Melo goto fail_unlock; 1813f421baaSArnaldo Carvalho de Melo } 1823f421baaSArnaldo Carvalho de Melo } 183a9d8f911SEvgeniy Polyakov } 1843f421baaSArnaldo Carvalho de Melo tb_not_found: 1853f421baaSArnaldo Carvalho de Melo ret = 1; 186941b1d22SPavel Emelyanov if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, 187941b1d22SPavel Emelyanov net, head, snum)) == NULL) 1883f421baaSArnaldo Carvalho de Melo goto fail_unlock; 1893f421baaSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 1903f421baaSArnaldo Carvalho de Melo if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 1913f421baaSArnaldo Carvalho de Melo tb->fastreuse = 1; 1923f421baaSArnaldo Carvalho de Melo else 1933f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 1943f421baaSArnaldo Carvalho de Melo } else if (tb->fastreuse && 1953f421baaSArnaldo Carvalho de Melo (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) 1963f421baaSArnaldo Carvalho de Melo tb->fastreuse = 0; 1973f421baaSArnaldo Carvalho de Melo success: 1983f421baaSArnaldo Carvalho de Melo if (!inet_csk(sk)->icsk_bind_hash) 1993f421baaSArnaldo Carvalho de Melo inet_bind_hash(sk, tb, snum); 200547b792cSIlpo Järvinen WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 2013f421baaSArnaldo Carvalho de Melo ret = 0; 2023f421baaSArnaldo Carvalho de Melo 2033f421baaSArnaldo Carvalho de Melo fail_unlock: 2043f421baaSArnaldo Carvalho de Melo spin_unlock(&head->lock); 2053f421baaSArnaldo Carvalho de Melo fail: 2063f421baaSArnaldo Carvalho de Melo local_bh_enable(); 2073f421baaSArnaldo Carvalho de Melo return ret; 2083f421baaSArnaldo Carvalho de Melo } 2093f421baaSArnaldo Carvalho de Melo 2103f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_get_port); 2113f421baaSArnaldo Carvalho de Melo 2123f421baaSArnaldo Carvalho de Melo /* 2133f421baaSArnaldo Carvalho de Melo * Wait for an incoming connection, avoid race conditions. This must be called 2143f421baaSArnaldo Carvalho de Melo * with the socket locked. 2153f421baaSArnaldo Carvalho de Melo */ 2163f421baaSArnaldo Carvalho de Melo static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 2173f421baaSArnaldo Carvalho de Melo { 2183f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2193f421baaSArnaldo Carvalho de Melo DEFINE_WAIT(wait); 2203f421baaSArnaldo Carvalho de Melo int err; 2213f421baaSArnaldo Carvalho de Melo 2223f421baaSArnaldo Carvalho de Melo /* 2233f421baaSArnaldo Carvalho de Melo * True wake-one mechanism for incoming connections: only 2243f421baaSArnaldo Carvalho de Melo * one process gets woken up, not the 'whole herd'. 2253f421baaSArnaldo Carvalho de Melo * Since we do not 'race & poll' for established sockets 2263f421baaSArnaldo Carvalho de Melo * anymore, the common case will execute the loop only once. 2273f421baaSArnaldo Carvalho de Melo * 2283f421baaSArnaldo Carvalho de Melo * Subtle issue: "add_wait_queue_exclusive()" will be added 2293f421baaSArnaldo Carvalho de Melo * after any current non-exclusive waiters, and we know that 2303f421baaSArnaldo Carvalho de Melo * it will always _stay_ after any new non-exclusive waiters 2313f421baaSArnaldo Carvalho de Melo * because all non-exclusive waiters are added at the 2323f421baaSArnaldo Carvalho de Melo * beginning of the wait-queue. As such, it's ok to "drop" 2333f421baaSArnaldo Carvalho de Melo * our exclusiveness temporarily when we get woken up without 2343f421baaSArnaldo Carvalho de Melo * having to remove and re-insert us on the wait queue. 2353f421baaSArnaldo Carvalho de Melo */ 2363f421baaSArnaldo Carvalho de Melo for (;;) { 2373f421baaSArnaldo Carvalho de Melo prepare_to_wait_exclusive(sk->sk_sleep, &wait, 2383f421baaSArnaldo Carvalho de Melo TASK_INTERRUPTIBLE); 2393f421baaSArnaldo Carvalho de Melo release_sock(sk); 2403f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 2413f421baaSArnaldo Carvalho de Melo timeo = schedule_timeout(timeo); 2423f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2433f421baaSArnaldo Carvalho de Melo err = 0; 2443f421baaSArnaldo Carvalho de Melo if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 2453f421baaSArnaldo Carvalho de Melo break; 2463f421baaSArnaldo Carvalho de Melo err = -EINVAL; 2473f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2483f421baaSArnaldo Carvalho de Melo break; 2493f421baaSArnaldo Carvalho de Melo err = sock_intr_errno(timeo); 2503f421baaSArnaldo Carvalho de Melo if (signal_pending(current)) 2513f421baaSArnaldo Carvalho de Melo break; 2523f421baaSArnaldo Carvalho de Melo err = -EAGAIN; 2533f421baaSArnaldo Carvalho de Melo if (!timeo) 2543f421baaSArnaldo Carvalho de Melo break; 2553f421baaSArnaldo Carvalho de Melo } 2563f421baaSArnaldo Carvalho de Melo finish_wait(sk->sk_sleep, &wait); 2573f421baaSArnaldo Carvalho de Melo return err; 2583f421baaSArnaldo Carvalho de Melo } 2593f421baaSArnaldo Carvalho de Melo 2603f421baaSArnaldo Carvalho de Melo /* 2613f421baaSArnaldo Carvalho de Melo * This will accept the next outstanding connection. 2623f421baaSArnaldo Carvalho de Melo */ 2633f421baaSArnaldo Carvalho de Melo struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 2643f421baaSArnaldo Carvalho de Melo { 2653f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 2663f421baaSArnaldo Carvalho de Melo struct sock *newsk; 2673f421baaSArnaldo Carvalho de Melo int error; 2683f421baaSArnaldo Carvalho de Melo 2693f421baaSArnaldo Carvalho de Melo lock_sock(sk); 2703f421baaSArnaldo Carvalho de Melo 2713f421baaSArnaldo Carvalho de Melo /* We need to make sure that this socket is listening, 2723f421baaSArnaldo Carvalho de Melo * and that it has something pending. 2733f421baaSArnaldo Carvalho de Melo */ 2743f421baaSArnaldo Carvalho de Melo error = -EINVAL; 2753f421baaSArnaldo Carvalho de Melo if (sk->sk_state != TCP_LISTEN) 2763f421baaSArnaldo Carvalho de Melo goto out_err; 2773f421baaSArnaldo Carvalho de Melo 2783f421baaSArnaldo Carvalho de Melo /* Find already established connection */ 2793f421baaSArnaldo Carvalho de Melo if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { 2803f421baaSArnaldo Carvalho de Melo long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 2813f421baaSArnaldo Carvalho de Melo 2823f421baaSArnaldo Carvalho de Melo /* If this is a non blocking socket don't sleep */ 2833f421baaSArnaldo Carvalho de Melo error = -EAGAIN; 2843f421baaSArnaldo Carvalho de Melo if (!timeo) 2853f421baaSArnaldo Carvalho de Melo goto out_err; 2863f421baaSArnaldo Carvalho de Melo 2873f421baaSArnaldo Carvalho de Melo error = inet_csk_wait_for_connect(sk, timeo); 2883f421baaSArnaldo Carvalho de Melo if (error) 2893f421baaSArnaldo Carvalho de Melo goto out_err; 2903f421baaSArnaldo Carvalho de Melo } 2913f421baaSArnaldo Carvalho de Melo 2923f421baaSArnaldo Carvalho de Melo newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); 293547b792cSIlpo Järvinen WARN_ON(newsk->sk_state == TCP_SYN_RECV); 2943f421baaSArnaldo Carvalho de Melo out: 2953f421baaSArnaldo Carvalho de Melo release_sock(sk); 2963f421baaSArnaldo Carvalho de Melo return newsk; 2973f421baaSArnaldo Carvalho de Melo out_err: 2983f421baaSArnaldo Carvalho de Melo newsk = NULL; 2993f421baaSArnaldo Carvalho de Melo *err = error; 3003f421baaSArnaldo Carvalho de Melo goto out; 3013f421baaSArnaldo Carvalho de Melo } 3023f421baaSArnaldo Carvalho de Melo 3033f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_accept); 3043f421baaSArnaldo Carvalho de Melo 3053f421baaSArnaldo Carvalho de Melo /* 3063f421baaSArnaldo Carvalho de Melo * Using different timers for retransmit, delayed acks and probes 3073f421baaSArnaldo Carvalho de Melo * We may wish use just one timer maintaining a list of expire jiffies 3083f421baaSArnaldo Carvalho de Melo * to optimize. 3093f421baaSArnaldo Carvalho de Melo */ 3103f421baaSArnaldo Carvalho de Melo void inet_csk_init_xmit_timers(struct sock *sk, 3113f421baaSArnaldo Carvalho de Melo void (*retransmit_handler)(unsigned long), 3123f421baaSArnaldo Carvalho de Melo void (*delack_handler)(unsigned long), 3133f421baaSArnaldo Carvalho de Melo void (*keepalive_handler)(unsigned long)) 3143f421baaSArnaldo Carvalho de Melo { 3153f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3163f421baaSArnaldo Carvalho de Melo 317b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 318b24b8a24SPavel Emelyanov (unsigned long)sk); 319b24b8a24SPavel Emelyanov setup_timer(&icsk->icsk_delack_timer, delack_handler, 320b24b8a24SPavel Emelyanov (unsigned long)sk); 321b24b8a24SPavel Emelyanov setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 3223f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = 0; 3233f421baaSArnaldo Carvalho de Melo } 3243f421baaSArnaldo Carvalho de Melo 3253f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_init_xmit_timers); 3263f421baaSArnaldo Carvalho de Melo 3273f421baaSArnaldo Carvalho de Melo void inet_csk_clear_xmit_timers(struct sock *sk) 3283f421baaSArnaldo Carvalho de Melo { 3293f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 3303f421baaSArnaldo Carvalho de Melo 3313f421baaSArnaldo Carvalho de Melo icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; 3323f421baaSArnaldo Carvalho de Melo 3333f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 3343f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &icsk->icsk_delack_timer); 3353f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3363f421baaSArnaldo Carvalho de Melo } 3373f421baaSArnaldo Carvalho de Melo 3383f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 3393f421baaSArnaldo Carvalho de Melo 3403f421baaSArnaldo Carvalho de Melo void inet_csk_delete_keepalive_timer(struct sock *sk) 3413f421baaSArnaldo Carvalho de Melo { 3423f421baaSArnaldo Carvalho de Melo sk_stop_timer(sk, &sk->sk_timer); 3433f421baaSArnaldo Carvalho de Melo } 3443f421baaSArnaldo Carvalho de Melo 3453f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 3463f421baaSArnaldo Carvalho de Melo 3473f421baaSArnaldo Carvalho de Melo void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 3483f421baaSArnaldo Carvalho de Melo { 3493f421baaSArnaldo Carvalho de Melo sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 3503f421baaSArnaldo Carvalho de Melo } 3513f421baaSArnaldo Carvalho de Melo 3523f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 3533f421baaSArnaldo Carvalho de Melo 3543f421baaSArnaldo Carvalho de Melo struct dst_entry *inet_csk_route_req(struct sock *sk, 3553f421baaSArnaldo Carvalho de Melo const struct request_sock *req) 3563f421baaSArnaldo Carvalho de Melo { 3573f421baaSArnaldo Carvalho de Melo struct rtable *rt; 3583f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 3593f421baaSArnaldo Carvalho de Melo struct ip_options *opt = inet_rsk(req)->opt; 3603f421baaSArnaldo Carvalho de Melo struct flowi fl = { .oif = sk->sk_bound_dev_if, 361ffce9082SAtis Elsts .mark = sk->sk_mark, 3623f421baaSArnaldo Carvalho de Melo .nl_u = { .ip4_u = 3633f421baaSArnaldo Carvalho de Melo { .daddr = ((opt && opt->srr) ? 3643f421baaSArnaldo Carvalho de Melo opt->faddr : 3653f421baaSArnaldo Carvalho de Melo ireq->rmt_addr), 3663f421baaSArnaldo Carvalho de Melo .saddr = ireq->loc_addr, 3673f421baaSArnaldo Carvalho de Melo .tos = RT_CONN_FLAGS(sk) } }, 3683f421baaSArnaldo Carvalho de Melo .proto = sk->sk_protocol, 36986b08d86SKOVACS Krisztian .flags = inet_sk_flowi_flags(sk), 3703f421baaSArnaldo Carvalho de Melo .uli_u = { .ports = 371c720c7e8SEric Dumazet { .sport = inet_sk(sk)->inet_sport, 3723f421baaSArnaldo Carvalho de Melo .dport = ireq->rmt_port } } }; 37384a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 3743f421baaSArnaldo Carvalho de Melo 3754237c75cSVenkat Yekkirala security_req_classify_flow(req, &fl); 376857a6e0aSIlpo Järvinen if (ip_route_output_flow(net, &rt, &fl, sk, 0)) 377857a6e0aSIlpo Järvinen goto no_route; 378857a6e0aSIlpo Järvinen if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 379857a6e0aSIlpo Järvinen goto route_err; 3803f421baaSArnaldo Carvalho de Melo return &rt->u.dst; 381857a6e0aSIlpo Järvinen 382857a6e0aSIlpo Järvinen route_err: 383857a6e0aSIlpo Järvinen ip_rt_put(rt); 384857a6e0aSIlpo Järvinen no_route: 385857a6e0aSIlpo Järvinen IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 386857a6e0aSIlpo Järvinen return NULL; 3873f421baaSArnaldo Carvalho de Melo } 3883f421baaSArnaldo Carvalho de Melo 3893f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_route_req); 3903f421baaSArnaldo Carvalho de Melo 3916b72977bSAl Viro static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 39272a3effaSEric Dumazet const u32 rnd, const u32 synq_hsize) 3933f421baaSArnaldo Carvalho de Melo { 3946b72977bSAl Viro return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 3953f421baaSArnaldo Carvalho de Melo } 3963f421baaSArnaldo Carvalho de Melo 3973f421baaSArnaldo Carvalho de Melo #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 3983f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 3993f421baaSArnaldo Carvalho de Melo #else 4003f421baaSArnaldo Carvalho de Melo #define AF_INET_FAMILY(fam) 1 4013f421baaSArnaldo Carvalho de Melo #endif 4023f421baaSArnaldo Carvalho de Melo 4033f421baaSArnaldo Carvalho de Melo struct request_sock *inet_csk_search_req(const struct sock *sk, 4043f421baaSArnaldo Carvalho de Melo struct request_sock ***prevp, 4056b72977bSAl Viro const __be16 rport, const __be32 raddr, 4067f25afbbSAl Viro const __be32 laddr) 4073f421baaSArnaldo Carvalho de Melo { 4083f421baaSArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sk); 4093f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 4103f421baaSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4113f421baaSArnaldo Carvalho de Melo 4123f421baaSArnaldo Carvalho de Melo for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, 4133f421baaSArnaldo Carvalho de Melo lopt->nr_table_entries)]; 4143f421baaSArnaldo Carvalho de Melo (req = *prev) != NULL; 4153f421baaSArnaldo Carvalho de Melo prev = &req->dl_next) { 4163f421baaSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 4173f421baaSArnaldo Carvalho de Melo 4183f421baaSArnaldo Carvalho de Melo if (ireq->rmt_port == rport && 4193f421baaSArnaldo Carvalho de Melo ireq->rmt_addr == raddr && 4203f421baaSArnaldo Carvalho de Melo ireq->loc_addr == laddr && 4213f421baaSArnaldo Carvalho de Melo AF_INET_FAMILY(req->rsk_ops->family)) { 422547b792cSIlpo Järvinen WARN_ON(req->sk); 4233f421baaSArnaldo Carvalho de Melo *prevp = prev; 4243f421baaSArnaldo Carvalho de Melo break; 4253f421baaSArnaldo Carvalho de Melo } 4263f421baaSArnaldo Carvalho de Melo } 4273f421baaSArnaldo Carvalho de Melo 4283f421baaSArnaldo Carvalho de Melo return req; 4293f421baaSArnaldo Carvalho de Melo } 4303f421baaSArnaldo Carvalho de Melo 4313f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_search_req); 4323f421baaSArnaldo Carvalho de Melo 4333f421baaSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 434c2977c22SArnaldo Carvalho de Melo unsigned long timeout) 4353f421baaSArnaldo Carvalho de Melo { 4363f421baaSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 4373f421baaSArnaldo Carvalho de Melo struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 4383f421baaSArnaldo Carvalho de Melo const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, 4393f421baaSArnaldo Carvalho de Melo lopt->hash_rnd, lopt->nr_table_entries); 4403f421baaSArnaldo Carvalho de Melo 4413f421baaSArnaldo Carvalho de Melo reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 4423f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_added(sk, timeout); 4433f421baaSArnaldo Carvalho de Melo } 4443f421baaSArnaldo Carvalho de Melo 445a019d6feSArnaldo Carvalho de Melo /* Only thing we need from tcp.h */ 446a019d6feSArnaldo Carvalho de Melo extern int sysctl_tcp_synack_retries; 447a019d6feSArnaldo Carvalho de Melo 4483f421baaSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 4499f1d2604SArnaldo Carvalho de Melo 4500c3d79bcSJulian Anastasov /* Decide when to expire the request and when to resend SYN-ACK */ 4510c3d79bcSJulian Anastasov static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 4520c3d79bcSJulian Anastasov const int max_retries, 4530c3d79bcSJulian Anastasov const u8 rskq_defer_accept, 4540c3d79bcSJulian Anastasov int *expire, int *resend) 4550c3d79bcSJulian Anastasov { 4560c3d79bcSJulian Anastasov if (!rskq_defer_accept) { 4570c3d79bcSJulian Anastasov *expire = req->retrans >= thresh; 4580c3d79bcSJulian Anastasov *resend = 1; 4590c3d79bcSJulian Anastasov return; 4600c3d79bcSJulian Anastasov } 4610c3d79bcSJulian Anastasov *expire = req->retrans >= thresh && 4620c3d79bcSJulian Anastasov (!inet_rsk(req)->acked || req->retrans >= max_retries); 4630c3d79bcSJulian Anastasov /* 4640c3d79bcSJulian Anastasov * Do not resend while waiting for data after ACK, 4650c3d79bcSJulian Anastasov * start to resend on end of deferring period to give 4660c3d79bcSJulian Anastasov * last chance for data or ACK to create established socket. 4670c3d79bcSJulian Anastasov */ 4680c3d79bcSJulian Anastasov *resend = !inet_rsk(req)->acked || 4690c3d79bcSJulian Anastasov req->retrans >= rskq_defer_accept - 1; 4700c3d79bcSJulian Anastasov } 4710c3d79bcSJulian Anastasov 472a019d6feSArnaldo Carvalho de Melo void inet_csk_reqsk_queue_prune(struct sock *parent, 473a019d6feSArnaldo Carvalho de Melo const unsigned long interval, 474a019d6feSArnaldo Carvalho de Melo const unsigned long timeout, 475a019d6feSArnaldo Carvalho de Melo const unsigned long max_rto) 476a019d6feSArnaldo Carvalho de Melo { 477a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(parent); 478a019d6feSArnaldo Carvalho de Melo struct request_sock_queue *queue = &icsk->icsk_accept_queue; 479a019d6feSArnaldo Carvalho de Melo struct listen_sock *lopt = queue->listen_opt; 480ec0a1966SDavid S. Miller int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 481ec0a1966SDavid S. Miller int thresh = max_retries; 482a019d6feSArnaldo Carvalho de Melo unsigned long now = jiffies; 483a019d6feSArnaldo Carvalho de Melo struct request_sock **reqp, *req; 484a019d6feSArnaldo Carvalho de Melo int i, budget; 485a019d6feSArnaldo Carvalho de Melo 486a019d6feSArnaldo Carvalho de Melo if (lopt == NULL || lopt->qlen == 0) 487a019d6feSArnaldo Carvalho de Melo return; 488a019d6feSArnaldo Carvalho de Melo 489a019d6feSArnaldo Carvalho de Melo /* Normally all the openreqs are young and become mature 490a019d6feSArnaldo Carvalho de Melo * (i.e. converted to established socket) for first timeout. 491a019d6feSArnaldo Carvalho de Melo * If synack was not acknowledged for 3 seconds, it means 492a019d6feSArnaldo Carvalho de Melo * one of the following things: synack was lost, ack was lost, 493a019d6feSArnaldo Carvalho de Melo * rtt is high or nobody planned to ack (i.e. synflood). 494a019d6feSArnaldo Carvalho de Melo * When server is a bit loaded, queue is populated with old 495a019d6feSArnaldo Carvalho de Melo * open requests, reducing effective size of queue. 496a019d6feSArnaldo Carvalho de Melo * When server is well loaded, queue size reduces to zero 497a019d6feSArnaldo Carvalho de Melo * after several minutes of work. It is not synflood, 498a019d6feSArnaldo Carvalho de Melo * it is normal operation. The solution is pruning 499a019d6feSArnaldo Carvalho de Melo * too old entries overriding normal timeout, when 500a019d6feSArnaldo Carvalho de Melo * situation becomes dangerous. 501a019d6feSArnaldo Carvalho de Melo * 502a019d6feSArnaldo Carvalho de Melo * Essentially, we reserve half of room for young 503a019d6feSArnaldo Carvalho de Melo * embrions; and abort old ones without pity, if old 504a019d6feSArnaldo Carvalho de Melo * ones are about to clog our table. 505a019d6feSArnaldo Carvalho de Melo */ 506a019d6feSArnaldo Carvalho de Melo if (lopt->qlen>>(lopt->max_qlen_log-1)) { 507a019d6feSArnaldo Carvalho de Melo int young = (lopt->qlen_young<<1); 508a019d6feSArnaldo Carvalho de Melo 509a019d6feSArnaldo Carvalho de Melo while (thresh > 2) { 510a019d6feSArnaldo Carvalho de Melo if (lopt->qlen < young) 511a019d6feSArnaldo Carvalho de Melo break; 512a019d6feSArnaldo Carvalho de Melo thresh--; 513a019d6feSArnaldo Carvalho de Melo young <<= 1; 514a019d6feSArnaldo Carvalho de Melo } 515a019d6feSArnaldo Carvalho de Melo } 516a019d6feSArnaldo Carvalho de Melo 517ec0a1966SDavid S. Miller if (queue->rskq_defer_accept) 518ec0a1966SDavid S. Miller max_retries = queue->rskq_defer_accept; 519ec0a1966SDavid S. Miller 520a019d6feSArnaldo Carvalho de Melo budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 521a019d6feSArnaldo Carvalho de Melo i = lopt->clock_hand; 522a019d6feSArnaldo Carvalho de Melo 523a019d6feSArnaldo Carvalho de Melo do { 524a019d6feSArnaldo Carvalho de Melo reqp=&lopt->syn_table[i]; 525a019d6feSArnaldo Carvalho de Melo while ((req = *reqp) != NULL) { 526a019d6feSArnaldo Carvalho de Melo if (time_after_eq(now, req->expires)) { 5270c3d79bcSJulian Anastasov int expire = 0, resend = 0; 5280c3d79bcSJulian Anastasov 5290c3d79bcSJulian Anastasov syn_ack_recalc(req, thresh, max_retries, 5300c3d79bcSJulian Anastasov queue->rskq_defer_accept, 5310c3d79bcSJulian Anastasov &expire, &resend); 5320c3d79bcSJulian Anastasov if (!expire && 5330c3d79bcSJulian Anastasov (!resend || 5340c3d79bcSJulian Anastasov !req->rsk_ops->rtx_syn_ack(parent, req) || 5350c3d79bcSJulian Anastasov inet_rsk(req)->acked)) { 536a019d6feSArnaldo Carvalho de Melo unsigned long timeo; 537a019d6feSArnaldo Carvalho de Melo 538a019d6feSArnaldo Carvalho de Melo if (req->retrans++ == 0) 539a019d6feSArnaldo Carvalho de Melo lopt->qlen_young--; 540a019d6feSArnaldo Carvalho de Melo timeo = min((timeout << req->retrans), max_rto); 541a019d6feSArnaldo Carvalho de Melo req->expires = now + timeo; 542a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 543a019d6feSArnaldo Carvalho de Melo continue; 544a019d6feSArnaldo Carvalho de Melo } 545a019d6feSArnaldo Carvalho de Melo 546a019d6feSArnaldo Carvalho de Melo /* Drop this request */ 547a019d6feSArnaldo Carvalho de Melo inet_csk_reqsk_queue_unlink(parent, req, reqp); 548a019d6feSArnaldo Carvalho de Melo reqsk_queue_removed(queue, req); 549a019d6feSArnaldo Carvalho de Melo reqsk_free(req); 550a019d6feSArnaldo Carvalho de Melo continue; 551a019d6feSArnaldo Carvalho de Melo } 552a019d6feSArnaldo Carvalho de Melo reqp = &req->dl_next; 553a019d6feSArnaldo Carvalho de Melo } 554a019d6feSArnaldo Carvalho de Melo 555a019d6feSArnaldo Carvalho de Melo i = (i + 1) & (lopt->nr_table_entries - 1); 556a019d6feSArnaldo Carvalho de Melo 557a019d6feSArnaldo Carvalho de Melo } while (--budget > 0); 558a019d6feSArnaldo Carvalho de Melo 559a019d6feSArnaldo Carvalho de Melo lopt->clock_hand = i; 560a019d6feSArnaldo Carvalho de Melo 561a019d6feSArnaldo Carvalho de Melo if (lopt->qlen) 562a019d6feSArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer(parent, interval); 563a019d6feSArnaldo Carvalho de Melo } 564a019d6feSArnaldo Carvalho de Melo 565a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 566a019d6feSArnaldo Carvalho de Melo 5679f1d2604SArnaldo Carvalho de Melo struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 568dd0fc66fSAl Viro const gfp_t priority) 5699f1d2604SArnaldo Carvalho de Melo { 5709f1d2604SArnaldo Carvalho de Melo struct sock *newsk = sk_clone(sk, priority); 5719f1d2604SArnaldo Carvalho de Melo 5729f1d2604SArnaldo Carvalho de Melo if (newsk != NULL) { 5739f1d2604SArnaldo Carvalho de Melo struct inet_connection_sock *newicsk = inet_csk(newsk); 5749f1d2604SArnaldo Carvalho de Melo 5759f1d2604SArnaldo Carvalho de Melo newsk->sk_state = TCP_SYN_RECV; 5769f1d2604SArnaldo Carvalho de Melo newicsk->icsk_bind_hash = NULL; 5779f1d2604SArnaldo Carvalho de Melo 578c720c7e8SEric Dumazet inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; 579c720c7e8SEric Dumazet inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); 580c720c7e8SEric Dumazet inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; 5819f1d2604SArnaldo Carvalho de Melo newsk->sk_write_space = sk_stream_write_space; 5829f1d2604SArnaldo Carvalho de Melo 5839f1d2604SArnaldo Carvalho de Melo newicsk->icsk_retransmits = 0; 5849f1d2604SArnaldo Carvalho de Melo newicsk->icsk_backoff = 0; 5856687e988SArnaldo Carvalho de Melo newicsk->icsk_probes_out = 0; 5869f1d2604SArnaldo Carvalho de Melo 5879f1d2604SArnaldo Carvalho de Melo /* Deinitialize accept_queue to trap illegal accesses. */ 5889f1d2604SArnaldo Carvalho de Melo memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 5894237c75cSVenkat Yekkirala 5904237c75cSVenkat Yekkirala security_inet_csk_clone(newsk, req); 5919f1d2604SArnaldo Carvalho de Melo } 5929f1d2604SArnaldo Carvalho de Melo return newsk; 5939f1d2604SArnaldo Carvalho de Melo } 5949f1d2604SArnaldo Carvalho de Melo 5959f1d2604SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_clone); 596a019d6feSArnaldo Carvalho de Melo 597a019d6feSArnaldo Carvalho de Melo /* 598a019d6feSArnaldo Carvalho de Melo * At this point, there should be no process reference to this 599a019d6feSArnaldo Carvalho de Melo * socket, and thus no user references at all. Therefore we 600a019d6feSArnaldo Carvalho de Melo * can assume the socket waitqueue is inactive and nobody will 601a019d6feSArnaldo Carvalho de Melo * try to jump onto it. 602a019d6feSArnaldo Carvalho de Melo */ 603a019d6feSArnaldo Carvalho de Melo void inet_csk_destroy_sock(struct sock *sk) 604a019d6feSArnaldo Carvalho de Melo { 605547b792cSIlpo Järvinen WARN_ON(sk->sk_state != TCP_CLOSE); 606547b792cSIlpo Järvinen WARN_ON(!sock_flag(sk, SOCK_DEAD)); 607a019d6feSArnaldo Carvalho de Melo 608a019d6feSArnaldo Carvalho de Melo /* It cannot be in hash table! */ 609547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 610a019d6feSArnaldo Carvalho de Melo 611c720c7e8SEric Dumazet /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 612c720c7e8SEric Dumazet WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 613a019d6feSArnaldo Carvalho de Melo 614a019d6feSArnaldo Carvalho de Melo sk->sk_prot->destroy(sk); 615a019d6feSArnaldo Carvalho de Melo 616a019d6feSArnaldo Carvalho de Melo sk_stream_kill_queues(sk); 617a019d6feSArnaldo Carvalho de Melo 618a019d6feSArnaldo Carvalho de Melo xfrm_sk_free_policy(sk); 619a019d6feSArnaldo Carvalho de Melo 620a019d6feSArnaldo Carvalho de Melo sk_refcnt_debug_release(sk); 621a019d6feSArnaldo Carvalho de Melo 622dd24c001SEric Dumazet percpu_counter_dec(sk->sk_prot->orphan_count); 623a019d6feSArnaldo Carvalho de Melo sock_put(sk); 624a019d6feSArnaldo Carvalho de Melo } 625a019d6feSArnaldo Carvalho de Melo 626a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_csk_destroy_sock); 627a019d6feSArnaldo Carvalho de Melo 628a019d6feSArnaldo Carvalho de Melo int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 629a019d6feSArnaldo Carvalho de Melo { 630a019d6feSArnaldo Carvalho de Melo struct inet_sock *inet = inet_sk(sk); 631a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 632a019d6feSArnaldo Carvalho de Melo int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 633a019d6feSArnaldo Carvalho de Melo 634a019d6feSArnaldo Carvalho de Melo if (rc != 0) 635a019d6feSArnaldo Carvalho de Melo return rc; 636a019d6feSArnaldo Carvalho de Melo 637a019d6feSArnaldo Carvalho de Melo sk->sk_max_ack_backlog = 0; 638a019d6feSArnaldo Carvalho de Melo sk->sk_ack_backlog = 0; 639a019d6feSArnaldo Carvalho de Melo inet_csk_delack_init(sk); 640a019d6feSArnaldo Carvalho de Melo 641a019d6feSArnaldo Carvalho de Melo /* There is race window here: we announce ourselves listening, 642a019d6feSArnaldo Carvalho de Melo * but this transition is still not validated by get_port(). 643a019d6feSArnaldo Carvalho de Melo * It is OK, because this socket enters to hash table only 644a019d6feSArnaldo Carvalho de Melo * after validation is complete. 645a019d6feSArnaldo Carvalho de Melo */ 646a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_LISTEN; 647c720c7e8SEric Dumazet if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 648c720c7e8SEric Dumazet inet->inet_sport = htons(inet->inet_num); 649a019d6feSArnaldo Carvalho de Melo 650a019d6feSArnaldo Carvalho de Melo sk_dst_reset(sk); 651a019d6feSArnaldo Carvalho de Melo sk->sk_prot->hash(sk); 652a019d6feSArnaldo Carvalho de Melo 653a019d6feSArnaldo Carvalho de Melo return 0; 654a019d6feSArnaldo Carvalho de Melo } 655a019d6feSArnaldo Carvalho de Melo 656a019d6feSArnaldo Carvalho de Melo sk->sk_state = TCP_CLOSE; 657a019d6feSArnaldo Carvalho de Melo __reqsk_queue_destroy(&icsk->icsk_accept_queue); 658a019d6feSArnaldo Carvalho de Melo return -EADDRINUSE; 659a019d6feSArnaldo Carvalho de Melo } 660a019d6feSArnaldo Carvalho de Melo 661a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_start); 662a019d6feSArnaldo Carvalho de Melo 663a019d6feSArnaldo Carvalho de Melo /* 664a019d6feSArnaldo Carvalho de Melo * This routine closes sockets which have been at least partially 665a019d6feSArnaldo Carvalho de Melo * opened, but not yet accepted. 666a019d6feSArnaldo Carvalho de Melo */ 667a019d6feSArnaldo Carvalho de Melo void inet_csk_listen_stop(struct sock *sk) 668a019d6feSArnaldo Carvalho de Melo { 669a019d6feSArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 670a019d6feSArnaldo Carvalho de Melo struct request_sock *acc_req; 671a019d6feSArnaldo Carvalho de Melo struct request_sock *req; 672a019d6feSArnaldo Carvalho de Melo 673a019d6feSArnaldo Carvalho de Melo inet_csk_delete_keepalive_timer(sk); 674a019d6feSArnaldo Carvalho de Melo 675a019d6feSArnaldo Carvalho de Melo /* make all the listen_opt local to us */ 676a019d6feSArnaldo Carvalho de Melo acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); 677a019d6feSArnaldo Carvalho de Melo 678a019d6feSArnaldo Carvalho de Melo /* Following specs, it would be better either to send FIN 679a019d6feSArnaldo Carvalho de Melo * (and enter FIN-WAIT-1, it is normal close) 680a019d6feSArnaldo Carvalho de Melo * or to send active reset (abort). 681a019d6feSArnaldo Carvalho de Melo * Certainly, it is pretty dangerous while synflood, but it is 682a019d6feSArnaldo Carvalho de Melo * bad justification for our negligence 8) 683a019d6feSArnaldo Carvalho de Melo * To be honest, we are not able to make either 684a019d6feSArnaldo Carvalho de Melo * of the variants now. --ANK 685a019d6feSArnaldo Carvalho de Melo */ 686a019d6feSArnaldo Carvalho de Melo reqsk_queue_destroy(&icsk->icsk_accept_queue); 687a019d6feSArnaldo Carvalho de Melo 688a019d6feSArnaldo Carvalho de Melo while ((req = acc_req) != NULL) { 689a019d6feSArnaldo Carvalho de Melo struct sock *child = req->sk; 690a019d6feSArnaldo Carvalho de Melo 691a019d6feSArnaldo Carvalho de Melo acc_req = req->dl_next; 692a019d6feSArnaldo Carvalho de Melo 693a019d6feSArnaldo Carvalho de Melo local_bh_disable(); 694a019d6feSArnaldo Carvalho de Melo bh_lock_sock(child); 695547b792cSIlpo Järvinen WARN_ON(sock_owned_by_user(child)); 696a019d6feSArnaldo Carvalho de Melo sock_hold(child); 697a019d6feSArnaldo Carvalho de Melo 698a019d6feSArnaldo Carvalho de Melo sk->sk_prot->disconnect(child, O_NONBLOCK); 699a019d6feSArnaldo Carvalho de Melo 700a019d6feSArnaldo Carvalho de Melo sock_orphan(child); 701a019d6feSArnaldo Carvalho de Melo 702eb4dea58SHerbert Xu percpu_counter_inc(sk->sk_prot->orphan_count); 703eb4dea58SHerbert Xu 704a019d6feSArnaldo Carvalho de Melo inet_csk_destroy_sock(child); 705a019d6feSArnaldo Carvalho de Melo 706a019d6feSArnaldo Carvalho de Melo bh_unlock_sock(child); 707a019d6feSArnaldo Carvalho de Melo local_bh_enable(); 708a019d6feSArnaldo Carvalho de Melo sock_put(child); 709a019d6feSArnaldo Carvalho de Melo 710a019d6feSArnaldo Carvalho de Melo sk_acceptq_removed(sk); 711a019d6feSArnaldo Carvalho de Melo __reqsk_free(req); 712a019d6feSArnaldo Carvalho de Melo } 713547b792cSIlpo Järvinen WARN_ON(sk->sk_ack_backlog); 714a019d6feSArnaldo Carvalho de Melo } 715a019d6feSArnaldo Carvalho de Melo 716a019d6feSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 717af05dc93SArnaldo Carvalho de Melo 718af05dc93SArnaldo Carvalho de Melo void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 719af05dc93SArnaldo Carvalho de Melo { 720af05dc93SArnaldo Carvalho de Melo struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 721af05dc93SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 722af05dc93SArnaldo Carvalho de Melo 723af05dc93SArnaldo Carvalho de Melo sin->sin_family = AF_INET; 724c720c7e8SEric Dumazet sin->sin_addr.s_addr = inet->inet_daddr; 725c720c7e8SEric Dumazet sin->sin_port = inet->inet_dport; 726af05dc93SArnaldo Carvalho de Melo } 727af05dc93SArnaldo Carvalho de Melo 728af05dc93SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 729c4d93909SArnaldo Carvalho de Melo 730dec73ff0SArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 731dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 732dec73ff0SArnaldo Carvalho de Melo char __user *optval, int __user *optlen) 733dec73ff0SArnaldo Carvalho de Melo { 734dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 735dec73ff0SArnaldo Carvalho de Melo 736dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_getsockopt != NULL) 737dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, 738dec73ff0SArnaldo Carvalho de Melo optval, optlen); 739dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->getsockopt(sk, level, optname, 740dec73ff0SArnaldo Carvalho de Melo optval, optlen); 741dec73ff0SArnaldo Carvalho de Melo } 742dec73ff0SArnaldo Carvalho de Melo 743dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 744dec73ff0SArnaldo Carvalho de Melo 745dec73ff0SArnaldo Carvalho de Melo int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 746b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 747dec73ff0SArnaldo Carvalho de Melo { 748dbeff12bSDavid S. Miller const struct inet_connection_sock *icsk = inet_csk(sk); 749dec73ff0SArnaldo Carvalho de Melo 750dec73ff0SArnaldo Carvalho de Melo if (icsk->icsk_af_ops->compat_setsockopt != NULL) 751dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, 752dec73ff0SArnaldo Carvalho de Melo optval, optlen); 753dec73ff0SArnaldo Carvalho de Melo return icsk->icsk_af_ops->setsockopt(sk, level, optname, 754dec73ff0SArnaldo Carvalho de Melo optval, optlen); 755dec73ff0SArnaldo Carvalho de Melo } 756dec73ff0SArnaldo Carvalho de Melo 757dec73ff0SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 758dec73ff0SArnaldo Carvalho de Melo #endif 759