177d8bf9cSArnaldo Carvalho de Melo /* 277d8bf9cSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 377d8bf9cSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 477d8bf9cSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 577d8bf9cSArnaldo Carvalho de Melo * 677d8bf9cSArnaldo Carvalho de Melo * Generic INET transport hashtables 777d8bf9cSArnaldo Carvalho de Melo * 877d8bf9cSArnaldo Carvalho de Melo * Authors: Lotsa people, from code originally in tcp 977d8bf9cSArnaldo Carvalho de Melo * 1077d8bf9cSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 1177d8bf9cSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 1277d8bf9cSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 1377d8bf9cSArnaldo Carvalho de Melo * 2 of the License, or (at your option) any later version. 1477d8bf9cSArnaldo Carvalho de Melo */ 1577d8bf9cSArnaldo Carvalho de Melo 162d8c4ce5SArnaldo Carvalho de Melo #include <linux/module.h> 17a7f5e7f1SArnaldo Carvalho de Melo #include <linux/random.h> 18f3f05f70SArnaldo Carvalho de Melo #include <linux/sched.h> 1977d8bf9cSArnaldo Carvalho de Melo #include <linux/slab.h> 20f3f05f70SArnaldo Carvalho de Melo #include <linux/wait.h> 21095dc8e0SEric Dumazet #include <linux/vmalloc.h> 2257c8a661SMike Rapoport #include <linux/memblock.h> 2377d8bf9cSArnaldo Carvalho de Melo 24c125e80bSCraig Gallek #include <net/addrconf.h> 25463c84b9SArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 2677d8bf9cSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 276e5714eaSDavid S. Miller #include <net/secure_seq.h> 28a7f5e7f1SArnaldo Carvalho de Melo #include <net/ip.h> 29a04a480dSDavid Ahern #include <net/tcp.h> 30c125e80bSCraig Gallek #include <net/sock_reuseport.h> 3177d8bf9cSArnaldo Carvalho de Melo 326eada011SEric Dumazet static u32 inet_ehashfn(const struct net *net, const __be32 laddr, 3365cd8033SHannes Frederic Sowa const __u16 lport, const __be32 faddr, 3465cd8033SHannes Frederic Sowa const __be16 fport) 3565cd8033SHannes Frederic Sowa { 361bbdceefSHannes Frederic Sowa static u32 inet_ehash_secret __read_mostly; 371bbdceefSHannes Frederic Sowa 381bbdceefSHannes Frederic Sowa net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); 391bbdceefSHannes Frederic Sowa 4065cd8033SHannes Frederic Sowa return __inet_ehashfn(laddr, lport, faddr, fport, 4165cd8033SHannes Frederic Sowa inet_ehash_secret + net_hash_mix(net)); 4265cd8033SHannes Frederic Sowa } 4365cd8033SHannes Frederic Sowa 44d1e559d0SEric Dumazet /* This function handles inet_sock, but also timewait and request sockets 45d1e559d0SEric Dumazet * for IPv4/IPv6. 46d1e559d0SEric Dumazet */ 47784c372aSEric Dumazet static u32 sk_ehashfn(const struct sock *sk) 4865cd8033SHannes Frederic Sowa { 49d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 50d1e559d0SEric Dumazet if (sk->sk_family == AF_INET6 && 51d1e559d0SEric Dumazet !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 52d1e559d0SEric Dumazet return inet6_ehashfn(sock_net(sk), 53d1e559d0SEric Dumazet &sk->sk_v6_rcv_saddr, sk->sk_num, 54d1e559d0SEric Dumazet &sk->sk_v6_daddr, sk->sk_dport); 55d1e559d0SEric Dumazet #endif 565b441f76SEric Dumazet return inet_ehashfn(sock_net(sk), 575b441f76SEric Dumazet sk->sk_rcv_saddr, sk->sk_num, 585b441f76SEric Dumazet sk->sk_daddr, sk->sk_dport); 5965cd8033SHannes Frederic Sowa } 6065cd8033SHannes Frederic Sowa 6177d8bf9cSArnaldo Carvalho de Melo /* 6277d8bf9cSArnaldo Carvalho de Melo * Allocate and initialize a new local port bind bucket. 6377d8bf9cSArnaldo Carvalho de Melo * The bindhash mutex for snum's hash chain must be held here. 6477d8bf9cSArnaldo Carvalho de Melo */ 65e18b890bSChristoph Lameter struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, 66941b1d22SPavel Emelyanov struct net *net, 6777d8bf9cSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head, 68*3c82a21fSRobert Shearman const unsigned short snum, 69*3c82a21fSRobert Shearman int l3mdev) 7077d8bf9cSArnaldo Carvalho de Melo { 7154e6ecb2SChristoph Lameter struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 7277d8bf9cSArnaldo Carvalho de Melo 7300db4124SIan Morris if (tb) { 74efd7ef1cSEric W. Biederman write_pnet(&tb->ib_net, net); 75*3c82a21fSRobert Shearman tb->l3mdev = l3mdev; 7677d8bf9cSArnaldo Carvalho de Melo tb->port = snum; 7777d8bf9cSArnaldo Carvalho de Melo tb->fastreuse = 0; 78da5e3630STom Herbert tb->fastreuseport = 0; 7977d8bf9cSArnaldo Carvalho de Melo INIT_HLIST_HEAD(&tb->owners); 8077d8bf9cSArnaldo Carvalho de Melo hlist_add_head(&tb->node, &head->chain); 8177d8bf9cSArnaldo Carvalho de Melo } 8277d8bf9cSArnaldo Carvalho de Melo return tb; 8377d8bf9cSArnaldo Carvalho de Melo } 8477d8bf9cSArnaldo Carvalho de Melo 8577d8bf9cSArnaldo Carvalho de Melo /* 8677d8bf9cSArnaldo Carvalho de Melo * Caller must hold hashbucket lock for this tb with local BH disabled 8777d8bf9cSArnaldo Carvalho de Melo */ 88e18b890bSChristoph Lameter void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) 8977d8bf9cSArnaldo Carvalho de Melo { 9077d8bf9cSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 9177d8bf9cSArnaldo Carvalho de Melo __hlist_del(&tb->node); 9277d8bf9cSArnaldo Carvalho de Melo kmem_cache_free(cachep, tb); 9377d8bf9cSArnaldo Carvalho de Melo } 9477d8bf9cSArnaldo Carvalho de Melo } 952d8c4ce5SArnaldo Carvalho de Melo 962d8c4ce5SArnaldo Carvalho de Melo void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 972d8c4ce5SArnaldo Carvalho de Melo const unsigned short snum) 982d8c4ce5SArnaldo Carvalho de Melo { 99c720c7e8SEric Dumazet inet_sk(sk)->inet_num = snum; 1002d8c4ce5SArnaldo Carvalho de Melo sk_add_bind_node(sk, &tb->owners); 101463c84b9SArnaldo Carvalho de Melo inet_csk(sk)->icsk_bind_hash = tb; 1022d8c4ce5SArnaldo Carvalho de Melo } 1032d8c4ce5SArnaldo Carvalho de Melo 1042d8c4ce5SArnaldo Carvalho de Melo /* 1052d8c4ce5SArnaldo Carvalho de Melo * Get rid of any references to a local port held by the given sock. 1062d8c4ce5SArnaldo Carvalho de Melo */ 107ab1e0a13SArnaldo Carvalho de Melo static void __inet_put_port(struct sock *sk) 1082d8c4ce5SArnaldo Carvalho de Melo { 10939d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 110c720c7e8SEric Dumazet const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, 1117f635ab7SPavel Emelyanov hashinfo->bhash_size); 1122d8c4ce5SArnaldo Carvalho de Melo struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 1132d8c4ce5SArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 1142d8c4ce5SArnaldo Carvalho de Melo 1152d8c4ce5SArnaldo Carvalho de Melo spin_lock(&head->lock); 116463c84b9SArnaldo Carvalho de Melo tb = inet_csk(sk)->icsk_bind_hash; 1172d8c4ce5SArnaldo Carvalho de Melo __sk_del_bind_node(sk); 118463c84b9SArnaldo Carvalho de Melo inet_csk(sk)->icsk_bind_hash = NULL; 119c720c7e8SEric Dumazet inet_sk(sk)->inet_num = 0; 1202d8c4ce5SArnaldo Carvalho de Melo inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 1212d8c4ce5SArnaldo Carvalho de Melo spin_unlock(&head->lock); 1222d8c4ce5SArnaldo Carvalho de Melo } 1232d8c4ce5SArnaldo Carvalho de Melo 124ab1e0a13SArnaldo Carvalho de Melo void inet_put_port(struct sock *sk) 1252d8c4ce5SArnaldo Carvalho de Melo { 1262d8c4ce5SArnaldo Carvalho de Melo local_bh_disable(); 127ab1e0a13SArnaldo Carvalho de Melo __inet_put_port(sk); 1282d8c4ce5SArnaldo Carvalho de Melo local_bh_enable(); 1292d8c4ce5SArnaldo Carvalho de Melo } 1302d8c4ce5SArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_put_port); 131f3f05f70SArnaldo Carvalho de Melo 1321ce31c9eSEric Dumazet int __inet_inherit_port(const struct sock *sk, struct sock *child) 13353083773SPavel Emelyanov { 13453083773SPavel Emelyanov struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 135093d2823SBalazs Scheidler unsigned short port = inet_sk(child)->inet_num; 136093d2823SBalazs Scheidler const int bhash = inet_bhashfn(sock_net(sk), port, 1377f635ab7SPavel Emelyanov table->bhash_size); 13853083773SPavel Emelyanov struct inet_bind_hashbucket *head = &table->bhash[bhash]; 13953083773SPavel Emelyanov struct inet_bind_bucket *tb; 140*3c82a21fSRobert Shearman int l3mdev; 14153083773SPavel Emelyanov 14253083773SPavel Emelyanov spin_lock(&head->lock); 14353083773SPavel Emelyanov tb = inet_csk(sk)->icsk_bind_hash; 144c2f34a65SEric Dumazet if (unlikely(!tb)) { 145c2f34a65SEric Dumazet spin_unlock(&head->lock); 146c2f34a65SEric Dumazet return -ENOENT; 147c2f34a65SEric Dumazet } 148093d2823SBalazs Scheidler if (tb->port != port) { 149*3c82a21fSRobert Shearman l3mdev = inet_sk_bound_l3mdev(sk); 150*3c82a21fSRobert Shearman 151093d2823SBalazs Scheidler /* NOTE: using tproxy and redirecting skbs to a proxy 152093d2823SBalazs Scheidler * on a different listener port breaks the assumption 153093d2823SBalazs Scheidler * that the listener socket's icsk_bind_hash is the same 154093d2823SBalazs Scheidler * as that of the child socket. We have to look up or 155093d2823SBalazs Scheidler * create a new bind bucket for the child here. */ 156b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) { 157093d2823SBalazs Scheidler if (net_eq(ib_net(tb), sock_net(sk)) && 158*3c82a21fSRobert Shearman tb->l3mdev == l3mdev && tb->port == port) 159093d2823SBalazs Scheidler break; 160093d2823SBalazs Scheidler } 161b67bfe0dSSasha Levin if (!tb) { 162093d2823SBalazs Scheidler tb = inet_bind_bucket_create(table->bind_bucket_cachep, 163*3c82a21fSRobert Shearman sock_net(sk), head, port, 164*3c82a21fSRobert Shearman l3mdev); 165093d2823SBalazs Scheidler if (!tb) { 166093d2823SBalazs Scheidler spin_unlock(&head->lock); 167093d2823SBalazs Scheidler return -ENOMEM; 168093d2823SBalazs Scheidler } 169093d2823SBalazs Scheidler } 170093d2823SBalazs Scheidler } 171b4ff3c90SNagendra Tomar inet_bind_hash(child, tb, port); 17253083773SPavel Emelyanov spin_unlock(&head->lock); 173093d2823SBalazs Scheidler 174093d2823SBalazs Scheidler return 0; 17553083773SPavel Emelyanov } 17653083773SPavel Emelyanov EXPORT_SYMBOL_GPL(__inet_inherit_port); 17753083773SPavel Emelyanov 17861b7c691SMartin KaFai Lau static struct inet_listen_hashbucket * 17961b7c691SMartin KaFai Lau inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) 18061b7c691SMartin KaFai Lau { 18161b7c691SMartin KaFai Lau u32 hash; 18261b7c691SMartin KaFai Lau 18361b7c691SMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6) 18461b7c691SMartin KaFai Lau if (sk->sk_family == AF_INET6) 18561b7c691SMartin KaFai Lau hash = ipv6_portaddr_hash(sock_net(sk), 18661b7c691SMartin KaFai Lau &sk->sk_v6_rcv_saddr, 18761b7c691SMartin KaFai Lau inet_sk(sk)->inet_num); 18861b7c691SMartin KaFai Lau else 18961b7c691SMartin KaFai Lau #endif 19061b7c691SMartin KaFai Lau hash = ipv4_portaddr_hash(sock_net(sk), 19161b7c691SMartin KaFai Lau inet_sk(sk)->inet_rcv_saddr, 19261b7c691SMartin KaFai Lau inet_sk(sk)->inet_num); 19361b7c691SMartin KaFai Lau return inet_lhash2_bucket(h, hash); 19461b7c691SMartin KaFai Lau } 19561b7c691SMartin KaFai Lau 19661b7c691SMartin KaFai Lau static void inet_hash2(struct inet_hashinfo *h, struct sock *sk) 19761b7c691SMartin KaFai Lau { 19861b7c691SMartin KaFai Lau struct inet_listen_hashbucket *ilb2; 19961b7c691SMartin KaFai Lau 20061b7c691SMartin KaFai Lau if (!h->lhash2) 20161b7c691SMartin KaFai Lau return; 20261b7c691SMartin KaFai Lau 20361b7c691SMartin KaFai Lau ilb2 = inet_lhash2_bucket_sk(h, sk); 20461b7c691SMartin KaFai Lau 20561b7c691SMartin KaFai Lau spin_lock(&ilb2->lock); 20661b7c691SMartin KaFai Lau if (sk->sk_reuseport && sk->sk_family == AF_INET6) 20761b7c691SMartin KaFai Lau hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, 20861b7c691SMartin KaFai Lau &ilb2->head); 20961b7c691SMartin KaFai Lau else 21061b7c691SMartin KaFai Lau hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, 21161b7c691SMartin KaFai Lau &ilb2->head); 21261b7c691SMartin KaFai Lau ilb2->count++; 21361b7c691SMartin KaFai Lau spin_unlock(&ilb2->lock); 21461b7c691SMartin KaFai Lau } 21561b7c691SMartin KaFai Lau 21661b7c691SMartin KaFai Lau static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk) 21761b7c691SMartin KaFai Lau { 21861b7c691SMartin KaFai Lau struct inet_listen_hashbucket *ilb2; 21961b7c691SMartin KaFai Lau 22061b7c691SMartin KaFai Lau if (!h->lhash2 || 22161b7c691SMartin KaFai Lau WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node))) 22261b7c691SMartin KaFai Lau return; 22361b7c691SMartin KaFai Lau 22461b7c691SMartin KaFai Lau ilb2 = inet_lhash2_bucket_sk(h, sk); 22561b7c691SMartin KaFai Lau 22661b7c691SMartin KaFai Lau spin_lock(&ilb2->lock); 22761b7c691SMartin KaFai Lau hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node); 22861b7c691SMartin KaFai Lau ilb2->count--; 22961b7c691SMartin KaFai Lau spin_unlock(&ilb2->lock); 23061b7c691SMartin KaFai Lau } 23161b7c691SMartin KaFai Lau 232c25eb3bfSEric Dumazet static inline int compute_score(struct sock *sk, struct net *net, 233c25eb3bfSEric Dumazet const unsigned short hnum, const __be32 daddr, 2343fa6f616SDavid Ahern const int dif, const int sdif, bool exact_dif) 235c25eb3bfSEric Dumazet { 236c25eb3bfSEric Dumazet int score = -1; 237c25eb3bfSEric Dumazet struct inet_sock *inet = inet_sk(sk); 238c25eb3bfSEric Dumazet 239c720c7e8SEric Dumazet if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && 240c25eb3bfSEric Dumazet !ipv6_only_sock(sk)) { 241c720c7e8SEric Dumazet __be32 rcv_saddr = inet->inet_rcv_saddr; 242da5e3630STom Herbert score = sk->sk_family == PF_INET ? 2 : 1; 243c25eb3bfSEric Dumazet if (rcv_saddr) { 244c25eb3bfSEric Dumazet if (rcv_saddr != daddr) 245c25eb3bfSEric Dumazet return -1; 246da5e3630STom Herbert score += 4; 247c25eb3bfSEric Dumazet } 248a04a480dSDavid Ahern if (sk->sk_bound_dev_if || exact_dif) { 2493fa6f616SDavid Ahern bool dev_match = (sk->sk_bound_dev_if == dif || 2503fa6f616SDavid Ahern sk->sk_bound_dev_if == sdif); 2513fa6f616SDavid Ahern 2528c43bd17SDavid Ahern if (!dev_match) 253c25eb3bfSEric Dumazet return -1; 2548c43bd17SDavid Ahern if (sk->sk_bound_dev_if) 255da5e3630STom Herbert score += 4; 256c25eb3bfSEric Dumazet } 25770da268bSEric Dumazet if (sk->sk_incoming_cpu == raw_smp_processor_id()) 25870da268bSEric Dumazet score++; 259c25eb3bfSEric Dumazet } 260c25eb3bfSEric Dumazet return score; 261c25eb3bfSEric Dumazet } 262c25eb3bfSEric Dumazet 263f3f05f70SArnaldo Carvalho de Melo /* 2643b24d854SEric Dumazet * Here are some nice properties to exploit here. The BSD API 2653b24d854SEric Dumazet * does not allow a listening sock to specify the remote port nor the 26633b62231SArnaldo Carvalho de Melo * remote address for the connection. So always assume those are both 26733b62231SArnaldo Carvalho de Melo * wildcarded during the search since they can never be otherwise. 26833b62231SArnaldo Carvalho de Melo */ 26933b62231SArnaldo Carvalho de Melo 2703b24d854SEric Dumazet /* called with rcu_read_lock() : No refcount taken on the socket */ 27161b7c691SMartin KaFai Lau static struct sock *inet_lhash2_lookup(struct net *net, 27261b7c691SMartin KaFai Lau struct inet_listen_hashbucket *ilb2, 27361b7c691SMartin KaFai Lau struct sk_buff *skb, int doff, 27461b7c691SMartin KaFai Lau const __be32 saddr, __be16 sport, 27561b7c691SMartin KaFai Lau const __be32 daddr, const unsigned short hnum, 27661b7c691SMartin KaFai Lau const int dif, const int sdif) 27761b7c691SMartin KaFai Lau { 27861b7c691SMartin KaFai Lau bool exact_dif = inet_exact_dif_match(net, skb); 27961b7c691SMartin KaFai Lau struct inet_connection_sock *icsk; 28061b7c691SMartin KaFai Lau struct sock *sk, *result = NULL; 28161b7c691SMartin KaFai Lau int score, hiscore = 0; 28261b7c691SMartin KaFai Lau u32 phash = 0; 28361b7c691SMartin KaFai Lau 28461b7c691SMartin KaFai Lau inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { 28561b7c691SMartin KaFai Lau sk = (struct sock *)icsk; 28661b7c691SMartin KaFai Lau score = compute_score(sk, net, hnum, daddr, 28761b7c691SMartin KaFai Lau dif, sdif, exact_dif); 28861b7c691SMartin KaFai Lau if (score > hiscore) { 28961b7c691SMartin KaFai Lau if (sk->sk_reuseport) { 29061b7c691SMartin KaFai Lau phash = inet_ehashfn(net, daddr, hnum, 29161b7c691SMartin KaFai Lau saddr, sport); 29261b7c691SMartin KaFai Lau result = reuseport_select_sock(sk, phash, 29361b7c691SMartin KaFai Lau skb, doff); 29461b7c691SMartin KaFai Lau if (result) 29561b7c691SMartin KaFai Lau return result; 29661b7c691SMartin KaFai Lau } 29761b7c691SMartin KaFai Lau result = sk; 29861b7c691SMartin KaFai Lau hiscore = score; 29961b7c691SMartin KaFai Lau } 30061b7c691SMartin KaFai Lau } 30161b7c691SMartin KaFai Lau 30261b7c691SMartin KaFai Lau return result; 30361b7c691SMartin KaFai Lau } 30461b7c691SMartin KaFai Lau 305c67499c0SPavel Emelyanov struct sock *__inet_lookup_listener(struct net *net, 306c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo, 307a583636aSCraig Gallek struct sk_buff *skb, int doff, 308da5e3630STom Herbert const __be32 saddr, __be16 sport, 309fb99c848SAl Viro const __be32 daddr, const unsigned short hnum, 3103fa6f616SDavid Ahern const int dif, const int sdif) 31199a92ff5SHerbert Xu { 312c25eb3bfSEric Dumazet unsigned int hash = inet_lhashfn(net, hnum); 313c25eb3bfSEric Dumazet struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 314a04a480dSDavid Ahern bool exact_dif = inet_exact_dif_match(net, skb); 31561b7c691SMartin KaFai Lau struct inet_listen_hashbucket *ilb2; 3163b24d854SEric Dumazet struct sock *sk, *result = NULL; 317e94a62f5SPaolo Abeni int score, hiscore = 0; 31861b7c691SMartin KaFai Lau unsigned int hash2; 319da5e3630STom Herbert u32 phash = 0; 32099a92ff5SHerbert Xu 32161b7c691SMartin KaFai Lau if (ilb->count <= 10 || !hashinfo->lhash2) 32261b7c691SMartin KaFai Lau goto port_lookup; 32361b7c691SMartin KaFai Lau 32461b7c691SMartin KaFai Lau /* Too many sk in the ilb bucket (which is hashed by port alone). 32561b7c691SMartin KaFai Lau * Try lhash2 (which is hashed by port and addr) instead. 32661b7c691SMartin KaFai Lau */ 32761b7c691SMartin KaFai Lau 32861b7c691SMartin KaFai Lau hash2 = ipv4_portaddr_hash(net, daddr, hnum); 32961b7c691SMartin KaFai Lau ilb2 = inet_lhash2_bucket(hashinfo, hash2); 33061b7c691SMartin KaFai Lau if (ilb2->count > ilb->count) 33161b7c691SMartin KaFai Lau goto port_lookup; 33261b7c691SMartin KaFai Lau 33361b7c691SMartin KaFai Lau result = inet_lhash2_lookup(net, ilb2, skb, doff, 33461b7c691SMartin KaFai Lau saddr, sport, daddr, hnum, 33561b7c691SMartin KaFai Lau dif, sdif); 33661b7c691SMartin KaFai Lau if (result) 3378217ca65SMartin KaFai Lau goto done; 33861b7c691SMartin KaFai Lau 33961b7c691SMartin KaFai Lau /* Lookup lhash2 with INADDR_ANY */ 34061b7c691SMartin KaFai Lau 34161b7c691SMartin KaFai Lau hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 34261b7c691SMartin KaFai Lau ilb2 = inet_lhash2_bucket(hashinfo, hash2); 34361b7c691SMartin KaFai Lau if (ilb2->count > ilb->count) 34461b7c691SMartin KaFai Lau goto port_lookup; 34561b7c691SMartin KaFai Lau 3468217ca65SMartin KaFai Lau result = inet_lhash2_lookup(net, ilb2, skb, doff, 34761b7c691SMartin KaFai Lau saddr, sport, daddr, hnum, 34861b7c691SMartin KaFai Lau dif, sdif); 3498217ca65SMartin KaFai Lau goto done; 35061b7c691SMartin KaFai Lau 35161b7c691SMartin KaFai Lau port_lookup: 3523b24d854SEric Dumazet sk_for_each_rcu(sk, &ilb->head) { 3533fa6f616SDavid Ahern score = compute_score(sk, net, hnum, daddr, 3543fa6f616SDavid Ahern dif, sdif, exact_dif); 355c25eb3bfSEric Dumazet if (score > hiscore) { 356e94a62f5SPaolo Abeni if (sk->sk_reuseport) { 357da5e3630STom Herbert phash = inet_ehashfn(net, daddr, hnum, 358da5e3630STom Herbert saddr, sport); 3593b24d854SEric Dumazet result = reuseport_select_sock(sk, phash, 360c125e80bSCraig Gallek skb, doff); 3613b24d854SEric Dumazet if (result) 3628217ca65SMartin KaFai Lau goto done; 363da5e3630STom Herbert } 3643b24d854SEric Dumazet result = sk; 3653b24d854SEric Dumazet hiscore = score; 36699a92ff5SHerbert Xu } 36799a92ff5SHerbert Xu } 3688217ca65SMartin KaFai Lau done: 3698217ca65SMartin KaFai Lau if (unlikely(IS_ERR(result))) 3708217ca65SMartin KaFai Lau return NULL; 371c25eb3bfSEric Dumazet return result; 37299a92ff5SHerbert Xu } 3738f491069SHerbert Xu EXPORT_SYMBOL_GPL(__inet_lookup_listener); 374a7f5e7f1SArnaldo Carvalho de Melo 37505dbc7b5SEric Dumazet /* All sockets share common refcount, but have different destructors */ 37605dbc7b5SEric Dumazet void sock_gen_put(struct sock *sk) 37705dbc7b5SEric Dumazet { 37841c6d650SReshetova, Elena if (!refcount_dec_and_test(&sk->sk_refcnt)) 37905dbc7b5SEric Dumazet return; 38005dbc7b5SEric Dumazet 38105dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 38205dbc7b5SEric Dumazet inet_twsk_free(inet_twsk(sk)); 38341b822c5SEric Dumazet else if (sk->sk_state == TCP_NEW_SYN_RECV) 38441b822c5SEric Dumazet reqsk_free(inet_reqsk(sk)); 38505dbc7b5SEric Dumazet else 38605dbc7b5SEric Dumazet sk_free(sk); 38705dbc7b5SEric Dumazet } 38805dbc7b5SEric Dumazet EXPORT_SYMBOL_GPL(sock_gen_put); 38905dbc7b5SEric Dumazet 3902c13270bSEric Dumazet void sock_edemux(struct sk_buff *skb) 3912c13270bSEric Dumazet { 3922c13270bSEric Dumazet sock_gen_put(skb->sk); 3932c13270bSEric Dumazet } 3942c13270bSEric Dumazet EXPORT_SYMBOL(sock_edemux); 3952c13270bSEric Dumazet 396c67499c0SPavel Emelyanov struct sock *__inet_lookup_established(struct net *net, 397c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo, 39877a5ba55SPavel Emelyanov const __be32 saddr, const __be16 sport, 39977a5ba55SPavel Emelyanov const __be32 daddr, const u16 hnum, 4003fa6f616SDavid Ahern const int dif, const int sdif) 40177a5ba55SPavel Emelyanov { 402c7228317SJoe Perches INET_ADDR_COOKIE(acookie, saddr, daddr); 40377a5ba55SPavel Emelyanov const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 40477a5ba55SPavel Emelyanov struct sock *sk; 4053ab5aee7SEric Dumazet const struct hlist_nulls_node *node; 40677a5ba55SPavel Emelyanov /* Optimize here for direct hit, only listening connections can 40777a5ba55SPavel Emelyanov * have wildcards anyways. 40877a5ba55SPavel Emelyanov */ 4099f26b3adSPavel Emelyanov unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); 410f373b53bSEric Dumazet unsigned int slot = hash & hashinfo->ehash_mask; 4113ab5aee7SEric Dumazet struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; 41277a5ba55SPavel Emelyanov 4133ab5aee7SEric Dumazet begin: 4143ab5aee7SEric Dumazet sk_nulls_for_each_rcu(sk, node, &head->chain) { 415ce43b03eSEric Dumazet if (sk->sk_hash != hash) 416ce43b03eSEric Dumazet continue; 417ce43b03eSEric Dumazet if (likely(INET_MATCH(sk, net, acookie, 4183fa6f616SDavid Ahern saddr, daddr, ports, dif, sdif))) { 41941c6d650SReshetova, Elena if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) 42005dbc7b5SEric Dumazet goto out; 421ce43b03eSEric Dumazet if (unlikely(!INET_MATCH(sk, net, acookie, 4223fa6f616SDavid Ahern saddr, daddr, ports, 4233fa6f616SDavid Ahern dif, sdif))) { 42405dbc7b5SEric Dumazet sock_gen_put(sk); 4253ab5aee7SEric Dumazet goto begin; 42677a5ba55SPavel Emelyanov } 42705dbc7b5SEric Dumazet goto found; 4283ab5aee7SEric Dumazet } 4293ab5aee7SEric Dumazet } 4303ab5aee7SEric Dumazet /* 4313ab5aee7SEric Dumazet * if the nulls value we got at the end of this lookup is 4323ab5aee7SEric Dumazet * not the expected one, we must restart lookup. 4333ab5aee7SEric Dumazet * We probably met an item that was moved to another chain. 4343ab5aee7SEric Dumazet */ 4353ab5aee7SEric Dumazet if (get_nulls_value(node) != slot) 4363ab5aee7SEric Dumazet goto begin; 43777a5ba55SPavel Emelyanov out: 43805dbc7b5SEric Dumazet sk = NULL; 43905dbc7b5SEric Dumazet found: 44077a5ba55SPavel Emelyanov return sk; 44177a5ba55SPavel Emelyanov } 44277a5ba55SPavel Emelyanov EXPORT_SYMBOL_GPL(__inet_lookup_established); 44377a5ba55SPavel Emelyanov 444a7f5e7f1SArnaldo Carvalho de Melo /* called with local bh disabled */ 445a7f5e7f1SArnaldo Carvalho de Melo static int __inet_check_established(struct inet_timewait_death_row *death_row, 446a7f5e7f1SArnaldo Carvalho de Melo struct sock *sk, __u16 lport, 447a7f5e7f1SArnaldo Carvalho de Melo struct inet_timewait_sock **twp) 448a7f5e7f1SArnaldo Carvalho de Melo { 449a7f5e7f1SArnaldo Carvalho de Melo struct inet_hashinfo *hinfo = death_row->hashinfo; 450a7f5e7f1SArnaldo Carvalho de Melo struct inet_sock *inet = inet_sk(sk); 451c720c7e8SEric Dumazet __be32 daddr = inet->inet_rcv_saddr; 452c720c7e8SEric Dumazet __be32 saddr = inet->inet_daddr; 453a7f5e7f1SArnaldo Carvalho de Melo int dif = sk->sk_bound_dev_if; 4543fa6f616SDavid Ahern struct net *net = sock_net(sk); 4553fa6f616SDavid Ahern int sdif = l3mdev_master_ifindex_by_index(net, dif); 456c7228317SJoe Perches INET_ADDR_COOKIE(acookie, saddr, daddr); 457c720c7e8SEric Dumazet const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 458c720c7e8SEric Dumazet unsigned int hash = inet_ehashfn(net, daddr, lport, 459c720c7e8SEric Dumazet saddr, inet->inet_dport); 460a7f5e7f1SArnaldo Carvalho de Melo struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 4619db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 462a7f5e7f1SArnaldo Carvalho de Melo struct sock *sk2; 4633ab5aee7SEric Dumazet const struct hlist_nulls_node *node; 46405dbc7b5SEric Dumazet struct inet_timewait_sock *tw = NULL; 465a7f5e7f1SArnaldo Carvalho de Melo 4669db66bdcSEric Dumazet spin_lock(lock); 467a7f5e7f1SArnaldo Carvalho de Melo 4683ab5aee7SEric Dumazet sk_nulls_for_each(sk2, node, &head->chain) { 469ce43b03eSEric Dumazet if (sk2->sk_hash != hash) 470ce43b03eSEric Dumazet continue; 47105dbc7b5SEric Dumazet 472ce43b03eSEric Dumazet if (likely(INET_MATCH(sk2, net, acookie, 4733fa6f616SDavid Ahern saddr, daddr, ports, dif, sdif))) { 47405dbc7b5SEric Dumazet if (sk2->sk_state == TCP_TIME_WAIT) { 47505dbc7b5SEric Dumazet tw = inet_twsk(sk2); 47605dbc7b5SEric Dumazet if (twsk_unique(sk, sk2, twp)) 47705dbc7b5SEric Dumazet break; 47805dbc7b5SEric Dumazet } 479a7f5e7f1SArnaldo Carvalho de Melo goto not_unique; 480a7f5e7f1SArnaldo Carvalho de Melo } 48105dbc7b5SEric Dumazet } 482a7f5e7f1SArnaldo Carvalho de Melo 483a7f5e7f1SArnaldo Carvalho de Melo /* Must record num and sport now. Otherwise we will see 48405dbc7b5SEric Dumazet * in hash table socket with a funny identity. 48505dbc7b5SEric Dumazet */ 486c720c7e8SEric Dumazet inet->inet_num = lport; 487c720c7e8SEric Dumazet inet->inet_sport = htons(lport); 488a7f5e7f1SArnaldo Carvalho de Melo sk->sk_hash = hash; 489547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 4903ab5aee7SEric Dumazet __sk_nulls_add_node_rcu(sk, &head->chain); 49113475a30SEric Dumazet if (tw) { 492fc01538fSEric Dumazet sk_nulls_del_node_init_rcu((struct sock *)tw); 49302a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); 49413475a30SEric Dumazet } 4959db66bdcSEric Dumazet spin_unlock(lock); 496c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 497a7f5e7f1SArnaldo Carvalho de Melo 498a7f5e7f1SArnaldo Carvalho de Melo if (twp) { 499a7f5e7f1SArnaldo Carvalho de Melo *twp = tw; 500a7f5e7f1SArnaldo Carvalho de Melo } else if (tw) { 501a7f5e7f1SArnaldo Carvalho de Melo /* Silly. Should hash-dance instead... */ 502dbe7faa4SEric Dumazet inet_twsk_deschedule_put(tw); 503a7f5e7f1SArnaldo Carvalho de Melo } 504a7f5e7f1SArnaldo Carvalho de Melo return 0; 505a7f5e7f1SArnaldo Carvalho de Melo 506a7f5e7f1SArnaldo Carvalho de Melo not_unique: 5079db66bdcSEric Dumazet spin_unlock(lock); 508a7f5e7f1SArnaldo Carvalho de Melo return -EADDRNOTAVAIL; 509a7f5e7f1SArnaldo Carvalho de Melo } 510a7f5e7f1SArnaldo Carvalho de Melo 511e2baad9eSEric Dumazet static u32 inet_sk_port_offset(const struct sock *sk) 512a7f5e7f1SArnaldo Carvalho de Melo { 513a7f5e7f1SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 514e2baad9eSEric Dumazet 515c720c7e8SEric Dumazet return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, 516c720c7e8SEric Dumazet inet->inet_daddr, 517c720c7e8SEric Dumazet inet->inet_dport); 518a7f5e7f1SArnaldo Carvalho de Melo } 519a7f5e7f1SArnaldo Carvalho de Melo 520079096f1SEric Dumazet /* insert a socket into ehash, and eventually remove another one 521079096f1SEric Dumazet * (The another one can be a SYN_RECV or TIMEWAIT 522079096f1SEric Dumazet */ 5235e0724d0SEric Dumazet bool inet_ehash_insert(struct sock *sk, struct sock *osk) 524152da81dSPavel Emelyanov { 52539d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 5263ab5aee7SEric Dumazet struct hlist_nulls_head *list; 527152da81dSPavel Emelyanov struct inet_ehash_bucket *head; 5285b441f76SEric Dumazet spinlock_t *lock; 5295e0724d0SEric Dumazet bool ret = true; 530152da81dSPavel Emelyanov 531079096f1SEric Dumazet WARN_ON_ONCE(!sk_unhashed(sk)); 532152da81dSPavel Emelyanov 5335b441f76SEric Dumazet sk->sk_hash = sk_ehashfn(sk); 534152da81dSPavel Emelyanov head = inet_ehash_bucket(hashinfo, sk->sk_hash); 535152da81dSPavel Emelyanov list = &head->chain; 536152da81dSPavel Emelyanov lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 537152da81dSPavel Emelyanov 5389db66bdcSEric Dumazet spin_lock(lock); 539fc01538fSEric Dumazet if (osk) { 5405e0724d0SEric Dumazet WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); 5415e0724d0SEric Dumazet ret = sk_nulls_del_node_init_rcu(osk); 5429327f705SEric Dumazet } 5435e0724d0SEric Dumazet if (ret) 5445e0724d0SEric Dumazet __sk_nulls_add_node_rcu(sk, list); 5459db66bdcSEric Dumazet spin_unlock(lock); 546079096f1SEric Dumazet return ret; 547079096f1SEric Dumazet } 548079096f1SEric Dumazet 5495e0724d0SEric Dumazet bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) 550079096f1SEric Dumazet { 5515e0724d0SEric Dumazet bool ok = inet_ehash_insert(sk, osk); 5525e0724d0SEric Dumazet 5535e0724d0SEric Dumazet if (ok) { 554c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 5555e0724d0SEric Dumazet } else { 5565e0724d0SEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 557563e0bb0SYafang Shao inet_sk_set_state(sk, TCP_CLOSE); 5585e0724d0SEric Dumazet sock_set_flag(sk, SOCK_DEAD); 5595e0724d0SEric Dumazet inet_csk_destroy_sock(sk); 560152da81dSPavel Emelyanov } 5615e0724d0SEric Dumazet return ok; 5625e0724d0SEric Dumazet } 5635e0724d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_ehash_nolisten); 564152da81dSPavel Emelyanov 565c125e80bSCraig Gallek static int inet_reuseport_add_sock(struct sock *sk, 566fe38d2a1SJosef Bacik struct inet_listen_hashbucket *ilb) 567c125e80bSCraig Gallek { 56890e5d0dbSCraig Gallek struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; 569c125e80bSCraig Gallek struct sock *sk2; 570c125e80bSCraig Gallek kuid_t uid = sock_i_uid(sk); 571c125e80bSCraig Gallek 57285017869SEric Dumazet sk_for_each_rcu(sk2, &ilb->head) { 573c125e80bSCraig Gallek if (sk2 != sk && 574c125e80bSCraig Gallek sk2->sk_family == sk->sk_family && 575c125e80bSCraig Gallek ipv6_only_sock(sk2) == ipv6_only_sock(sk) && 576c125e80bSCraig Gallek sk2->sk_bound_dev_if == sk->sk_bound_dev_if && 57790e5d0dbSCraig Gallek inet_csk(sk2)->icsk_bind_hash == tb && 578c125e80bSCraig Gallek sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && 579fe38d2a1SJosef Bacik inet_rcv_saddr_equal(sk, sk2, false)) 5802dbb9b9eSMartin KaFai Lau return reuseport_add_sock(sk, sk2, 5812dbb9b9eSMartin KaFai Lau inet_rcv_saddr_any(sk)); 582c125e80bSCraig Gallek } 583c125e80bSCraig Gallek 5842dbb9b9eSMartin KaFai Lau return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); 585c125e80bSCraig Gallek } 586c125e80bSCraig Gallek 587fe38d2a1SJosef Bacik int __inet_hash(struct sock *sk, struct sock *osk) 588152da81dSPavel Emelyanov { 58939d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 5905caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 591c125e80bSCraig Gallek int err = 0; 592152da81dSPavel Emelyanov 5935e0724d0SEric Dumazet if (sk->sk_state != TCP_LISTEN) { 5945e0724d0SEric Dumazet inet_ehash_nolisten(sk, osk); 595c125e80bSCraig Gallek return 0; 5965e0724d0SEric Dumazet } 597547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 5985caea4eaSEric Dumazet ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 599152da81dSPavel Emelyanov 6005caea4eaSEric Dumazet spin_lock(&ilb->lock); 601c125e80bSCraig Gallek if (sk->sk_reuseport) { 602fe38d2a1SJosef Bacik err = inet_reuseport_add_sock(sk, ilb); 603c125e80bSCraig Gallek if (err) 604c125e80bSCraig Gallek goto unlock; 605c125e80bSCraig Gallek } 606d296ba60SCraig Gallek if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && 607d296ba60SCraig Gallek sk->sk_family == AF_INET6) 608d296ba60SCraig Gallek hlist_add_tail_rcu(&sk->sk_node, &ilb->head); 609d296ba60SCraig Gallek else 6103b24d854SEric Dumazet hlist_add_head_rcu(&sk->sk_node, &ilb->head); 61161b7c691SMartin KaFai Lau inet_hash2(hashinfo, sk); 61276d013b2SMartin KaFai Lau ilb->count++; 6133b24d854SEric Dumazet sock_set_flag(sk, SOCK_RCU_FREE); 614c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 615c125e80bSCraig Gallek unlock: 6165caea4eaSEric Dumazet spin_unlock(&ilb->lock); 617c125e80bSCraig Gallek 618c125e80bSCraig Gallek return err; 619152da81dSPavel Emelyanov } 62077a6a471SEric Dumazet EXPORT_SYMBOL(__inet_hash); 621ab1e0a13SArnaldo Carvalho de Melo 622086c653fSCraig Gallek int inet_hash(struct sock *sk) 623ab1e0a13SArnaldo Carvalho de Melo { 624c125e80bSCraig Gallek int err = 0; 625c125e80bSCraig Gallek 626ab1e0a13SArnaldo Carvalho de Melo if (sk->sk_state != TCP_CLOSE) { 627ab1e0a13SArnaldo Carvalho de Melo local_bh_disable(); 628fe38d2a1SJosef Bacik err = __inet_hash(sk, NULL); 629ab1e0a13SArnaldo Carvalho de Melo local_bh_enable(); 630ab1e0a13SArnaldo Carvalho de Melo } 631086c653fSCraig Gallek 632c125e80bSCraig Gallek return err; 633ab1e0a13SArnaldo Carvalho de Melo } 634ab1e0a13SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_hash); 635ab1e0a13SArnaldo Carvalho de Melo 636ab1e0a13SArnaldo Carvalho de Melo void inet_unhash(struct sock *sk) 637ab1e0a13SArnaldo Carvalho de Melo { 63839d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 6390ba98718SGeert Uytterhoeven struct inet_listen_hashbucket *ilb = NULL; 640c25eb3bfSEric Dumazet spinlock_t *lock; 641ab1e0a13SArnaldo Carvalho de Melo 642ab1e0a13SArnaldo Carvalho de Melo if (sk_unhashed(sk)) 6435caea4eaSEric Dumazet return; 644ab1e0a13SArnaldo Carvalho de Melo 6453b24d854SEric Dumazet if (sk->sk_state == TCP_LISTEN) { 64676d013b2SMartin KaFai Lau ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 64776d013b2SMartin KaFai Lau lock = &ilb->lock; 6483b24d854SEric Dumazet } else { 649c25eb3bfSEric Dumazet lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 6503b24d854SEric Dumazet } 6519db66bdcSEric Dumazet spin_lock_bh(lock); 65261b7c691SMartin KaFai Lau if (sk_unhashed(sk)) 65361b7c691SMartin KaFai Lau goto unlock; 65461b7c691SMartin KaFai Lau 655c125e80bSCraig Gallek if (rcu_access_pointer(sk->sk_reuseport_cb)) 656c125e80bSCraig Gallek reuseport_detach_sock(sk); 6570ba98718SGeert Uytterhoeven if (ilb) { 65861b7c691SMartin KaFai Lau inet_unhash2(hashinfo, sk); 65961b7c691SMartin KaFai Lau __sk_del_node_init(sk); 66076d013b2SMartin KaFai Lau ilb->count--; 66161b7c691SMartin KaFai Lau } else { 66261b7c691SMartin KaFai Lau __sk_nulls_del_node_init_rcu(sk); 66376d013b2SMartin KaFai Lau } 66461b7c691SMartin KaFai Lau sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 66561b7c691SMartin KaFai Lau unlock: 666920de804SEric Dumazet spin_unlock_bh(lock); 667ab1e0a13SArnaldo Carvalho de Melo } 668ab1e0a13SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_unhash); 669152da81dSPavel Emelyanov 6705ee31fc1SPavel Emelyanov int __inet_hash_connect(struct inet_timewait_death_row *death_row, 6715d8c0aa9SPavel Emelyanov struct sock *sk, u32 port_offset, 6725ee31fc1SPavel Emelyanov int (*check_established)(struct inet_timewait_death_row *, 673b4d6444eSEric Dumazet struct sock *, __u16, struct inet_timewait_sock **)) 674a7f5e7f1SArnaldo Carvalho de Melo { 675a7f5e7f1SArnaldo Carvalho de Melo struct inet_hashinfo *hinfo = death_row->hashinfo; 676a7f5e7f1SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = NULL; 6771580ab63SEric Dumazet struct inet_bind_hashbucket *head; 6781580ab63SEric Dumazet int port = inet_sk(sk)->inet_num; 6791580ab63SEric Dumazet struct net *net = sock_net(sk); 6801580ab63SEric Dumazet struct inet_bind_bucket *tb; 6811580ab63SEric Dumazet u32 remaining, offset; 6821580ab63SEric Dumazet int ret, i, low, high; 6831580ab63SEric Dumazet static u32 hint; 684*3c82a21fSRobert Shearman int l3mdev; 6851580ab63SEric Dumazet 6861580ab63SEric Dumazet if (port) { 6871580ab63SEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 6881580ab63SEric Dumazet hinfo->bhash_size)]; 6891580ab63SEric Dumazet tb = inet_csk(sk)->icsk_bind_hash; 6901580ab63SEric Dumazet spin_lock_bh(&head->lock); 6911580ab63SEric Dumazet if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 6921580ab63SEric Dumazet inet_ehash_nolisten(sk, NULL); 6931580ab63SEric Dumazet spin_unlock_bh(&head->lock); 6941580ab63SEric Dumazet return 0; 6951580ab63SEric Dumazet } 6961580ab63SEric Dumazet spin_unlock(&head->lock); 6971580ab63SEric Dumazet /* No definite answer... Walk to established hash table */ 6981580ab63SEric Dumazet ret = check_established(death_row, sk, port, NULL); 6991580ab63SEric Dumazet local_bh_enable(); 7001580ab63SEric Dumazet return ret; 7011580ab63SEric Dumazet } 702a7f5e7f1SArnaldo Carvalho de Melo 703*3c82a21fSRobert Shearman l3mdev = inet_sk_bound_l3mdev(sk); 704*3c82a21fSRobert Shearman 7050bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 7061580ab63SEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 7071580ab63SEric Dumazet remaining = high - low; 7081580ab63SEric Dumazet if (likely(remaining > 1)) 7091580ab63SEric Dumazet remaining &= ~1U; 710227b60f5SStephen Hemminger 7111580ab63SEric Dumazet offset = (hint + port_offset) % remaining; 7121580ab63SEric Dumazet /* In first pass we try ports of @low parity. 7131580ab63SEric Dumazet * inet_csk_get_port() does the opposite choice. 71407f4c900SEric Dumazet */ 7151580ab63SEric Dumazet offset &= ~1U; 7161580ab63SEric Dumazet other_parity_scan: 7171580ab63SEric Dumazet port = low + offset; 7181580ab63SEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 7191580ab63SEric Dumazet if (unlikely(port >= high)) 7201580ab63SEric Dumazet port -= remaining; 721122ff243SWANG Cong if (inet_is_local_reserved_port(net, port)) 722e3826f1eSAmerigo Wang continue; 7237f635ab7SPavel Emelyanov head = &hinfo->bhash[inet_bhashfn(net, port, 7247f635ab7SPavel Emelyanov hinfo->bhash_size)]; 7251580ab63SEric Dumazet spin_lock_bh(&head->lock); 726a7f5e7f1SArnaldo Carvalho de Melo 7271580ab63SEric Dumazet /* Does not bother with rcv_saddr checks, because 7281580ab63SEric Dumazet * the established check is already unique enough. 729a7f5e7f1SArnaldo Carvalho de Melo */ 730b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) { 731*3c82a21fSRobert Shearman if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 732*3c82a21fSRobert Shearman tb->port == port) { 733da5e3630STom Herbert if (tb->fastreuse >= 0 || 734da5e3630STom Herbert tb->fastreuseport >= 0) 735a7f5e7f1SArnaldo Carvalho de Melo goto next_port; 736a9d8f911SEvgeniy Polyakov WARN_ON(hlist_empty(&tb->owners)); 7375ee31fc1SPavel Emelyanov if (!check_established(death_row, sk, 7385ee31fc1SPavel Emelyanov port, &tw)) 739a7f5e7f1SArnaldo Carvalho de Melo goto ok; 740a7f5e7f1SArnaldo Carvalho de Melo goto next_port; 741a7f5e7f1SArnaldo Carvalho de Melo } 742a7f5e7f1SArnaldo Carvalho de Melo } 743a7f5e7f1SArnaldo Carvalho de Melo 744941b1d22SPavel Emelyanov tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 745*3c82a21fSRobert Shearman net, head, port, l3mdev); 746a7f5e7f1SArnaldo Carvalho de Melo if (!tb) { 7471580ab63SEric Dumazet spin_unlock_bh(&head->lock); 7481580ab63SEric Dumazet return -ENOMEM; 749a7f5e7f1SArnaldo Carvalho de Melo } 750a7f5e7f1SArnaldo Carvalho de Melo tb->fastreuse = -1; 751da5e3630STom Herbert tb->fastreuseport = -1; 752a7f5e7f1SArnaldo Carvalho de Melo goto ok; 753a7f5e7f1SArnaldo Carvalho de Melo next_port: 7541580ab63SEric Dumazet spin_unlock_bh(&head->lock); 7551580ab63SEric Dumazet cond_resched(); 756a7f5e7f1SArnaldo Carvalho de Melo } 7571580ab63SEric Dumazet 7581580ab63SEric Dumazet offset++; 7591580ab63SEric Dumazet if ((offset & 1) && remaining > 1) 7601580ab63SEric Dumazet goto other_parity_scan; 761a7f5e7f1SArnaldo Carvalho de Melo 762a7f5e7f1SArnaldo Carvalho de Melo return -EADDRNOTAVAIL; 763a7f5e7f1SArnaldo Carvalho de Melo 764a7f5e7f1SArnaldo Carvalho de Melo ok: 7651580ab63SEric Dumazet hint += i + 2; 766a7f5e7f1SArnaldo Carvalho de Melo 767a7f5e7f1SArnaldo Carvalho de Melo /* Head lock still held and bh's disabled */ 768a7f5e7f1SArnaldo Carvalho de Melo inet_bind_hash(sk, tb, port); 769a7f5e7f1SArnaldo Carvalho de Melo if (sk_unhashed(sk)) { 770c720c7e8SEric Dumazet inet_sk(sk)->inet_sport = htons(port); 7715e0724d0SEric Dumazet inet_ehash_nolisten(sk, (struct sock *)tw); 772a7f5e7f1SArnaldo Carvalho de Melo } 7733cdaedaeSEric Dumazet if (tw) 774fc01538fSEric Dumazet inet_twsk_bind_unhash(tw, hinfo); 775a7f5e7f1SArnaldo Carvalho de Melo spin_unlock(&head->lock); 776dbe7faa4SEric Dumazet if (tw) 777dbe7faa4SEric Dumazet inet_twsk_deschedule_put(tw); 778a7f5e7f1SArnaldo Carvalho de Melo local_bh_enable(); 7791580ab63SEric Dumazet return 0; 780a7f5e7f1SArnaldo Carvalho de Melo } 7815ee31fc1SPavel Emelyanov 7825ee31fc1SPavel Emelyanov /* 7835ee31fc1SPavel Emelyanov * Bind a port for a connect operation and hash it. 7845ee31fc1SPavel Emelyanov */ 7855ee31fc1SPavel Emelyanov int inet_hash_connect(struct inet_timewait_death_row *death_row, 7865ee31fc1SPavel Emelyanov struct sock *sk) 7875ee31fc1SPavel Emelyanov { 788e2baad9eSEric Dumazet u32 port_offset = 0; 789e2baad9eSEric Dumazet 790e2baad9eSEric Dumazet if (!inet_sk(sk)->inet_num) 791e2baad9eSEric Dumazet port_offset = inet_sk_port_offset(sk); 792e2baad9eSEric Dumazet return __inet_hash_connect(death_row, sk, port_offset, 793b4d6444eSEric Dumazet __inet_check_established); 7945ee31fc1SPavel Emelyanov } 795a7f5e7f1SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_hash_connect); 7965caea4eaSEric Dumazet 7975caea4eaSEric Dumazet void inet_hashinfo_init(struct inet_hashinfo *h) 7985caea4eaSEric Dumazet { 7995caea4eaSEric Dumazet int i; 8005caea4eaSEric Dumazet 801c25eb3bfSEric Dumazet for (i = 0; i < INET_LHTABLE_SIZE; i++) { 8025caea4eaSEric Dumazet spin_lock_init(&h->listening_hash[i].lock); 8033b24d854SEric Dumazet INIT_HLIST_HEAD(&h->listening_hash[i].head); 80476d013b2SMartin KaFai Lau h->listening_hash[i].count = 0; 805c25eb3bfSEric Dumazet } 80661b7c691SMartin KaFai Lau 80761b7c691SMartin KaFai Lau h->lhash2 = NULL; 8085caea4eaSEric Dumazet } 8095caea4eaSEric Dumazet EXPORT_SYMBOL_GPL(inet_hashinfo_init); 810095dc8e0SEric Dumazet 81161b7c691SMartin KaFai Lau void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, 81261b7c691SMartin KaFai Lau unsigned long numentries, int scale, 81361b7c691SMartin KaFai Lau unsigned long low_limit, 81461b7c691SMartin KaFai Lau unsigned long high_limit) 81561b7c691SMartin KaFai Lau { 81661b7c691SMartin KaFai Lau unsigned int i; 81761b7c691SMartin KaFai Lau 81861b7c691SMartin KaFai Lau h->lhash2 = alloc_large_system_hash(name, 81961b7c691SMartin KaFai Lau sizeof(*h->lhash2), 82061b7c691SMartin KaFai Lau numentries, 82161b7c691SMartin KaFai Lau scale, 82261b7c691SMartin KaFai Lau 0, 82361b7c691SMartin KaFai Lau NULL, 82461b7c691SMartin KaFai Lau &h->lhash2_mask, 82561b7c691SMartin KaFai Lau low_limit, 82661b7c691SMartin KaFai Lau high_limit); 82761b7c691SMartin KaFai Lau 82861b7c691SMartin KaFai Lau for (i = 0; i <= h->lhash2_mask; i++) { 82961b7c691SMartin KaFai Lau spin_lock_init(&h->lhash2[i].lock); 83061b7c691SMartin KaFai Lau INIT_HLIST_HEAD(&h->lhash2[i].head); 83161b7c691SMartin KaFai Lau h->lhash2[i].count = 0; 83261b7c691SMartin KaFai Lau } 83361b7c691SMartin KaFai Lau } 83461b7c691SMartin KaFai Lau 835095dc8e0SEric Dumazet int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) 836095dc8e0SEric Dumazet { 83789e478a2SEric Dumazet unsigned int locksz = sizeof(spinlock_t); 838095dc8e0SEric Dumazet unsigned int i, nblocks = 1; 839095dc8e0SEric Dumazet 84089e478a2SEric Dumazet if (locksz != 0) { 841095dc8e0SEric Dumazet /* allocate 2 cache lines or at least one spinlock per cpu */ 84289e478a2SEric Dumazet nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); 843095dc8e0SEric Dumazet nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); 844095dc8e0SEric Dumazet 845095dc8e0SEric Dumazet /* no more locks than number of hash buckets */ 846095dc8e0SEric Dumazet nblocks = min(nblocks, hashinfo->ehash_mask + 1); 847095dc8e0SEric Dumazet 848752ade68SMichal Hocko hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); 849095dc8e0SEric Dumazet if (!hashinfo->ehash_locks) 850095dc8e0SEric Dumazet return -ENOMEM; 851095dc8e0SEric Dumazet 852095dc8e0SEric Dumazet for (i = 0; i < nblocks; i++) 853095dc8e0SEric Dumazet spin_lock_init(&hashinfo->ehash_locks[i]); 854095dc8e0SEric Dumazet } 855095dc8e0SEric Dumazet hashinfo->ehash_locks_mask = nblocks - 1; 856095dc8e0SEric Dumazet return 0; 857095dc8e0SEric Dumazet } 858095dc8e0SEric Dumazet EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); 859