177d8bf9cSArnaldo Carvalho de Melo /* 277d8bf9cSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 377d8bf9cSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 477d8bf9cSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 577d8bf9cSArnaldo Carvalho de Melo * 677d8bf9cSArnaldo Carvalho de Melo * Generic INET transport hashtables 777d8bf9cSArnaldo Carvalho de Melo * 877d8bf9cSArnaldo Carvalho de Melo * Authors: Lotsa people, from code originally in tcp 977d8bf9cSArnaldo Carvalho de Melo * 1077d8bf9cSArnaldo Carvalho de Melo * This program is free software; you can redistribute it and/or 1177d8bf9cSArnaldo Carvalho de Melo * modify it under the terms of the GNU General Public License 1277d8bf9cSArnaldo Carvalho de Melo * as published by the Free Software Foundation; either version 1377d8bf9cSArnaldo Carvalho de Melo * 2 of the License, or (at your option) any later version. 1477d8bf9cSArnaldo Carvalho de Melo */ 1577d8bf9cSArnaldo Carvalho de Melo 162d8c4ce5SArnaldo Carvalho de Melo #include <linux/module.h> 17a7f5e7f1SArnaldo Carvalho de Melo #include <linux/random.h> 18f3f05f70SArnaldo Carvalho de Melo #include <linux/sched.h> 1977d8bf9cSArnaldo Carvalho de Melo #include <linux/slab.h> 20f3f05f70SArnaldo Carvalho de Melo #include <linux/wait.h> 21095dc8e0SEric Dumazet #include <linux/vmalloc.h> 2277d8bf9cSArnaldo Carvalho de Melo 23c125e80bSCraig Gallek #include <net/addrconf.h> 24463c84b9SArnaldo Carvalho de Melo #include <net/inet_connection_sock.h> 2577d8bf9cSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 266e5714eaSDavid S. Miller #include <net/secure_seq.h> 27a7f5e7f1SArnaldo Carvalho de Melo #include <net/ip.h> 28c125e80bSCraig Gallek #include <net/sock_reuseport.h> 2977d8bf9cSArnaldo Carvalho de Melo 306eada011SEric Dumazet static u32 inet_ehashfn(const struct net *net, const __be32 laddr, 3165cd8033SHannes Frederic Sowa const __u16 lport, const __be32 faddr, 3265cd8033SHannes Frederic Sowa const __be16 fport) 3365cd8033SHannes Frederic Sowa { 341bbdceefSHannes Frederic Sowa static u32 inet_ehash_secret __read_mostly; 351bbdceefSHannes Frederic Sowa 361bbdceefSHannes Frederic Sowa net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); 371bbdceefSHannes Frederic Sowa 3865cd8033SHannes Frederic Sowa return __inet_ehashfn(laddr, lport, faddr, fport, 3965cd8033SHannes Frederic Sowa inet_ehash_secret + net_hash_mix(net)); 4065cd8033SHannes Frederic Sowa } 4165cd8033SHannes Frederic Sowa 42d1e559d0SEric Dumazet /* This function handles inet_sock, but also timewait and request sockets 43d1e559d0SEric Dumazet * for IPv4/IPv6. 44d1e559d0SEric Dumazet */ 455b441f76SEric Dumazet u32 sk_ehashfn(const struct sock *sk) 4665cd8033SHannes Frederic Sowa { 47d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 48d1e559d0SEric Dumazet if (sk->sk_family == AF_INET6 && 49d1e559d0SEric Dumazet !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 50d1e559d0SEric Dumazet return inet6_ehashfn(sock_net(sk), 51d1e559d0SEric Dumazet &sk->sk_v6_rcv_saddr, sk->sk_num, 52d1e559d0SEric Dumazet &sk->sk_v6_daddr, sk->sk_dport); 53d1e559d0SEric Dumazet #endif 545b441f76SEric Dumazet return inet_ehashfn(sock_net(sk), 555b441f76SEric Dumazet sk->sk_rcv_saddr, sk->sk_num, 565b441f76SEric Dumazet sk->sk_daddr, sk->sk_dport); 5765cd8033SHannes Frederic Sowa } 5865cd8033SHannes Frederic Sowa 5977d8bf9cSArnaldo Carvalho de Melo /* 6077d8bf9cSArnaldo Carvalho de Melo * Allocate and initialize a new local port bind bucket. 6177d8bf9cSArnaldo Carvalho de Melo * The bindhash mutex for snum's hash chain must be held here. 6277d8bf9cSArnaldo Carvalho de Melo */ 63e18b890bSChristoph Lameter struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, 64941b1d22SPavel Emelyanov struct net *net, 6577d8bf9cSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head, 6677d8bf9cSArnaldo Carvalho de Melo const unsigned short snum) 6777d8bf9cSArnaldo Carvalho de Melo { 6854e6ecb2SChristoph Lameter struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 6977d8bf9cSArnaldo Carvalho de Melo 7000db4124SIan Morris if (tb) { 71efd7ef1cSEric W. Biederman write_pnet(&tb->ib_net, net); 7277d8bf9cSArnaldo Carvalho de Melo tb->port = snum; 7377d8bf9cSArnaldo Carvalho de Melo tb->fastreuse = 0; 74da5e3630STom Herbert tb->fastreuseport = 0; 75a9d8f911SEvgeniy Polyakov tb->num_owners = 0; 7677d8bf9cSArnaldo Carvalho de Melo INIT_HLIST_HEAD(&tb->owners); 7777d8bf9cSArnaldo Carvalho de Melo hlist_add_head(&tb->node, &head->chain); 7877d8bf9cSArnaldo Carvalho de Melo } 7977d8bf9cSArnaldo Carvalho de Melo return tb; 8077d8bf9cSArnaldo Carvalho de Melo } 8177d8bf9cSArnaldo Carvalho de Melo 8277d8bf9cSArnaldo Carvalho de Melo /* 8377d8bf9cSArnaldo Carvalho de Melo * Caller must hold hashbucket lock for this tb with local BH disabled 8477d8bf9cSArnaldo Carvalho de Melo */ 85e18b890bSChristoph Lameter void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) 8677d8bf9cSArnaldo Carvalho de Melo { 8777d8bf9cSArnaldo Carvalho de Melo if (hlist_empty(&tb->owners)) { 8877d8bf9cSArnaldo Carvalho de Melo __hlist_del(&tb->node); 8977d8bf9cSArnaldo Carvalho de Melo kmem_cache_free(cachep, tb); 9077d8bf9cSArnaldo Carvalho de Melo } 9177d8bf9cSArnaldo Carvalho de Melo } 922d8c4ce5SArnaldo Carvalho de Melo 932d8c4ce5SArnaldo Carvalho de Melo void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 942d8c4ce5SArnaldo Carvalho de Melo const unsigned short snum) 952d8c4ce5SArnaldo Carvalho de Melo { 96c720c7e8SEric Dumazet inet_sk(sk)->inet_num = snum; 972d8c4ce5SArnaldo Carvalho de Melo sk_add_bind_node(sk, &tb->owners); 98a9d8f911SEvgeniy Polyakov tb->num_owners++; 99463c84b9SArnaldo Carvalho de Melo inet_csk(sk)->icsk_bind_hash = tb; 1002d8c4ce5SArnaldo Carvalho de Melo } 1012d8c4ce5SArnaldo Carvalho de Melo 1022d8c4ce5SArnaldo Carvalho de Melo /* 1032d8c4ce5SArnaldo Carvalho de Melo * Get rid of any references to a local port held by the given sock. 1042d8c4ce5SArnaldo Carvalho de Melo */ 105ab1e0a13SArnaldo Carvalho de Melo static void __inet_put_port(struct sock *sk) 1062d8c4ce5SArnaldo Carvalho de Melo { 10739d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 108c720c7e8SEric Dumazet const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, 1097f635ab7SPavel Emelyanov hashinfo->bhash_size); 1102d8c4ce5SArnaldo Carvalho de Melo struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 1112d8c4ce5SArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 1122d8c4ce5SArnaldo Carvalho de Melo 1132d8c4ce5SArnaldo Carvalho de Melo spin_lock(&head->lock); 114463c84b9SArnaldo Carvalho de Melo tb = inet_csk(sk)->icsk_bind_hash; 1152d8c4ce5SArnaldo Carvalho de Melo __sk_del_bind_node(sk); 116a9d8f911SEvgeniy Polyakov tb->num_owners--; 117463c84b9SArnaldo Carvalho de Melo inet_csk(sk)->icsk_bind_hash = NULL; 118c720c7e8SEric Dumazet inet_sk(sk)->inet_num = 0; 1192d8c4ce5SArnaldo Carvalho de Melo inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 1202d8c4ce5SArnaldo Carvalho de Melo spin_unlock(&head->lock); 1212d8c4ce5SArnaldo Carvalho de Melo } 1222d8c4ce5SArnaldo Carvalho de Melo 123ab1e0a13SArnaldo Carvalho de Melo void inet_put_port(struct sock *sk) 1242d8c4ce5SArnaldo Carvalho de Melo { 1252d8c4ce5SArnaldo Carvalho de Melo local_bh_disable(); 126ab1e0a13SArnaldo Carvalho de Melo __inet_put_port(sk); 1272d8c4ce5SArnaldo Carvalho de Melo local_bh_enable(); 1282d8c4ce5SArnaldo Carvalho de Melo } 1292d8c4ce5SArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_put_port); 130f3f05f70SArnaldo Carvalho de Melo 1311ce31c9eSEric Dumazet int __inet_inherit_port(const struct sock *sk, struct sock *child) 13253083773SPavel Emelyanov { 13353083773SPavel Emelyanov struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 134093d2823SBalazs Scheidler unsigned short port = inet_sk(child)->inet_num; 135093d2823SBalazs Scheidler const int bhash = inet_bhashfn(sock_net(sk), port, 1367f635ab7SPavel Emelyanov table->bhash_size); 13753083773SPavel Emelyanov struct inet_bind_hashbucket *head = &table->bhash[bhash]; 13853083773SPavel Emelyanov struct inet_bind_bucket *tb; 13953083773SPavel Emelyanov 14053083773SPavel Emelyanov spin_lock(&head->lock); 14153083773SPavel Emelyanov tb = inet_csk(sk)->icsk_bind_hash; 142c2f34a65SEric Dumazet if (unlikely(!tb)) { 143c2f34a65SEric Dumazet spin_unlock(&head->lock); 144c2f34a65SEric Dumazet return -ENOENT; 145c2f34a65SEric Dumazet } 146093d2823SBalazs Scheidler if (tb->port != port) { 147093d2823SBalazs Scheidler /* NOTE: using tproxy and redirecting skbs to a proxy 148093d2823SBalazs Scheidler * on a different listener port breaks the assumption 149093d2823SBalazs Scheidler * that the listener socket's icsk_bind_hash is the same 150093d2823SBalazs Scheidler * as that of the child socket. We have to look up or 151093d2823SBalazs Scheidler * create a new bind bucket for the child here. */ 152b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) { 153093d2823SBalazs Scheidler if (net_eq(ib_net(tb), sock_net(sk)) && 154093d2823SBalazs Scheidler tb->port == port) 155093d2823SBalazs Scheidler break; 156093d2823SBalazs Scheidler } 157b67bfe0dSSasha Levin if (!tb) { 158093d2823SBalazs Scheidler tb = inet_bind_bucket_create(table->bind_bucket_cachep, 159093d2823SBalazs Scheidler sock_net(sk), head, port); 160093d2823SBalazs Scheidler if (!tb) { 161093d2823SBalazs Scheidler spin_unlock(&head->lock); 162093d2823SBalazs Scheidler return -ENOMEM; 163093d2823SBalazs Scheidler } 164093d2823SBalazs Scheidler } 165093d2823SBalazs Scheidler } 166b4ff3c90SNagendra Tomar inet_bind_hash(child, tb, port); 16753083773SPavel Emelyanov spin_unlock(&head->lock); 168093d2823SBalazs Scheidler 169093d2823SBalazs Scheidler return 0; 17053083773SPavel Emelyanov } 17153083773SPavel Emelyanov EXPORT_SYMBOL_GPL(__inet_inherit_port); 17253083773SPavel Emelyanov 173c25eb3bfSEric Dumazet static inline int compute_score(struct sock *sk, struct net *net, 174c25eb3bfSEric Dumazet const unsigned short hnum, const __be32 daddr, 175c25eb3bfSEric Dumazet const int dif) 176c25eb3bfSEric Dumazet { 177c25eb3bfSEric Dumazet int score = -1; 178c25eb3bfSEric Dumazet struct inet_sock *inet = inet_sk(sk); 179c25eb3bfSEric Dumazet 180c720c7e8SEric Dumazet if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && 181c25eb3bfSEric Dumazet !ipv6_only_sock(sk)) { 182c720c7e8SEric Dumazet __be32 rcv_saddr = inet->inet_rcv_saddr; 183da5e3630STom Herbert score = sk->sk_family == PF_INET ? 2 : 1; 184c25eb3bfSEric Dumazet if (rcv_saddr) { 185c25eb3bfSEric Dumazet if (rcv_saddr != daddr) 186c25eb3bfSEric Dumazet return -1; 187da5e3630STom Herbert score += 4; 188c25eb3bfSEric Dumazet } 189c25eb3bfSEric Dumazet if (sk->sk_bound_dev_if) { 190c25eb3bfSEric Dumazet if (sk->sk_bound_dev_if != dif) 191c25eb3bfSEric Dumazet return -1; 192da5e3630STom Herbert score += 4; 193c25eb3bfSEric Dumazet } 19470da268bSEric Dumazet if (sk->sk_incoming_cpu == raw_smp_processor_id()) 19570da268bSEric Dumazet score++; 196c25eb3bfSEric Dumazet } 197c25eb3bfSEric Dumazet return score; 198c25eb3bfSEric Dumazet } 199c25eb3bfSEric Dumazet 200f3f05f70SArnaldo Carvalho de Melo /* 2013b24d854SEric Dumazet * Here are some nice properties to exploit here. The BSD API 2023b24d854SEric Dumazet * does not allow a listening sock to specify the remote port nor the 20333b62231SArnaldo Carvalho de Melo * remote address for the connection. So always assume those are both 20433b62231SArnaldo Carvalho de Melo * wildcarded during the search since they can never be otherwise. 20533b62231SArnaldo Carvalho de Melo */ 20633b62231SArnaldo Carvalho de Melo 2073b24d854SEric Dumazet /* called with rcu_read_lock() : No refcount taken on the socket */ 208c67499c0SPavel Emelyanov struct sock *__inet_lookup_listener(struct net *net, 209c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo, 210a583636aSCraig Gallek struct sk_buff *skb, int doff, 211da5e3630STom Herbert const __be32 saddr, __be16 sport, 212fb99c848SAl Viro const __be32 daddr, const unsigned short hnum, 21399a92ff5SHerbert Xu const int dif) 21499a92ff5SHerbert Xu { 215c25eb3bfSEric Dumazet unsigned int hash = inet_lhashfn(net, hnum); 216c25eb3bfSEric Dumazet struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 2173b24d854SEric Dumazet int score, hiscore = 0, matches = 0, reuseport = 0; 2183b24d854SEric Dumazet struct sock *sk, *result = NULL; 219da5e3630STom Herbert u32 phash = 0; 22099a92ff5SHerbert Xu 2213b24d854SEric Dumazet sk_for_each_rcu(sk, &ilb->head) { 222c25eb3bfSEric Dumazet score = compute_score(sk, net, hnum, daddr, dif); 223c25eb3bfSEric Dumazet if (score > hiscore) { 224da5e3630STom Herbert reuseport = sk->sk_reuseport; 225da5e3630STom Herbert if (reuseport) { 226da5e3630STom Herbert phash = inet_ehashfn(net, daddr, hnum, 227da5e3630STom Herbert saddr, sport); 2283b24d854SEric Dumazet result = reuseport_select_sock(sk, phash, 229c125e80bSCraig Gallek skb, doff); 2303b24d854SEric Dumazet if (result) 2313b24d854SEric Dumazet return result; 232da5e3630STom Herbert matches = 1; 233da5e3630STom Herbert } 2343b24d854SEric Dumazet result = sk; 2353b24d854SEric Dumazet hiscore = score; 236da5e3630STom Herbert } else if (score == hiscore && reuseport) { 237da5e3630STom Herbert matches++; 2388fc54f68SDaniel Borkmann if (reciprocal_scale(phash, matches) == 0) 239da5e3630STom Herbert result = sk; 240da5e3630STom Herbert phash = next_pseudo_random32(phash); 24199a92ff5SHerbert Xu } 24299a92ff5SHerbert Xu } 243c25eb3bfSEric Dumazet return result; 24499a92ff5SHerbert Xu } 2458f491069SHerbert Xu EXPORT_SYMBOL_GPL(__inet_lookup_listener); 246a7f5e7f1SArnaldo Carvalho de Melo 24705dbc7b5SEric Dumazet /* All sockets share common refcount, but have different destructors */ 24805dbc7b5SEric Dumazet void sock_gen_put(struct sock *sk) 24905dbc7b5SEric Dumazet { 25005dbc7b5SEric Dumazet if (!atomic_dec_and_test(&sk->sk_refcnt)) 25105dbc7b5SEric Dumazet return; 25205dbc7b5SEric Dumazet 25305dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 25405dbc7b5SEric Dumazet inet_twsk_free(inet_twsk(sk)); 25541b822c5SEric Dumazet else if (sk->sk_state == TCP_NEW_SYN_RECV) 25641b822c5SEric Dumazet reqsk_free(inet_reqsk(sk)); 25705dbc7b5SEric Dumazet else 25805dbc7b5SEric Dumazet sk_free(sk); 25905dbc7b5SEric Dumazet } 26005dbc7b5SEric Dumazet EXPORT_SYMBOL_GPL(sock_gen_put); 26105dbc7b5SEric Dumazet 2622c13270bSEric Dumazet void sock_edemux(struct sk_buff *skb) 2632c13270bSEric Dumazet { 2642c13270bSEric Dumazet sock_gen_put(skb->sk); 2652c13270bSEric Dumazet } 2662c13270bSEric Dumazet EXPORT_SYMBOL(sock_edemux); 2672c13270bSEric Dumazet 268c67499c0SPavel Emelyanov struct sock *__inet_lookup_established(struct net *net, 269c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo, 27077a5ba55SPavel Emelyanov const __be32 saddr, const __be16 sport, 27177a5ba55SPavel Emelyanov const __be32 daddr, const u16 hnum, 27277a5ba55SPavel Emelyanov const int dif) 27377a5ba55SPavel Emelyanov { 274c7228317SJoe Perches INET_ADDR_COOKIE(acookie, saddr, daddr); 27577a5ba55SPavel Emelyanov const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 27677a5ba55SPavel Emelyanov struct sock *sk; 2773ab5aee7SEric Dumazet const struct hlist_nulls_node *node; 27877a5ba55SPavel Emelyanov /* Optimize here for direct hit, only listening connections can 27977a5ba55SPavel Emelyanov * have wildcards anyways. 28077a5ba55SPavel Emelyanov */ 2819f26b3adSPavel Emelyanov unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); 282f373b53bSEric Dumazet unsigned int slot = hash & hashinfo->ehash_mask; 2833ab5aee7SEric Dumazet struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; 28477a5ba55SPavel Emelyanov 2853ab5aee7SEric Dumazet begin: 2863ab5aee7SEric Dumazet sk_nulls_for_each_rcu(sk, node, &head->chain) { 287ce43b03eSEric Dumazet if (sk->sk_hash != hash) 288ce43b03eSEric Dumazet continue; 289ce43b03eSEric Dumazet if (likely(INET_MATCH(sk, net, acookie, 290ce43b03eSEric Dumazet saddr, daddr, ports, dif))) { 2913ab5aee7SEric Dumazet if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 29205dbc7b5SEric Dumazet goto out; 293ce43b03eSEric Dumazet if (unlikely(!INET_MATCH(sk, net, acookie, 2943ab5aee7SEric Dumazet saddr, daddr, ports, dif))) { 29505dbc7b5SEric Dumazet sock_gen_put(sk); 2963ab5aee7SEric Dumazet goto begin; 29777a5ba55SPavel Emelyanov } 29805dbc7b5SEric Dumazet goto found; 2993ab5aee7SEric Dumazet } 3003ab5aee7SEric Dumazet } 3013ab5aee7SEric Dumazet /* 3023ab5aee7SEric Dumazet * if the nulls value we got at the end of this lookup is 3033ab5aee7SEric Dumazet * not the expected one, we must restart lookup. 3043ab5aee7SEric Dumazet * We probably met an item that was moved to another chain. 3053ab5aee7SEric Dumazet */ 3063ab5aee7SEric Dumazet if (get_nulls_value(node) != slot) 3073ab5aee7SEric Dumazet goto begin; 30877a5ba55SPavel Emelyanov out: 30905dbc7b5SEric Dumazet sk = NULL; 31005dbc7b5SEric Dumazet found: 31177a5ba55SPavel Emelyanov return sk; 31277a5ba55SPavel Emelyanov } 31377a5ba55SPavel Emelyanov EXPORT_SYMBOL_GPL(__inet_lookup_established); 31477a5ba55SPavel Emelyanov 315a7f5e7f1SArnaldo Carvalho de Melo /* called with local bh disabled */ 316a7f5e7f1SArnaldo Carvalho de Melo static int __inet_check_established(struct inet_timewait_death_row *death_row, 317a7f5e7f1SArnaldo Carvalho de Melo struct sock *sk, __u16 lport, 318a7f5e7f1SArnaldo Carvalho de Melo struct inet_timewait_sock **twp) 319a7f5e7f1SArnaldo Carvalho de Melo { 320a7f5e7f1SArnaldo Carvalho de Melo struct inet_hashinfo *hinfo = death_row->hashinfo; 321a7f5e7f1SArnaldo Carvalho de Melo struct inet_sock *inet = inet_sk(sk); 322c720c7e8SEric Dumazet __be32 daddr = inet->inet_rcv_saddr; 323c720c7e8SEric Dumazet __be32 saddr = inet->inet_daddr; 324a7f5e7f1SArnaldo Carvalho de Melo int dif = sk->sk_bound_dev_if; 325c7228317SJoe Perches INET_ADDR_COOKIE(acookie, saddr, daddr); 326c720c7e8SEric Dumazet const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 3279f26b3adSPavel Emelyanov struct net *net = sock_net(sk); 328c720c7e8SEric Dumazet unsigned int hash = inet_ehashfn(net, daddr, lport, 329c720c7e8SEric Dumazet saddr, inet->inet_dport); 330a7f5e7f1SArnaldo Carvalho de Melo struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 3319db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 332a7f5e7f1SArnaldo Carvalho de Melo struct sock *sk2; 3333ab5aee7SEric Dumazet const struct hlist_nulls_node *node; 33405dbc7b5SEric Dumazet struct inet_timewait_sock *tw = NULL; 335a7f5e7f1SArnaldo Carvalho de Melo 3369db66bdcSEric Dumazet spin_lock(lock); 337a7f5e7f1SArnaldo Carvalho de Melo 3383ab5aee7SEric Dumazet sk_nulls_for_each(sk2, node, &head->chain) { 339ce43b03eSEric Dumazet if (sk2->sk_hash != hash) 340ce43b03eSEric Dumazet continue; 34105dbc7b5SEric Dumazet 342ce43b03eSEric Dumazet if (likely(INET_MATCH(sk2, net, acookie, 34305dbc7b5SEric Dumazet saddr, daddr, ports, dif))) { 34405dbc7b5SEric Dumazet if (sk2->sk_state == TCP_TIME_WAIT) { 34505dbc7b5SEric Dumazet tw = inet_twsk(sk2); 34605dbc7b5SEric Dumazet if (twsk_unique(sk, sk2, twp)) 34705dbc7b5SEric Dumazet break; 34805dbc7b5SEric Dumazet } 349a7f5e7f1SArnaldo Carvalho de Melo goto not_unique; 350a7f5e7f1SArnaldo Carvalho de Melo } 35105dbc7b5SEric Dumazet } 352a7f5e7f1SArnaldo Carvalho de Melo 353a7f5e7f1SArnaldo Carvalho de Melo /* Must record num and sport now. Otherwise we will see 35405dbc7b5SEric Dumazet * in hash table socket with a funny identity. 35505dbc7b5SEric Dumazet */ 356c720c7e8SEric Dumazet inet->inet_num = lport; 357c720c7e8SEric Dumazet inet->inet_sport = htons(lport); 358a7f5e7f1SArnaldo Carvalho de Melo sk->sk_hash = hash; 359547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 3603ab5aee7SEric Dumazet __sk_nulls_add_node_rcu(sk, &head->chain); 36113475a30SEric Dumazet if (tw) { 362fc01538fSEric Dumazet sk_nulls_del_node_init_rcu((struct sock *)tw); 363*02a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); 36413475a30SEric Dumazet } 3659db66bdcSEric Dumazet spin_unlock(lock); 366c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 367a7f5e7f1SArnaldo Carvalho de Melo 368a7f5e7f1SArnaldo Carvalho de Melo if (twp) { 369a7f5e7f1SArnaldo Carvalho de Melo *twp = tw; 370a7f5e7f1SArnaldo Carvalho de Melo } else if (tw) { 371a7f5e7f1SArnaldo Carvalho de Melo /* Silly. Should hash-dance instead... */ 372dbe7faa4SEric Dumazet inet_twsk_deschedule_put(tw); 373a7f5e7f1SArnaldo Carvalho de Melo } 374a7f5e7f1SArnaldo Carvalho de Melo return 0; 375a7f5e7f1SArnaldo Carvalho de Melo 376a7f5e7f1SArnaldo Carvalho de Melo not_unique: 3779db66bdcSEric Dumazet spin_unlock(lock); 378a7f5e7f1SArnaldo Carvalho de Melo return -EADDRNOTAVAIL; 379a7f5e7f1SArnaldo Carvalho de Melo } 380a7f5e7f1SArnaldo Carvalho de Melo 381e2baad9eSEric Dumazet static u32 inet_sk_port_offset(const struct sock *sk) 382a7f5e7f1SArnaldo Carvalho de Melo { 383a7f5e7f1SArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 384e2baad9eSEric Dumazet 385c720c7e8SEric Dumazet return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, 386c720c7e8SEric Dumazet inet->inet_daddr, 387c720c7e8SEric Dumazet inet->inet_dport); 388a7f5e7f1SArnaldo Carvalho de Melo } 389a7f5e7f1SArnaldo Carvalho de Melo 390079096f1SEric Dumazet /* insert a socket into ehash, and eventually remove another one 391079096f1SEric Dumazet * (The another one can be a SYN_RECV or TIMEWAIT 392079096f1SEric Dumazet */ 3935e0724d0SEric Dumazet bool inet_ehash_insert(struct sock *sk, struct sock *osk) 394152da81dSPavel Emelyanov { 39539d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 3963ab5aee7SEric Dumazet struct hlist_nulls_head *list; 397152da81dSPavel Emelyanov struct inet_ehash_bucket *head; 3985b441f76SEric Dumazet spinlock_t *lock; 3995e0724d0SEric Dumazet bool ret = true; 400152da81dSPavel Emelyanov 401079096f1SEric Dumazet WARN_ON_ONCE(!sk_unhashed(sk)); 402152da81dSPavel Emelyanov 4035b441f76SEric Dumazet sk->sk_hash = sk_ehashfn(sk); 404152da81dSPavel Emelyanov head = inet_ehash_bucket(hashinfo, sk->sk_hash); 405152da81dSPavel Emelyanov list = &head->chain; 406152da81dSPavel Emelyanov lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 407152da81dSPavel Emelyanov 4089db66bdcSEric Dumazet spin_lock(lock); 409fc01538fSEric Dumazet if (osk) { 4105e0724d0SEric Dumazet WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); 4115e0724d0SEric Dumazet ret = sk_nulls_del_node_init_rcu(osk); 4129327f705SEric Dumazet } 4135e0724d0SEric Dumazet if (ret) 4145e0724d0SEric Dumazet __sk_nulls_add_node_rcu(sk, list); 4159db66bdcSEric Dumazet spin_unlock(lock); 416079096f1SEric Dumazet return ret; 417079096f1SEric Dumazet } 418079096f1SEric Dumazet 4195e0724d0SEric Dumazet bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) 420079096f1SEric Dumazet { 4215e0724d0SEric Dumazet bool ok = inet_ehash_insert(sk, osk); 4225e0724d0SEric Dumazet 4235e0724d0SEric Dumazet if (ok) { 424c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 4255e0724d0SEric Dumazet } else { 4265e0724d0SEric Dumazet percpu_counter_inc(sk->sk_prot->orphan_count); 4275e0724d0SEric Dumazet sk->sk_state = TCP_CLOSE; 4285e0724d0SEric Dumazet sock_set_flag(sk, SOCK_DEAD); 4295e0724d0SEric Dumazet inet_csk_destroy_sock(sk); 430152da81dSPavel Emelyanov } 4315e0724d0SEric Dumazet return ok; 4325e0724d0SEric Dumazet } 4335e0724d0SEric Dumazet EXPORT_SYMBOL_GPL(inet_ehash_nolisten); 434152da81dSPavel Emelyanov 435c125e80bSCraig Gallek static int inet_reuseport_add_sock(struct sock *sk, 436c125e80bSCraig Gallek struct inet_listen_hashbucket *ilb, 437c125e80bSCraig Gallek int (*saddr_same)(const struct sock *sk1, 438c125e80bSCraig Gallek const struct sock *sk2, 439c125e80bSCraig Gallek bool match_wildcard)) 440c125e80bSCraig Gallek { 441c125e80bSCraig Gallek struct sock *sk2; 442c125e80bSCraig Gallek kuid_t uid = sock_i_uid(sk); 443c125e80bSCraig Gallek 44485017869SEric Dumazet sk_for_each_rcu(sk2, &ilb->head) { 445c125e80bSCraig Gallek if (sk2 != sk && 446c125e80bSCraig Gallek sk2->sk_family == sk->sk_family && 447c125e80bSCraig Gallek ipv6_only_sock(sk2) == ipv6_only_sock(sk) && 448c125e80bSCraig Gallek sk2->sk_bound_dev_if == sk->sk_bound_dev_if && 449c125e80bSCraig Gallek sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && 450c125e80bSCraig Gallek saddr_same(sk, sk2, false)) 451c125e80bSCraig Gallek return reuseport_add_sock(sk, sk2); 452c125e80bSCraig Gallek } 453c125e80bSCraig Gallek 454c125e80bSCraig Gallek /* Initial allocation may have already happened via setsockopt */ 455c125e80bSCraig Gallek if (!rcu_access_pointer(sk->sk_reuseport_cb)) 456c125e80bSCraig Gallek return reuseport_alloc(sk); 457c125e80bSCraig Gallek return 0; 458c125e80bSCraig Gallek } 459c125e80bSCraig Gallek 460c125e80bSCraig Gallek int __inet_hash(struct sock *sk, struct sock *osk, 461c125e80bSCraig Gallek int (*saddr_same)(const struct sock *sk1, 462c125e80bSCraig Gallek const struct sock *sk2, 463c125e80bSCraig Gallek bool match_wildcard)) 464152da81dSPavel Emelyanov { 46539d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 4665caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 467c125e80bSCraig Gallek int err = 0; 468152da81dSPavel Emelyanov 4695e0724d0SEric Dumazet if (sk->sk_state != TCP_LISTEN) { 4705e0724d0SEric Dumazet inet_ehash_nolisten(sk, osk); 471c125e80bSCraig Gallek return 0; 4725e0724d0SEric Dumazet } 473547b792cSIlpo Järvinen WARN_ON(!sk_unhashed(sk)); 4745caea4eaSEric Dumazet ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 475152da81dSPavel Emelyanov 4765caea4eaSEric Dumazet spin_lock(&ilb->lock); 477c125e80bSCraig Gallek if (sk->sk_reuseport) { 478c125e80bSCraig Gallek err = inet_reuseport_add_sock(sk, ilb, saddr_same); 479c125e80bSCraig Gallek if (err) 480c125e80bSCraig Gallek goto unlock; 481c125e80bSCraig Gallek } 482d296ba60SCraig Gallek if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && 483d296ba60SCraig Gallek sk->sk_family == AF_INET6) 484d296ba60SCraig Gallek hlist_add_tail_rcu(&sk->sk_node, &ilb->head); 485d296ba60SCraig Gallek else 4863b24d854SEric Dumazet hlist_add_head_rcu(&sk->sk_node, &ilb->head); 4873b24d854SEric Dumazet sock_set_flag(sk, SOCK_RCU_FREE); 488c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 489c125e80bSCraig Gallek unlock: 4905caea4eaSEric Dumazet spin_unlock(&ilb->lock); 491c125e80bSCraig Gallek 492c125e80bSCraig Gallek return err; 493152da81dSPavel Emelyanov } 49477a6a471SEric Dumazet EXPORT_SYMBOL(__inet_hash); 495ab1e0a13SArnaldo Carvalho de Melo 496086c653fSCraig Gallek int inet_hash(struct sock *sk) 497ab1e0a13SArnaldo Carvalho de Melo { 498c125e80bSCraig Gallek int err = 0; 499c125e80bSCraig Gallek 500ab1e0a13SArnaldo Carvalho de Melo if (sk->sk_state != TCP_CLOSE) { 501ab1e0a13SArnaldo Carvalho de Melo local_bh_disable(); 502c125e80bSCraig Gallek err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal); 503ab1e0a13SArnaldo Carvalho de Melo local_bh_enable(); 504ab1e0a13SArnaldo Carvalho de Melo } 505086c653fSCraig Gallek 506c125e80bSCraig Gallek return err; 507ab1e0a13SArnaldo Carvalho de Melo } 508ab1e0a13SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_hash); 509ab1e0a13SArnaldo Carvalho de Melo 510ab1e0a13SArnaldo Carvalho de Melo void inet_unhash(struct sock *sk) 511ab1e0a13SArnaldo Carvalho de Melo { 51239d8cda7SPavel Emelyanov struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 513c25eb3bfSEric Dumazet spinlock_t *lock; 5143b24d854SEric Dumazet bool listener = false; 515c25eb3bfSEric Dumazet int done; 516ab1e0a13SArnaldo Carvalho de Melo 517ab1e0a13SArnaldo Carvalho de Melo if (sk_unhashed(sk)) 5185caea4eaSEric Dumazet return; 519ab1e0a13SArnaldo Carvalho de Melo 5203b24d854SEric Dumazet if (sk->sk_state == TCP_LISTEN) { 521c25eb3bfSEric Dumazet lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; 5223b24d854SEric Dumazet listener = true; 5233b24d854SEric Dumazet } else { 524c25eb3bfSEric Dumazet lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 5253b24d854SEric Dumazet } 5269db66bdcSEric Dumazet spin_lock_bh(lock); 527c125e80bSCraig Gallek if (rcu_access_pointer(sk->sk_reuseport_cb)) 528c125e80bSCraig Gallek reuseport_detach_sock(sk); 5293b24d854SEric Dumazet if (listener) 5303b24d854SEric Dumazet done = __sk_del_node_init(sk); 5313b24d854SEric Dumazet else 532c25eb3bfSEric Dumazet done = __sk_nulls_del_node_init_rcu(sk); 533c25eb3bfSEric Dumazet if (done) 534c25eb3bfSEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 535920de804SEric Dumazet spin_unlock_bh(lock); 536ab1e0a13SArnaldo Carvalho de Melo } 537ab1e0a13SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_unhash); 538152da81dSPavel Emelyanov 5395ee31fc1SPavel Emelyanov int __inet_hash_connect(struct inet_timewait_death_row *death_row, 5405d8c0aa9SPavel Emelyanov struct sock *sk, u32 port_offset, 5415ee31fc1SPavel Emelyanov int (*check_established)(struct inet_timewait_death_row *, 542b4d6444eSEric Dumazet struct sock *, __u16, struct inet_timewait_sock **)) 543a7f5e7f1SArnaldo Carvalho de Melo { 544a7f5e7f1SArnaldo Carvalho de Melo struct inet_hashinfo *hinfo = death_row->hashinfo; 545a7f5e7f1SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = NULL; 5461580ab63SEric Dumazet struct inet_bind_hashbucket *head; 5471580ab63SEric Dumazet int port = inet_sk(sk)->inet_num; 5481580ab63SEric Dumazet struct net *net = sock_net(sk); 5491580ab63SEric Dumazet struct inet_bind_bucket *tb; 5501580ab63SEric Dumazet u32 remaining, offset; 5511580ab63SEric Dumazet int ret, i, low, high; 5521580ab63SEric Dumazet static u32 hint; 5531580ab63SEric Dumazet 5541580ab63SEric Dumazet if (port) { 5551580ab63SEric Dumazet head = &hinfo->bhash[inet_bhashfn(net, port, 5561580ab63SEric Dumazet hinfo->bhash_size)]; 5571580ab63SEric Dumazet tb = inet_csk(sk)->icsk_bind_hash; 5581580ab63SEric Dumazet spin_lock_bh(&head->lock); 5591580ab63SEric Dumazet if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 5601580ab63SEric Dumazet inet_ehash_nolisten(sk, NULL); 5611580ab63SEric Dumazet spin_unlock_bh(&head->lock); 5621580ab63SEric Dumazet return 0; 5631580ab63SEric Dumazet } 5641580ab63SEric Dumazet spin_unlock(&head->lock); 5651580ab63SEric Dumazet /* No definite answer... Walk to established hash table */ 5661580ab63SEric Dumazet ret = check_established(death_row, sk, port, NULL); 5671580ab63SEric Dumazet local_bh_enable(); 5681580ab63SEric Dumazet return ret; 5691580ab63SEric Dumazet } 570a7f5e7f1SArnaldo Carvalho de Melo 5710bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 5721580ab63SEric Dumazet high++; /* [32768, 60999] -> [32768, 61000[ */ 5731580ab63SEric Dumazet remaining = high - low; 5741580ab63SEric Dumazet if (likely(remaining > 1)) 5751580ab63SEric Dumazet remaining &= ~1U; 576227b60f5SStephen Hemminger 5771580ab63SEric Dumazet offset = (hint + port_offset) % remaining; 5781580ab63SEric Dumazet /* In first pass we try ports of @low parity. 5791580ab63SEric Dumazet * inet_csk_get_port() does the opposite choice. 58007f4c900SEric Dumazet */ 5811580ab63SEric Dumazet offset &= ~1U; 5821580ab63SEric Dumazet other_parity_scan: 5831580ab63SEric Dumazet port = low + offset; 5841580ab63SEric Dumazet for (i = 0; i < remaining; i += 2, port += 2) { 5851580ab63SEric Dumazet if (unlikely(port >= high)) 5861580ab63SEric Dumazet port -= remaining; 587122ff243SWANG Cong if (inet_is_local_reserved_port(net, port)) 588e3826f1eSAmerigo Wang continue; 5897f635ab7SPavel Emelyanov head = &hinfo->bhash[inet_bhashfn(net, port, 5907f635ab7SPavel Emelyanov hinfo->bhash_size)]; 5911580ab63SEric Dumazet spin_lock_bh(&head->lock); 592a7f5e7f1SArnaldo Carvalho de Melo 5931580ab63SEric Dumazet /* Does not bother with rcv_saddr checks, because 5941580ab63SEric Dumazet * the established check is already unique enough. 595a7f5e7f1SArnaldo Carvalho de Melo */ 596b67bfe0dSSasha Levin inet_bind_bucket_for_each(tb, &head->chain) { 5971580ab63SEric Dumazet if (net_eq(ib_net(tb), net) && tb->port == port) { 598da5e3630STom Herbert if (tb->fastreuse >= 0 || 599da5e3630STom Herbert tb->fastreuseport >= 0) 600a7f5e7f1SArnaldo Carvalho de Melo goto next_port; 601a9d8f911SEvgeniy Polyakov WARN_ON(hlist_empty(&tb->owners)); 6025ee31fc1SPavel Emelyanov if (!check_established(death_row, sk, 6035ee31fc1SPavel Emelyanov port, &tw)) 604a7f5e7f1SArnaldo Carvalho de Melo goto ok; 605a7f5e7f1SArnaldo Carvalho de Melo goto next_port; 606a7f5e7f1SArnaldo Carvalho de Melo } 607a7f5e7f1SArnaldo Carvalho de Melo } 608a7f5e7f1SArnaldo Carvalho de Melo 609941b1d22SPavel Emelyanov tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 610941b1d22SPavel Emelyanov net, head, port); 611a7f5e7f1SArnaldo Carvalho de Melo if (!tb) { 6121580ab63SEric Dumazet spin_unlock_bh(&head->lock); 6131580ab63SEric Dumazet return -ENOMEM; 614a7f5e7f1SArnaldo Carvalho de Melo } 615a7f5e7f1SArnaldo Carvalho de Melo tb->fastreuse = -1; 616da5e3630STom Herbert tb->fastreuseport = -1; 617a7f5e7f1SArnaldo Carvalho de Melo goto ok; 618a7f5e7f1SArnaldo Carvalho de Melo next_port: 6191580ab63SEric Dumazet spin_unlock_bh(&head->lock); 6201580ab63SEric Dumazet cond_resched(); 621a7f5e7f1SArnaldo Carvalho de Melo } 6221580ab63SEric Dumazet 6231580ab63SEric Dumazet offset++; 6241580ab63SEric Dumazet if ((offset & 1) && remaining > 1) 6251580ab63SEric Dumazet goto other_parity_scan; 626a7f5e7f1SArnaldo Carvalho de Melo 627a7f5e7f1SArnaldo Carvalho de Melo return -EADDRNOTAVAIL; 628a7f5e7f1SArnaldo Carvalho de Melo 629a7f5e7f1SArnaldo Carvalho de Melo ok: 6301580ab63SEric Dumazet hint += i + 2; 631a7f5e7f1SArnaldo Carvalho de Melo 632a7f5e7f1SArnaldo Carvalho de Melo /* Head lock still held and bh's disabled */ 633a7f5e7f1SArnaldo Carvalho de Melo inet_bind_hash(sk, tb, port); 634a7f5e7f1SArnaldo Carvalho de Melo if (sk_unhashed(sk)) { 635c720c7e8SEric Dumazet inet_sk(sk)->inet_sport = htons(port); 6365e0724d0SEric Dumazet inet_ehash_nolisten(sk, (struct sock *)tw); 637a7f5e7f1SArnaldo Carvalho de Melo } 6383cdaedaeSEric Dumazet if (tw) 639fc01538fSEric Dumazet inet_twsk_bind_unhash(tw, hinfo); 640a7f5e7f1SArnaldo Carvalho de Melo spin_unlock(&head->lock); 641dbe7faa4SEric Dumazet if (tw) 642dbe7faa4SEric Dumazet inet_twsk_deschedule_put(tw); 643a7f5e7f1SArnaldo Carvalho de Melo local_bh_enable(); 6441580ab63SEric Dumazet return 0; 645a7f5e7f1SArnaldo Carvalho de Melo } 6465ee31fc1SPavel Emelyanov 6475ee31fc1SPavel Emelyanov /* 6485ee31fc1SPavel Emelyanov * Bind a port for a connect operation and hash it. 6495ee31fc1SPavel Emelyanov */ 6505ee31fc1SPavel Emelyanov int inet_hash_connect(struct inet_timewait_death_row *death_row, 6515ee31fc1SPavel Emelyanov struct sock *sk) 6525ee31fc1SPavel Emelyanov { 653e2baad9eSEric Dumazet u32 port_offset = 0; 654e2baad9eSEric Dumazet 655e2baad9eSEric Dumazet if (!inet_sk(sk)->inet_num) 656e2baad9eSEric Dumazet port_offset = inet_sk_port_offset(sk); 657e2baad9eSEric Dumazet return __inet_hash_connect(death_row, sk, port_offset, 658b4d6444eSEric Dumazet __inet_check_established); 6595ee31fc1SPavel Emelyanov } 660a7f5e7f1SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_hash_connect); 6615caea4eaSEric Dumazet 6625caea4eaSEric Dumazet void inet_hashinfo_init(struct inet_hashinfo *h) 6635caea4eaSEric Dumazet { 6645caea4eaSEric Dumazet int i; 6655caea4eaSEric Dumazet 666c25eb3bfSEric Dumazet for (i = 0; i < INET_LHTABLE_SIZE; i++) { 6675caea4eaSEric Dumazet spin_lock_init(&h->listening_hash[i].lock); 6683b24d854SEric Dumazet INIT_HLIST_HEAD(&h->listening_hash[i].head); 669c25eb3bfSEric Dumazet } 6705caea4eaSEric Dumazet } 6715caea4eaSEric Dumazet EXPORT_SYMBOL_GPL(inet_hashinfo_init); 672095dc8e0SEric Dumazet 673095dc8e0SEric Dumazet int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) 674095dc8e0SEric Dumazet { 67589e478a2SEric Dumazet unsigned int locksz = sizeof(spinlock_t); 676095dc8e0SEric Dumazet unsigned int i, nblocks = 1; 677095dc8e0SEric Dumazet 67889e478a2SEric Dumazet if (locksz != 0) { 679095dc8e0SEric Dumazet /* allocate 2 cache lines or at least one spinlock per cpu */ 68089e478a2SEric Dumazet nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); 681095dc8e0SEric Dumazet nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); 682095dc8e0SEric Dumazet 683095dc8e0SEric Dumazet /* no more locks than number of hash buckets */ 684095dc8e0SEric Dumazet nblocks = min(nblocks, hashinfo->ehash_mask + 1); 685095dc8e0SEric Dumazet 68689e478a2SEric Dumazet hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, 687095dc8e0SEric Dumazet GFP_KERNEL | __GFP_NOWARN); 688095dc8e0SEric Dumazet if (!hashinfo->ehash_locks) 68989e478a2SEric Dumazet hashinfo->ehash_locks = vmalloc(nblocks * locksz); 690095dc8e0SEric Dumazet 691095dc8e0SEric Dumazet if (!hashinfo->ehash_locks) 692095dc8e0SEric Dumazet return -ENOMEM; 693095dc8e0SEric Dumazet 694095dc8e0SEric Dumazet for (i = 0; i < nblocks; i++) 695095dc8e0SEric Dumazet spin_lock_init(&hashinfo->ehash_locks[i]); 696095dc8e0SEric Dumazet } 697095dc8e0SEric Dumazet hashinfo->ehash_locks_mask = nblocks - 1; 698095dc8e0SEric Dumazet return 0; 699095dc8e0SEric Dumazet } 700095dc8e0SEric Dumazet EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); 701