1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* util/support/hash.c - hash table implementation */
3 /*
4  * Copyright (C) 2018 by the Massachusetts Institute of Technology.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright
15  *   notice, this list of conditions and the following disclaimer in
16  *   the documentation and/or other materials provided with the
17  *   distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30  * OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "k5-platform.h"
34 #include "k5-hashtab.h"
35 #include "k5-queue.h"
36 
37 struct entry {
38     const void *key;
39     size_t klen;
40     void *val;
41     K5_SLIST_ENTRY(entry) next;
42 };
43 
44 struct k5_hashtab {
45     uint64_t k0;
46     uint64_t k1;
47     size_t nbuckets;
48     size_t nentries;
49     K5_SLIST_HEAD(bucket_list, entry) *buckets;
50 };
51 
52 /* Return x rotated to the left by r bits. */
53 static inline uint64_t
rotl64(uint64_t x,int r)54 rotl64(uint64_t x, int r)
55 {
56     return (x << r) | (x >> (64 - r));
57 }
58 
59 static inline void
sipround(uint64_t * v0,uint64_t * v1,uint64_t * v2,uint64_t * v3)60 sipround(uint64_t *v0, uint64_t *v1, uint64_t *v2, uint64_t *v3)
61 {
62     *v0 += *v1;
63     *v2 += *v3;
64     *v1 = rotl64(*v1, 13) ^ *v0;
65     *v3 = rotl64(*v3, 16) ^ *v2;
66     *v0 = rotl64(*v0, 32);
67     *v2 += *v1;
68     *v0 += *v3;
69     *v1 = rotl64(*v1, 17) ^ *v2;
70     *v3 = rotl64(*v3, 21) ^ *v0;
71     *v2 = rotl64(*v2, 32);
72 }
73 
74 /* SipHash-2-4 from https://131002.net/siphash/siphash.pdf (Jean-Philippe
75  * Aumasson and Daniel J. Bernstein) */
76 static uint64_t
siphash24(const uint8_t * data,size_t len,uint64_t k0,uint64_t k1)77 siphash24(const uint8_t *data, size_t len, uint64_t k0, uint64_t k1)
78 {
79     uint64_t v0 = k0 ^ 0x736F6D6570736575;
80     uint64_t v1 = k1 ^ 0x646F72616E646F6D;
81     uint64_t v2 = k0 ^ 0x6C7967656E657261;
82     uint64_t v3 = k1 ^ 0x7465646279746573;
83     uint64_t mi;
84     const uint8_t *p, *end = data + (len - len % 8);
85     uint8_t last[8] = { 0 };
86 
87     /* Process each full 8-byte chunk of data. */
88     for (p = data; p < end; p += 8) {
89         mi = load_64_le(p);
90         v3 ^= mi;
91         sipround(&v0, &v1, &v2, &v3);
92         sipround(&v0, &v1, &v2, &v3);
93         v0 ^= mi;
94     }
95 
96     /* Process the last 0-7 bytes followed by the length mod 256. */
97     memcpy(last, end, len % 8);
98     last[7] = len & 0xFF;
99     mi = load_64_le(last);
100     v3 ^= mi;
101     sipround(&v0, &v1, &v2, &v3);
102     sipround(&v0, &v1, &v2, &v3);
103     v0 ^= mi;
104 
105     /* Finalize. */
106     v2 ^= 0xFF;
107     sipround(&v0, &v1, &v2, &v3);
108     sipround(&v0, &v1, &v2, &v3);
109     sipround(&v0, &v1, &v2, &v3);
110     sipround(&v0, &v1, &v2, &v3);
111     return v0 ^ v1 ^ v2 ^ v3;
112 }
113 
114 uint64_t
k5_siphash24(const uint8_t * data,size_t len,const uint8_t seed[K5_HASH_SEED_LEN])115 k5_siphash24(const uint8_t *data, size_t len,
116              const uint8_t seed[K5_HASH_SEED_LEN])
117 {
118     uint64_t k0 = load_64_le(seed), k1 = load_64_le(seed + 8);
119 
120     return siphash24(data, len, k0, k1);
121 }
122 
123 int
k5_hashtab_create(const uint8_t seed[K5_HASH_SEED_LEN],size_t initial_buckets,struct k5_hashtab ** ht_out)124 k5_hashtab_create(const uint8_t seed[K5_HASH_SEED_LEN], size_t initial_buckets,
125                   struct k5_hashtab **ht_out)
126 {
127     struct k5_hashtab *ht;
128 
129     *ht_out = NULL;
130 
131     ht = malloc(sizeof(*ht));
132     if (ht == NULL)
133         return ENOMEM;
134 
135     if (seed != NULL) {
136         ht->k0 = load_64_le(seed);
137         ht->k1 = load_64_le(seed + 8);
138     } else {
139         ht->k0 = ht->k1 = 0;
140     }
141     ht->nbuckets = (initial_buckets > 0) ? initial_buckets : 64;
142     ht->nentries = 0;
143     ht->buckets = calloc(ht->nbuckets, sizeof(*ht->buckets));
144     if (ht->buckets == NULL) {
145         free(ht);
146         return ENOMEM;
147     }
148 
149     *ht_out = ht;
150     return 0;
151 }
152 
153 void
k5_hashtab_free(struct k5_hashtab * ht)154 k5_hashtab_free(struct k5_hashtab *ht)
155 {
156     size_t i;
157     struct entry *ent;
158 
159     for (i = 0; i < ht->nbuckets; i++) {
160         while (!K5_SLIST_EMPTY(&ht->buckets[i])) {
161             ent = K5_SLIST_FIRST(&ht->buckets[i]);
162             K5_SLIST_REMOVE_HEAD(&ht->buckets[i], next);
163             free(ent);
164         }
165     }
166     free(ht->buckets);
167     free(ht);
168 }
169 
170 static int
resize_table(struct k5_hashtab * ht)171 resize_table(struct k5_hashtab *ht)
172 {
173     size_t i, j, newsize = ht->nbuckets * 2;
174     struct bucket_list *newbuckets;
175     struct entry *ent;
176 
177     newbuckets = calloc(newsize, sizeof(*newbuckets));
178     if (newbuckets == NULL)
179         return ENOMEM;
180 
181     /* Rehash all the entries into the new buckets. */
182     for (i = 0; i < ht->nbuckets; i++) {
183         while (!K5_SLIST_EMPTY(&ht->buckets[i])) {
184             ent = K5_SLIST_FIRST(&ht->buckets[i]);
185             j = siphash24(ent->key, ent->klen, ht->k0, ht->k1) % newsize;
186             K5_SLIST_REMOVE_HEAD(&ht->buckets[i], next);
187             K5_SLIST_INSERT_HEAD(&newbuckets[j], ent, next);
188         }
189     }
190 
191     free(ht->buckets);
192     ht->buckets = newbuckets;
193     ht->nbuckets = newsize;
194     return 0;
195 }
196 
197 int
k5_hashtab_add(struct k5_hashtab * ht,const void * key,size_t klen,void * val)198 k5_hashtab_add(struct k5_hashtab *ht, const void *key, size_t klen, void *val)
199 {
200     size_t i;
201     struct entry *ent;
202 
203     if (ht->nentries == ht->nbuckets) {
204         if (resize_table(ht) != 0)
205             return ENOMEM;
206     }
207 
208     ent = malloc(sizeof(*ent));
209     if (ent == NULL)
210         return ENOMEM;
211     ent->key = key;
212     ent->klen = klen;
213     ent->val = val;
214 
215     i = siphash24(key, klen, ht->k0, ht->k1) % ht->nbuckets;
216     K5_SLIST_INSERT_HEAD(&ht->buckets[i], ent, next);
217 
218     ht->nentries++;
219     return 0;
220 }
221 
222 int
k5_hashtab_remove(struct k5_hashtab * ht,const void * key,size_t klen)223 k5_hashtab_remove(struct k5_hashtab *ht, const void *key, size_t klen)
224 {
225     size_t i;
226     struct entry *ent;
227 
228     i = siphash24(key, klen, ht->k0, ht->k1) % ht->nbuckets;
229     K5_SLIST_FOREACH(ent, &ht->buckets[i], next) {
230         if (ent->klen == klen && memcmp(ent->key, key, klen) == 0) {
231             K5_SLIST_REMOVE(&ht->buckets[i], ent, entry, next);
232             free(ent);
233             ht->nentries--;
234             return 1;
235         }
236     }
237     return 0;
238 }
239 
240 void *
k5_hashtab_get(struct k5_hashtab * ht,const void * key,size_t klen)241 k5_hashtab_get(struct k5_hashtab *ht, const void *key, size_t klen)
242 {
243     size_t i;
244     struct entry *ent;
245 
246     i = siphash24(key, klen, ht->k0, ht->k1) % ht->nbuckets;
247     K5_SLIST_FOREACH(ent, &ht->buckets[i], next) {
248         if (ent->klen == klen && memcmp(ent->key, key, klen) == 0)
249             return ent->val;
250     }
251     return NULL;
252 }
253