1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl_internal.h" 25 #include "ucl_hash.h" 26 #include "khash.h" 27 #include "kvec.h" 28 #include "mum.h" 29 30 #include <time.h> 31 #include <limits.h> 32 33 struct ucl_hash_elt { 34 const ucl_object_t *obj; 35 struct ucl_hash_elt *prev, *next; 36 }; 37 38 struct ucl_hash_struct { 39 void *hash; 40 struct ucl_hash_elt *head; 41 bool caseless; 42 }; 43 44 static uint64_t 45 ucl_hash_seed(void) 46 { 47 static uint64_t seed; 48 if (seed == 0) { 49 #ifdef UCL_RANDOM_FUNCTION 50 seed = UCL_RANDOM_FUNCTION; 51 #else 52 /* Not very random but can be useful for our purposes */ 53 seed = time(NULL); 54 #endif 55 } 56 57 return seed; 58 } 59 60 static const unsigned char lc_map[256] = { 61 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 62 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 63 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 64 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 65 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 66 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 67 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 68 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 69 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 70 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 71 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 72 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 73 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 74 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 75 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 76 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 77 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 78 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 79 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 80 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 81 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 82 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 83 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 84 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 85 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 86 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 88 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 89 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 90 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 91 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 92 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}; 93 94 #if (defined(WORD_BIT) && WORD_BIT == 64) || \ 95 (defined(__WORDSIZE) && __WORDSIZE == 64) || \ 96 defined(__x86_64__) || \ 97 defined(__amd64__) 98 #define UCL64_BIT_HASH 1 99 #endif 100 101 static inline uint32_t 102 ucl_hash_func(const ucl_object_t *o) 103 { 104 return mum_hash(o->key, o->keylen, ucl_hash_seed()); 105 } 106 static inline int 107 ucl_hash_equal(const ucl_object_t *k1, const ucl_object_t *k2) 108 { 109 if (k1->keylen == k2->keylen) { 110 return memcmp(k1->key, k2->key, k1->keylen) == 0; 111 } 112 113 return 0; 114 } 115 116 KHASH_INIT(ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt *, 1, 117 ucl_hash_func, ucl_hash_equal) 118 119 static inline uint32_t 120 ucl_hash_caseless_func(const ucl_object_t *o) 121 { 122 unsigned len = o->keylen; 123 unsigned leftover = o->keylen % 8; 124 unsigned fp, i; 125 const uint8_t *s = (const uint8_t *) o->key; 126 union { 127 struct { 128 unsigned char c1, c2, c3, c4, c5, c6, c7, c8; 129 } c; 130 uint64_t pp; 131 } u; 132 uint64_t r; 133 134 fp = len - leftover; 135 r = ucl_hash_seed(); 136 137 for (i = 0; i != fp; i += 8) { 138 u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; 139 u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7]; 140 u.c.c1 = lc_map[u.c.c1]; 141 u.c.c2 = lc_map[u.c.c2]; 142 u.c.c3 = lc_map[u.c.c3]; 143 u.c.c4 = lc_map[u.c.c4]; 144 u.c.c5 = lc_map[u.c.c5]; 145 u.c.c6 = lc_map[u.c.c6]; 146 u.c.c7 = lc_map[u.c.c7]; 147 u.c.c8 = lc_map[u.c.c8]; 148 r = mum_hash_step(r, u.pp); 149 } 150 151 u.pp = 0; 152 switch (leftover) { 153 case 7: 154 u.c.c7 = lc_map[(unsigned char) s[i++]]; 155 /* FALLTHRU */ 156 case 6: 157 u.c.c6 = lc_map[(unsigned char) s[i++]]; 158 /* FALLTHRU */ 159 case 5: 160 u.c.c5 = lc_map[(unsigned char) s[i++]]; 161 /* FALLTHRU */ 162 case 4: 163 u.c.c4 = lc_map[(unsigned char) s[i++]]; 164 /* FALLTHRU */ 165 case 3: 166 u.c.c3 = lc_map[(unsigned char) s[i++]]; 167 /* FALLTHRU */ 168 case 2: 169 u.c.c2 = lc_map[(unsigned char) s[i++]]; 170 /* FALLTHRU */ 171 case 1: 172 u.c.c1 = lc_map[(unsigned char) s[i]]; 173 r = mum_hash_step(r, u.pp); 174 break; 175 } 176 177 return mum_hash_finish(r); 178 } 179 180 static inline int 181 ucl_hash_caseless_equal(const ucl_object_t *k1, const ucl_object_t *k2) 182 { 183 if (k1->keylen == k2->keylen) { 184 unsigned fp, i; 185 const char *s = k1->key, *d = k2->key; 186 unsigned char c1, c2, c3, c4; 187 union { 188 unsigned char c[4]; 189 uint32_t n; 190 } cmp1, cmp2; 191 size_t leftover = k1->keylen % 4; 192 193 fp = k1->keylen - leftover; 194 195 for (i = 0; i != fp; i += 4) { 196 c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; 197 cmp1.c[0] = lc_map[c1]; 198 cmp1.c[1] = lc_map[c2]; 199 cmp1.c[2] = lc_map[c3]; 200 cmp1.c[3] = lc_map[c4]; 201 202 c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; 203 cmp2.c[0] = lc_map[c1]; 204 cmp2.c[1] = lc_map[c2]; 205 cmp2.c[2] = lc_map[c3]; 206 cmp2.c[3] = lc_map[c4]; 207 208 if (cmp1.n != cmp2.n) { 209 return 0; 210 } 211 } 212 213 while (leftover > 0) { 214 if (lc_map[(unsigned char) s[i]] != lc_map[(unsigned char) d[i]]) { 215 return 0; 216 } 217 218 leftover--; 219 i++; 220 } 221 222 return 1; 223 } 224 225 return 0; 226 } 227 228 KHASH_INIT(ucl_hash_caseless_node, const ucl_object_t *, struct ucl_hash_elt *, 1, 229 ucl_hash_caseless_func, ucl_hash_caseless_equal) 230 231 ucl_hash_t * 232 ucl_hash_create(bool ignore_case) 233 { 234 ucl_hash_t *new; 235 236 new = UCL_ALLOC(sizeof(ucl_hash_t)); 237 if (new != NULL) { 238 void *h; 239 new->head = NULL; 240 new->caseless = ignore_case; 241 if (ignore_case) { 242 h = (void *) kh_init(ucl_hash_caseless_node); 243 } 244 else { 245 h = (void *) kh_init(ucl_hash_node); 246 } 247 if (h == NULL) { 248 UCL_FREE(sizeof(ucl_hash_t), new); 249 return NULL; 250 } 251 new->hash = h; 252 } 253 return new; 254 } 255 256 void ucl_hash_destroy(ucl_hash_t *hashlin, ucl_hash_free_func func) 257 { 258 259 if (hashlin == NULL) { 260 return; 261 } 262 263 if (func != NULL) { 264 /* Iterate over the hash first */ 265 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 266 hashlin->hash; 267 khiter_t k; 268 const ucl_object_t *cur, *tmp; 269 270 for (k = kh_begin(h); k != kh_end(h); ++k) { 271 if (kh_exist(h, k)) { 272 cur = (kh_value(h, k))->obj; 273 while (cur != NULL) { 274 tmp = cur->next; 275 func(__DECONST(ucl_object_t *, cur)); 276 cur = tmp; 277 } 278 } 279 } 280 } 281 282 if (hashlin->caseless) { 283 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 284 hashlin->hash; 285 kh_destroy(ucl_hash_caseless_node, h); 286 } 287 else { 288 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 289 hashlin->hash; 290 kh_destroy(ucl_hash_node, h); 291 } 292 293 struct ucl_hash_elt *cur, *tmp; 294 295 DL_FOREACH_SAFE(hashlin->head, cur, tmp) 296 { 297 UCL_FREE(sizeof(*cur), cur); 298 } 299 300 UCL_FREE(sizeof(*hashlin), hashlin); 301 } 302 303 bool ucl_hash_insert(ucl_hash_t *hashlin, const ucl_object_t *obj, 304 const char *key, unsigned keylen) 305 { 306 khiter_t k; 307 int ret; 308 struct ucl_hash_elt **pelt, *elt; 309 310 if (hashlin == NULL) { 311 return false; 312 } 313 314 if (hashlin->caseless) { 315 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 316 hashlin->hash; 317 k = kh_put(ucl_hash_caseless_node, h, obj, &ret); 318 if (ret > 0) { 319 elt = UCL_ALLOC(sizeof(*elt)); 320 pelt = &kh_value(h, k); 321 *pelt = elt; 322 DL_APPEND(hashlin->head, elt); 323 elt->obj = obj; 324 } 325 else if (ret < 0) { 326 goto e0; 327 } 328 } 329 else { 330 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 331 hashlin->hash; 332 k = kh_put(ucl_hash_node, h, obj, &ret); 333 if (ret > 0) { 334 elt = UCL_ALLOC(sizeof(*elt)); 335 pelt = &kh_value(h, k); 336 *pelt = elt; 337 DL_APPEND(hashlin->head, elt); 338 elt->obj = obj; 339 } 340 else if (ret < 0) { 341 goto e0; 342 } 343 } 344 return true; 345 e0: 346 return false; 347 } 348 349 void ucl_hash_replace(ucl_hash_t *hashlin, const ucl_object_t *old, 350 const ucl_object_t *new) 351 { 352 khiter_t k; 353 int ret; 354 struct ucl_hash_elt *elt, *nelt; 355 356 if (hashlin == NULL) { 357 return; 358 } 359 360 if (hashlin->caseless) { 361 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 362 hashlin->hash; 363 k = kh_put(ucl_hash_caseless_node, h, old, &ret); 364 if (ret == 0) { 365 elt = kh_value(h, k); 366 kh_del(ucl_hash_caseless_node, h, k); 367 k = kh_put(ucl_hash_caseless_node, h, new, &ret); 368 nelt = UCL_ALLOC(sizeof(*nelt)); 369 nelt->obj = new; 370 kh_value(h, k) = nelt; 371 DL_REPLACE_ELEM(hashlin->head, elt, nelt); 372 UCL_FREE(sizeof(*elt), elt); 373 } 374 } 375 else { 376 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 377 hashlin->hash; 378 k = kh_put(ucl_hash_node, h, old, &ret); 379 if (ret == 0) { 380 elt = kh_value(h, k); 381 kh_del(ucl_hash_node, h, k); 382 k = kh_put(ucl_hash_node, h, new, &ret); 383 nelt = UCL_ALLOC(sizeof(*nelt)); 384 nelt->obj = new; 385 kh_value(h, k) = nelt; 386 DL_REPLACE_ELEM(hashlin->head, elt, nelt); 387 UCL_FREE(sizeof(*elt), elt); 388 } 389 } 390 } 391 392 struct ucl_hash_real_iter { 393 const struct ucl_hash_elt *cur; 394 }; 395 396 #define UHI_SETERR(ep, ern) \ 397 { \ 398 if (ep != NULL) *ep = (ern); \ 399 } 400 401 const void * 402 ucl_hash_iterate2(ucl_hash_t *hashlin, ucl_hash_iter_t *iter, int *ep) 403 { 404 struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *) (*iter); 405 const ucl_object_t *ret = NULL; 406 407 if (hashlin == NULL) { 408 UHI_SETERR(ep, EINVAL); 409 return NULL; 410 } 411 412 if (it == NULL) { 413 it = UCL_ALLOC(sizeof(*it)); 414 415 if (it == NULL) { 416 UHI_SETERR(ep, ENOMEM); 417 return NULL; 418 } 419 420 it->cur = hashlin->head; 421 } 422 423 UHI_SETERR(ep, 0); 424 if (it->cur) { 425 ret = it->cur->obj; 426 it->cur = it->cur->next; 427 } 428 else { 429 UCL_FREE(sizeof(*it), it); 430 *iter = NULL; 431 return NULL; 432 } 433 434 *iter = it; 435 436 return ret; 437 } 438 439 bool ucl_hash_iter_has_next(ucl_hash_t *hashlin, ucl_hash_iter_t iter) 440 { 441 struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *) (iter); 442 443 return it->cur != NULL; 444 } 445 446 447 const ucl_object_t * 448 ucl_hash_search(ucl_hash_t *hashlin, const char *key, unsigned keylen) 449 { 450 khiter_t k; 451 const ucl_object_t *ret = NULL; 452 ucl_object_t search; 453 struct ucl_hash_elt *elt; 454 455 search.key = key; 456 search.keylen = keylen; 457 458 if (hashlin == NULL) { 459 return NULL; 460 } 461 462 if (hashlin->caseless) { 463 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 464 hashlin->hash; 465 466 k = kh_get(ucl_hash_caseless_node, h, &search); 467 if (k != kh_end(h)) { 468 elt = kh_value(h, k); 469 ret = elt->obj; 470 } 471 } 472 else { 473 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 474 hashlin->hash; 475 k = kh_get(ucl_hash_node, h, &search); 476 if (k != kh_end(h)) { 477 elt = kh_value(h, k); 478 ret = elt->obj; 479 } 480 } 481 482 return ret; 483 } 484 485 void ucl_hash_delete(ucl_hash_t *hashlin, const ucl_object_t *obj) 486 { 487 khiter_t k; 488 struct ucl_hash_elt *elt; 489 490 if (hashlin == NULL) { 491 return; 492 } 493 494 if (hashlin->caseless) { 495 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 496 hashlin->hash; 497 498 k = kh_get(ucl_hash_caseless_node, h, obj); 499 if (k != kh_end(h)) { 500 elt = kh_value(h, k); 501 DL_DELETE(hashlin->head, elt); 502 kh_del(ucl_hash_caseless_node, h, k); 503 UCL_FREE(sizeof(*elt), elt); 504 } 505 } 506 else { 507 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 508 hashlin->hash; 509 k = kh_get(ucl_hash_node, h, obj); 510 if (k != kh_end(h)) { 511 elt = kh_value(h, k); 512 DL_DELETE(hashlin->head, elt); 513 kh_del(ucl_hash_node, h, k); 514 UCL_FREE(sizeof(*elt), elt); 515 } 516 } 517 } 518 519 bool ucl_hash_reserve(ucl_hash_t *hashlin, size_t sz) 520 { 521 if (hashlin == NULL) { 522 return false; 523 } 524 525 if (sz > kh_size((khash_t(ucl_hash_node) *) hashlin->hash)) { 526 if (hashlin->caseless) { 527 khash_t(ucl_hash_caseless_node) *h = (khash_t( 528 ucl_hash_caseless_node) *) 529 hashlin->hash; 530 kh_resize(ucl_hash_caseless_node, h, sz * 2); 531 } 532 else { 533 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 534 hashlin->hash; 535 kh_resize(ucl_hash_node, h, sz * 2); 536 } 537 } 538 return true; 539 } 540 541 static int 542 ucl_lc_cmp(const char *s, const char *d, size_t l) 543 { 544 unsigned int fp, i; 545 unsigned char c1, c2, c3, c4; 546 union { 547 unsigned char c[4]; 548 uint32_t n; 549 } cmp1, cmp2; 550 size_t leftover = l % 4; 551 int ret = 0; 552 553 fp = l - leftover; 554 555 for (i = 0; i != fp; i += 4) { 556 c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; 557 cmp1.c[0] = lc_map[c1]; 558 cmp1.c[1] = lc_map[c2]; 559 cmp1.c[2] = lc_map[c3]; 560 cmp1.c[3] = lc_map[c4]; 561 562 c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; 563 cmp2.c[0] = lc_map[c1]; 564 cmp2.c[1] = lc_map[c2]; 565 cmp2.c[2] = lc_map[c3]; 566 cmp2.c[3] = lc_map[c4]; 567 568 if (cmp1.n != cmp2.n) { 569 return cmp1.n - cmp2.n; 570 } 571 } 572 573 while (leftover > 0) { 574 if (lc_map[(unsigned char) s[i]] != lc_map[(unsigned char) d[i]]) { 575 return s[i] - d[i]; 576 } 577 578 leftover--; 579 i++; 580 } 581 582 return ret; 583 } 584 585 static int 586 ucl_hash_cmp_icase(const void *a, const void *b) 587 { 588 const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *) a, 589 *ob = (const struct ucl_hash_elt *) b; 590 591 if (oa->obj->keylen == ob->obj->keylen) { 592 return ucl_lc_cmp(oa->obj->key, ob->obj->key, oa->obj->keylen); 593 } 594 595 return ((int) (oa->obj->keylen)) - ob->obj->keylen; 596 } 597 598 static int 599 ucl_hash_cmp_case_sens(const void *a, const void *b) 600 { 601 const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *) a, 602 *ob = (const struct ucl_hash_elt *) b; 603 604 if (oa->obj->keylen == ob->obj->keylen) { 605 return memcmp(oa->obj->key, ob->obj->key, oa->obj->keylen); 606 } 607 608 return ((int) (oa->obj->keylen)) - ob->obj->keylen; 609 } 610 611 void ucl_hash_sort(ucl_hash_t *hashlin, enum ucl_object_keys_sort_flags fl) 612 { 613 614 if (fl & UCL_SORT_KEYS_ICASE) { 615 DL_SORT(hashlin->head, ucl_hash_cmp_icase); 616 } 617 else { 618 DL_SORT(hashlin->head, ucl_hash_cmp_case_sens); 619 } 620 621 if (fl & UCL_SORT_KEYS_RECURSIVE) { 622 struct ucl_hash_elt *elt; 623 624 DL_FOREACH(hashlin->head, elt) 625 { 626 if (ucl_object_type(elt->obj) == UCL_OBJECT) { 627 ucl_hash_sort(elt->obj->value.ov, fl); 628 } 629 } 630 } 631 } 632