1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl_internal.h" 25 #include "ucl_hash.h" 26 #include "khash.h" 27 #include "kvec.h" 28 #include "mum.h" 29 30 #include <time.h> 31 #include <limits.h> 32 33 struct ucl_hash_elt { 34 const ucl_object_t *obj; 35 struct ucl_hash_elt *prev, *next; 36 }; 37 38 struct ucl_hash_struct { 39 void *hash; 40 struct ucl_hash_elt *head; 41 bool caseless; 42 }; 43 44 static uint64_t 45 ucl_hash_seed (void) 46 { 47 static uint64_t seed; 48 if (seed == 0) { 49 #ifdef UCL_RANDOM_FUNCTION 50 seed = UCL_RANDOM_FUNCTION; 51 #else 52 /* Not very random but can be useful for our purposes */ 53 seed = time (NULL); 54 #endif 55 } 56 57 return seed; 58 } 59 60 static const unsigned char lc_map[256] = { 61 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 62 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 63 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 64 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 65 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 66 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 67 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 68 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 69 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 70 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 71 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 72 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 73 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 74 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 75 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 76 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 77 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 78 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 79 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 80 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 81 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 82 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 83 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 84 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 85 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 86 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 88 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 89 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 90 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 91 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 92 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff 93 }; 94 95 #if (defined(WORD_BIT) && WORD_BIT == 64) || \ 96 (defined(__WORDSIZE) && __WORDSIZE == 64) || \ 97 defined(__x86_64__) || \ 98 defined(__amd64__) 99 #define UCL64_BIT_HASH 1 100 #endif 101 102 static inline uint32_t 103 ucl_hash_func (const ucl_object_t *o) 104 { 105 return mum_hash (o->key, o->keylen, ucl_hash_seed ()); 106 } 107 static inline int 108 ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2) 109 { 110 if (k1->keylen == k2->keylen) { 111 return memcmp (k1->key, k2->key, k1->keylen) == 0; 112 } 113 114 return 0; 115 } 116 117 KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt *, 1, 118 ucl_hash_func, ucl_hash_equal) 119 120 static inline uint32_t 121 ucl_hash_caseless_func (const ucl_object_t *o) 122 { 123 unsigned len = o->keylen; 124 unsigned leftover = o->keylen % 8; 125 unsigned fp, i; 126 const uint8_t* s = (const uint8_t*)o->key; 127 union { 128 struct { 129 unsigned char c1, c2, c3, c4, c5, c6, c7, c8; 130 } c; 131 uint64_t pp; 132 } u; 133 uint64_t r; 134 135 fp = len - leftover; 136 r = ucl_hash_seed (); 137 138 for (i = 0; i != fp; i += 8) { 139 u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; 140 u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7]; 141 u.c.c1 = lc_map[u.c.c1]; 142 u.c.c2 = lc_map[u.c.c2]; 143 u.c.c3 = lc_map[u.c.c3]; 144 u.c.c4 = lc_map[u.c.c4]; 145 u.c.c5 = lc_map[u.c.c5]; 146 u.c.c6 = lc_map[u.c.c6]; 147 u.c.c7 = lc_map[u.c.c7]; 148 u.c.c8 = lc_map[u.c.c8]; 149 r = mum_hash_step (r, u.pp); 150 } 151 152 u.pp = 0; 153 switch (leftover) { 154 case 7: 155 u.c.c7 = lc_map[(unsigned char)s[i++]]; 156 /* FALLTHRU */ 157 case 6: 158 u.c.c6 = lc_map[(unsigned char)s[i++]]; 159 /* FALLTHRU */ 160 case 5: 161 u.c.c5 = lc_map[(unsigned char)s[i++]]; 162 /* FALLTHRU */ 163 case 4: 164 u.c.c4 = lc_map[(unsigned char)s[i++]]; 165 /* FALLTHRU */ 166 case 3: 167 u.c.c3 = lc_map[(unsigned char)s[i++]]; 168 /* FALLTHRU */ 169 case 2: 170 u.c.c2 = lc_map[(unsigned char)s[i++]]; 171 /* FALLTHRU */ 172 case 1: 173 u.c.c1 = lc_map[(unsigned char)s[i]]; 174 r = mum_hash_step (r, u.pp); 175 break; 176 } 177 178 return mum_hash_finish (r); 179 } 180 181 static inline int 182 ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2) 183 { 184 if (k1->keylen == k2->keylen) { 185 unsigned fp, i; 186 const char *s = k1->key, *d = k2->key; 187 unsigned char c1, c2, c3, c4; 188 union { 189 unsigned char c[4]; 190 uint32_t n; 191 } cmp1, cmp2; 192 size_t leftover = k1->keylen % 4; 193 194 fp = k1->keylen - leftover; 195 196 for (i = 0; i != fp; i += 4) { 197 c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; 198 cmp1.c[0] = lc_map[c1]; 199 cmp1.c[1] = lc_map[c2]; 200 cmp1.c[2] = lc_map[c3]; 201 cmp1.c[3] = lc_map[c4]; 202 203 c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; 204 cmp2.c[0] = lc_map[c1]; 205 cmp2.c[1] = lc_map[c2]; 206 cmp2.c[2] = lc_map[c3]; 207 cmp2.c[3] = lc_map[c4]; 208 209 if (cmp1.n != cmp2.n) { 210 return 0; 211 } 212 } 213 214 while (leftover > 0) { 215 if (lc_map[(unsigned char)s[i]] != lc_map[(unsigned char)d[i]]) { 216 return 0; 217 } 218 219 leftover--; 220 i++; 221 } 222 223 return 1; 224 } 225 226 return 0; 227 } 228 229 KHASH_INIT (ucl_hash_caseless_node, const ucl_object_t *, struct ucl_hash_elt *, 1, 230 ucl_hash_caseless_func, ucl_hash_caseless_equal) 231 232 ucl_hash_t* 233 ucl_hash_create (bool ignore_case) 234 { 235 ucl_hash_t *new; 236 237 new = UCL_ALLOC (sizeof (ucl_hash_t)); 238 if (new != NULL) { 239 void *h; 240 new->head = NULL; 241 new->caseless = ignore_case; 242 if (ignore_case) { 243 h = (void *)kh_init (ucl_hash_caseless_node); 244 } 245 else { 246 h = (void *)kh_init (ucl_hash_node); 247 } 248 if (h == NULL) { 249 UCL_FREE (sizeof (ucl_hash_t), new); 250 return NULL; 251 } 252 new->hash = h; 253 } 254 return new; 255 } 256 257 void ucl_hash_destroy (ucl_hash_t* hashlin, ucl_hash_free_func func) 258 { 259 260 if (hashlin == NULL) { 261 return; 262 } 263 264 if (func != NULL) { 265 /* Iterate over the hash first */ 266 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 267 hashlin->hash; 268 khiter_t k; 269 const ucl_object_t *cur, *tmp; 270 271 for (k = kh_begin (h); k != kh_end (h); ++k) { 272 if (kh_exist (h, k)) { 273 cur = (kh_value (h, k))->obj; 274 while (cur != NULL) { 275 tmp = cur->next; 276 func (__DECONST (ucl_object_t *, cur)); 277 cur = tmp; 278 } 279 } 280 } 281 } 282 283 if (hashlin->caseless) { 284 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 285 hashlin->hash; 286 kh_destroy (ucl_hash_caseless_node, h); 287 } 288 else { 289 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 290 hashlin->hash; 291 kh_destroy (ucl_hash_node, h); 292 } 293 294 struct ucl_hash_elt *cur, *tmp; 295 296 DL_FOREACH_SAFE(hashlin->head, cur, tmp) { 297 UCL_FREE(sizeof(*cur), cur); 298 } 299 300 UCL_FREE (sizeof (*hashlin), hashlin); 301 } 302 303 bool 304 ucl_hash_insert (ucl_hash_t* hashlin, const ucl_object_t *obj, 305 const char *key, unsigned keylen) 306 { 307 khiter_t k; 308 int ret; 309 struct ucl_hash_elt **pelt, *elt; 310 311 if (hashlin == NULL) { 312 return false; 313 } 314 315 if (hashlin->caseless) { 316 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 317 hashlin->hash; 318 k = kh_put (ucl_hash_caseless_node, h, obj, &ret); 319 if (ret > 0) { 320 elt = UCL_ALLOC(sizeof(*elt)); 321 pelt = &kh_value (h, k); 322 *pelt = elt; 323 DL_APPEND(hashlin->head, elt); 324 elt->obj = obj; 325 } 326 else if (ret < 0) { 327 goto e0; 328 } 329 } 330 else { 331 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 332 hashlin->hash; 333 k = kh_put (ucl_hash_node, h, obj, &ret); 334 if (ret > 0) { 335 elt = UCL_ALLOC(sizeof(*elt)); 336 pelt = &kh_value (h, k); 337 *pelt = elt; 338 DL_APPEND(hashlin->head, elt); 339 elt->obj = obj; 340 } else if (ret < 0) { 341 goto e0; 342 } 343 } 344 return true; 345 e0: 346 return false; 347 } 348 349 void ucl_hash_replace (ucl_hash_t* hashlin, const ucl_object_t *old, 350 const ucl_object_t *new) 351 { 352 khiter_t k; 353 int ret; 354 struct ucl_hash_elt *elt, *nelt; 355 356 if (hashlin == NULL) { 357 return; 358 } 359 360 if (hashlin->caseless) { 361 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 362 hashlin->hash; 363 k = kh_put (ucl_hash_caseless_node, h, old, &ret); 364 if (ret == 0) { 365 elt = kh_value(h, k); 366 kh_del (ucl_hash_caseless_node, h, k); 367 k = kh_put (ucl_hash_caseless_node, h, new, &ret); 368 nelt = UCL_ALLOC(sizeof(*nelt)); 369 nelt->obj = new; 370 kh_value(h, k) = nelt; 371 DL_REPLACE_ELEM(hashlin->head, elt, nelt); 372 UCL_FREE(sizeof(*elt), elt); 373 } 374 } 375 else { 376 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 377 hashlin->hash; 378 k = kh_put (ucl_hash_node, h, old, &ret); 379 if (ret == 0) { 380 elt = kh_value (h, k); 381 kh_del (ucl_hash_node, h, k); 382 k = kh_put (ucl_hash_node, h, new, &ret); 383 nelt = UCL_ALLOC(sizeof(*nelt)); 384 nelt->obj = new; 385 kh_value(h, k) = nelt; 386 DL_REPLACE_ELEM(hashlin->head, elt, nelt); 387 UCL_FREE(sizeof(*elt), elt); 388 } 389 } 390 } 391 392 struct ucl_hash_real_iter { 393 const struct ucl_hash_elt *cur; 394 }; 395 396 #define UHI_SETERR(ep, ern) {if (ep != NULL) *ep = (ern);} 397 398 const void* 399 ucl_hash_iterate2 (ucl_hash_t *hashlin, ucl_hash_iter_t *iter, int *ep) 400 { 401 struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(*iter); 402 const ucl_object_t *ret = NULL; 403 404 if (hashlin == NULL) { 405 UHI_SETERR(ep, EINVAL); 406 return NULL; 407 } 408 409 if (it == NULL) { 410 it = UCL_ALLOC (sizeof (*it)); 411 412 if (it == NULL) { 413 UHI_SETERR(ep, ENOMEM); 414 return NULL; 415 } 416 417 it->cur = hashlin->head; 418 } 419 420 UHI_SETERR(ep, 0); 421 if (it->cur) { 422 ret = it->cur->obj; 423 it->cur = it->cur->next; 424 } 425 else { 426 UCL_FREE (sizeof (*it), it); 427 *iter = NULL; 428 return NULL; 429 } 430 431 *iter = it; 432 433 return ret; 434 } 435 436 bool 437 ucl_hash_iter_has_next (ucl_hash_t *hashlin, ucl_hash_iter_t iter) 438 { 439 struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(iter); 440 441 return it->cur != NULL; 442 } 443 444 445 const ucl_object_t* 446 ucl_hash_search (ucl_hash_t* hashlin, const char *key, unsigned keylen) 447 { 448 khiter_t k; 449 const ucl_object_t *ret = NULL; 450 ucl_object_t search; 451 struct ucl_hash_elt *elt; 452 453 search.key = key; 454 search.keylen = keylen; 455 456 if (hashlin == NULL) { 457 return NULL; 458 } 459 460 if (hashlin->caseless) { 461 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 462 hashlin->hash; 463 464 k = kh_get (ucl_hash_caseless_node, h, &search); 465 if (k != kh_end (h)) { 466 elt = kh_value (h, k); 467 ret = elt->obj; 468 } 469 } 470 else { 471 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 472 hashlin->hash; 473 k = kh_get (ucl_hash_node, h, &search); 474 if (k != kh_end (h)) { 475 elt = kh_value (h, k); 476 ret = elt->obj; 477 } 478 } 479 480 return ret; 481 } 482 483 void 484 ucl_hash_delete (ucl_hash_t* hashlin, const ucl_object_t *obj) 485 { 486 khiter_t k; 487 struct ucl_hash_elt *elt; 488 489 if (hashlin == NULL) { 490 return; 491 } 492 493 if (hashlin->caseless) { 494 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *) 495 hashlin->hash; 496 497 k = kh_get (ucl_hash_caseless_node, h, obj); 498 if (k != kh_end (h)) { 499 elt = kh_value (h, k); 500 DL_DELETE(hashlin->head, elt); 501 kh_del (ucl_hash_caseless_node, h, k); 502 UCL_FREE(sizeof(*elt), elt); 503 } 504 } 505 else { 506 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 507 hashlin->hash; 508 k = kh_get (ucl_hash_node, h, obj); 509 if (k != kh_end (h)) { 510 elt = kh_value (h, k); 511 DL_DELETE(hashlin->head, elt); 512 kh_del (ucl_hash_node, h, k); 513 UCL_FREE(sizeof(*elt), elt); 514 } 515 } 516 } 517 518 bool ucl_hash_reserve (ucl_hash_t *hashlin, size_t sz) 519 { 520 if (hashlin == NULL) { 521 return false; 522 } 523 524 if (sz > kh_size((khash_t(ucl_hash_node) *)hashlin->hash)) { 525 if (hashlin->caseless) { 526 khash_t(ucl_hash_caseless_node) *h = (khash_t( 527 ucl_hash_caseless_node) *) 528 hashlin->hash; 529 kh_resize (ucl_hash_caseless_node, h, sz * 2); 530 } else { 531 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *) 532 hashlin->hash; 533 kh_resize (ucl_hash_node, h, sz * 2); 534 } 535 } 536 return true; 537 } 538 539 static int 540 ucl_lc_cmp (const char *s, const char *d, size_t l) 541 { 542 unsigned int fp, i; 543 unsigned char c1, c2, c3, c4; 544 union { 545 unsigned char c[4]; 546 uint32_t n; 547 } cmp1, cmp2; 548 size_t leftover = l % 4; 549 int ret = 0; 550 551 fp = l - leftover; 552 553 for (i = 0; i != fp; i += 4) { 554 c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; 555 cmp1.c[0] = lc_map[c1]; 556 cmp1.c[1] = lc_map[c2]; 557 cmp1.c[2] = lc_map[c3]; 558 cmp1.c[3] = lc_map[c4]; 559 560 c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; 561 cmp2.c[0] = lc_map[c1]; 562 cmp2.c[1] = lc_map[c2]; 563 cmp2.c[2] = lc_map[c3]; 564 cmp2.c[3] = lc_map[c4]; 565 566 if (cmp1.n != cmp2.n) { 567 return cmp1.n - cmp2.n; 568 } 569 } 570 571 while (leftover > 0) { 572 if (lc_map[(unsigned char)s[i]] != lc_map[(unsigned char)d[i]]) { 573 return s[i] - d[i]; 574 } 575 576 leftover--; 577 i++; 578 } 579 580 return ret; 581 } 582 583 static int 584 ucl_hash_cmp_icase (const void *a, const void *b) 585 { 586 const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *)a, 587 *ob = (const struct ucl_hash_elt *)b; 588 589 if (oa->obj->keylen == ob->obj->keylen) { 590 return ucl_lc_cmp (oa->obj->key, ob->obj->key, oa->obj->keylen); 591 } 592 593 return ((int)(oa->obj->keylen)) - ob->obj->keylen; 594 } 595 596 static int 597 ucl_hash_cmp_case_sens (const void *a, const void *b) 598 { 599 const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *)a, 600 *ob = (const struct ucl_hash_elt *)b; 601 602 if (oa->obj->keylen == ob->obj->keylen) { 603 return memcmp (oa->obj->key, ob->obj->key, oa->obj->keylen); 604 } 605 606 return ((int)(oa->obj->keylen)) - ob->obj->keylen; 607 } 608 609 void 610 ucl_hash_sort (ucl_hash_t *hashlin, enum ucl_object_keys_sort_flags fl) 611 { 612 613 if (fl & UCL_SORT_KEYS_ICASE) { 614 DL_SORT(hashlin->head, ucl_hash_cmp_icase); 615 } 616 else { 617 DL_SORT(hashlin->head, ucl_hash_cmp_case_sens); 618 } 619 620 if (fl & UCL_SORT_KEYS_RECURSIVE) { 621 struct ucl_hash_elt *elt; 622 623 DL_FOREACH(hashlin->head, elt) { 624 if (ucl_object_type (elt->obj) == UCL_OBJECT) { 625 ucl_hash_sort (elt->obj->value.ov, fl); 626 } 627 } 628 } 629 } 630