1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * The 512-byte leaf is broken into 32 16-byte chunks. 28 * chunk number n means l_chunk[n], even though the header precedes it. 29 * the names are stored null-terminated. 30 */ 31 32 #include <sys/spa.h> 33 #include <sys/dmu.h> 34 #include <sys/zfs_context.h> 35 #include <sys/fs/zfs.h> 36 #include <sys/zap.h> 37 #include <sys/zap_impl.h> 38 #include <sys/zap_leaf.h> 39 40 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); 41 42 #define CHAIN_END 0xffff /* end of the chunk chain */ 43 44 /* half the (current) minimum block size */ 45 #define MAX_ARRAY_BYTES (8<<10) 46 47 #define LEAF_HASH(l, h) \ 48 ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 49 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) 50 51 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 52 53 54 static void 55 zap_memset(void *a, int c, size_t n) 56 { 57 char *cp = a; 58 char *cpend = cp + n; 59 60 while (cp < cpend) 61 *cp++ = c; 62 } 63 64 static void 65 stv(int len, void *addr, uint64_t value) 66 { 67 switch (len) { 68 case 1: 69 *(uint8_t *)addr = value; 70 return; 71 case 2: 72 *(uint16_t *)addr = value; 73 return; 74 case 4: 75 *(uint32_t *)addr = value; 76 return; 77 case 8: 78 *(uint64_t *)addr = value; 79 return; 80 } 81 ASSERT(!"bad int len"); 82 } 83 84 static uint64_t 85 ldv(int len, const void *addr) 86 { 87 switch (len) { 88 case 1: 89 return (*(uint8_t *)addr); 90 case 2: 91 return (*(uint16_t *)addr); 92 case 4: 93 return (*(uint32_t *)addr); 94 case 8: 95 return (*(uint64_t *)addr); 96 } 97 ASSERT(!"bad int len"); 98 return (0xFEEDFACEDEADBEEFULL); 99 } 100 101 void 102 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) 103 { 104 int i; 105 zap_leaf_t l; 106 l.l_bs = highbit(size)-1; 107 l.l_phys = buf; 108 109 buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); 110 buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); 111 buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); 112 buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); 113 buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); 114 buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); 115 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 116 117 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 118 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 119 120 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 121 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 122 struct zap_leaf_entry *le; 123 124 switch (lc->l_free.lf_type) { 125 case ZAP_CHUNK_ENTRY: 126 le = &lc->l_entry; 127 128 le->le_type = BSWAP_8(le->le_type); 129 le->le_int_size = BSWAP_8(le->le_int_size); 130 le->le_next = BSWAP_16(le->le_next); 131 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 132 le->le_name_length = BSWAP_16(le->le_name_length); 133 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 134 le->le_value_length = BSWAP_16(le->le_value_length); 135 le->le_cd = BSWAP_32(le->le_cd); 136 le->le_hash = BSWAP_64(le->le_hash); 137 break; 138 case ZAP_CHUNK_FREE: 139 lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 140 lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 141 break; 142 case ZAP_CHUNK_ARRAY: 143 lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 144 lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 145 /* la_array doesn't need swapping */ 146 break; 147 default: 148 ASSERT(!"bad leaf type"); 149 } 150 } 151 } 152 153 void 154 zap_leaf_init(zap_leaf_t *l, boolean_t sort) 155 { 156 int i; 157 158 l->l_bs = highbit(l->l_dbuf->db_size)-1; 159 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 160 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 161 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 162 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 163 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 164 } 165 ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 166 l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; 167 l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 168 l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 169 if (sort) 170 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 171 } 172 173 /* 174 * Routines which manipulate leaf chunks (l_chunk[]). 175 */ 176 177 static uint16_t 178 zap_leaf_chunk_alloc(zap_leaf_t *l) 179 { 180 int chunk; 181 182 ASSERT(l->l_phys->l_hdr.lh_nfree > 0); 183 184 chunk = l->l_phys->l_hdr.lh_freelist; 185 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 186 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 187 188 l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 189 190 l->l_phys->l_hdr.lh_nfree--; 191 192 return (chunk); 193 } 194 195 static void 196 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 197 { 198 struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 199 ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 200 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 201 ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 202 203 zlf->lf_type = ZAP_CHUNK_FREE; 204 zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 205 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 206 l->l_phys->l_hdr.lh_freelist = chunk; 207 208 l->l_phys->l_hdr.lh_nfree++; 209 } 210 211 /* 212 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 213 */ 214 215 static uint16_t 216 zap_leaf_array_create(zap_leaf_t *l, const char *buf, 217 int integer_size, int num_integers) 218 { 219 uint16_t chunk_head; 220 uint16_t *chunkp = &chunk_head; 221 int byten = 0; 222 uint64_t value; 223 int shift = (integer_size-1)*8; 224 int len = num_integers; 225 226 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 227 228 while (len > 0) { 229 uint16_t chunk = zap_leaf_chunk_alloc(l); 230 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 231 int i; 232 233 la->la_type = ZAP_CHUNK_ARRAY; 234 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 235 if (byten == 0) 236 value = ldv(integer_size, buf); 237 la->la_array[i] = value >> shift; 238 value <<= 8; 239 if (++byten == integer_size) { 240 byten = 0; 241 buf += integer_size; 242 if (--len == 0) 243 break; 244 } 245 } 246 247 *chunkp = chunk; 248 chunkp = &la->la_next; 249 } 250 *chunkp = CHAIN_END; 251 252 return (chunk_head); 253 } 254 255 static void 256 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) 257 { 258 uint16_t chunk = *chunkp; 259 260 *chunkp = CHAIN_END; 261 262 while (chunk != CHAIN_END) { 263 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 264 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 265 ZAP_CHUNK_ARRAY); 266 zap_leaf_chunk_free(l, chunk); 267 chunk = nextchunk; 268 } 269 } 270 271 /* array_len and buf_len are in integers, not bytes */ 272 static void 273 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, 274 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 275 char *buf) 276 { 277 int len = MIN(array_len, buf_len); 278 int byten = 0; 279 uint64_t value = 0; 280 281 ASSERT3U(array_int_len, <=, buf_int_len); 282 283 /* Fast path for one 8-byte integer */ 284 if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 285 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 286 uint8_t *ip = la->la_array; 287 uint64_t *buf64 = (uint64_t *)buf; 288 289 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 290 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 291 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 292 (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 293 return; 294 } 295 296 /* Fast path for an array of 1-byte integers (eg. the entry name) */ 297 if (array_int_len == 1 && buf_int_len == 1 && 298 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 299 while (chunk != CHAIN_END) { 300 struct zap_leaf_array *la = 301 &ZAP_LEAF_CHUNK(l, chunk).l_array; 302 bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES); 303 buf += ZAP_LEAF_ARRAY_BYTES; 304 chunk = la->la_next; 305 } 306 return; 307 } 308 309 while (len > 0) { 310 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 311 int i; 312 313 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 314 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 315 value = (value << 8) | la->la_array[i]; 316 byten++; 317 if (byten == array_int_len) { 318 stv(buf_int_len, buf, value); 319 byten = 0; 320 len--; 321 if (len == 0) 322 return; 323 buf += buf_int_len; 324 } 325 } 326 chunk = la->la_next; 327 } 328 } 329 330 /* 331 * Only to be used on 8-bit arrays. 332 * array_len is actual len in bytes (not encoded le_value_length). 333 * namenorm is null-terminated. 334 */ 335 static boolean_t 336 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len) 337 { 338 int bseen = 0; 339 340 if (zn->zn_matchtype == MT_FIRST) { 341 char *thisname = kmem_alloc(array_len, KM_SLEEP); 342 boolean_t match; 343 344 zap_leaf_array_read(l, chunk, 1, array_len, 1, 345 array_len, thisname); 346 match = zap_match(zn, thisname); 347 kmem_free(thisname, array_len); 348 return (match); 349 } 350 351 /* Fast path for exact matching */ 352 while (bseen < array_len) { 353 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 354 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 355 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 356 if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread)) 357 break; 358 chunk = la->la_next; 359 bseen += toread; 360 } 361 return (bseen == array_len); 362 } 363 364 /* 365 * Routines which manipulate leaf entries. 366 */ 367 368 int 369 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) 370 { 371 uint16_t *chunkp; 372 struct zap_leaf_entry *le; 373 374 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 375 376 again: 377 for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); 378 *chunkp != CHAIN_END; chunkp = &le->le_next) { 379 uint16_t chunk = *chunkp; 380 le = ZAP_LEAF_ENTRY(l, chunk); 381 382 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 383 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 384 385 if (le->le_hash != zn->zn_hash) 386 continue; 387 388 /* 389 * NB: the entry chain is always sorted by cd on 390 * normalized zap objects, so this will find the 391 * lowest-cd match for MT_FIRST. 392 */ 393 ASSERT(zn->zn_matchtype == MT_EXACT || 394 (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); 395 if (zap_leaf_array_match(l, zn, le->le_name_chunk, 396 le->le_name_length)) { 397 zeh->zeh_num_integers = le->le_value_length; 398 zeh->zeh_integer_size = le->le_int_size; 399 zeh->zeh_cd = le->le_cd; 400 zeh->zeh_hash = le->le_hash; 401 zeh->zeh_chunkp = chunkp; 402 zeh->zeh_leaf = l; 403 return (0); 404 } 405 } 406 407 /* 408 * NB: we could of course do this in one pass, but that would be 409 * a pain. We'll see if MT_BEST is even used much. 410 */ 411 if (zn->zn_matchtype == MT_BEST) { 412 zn->zn_matchtype = MT_FIRST; 413 goto again; 414 } 415 416 return (ENOENT); 417 } 418 419 /* Return (h1,cd1 >= h2,cd2) */ 420 #define HCD_GTEQ(h1, cd1, h2, cd2) \ 421 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 422 423 int 424 zap_leaf_lookup_closest(zap_leaf_t *l, 425 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 426 { 427 uint16_t chunk; 428 uint64_t besth = -1ULL; 429 uint32_t bestcd = ZAP_MAXCD; 430 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 431 uint16_t lh; 432 struct zap_leaf_entry *le; 433 434 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 435 436 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 437 for (chunk = l->l_phys->l_hash[lh]; 438 chunk != CHAIN_END; chunk = le->le_next) { 439 le = ZAP_LEAF_ENTRY(l, chunk); 440 441 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 442 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 443 444 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 445 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 446 ASSERT3U(bestlh, >=, lh); 447 bestlh = lh; 448 besth = le->le_hash; 449 bestcd = le->le_cd; 450 451 zeh->zeh_num_integers = le->le_value_length; 452 zeh->zeh_integer_size = le->le_int_size; 453 zeh->zeh_cd = le->le_cd; 454 zeh->zeh_hash = le->le_hash; 455 zeh->zeh_fakechunk = chunk; 456 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 457 zeh->zeh_leaf = l; 458 } 459 } 460 } 461 462 return (bestcd == ZAP_MAXCD ? ENOENT : 0); 463 } 464 465 int 466 zap_entry_read(const zap_entry_handle_t *zeh, 467 uint8_t integer_size, uint64_t num_integers, void *buf) 468 { 469 struct zap_leaf_entry *le = 470 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 471 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 472 473 if (le->le_int_size > integer_size) 474 return (EINVAL); 475 476 zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_int_size, 477 le->le_value_length, integer_size, num_integers, buf); 478 479 if (zeh->zeh_num_integers > num_integers) 480 return (EOVERFLOW); 481 return (0); 482 483 } 484 485 int 486 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) 487 { 488 struct zap_leaf_entry *le = 489 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 490 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 491 492 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, 493 le->le_name_length, 1, buflen, buf); 494 if (le->le_name_length > buflen) 495 return (EOVERFLOW); 496 return (0); 497 } 498 499 int 500 zap_entry_update(zap_entry_handle_t *zeh, 501 uint8_t integer_size, uint64_t num_integers, const void *buf) 502 { 503 int delta_chunks; 504 zap_leaf_t *l = zeh->zeh_leaf; 505 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); 506 507 delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - 508 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * le->le_int_size); 509 510 if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) 511 return (EAGAIN); 512 513 /* 514 * We should search other chained leaves (via 515 * zap_entry_remove,create?) otherwise returning EAGAIN will 516 * just send us into an infinite loop if we have to chain 517 * another leaf block, rather than being able to split this 518 * block. 519 */ 520 521 zap_leaf_array_free(l, &le->le_value_chunk); 522 le->le_value_chunk = 523 zap_leaf_array_create(l, buf, integer_size, num_integers); 524 le->le_value_length = num_integers; 525 le->le_int_size = integer_size; 526 return (0); 527 } 528 529 void 530 zap_entry_remove(zap_entry_handle_t *zeh) 531 { 532 uint16_t entry_chunk; 533 struct zap_leaf_entry *le; 534 zap_leaf_t *l = zeh->zeh_leaf; 535 536 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 537 538 entry_chunk = *zeh->zeh_chunkp; 539 le = ZAP_LEAF_ENTRY(l, entry_chunk); 540 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 541 542 zap_leaf_array_free(l, &le->le_name_chunk); 543 zap_leaf_array_free(l, &le->le_value_chunk); 544 545 *zeh->zeh_chunkp = le->le_next; 546 zap_leaf_chunk_free(l, entry_chunk); 547 548 l->l_phys->l_hdr.lh_nentries--; 549 } 550 551 int 552 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, 553 uint8_t integer_size, uint64_t num_integers, const void *buf, 554 zap_entry_handle_t *zeh) 555 { 556 uint16_t chunk; 557 uint16_t *chunkp; 558 struct zap_leaf_entry *le; 559 uint64_t namelen, valuelen; 560 int numchunks; 561 562 valuelen = integer_size * num_integers; 563 namelen = strlen(name) + 1; 564 ASSERT(namelen >= 2); 565 566 numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(namelen) + 567 ZAP_LEAF_ARRAY_NCHUNKS(valuelen); 568 if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 569 return (E2BIG); 570 571 if (cd == ZAP_MAXCD) { 572 /* find the lowest unused cd */ 573 if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { 574 cd = 0; 575 576 for (chunk = *LEAF_HASH_ENTPTR(l, h); 577 chunk != CHAIN_END; chunk = le->le_next) { 578 le = ZAP_LEAF_ENTRY(l, chunk); 579 if (le->le_cd > cd) 580 break; 581 if (le->le_hash == h) { 582 ASSERT3U(cd, ==, le->le_cd); 583 cd++; 584 } 585 } 586 } else { 587 /* old unsorted format; do it the O(n^2) way */ 588 for (cd = 0; cd < ZAP_MAXCD; cd++) { 589 for (chunk = *LEAF_HASH_ENTPTR(l, h); 590 chunk != CHAIN_END; chunk = le->le_next) { 591 le = ZAP_LEAF_ENTRY(l, chunk); 592 if (le->le_hash == h && 593 le->le_cd == cd) { 594 break; 595 } 596 } 597 /* If this cd is not in use, we are good. */ 598 if (chunk == CHAIN_END) 599 break; 600 } 601 } 602 /* 603 * we would run out of space in a block before we could 604 * have ZAP_MAXCD entries 605 */ 606 ASSERT3U(cd, <, ZAP_MAXCD); 607 } 608 609 if (l->l_phys->l_hdr.lh_nfree < numchunks) 610 return (EAGAIN); 611 612 /* make the entry */ 613 chunk = zap_leaf_chunk_alloc(l); 614 le = ZAP_LEAF_ENTRY(l, chunk); 615 le->le_type = ZAP_CHUNK_ENTRY; 616 le->le_name_chunk = zap_leaf_array_create(l, name, 1, namelen); 617 le->le_name_length = namelen; 618 le->le_value_chunk = 619 zap_leaf_array_create(l, buf, integer_size, num_integers); 620 le->le_value_length = num_integers; 621 le->le_int_size = integer_size; 622 le->le_hash = h; 623 le->le_cd = cd; 624 625 /* link it into the hash chain */ 626 /* XXX if we did the search above, we could just use that */ 627 chunkp = zap_leaf_rehash_entry(l, chunk); 628 629 l->l_phys->l_hdr.lh_nentries++; 630 631 zeh->zeh_leaf = l; 632 zeh->zeh_num_integers = num_integers; 633 zeh->zeh_integer_size = le->le_int_size; 634 zeh->zeh_cd = le->le_cd; 635 zeh->zeh_hash = le->le_hash; 636 zeh->zeh_chunkp = chunkp; 637 638 return (0); 639 } 640 641 /* 642 * Determine if there is another entry with the same normalized form. 643 * For performance purposes, either zn or name must be provided (the 644 * other can be NULL). Note, there usually won't be any hash 645 * conflicts, in which case we don't need the concatenated/normalized 646 * form of the name. But all callers have one of these on hand anyway, 647 * so might as well take advantage. A cleaner but slower interface 648 * would accept neither argument, and compute the normalized name as 649 * needed (using zap_name_alloc(zap_entry_read_name(zeh))). 650 */ 651 boolean_t 652 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, 653 const char *name, zap_t *zap) 654 { 655 uint64_t chunk; 656 struct zap_leaf_entry *le; 657 boolean_t allocdzn = B_FALSE; 658 659 if (zap->zap_normflags == 0) 660 return (B_FALSE); 661 662 for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); 663 chunk != CHAIN_END; chunk = le->le_next) { 664 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); 665 if (le->le_hash != zeh->zeh_hash) 666 continue; 667 if (le->le_cd == zeh->zeh_cd) 668 continue; 669 670 if (zn == NULL) { 671 zn = zap_name_alloc(zap, name, MT_FIRST); 672 allocdzn = B_TRUE; 673 } 674 if (zap_leaf_array_match(zeh->zeh_leaf, zn, 675 le->le_name_chunk, le->le_name_length)) { 676 if (allocdzn) 677 zap_name_free(zn); 678 return (B_TRUE); 679 } 680 } 681 if (allocdzn) 682 zap_name_free(zn); 683 return (B_FALSE); 684 } 685 686 /* 687 * Routines for transferring entries between leafs. 688 */ 689 690 static uint16_t * 691 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 692 { 693 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 694 struct zap_leaf_entry *le2; 695 uint16_t *chunkp; 696 697 /* 698 * keep the entry chain sorted by cd 699 * NB: this will not cause problems for unsorted leafs, though 700 * it is unnecessary there. 701 */ 702 for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); 703 *chunkp != CHAIN_END; chunkp = &le2->le_next) { 704 le2 = ZAP_LEAF_ENTRY(l, *chunkp); 705 if (le2->le_cd > le->le_cd) 706 break; 707 } 708 709 le->le_next = *chunkp; 710 *chunkp = entry; 711 return (chunkp); 712 } 713 714 static uint16_t 715 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 716 { 717 uint16_t new_chunk; 718 uint16_t *nchunkp = &new_chunk; 719 720 while (chunk != CHAIN_END) { 721 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 722 struct zap_leaf_array *nla = 723 &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 724 struct zap_leaf_array *la = 725 &ZAP_LEAF_CHUNK(l, chunk).l_array; 726 int nextchunk = la->la_next; 727 728 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 729 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 730 731 *nla = *la; /* structure assignment */ 732 733 zap_leaf_chunk_free(l, chunk); 734 chunk = nextchunk; 735 *nchunkp = nchunk; 736 nchunkp = &nla->la_next; 737 } 738 *nchunkp = CHAIN_END; 739 return (new_chunk); 740 } 741 742 static void 743 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) 744 { 745 struct zap_leaf_entry *le, *nle; 746 uint16_t chunk; 747 748 le = ZAP_LEAF_ENTRY(l, entry); 749 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 750 751 chunk = zap_leaf_chunk_alloc(nl); 752 nle = ZAP_LEAF_ENTRY(nl, chunk); 753 *nle = *le; /* structure assignment */ 754 755 (void) zap_leaf_rehash_entry(nl, chunk); 756 757 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 758 nle->le_value_chunk = 759 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 760 761 zap_leaf_chunk_free(l, entry); 762 763 l->l_phys->l_hdr.lh_nentries--; 764 nl->l_phys->l_hdr.lh_nentries++; 765 } 766 767 /* 768 * Transfer the entries whose hash prefix ends in 1 to the new leaf. 769 */ 770 void 771 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) 772 { 773 int i; 774 int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; 775 776 /* set new prefix and prefix_len */ 777 l->l_phys->l_hdr.lh_prefix <<= 1; 778 l->l_phys->l_hdr.lh_prefix_len++; 779 nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; 780 nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; 781 782 /* break existing hash chains */ 783 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 784 785 if (sort) 786 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 787 788 /* 789 * Transfer entries whose hash bit 'bit' is set to nl; rehash 790 * the remaining entries 791 * 792 * NB: We could find entries via the hashtable instead. That 793 * would be O(hashents+numents) rather than O(numblks+numents), 794 * but this accesses memory more sequentially, and when we're 795 * called, the block is usually pretty full. 796 */ 797 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 798 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 799 if (le->le_type != ZAP_CHUNK_ENTRY) 800 continue; 801 802 if (le->le_hash & (1ULL << bit)) 803 zap_leaf_transfer_entry(l, i, nl); 804 else 805 (void) zap_leaf_rehash_entry(l, i); 806 } 807 } 808 809 void 810 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 811 { 812 int i, n; 813 814 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - 815 l->l_phys->l_hdr.lh_prefix_len; 816 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 817 zs->zs_leafs_with_2n_pointers[n]++; 818 819 820 n = l->l_phys->l_hdr.lh_nentries/5; 821 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 822 zs->zs_blocks_with_n5_entries[n]++; 823 824 n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 825 l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 826 (1<<FZAP_BLOCK_SHIFT(zap)); 827 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 828 zs->zs_blocks_n_tenths_full[n]++; 829 830 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 831 int nentries = 0; 832 int chunk = l->l_phys->l_hash[i]; 833 834 while (chunk != CHAIN_END) { 835 struct zap_leaf_entry *le = 836 ZAP_LEAF_ENTRY(l, chunk); 837 838 n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) + 839 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * 840 le->le_int_size); 841 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 842 zs->zs_entries_using_n_chunks[n]++; 843 844 chunk = le->le_next; 845 nentries++; 846 } 847 848 n = nentries; 849 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 850 zs->zs_buckets_with_n_entries[n]++; 851 } 852 } 853