1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * The 512-byte leaf is broken into 32 16-byte chunks. 31 * chunk number n means l_chunk[n], even though the header precedes it. 32 * the names are stored null-terminated. 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/zap.h> 37 #include <sys/zap_impl.h> 38 #include <sys/zap_leaf.h> 39 #include <sys/spa.h> 40 #include <sys/dmu.h> 41 42 #define CHAIN_END 0xffff /* end of the chunk chain */ 43 44 /* somewhat arbitrary, could go up to around 100k ... */ 45 #define MAX_ARRAY_BYTES (8<<10) 46 47 #define NCHUNKS(bytes) (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) 48 49 /* 50 * XXX This will >> by a negative number when 51 * lh_prefix_len > 64-ZAP_LEAF_HASH_SHIFT. 52 */ 53 #define LEAF_HASH(l, h) \ 54 ((ZAP_LEAF_HASH_NUMENTRIES-1) & \ 55 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT-(l)->lh_prefix_len))) 56 57 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 58 59 /* #define MEMCHECK */ 60 61 62 static void 63 zap_memset(void *a, int c, size_t n) 64 { 65 char *cp = a; 66 char *cpend = cp + n; 67 68 while (cp < cpend) 69 *cp++ = c; 70 } 71 72 static void 73 stv(int len, void *addr, uint64_t value) 74 { 75 switch (len) { 76 case 1: 77 *(uint8_t *)addr = value; 78 return; 79 case 2: 80 *(uint16_t *)addr = value; 81 return; 82 case 4: 83 *(uint32_t *)addr = value; 84 return; 85 case 8: 86 *(uint64_t *)addr = value; 87 return; 88 } 89 ASSERT(!"bad int len"); 90 } 91 92 static uint64_t 93 ldv(int len, const void *addr) 94 { 95 switch (len) { 96 case 1: 97 return (*(uint8_t *)addr); 98 case 2: 99 return (*(uint16_t *)addr); 100 case 4: 101 return (*(uint32_t *)addr); 102 case 8: 103 return (*(uint64_t *)addr); 104 } 105 ASSERT(!"bad int len"); 106 return (0xFEEDFACEDEADBEEF); 107 } 108 109 void 110 zap_leaf_byteswap(zap_leaf_phys_t *buf) 111 { 112 int i; 113 114 buf->l_hdr.lhr_block_type = BSWAP_64(buf->l_hdr.lhr_block_type); 115 buf->l_hdr.lhr_next = BSWAP_64(buf->l_hdr.lhr_next); 116 buf->l_hdr.lhr_prefix = BSWAP_64(buf->l_hdr.lhr_prefix); 117 buf->l_hdr.lhr_magic = BSWAP_32(buf->l_hdr.lhr_magic); 118 buf->l_hdr.lhr_nfree = BSWAP_16(buf->l_hdr.lhr_nfree); 119 buf->l_hdr.lhr_nentries = BSWAP_16(buf->l_hdr.lhr_nentries); 120 buf->l_hdr.lhr_prefix_len = BSWAP_16(buf->l_hdr.lhr_prefix_len); 121 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 122 123 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) 124 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 125 126 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 127 struct zap_leaf_entry *le; 128 129 switch (buf->l_chunk[i].l_free.lf_type) { 130 case ZAP_LEAF_ENTRY: 131 le = &buf->l_chunk[i].l_entry; 132 133 le->le_type = BSWAP_8(le->le_type); 134 le->le_int_size = BSWAP_8(le->le_int_size); 135 le->le_next = BSWAP_16(le->le_next); 136 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 137 le->le_name_length = BSWAP_16(le->le_name_length); 138 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 139 le->le_value_length = BSWAP_16(le->le_value_length); 140 le->le_cd = BSWAP_32(le->le_cd); 141 le->le_hash = BSWAP_64(le->le_hash); 142 break; 143 case ZAP_LEAF_FREE: 144 buf->l_chunk[i].l_free.lf_type = 145 BSWAP_8(buf->l_chunk[i].l_free.lf_type); 146 buf->l_chunk[i].l_free.lf_next = 147 BSWAP_16(buf->l_chunk[i].l_free.lf_next); 148 break; 149 case ZAP_LEAF_ARRAY: 150 /* zap_leaf_array */ 151 buf->l_chunk[i].l_array.la_type = 152 BSWAP_8(buf->l_chunk[i].l_array.la_type); 153 buf->l_chunk[i].l_array.la_next = 154 BSWAP_16(buf->l_chunk[i].l_array.la_next); 155 /* la_array doesn't need swapping */ 156 break; 157 default: 158 ASSERT(!"bad leaf type"); 159 } 160 } 161 } 162 163 void 164 zap_leaf_init(zap_leaf_t *l) 165 { 166 int i; 167 168 ASSERT3U(sizeof (zap_leaf_phys_t), ==, l->l_dbuf->db_size); 169 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 170 zap_memset(&l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 171 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 172 l->l_phys->l_chunk[i].l_free.lf_type = ZAP_LEAF_FREE; 173 l->l_phys->l_chunk[i].l_free.lf_next = i+1; 174 } 175 l->l_phys->l_chunk[ZAP_LEAF_NUMCHUNKS-1].l_free.lf_next = CHAIN_END; 176 l->lh_block_type = ZBT_LEAF; 177 l->lh_magic = ZAP_LEAF_MAGIC; 178 l->lh_nfree = ZAP_LEAF_NUMCHUNKS; 179 } 180 181 zap_leaf_t * 182 zap_leaf_chainmore(zap_leaf_t *l, zap_leaf_t *nl) 183 { 184 nl->lh_prefix = l->lh_prefix; 185 nl->lh_prefix_len = l->lh_prefix_len; 186 nl->l_next = l->l_next; 187 l->l_next = nl; 188 nl->lh_next = l->lh_next; 189 l->lh_next = nl->l_blkid; 190 return (nl); 191 } 192 193 /* 194 * Routines which manipulate leaf chunks (l_chunk[]). 195 */ 196 197 static uint16_t 198 zap_leaf_chunk_alloc(zap_leaf_t *l) 199 { 200 int chunk; 201 202 ASSERT(l->lh_nfree > 0); 203 204 chunk = l->l_phys->l_hdr.lh_freelist; 205 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 206 ASSERT3U(l->l_phys->l_chunk[chunk].l_free.lf_type, ==, ZAP_LEAF_FREE); 207 208 l->l_phys->l_hdr.lh_freelist = l->l_phys->l_chunk[chunk].l_free.lf_next; 209 210 #ifdef MEMCHECK 211 zap_memset(&l->l_phys->l_chunk[chunk], 0xa1, 212 sizeof (l->l_phys->l_chunk[chunk])); 213 #endif 214 215 l->lh_nfree--; 216 217 return (chunk); 218 } 219 220 static void 221 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 222 { 223 struct zap_leaf_free *zlf = &l->l_phys->l_chunk[chunk].l_free; 224 ASSERT3U(l->lh_nfree, <, ZAP_LEAF_NUMCHUNKS); 225 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 226 ASSERT(zlf->lf_type != ZAP_LEAF_FREE); 227 228 #ifdef MEMCHECK 229 zap_memset(&l->l_phys->l_chunk[chunk], 0xf4, 230 sizeof (l->l_phys->l_chunk[chunk])); 231 #endif 232 233 zlf->lf_type = ZAP_LEAF_FREE; 234 zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 235 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 236 l->l_phys->l_hdr.lh_freelist = chunk; 237 238 l->lh_nfree++; 239 } 240 241 242 /* 243 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 244 */ 245 246 static uint16_t 247 zap_leaf_array_create(const zap_entry_handle_t *zeh, const char *buf, 248 int integer_size, int num_integers) 249 { 250 uint16_t chunk_head; 251 uint16_t *chunkp = &chunk_head; 252 int byten = 0; 253 uint64_t value; 254 int shift = (integer_size-1)*8; 255 int len = num_integers; 256 zap_leaf_t *l = zeh->zeh_found_leaf; 257 258 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 259 260 while (len > 0) { 261 uint16_t chunk = zap_leaf_chunk_alloc(l); 262 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 263 int i; 264 265 la->la_type = ZAP_LEAF_ARRAY; 266 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 267 if (byten == 0) 268 value = ldv(integer_size, buf); 269 la->la_array[i] = (value & (0xff << shift)) >> shift; 270 value <<= 8; 271 if (++byten == integer_size) { 272 byten = 0; 273 buf += integer_size; 274 if (--len == 0) 275 break; 276 } 277 } 278 279 *chunkp = chunk; 280 chunkp = &la->la_next; 281 } 282 *chunkp = CHAIN_END; 283 284 return (chunk_head); 285 } 286 287 static void 288 zap_leaf_array_free(zap_entry_handle_t *zeh, uint16_t *chunkp) 289 { 290 uint16_t chunk = *chunkp; 291 zap_leaf_t *l = zeh->zeh_found_leaf; 292 293 *chunkp = CHAIN_END; 294 295 while (chunk != CHAIN_END) { 296 int nextchunk = l->l_phys->l_chunk[chunk].l_array.la_next; 297 ASSERT3U(l->l_phys->l_chunk[chunk].l_array.la_type, ==, 298 ZAP_LEAF_ARRAY); 299 zap_leaf_chunk_free(l, chunk); 300 chunk = nextchunk; 301 } 302 } 303 304 /* array_len and buf_len are in integers, not bytes */ 305 static void 306 zap_leaf_array_read(const zap_entry_handle_t *zeh, uint16_t chunk, 307 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 308 char *buf) 309 { 310 int len = MIN(array_len, buf_len); 311 int byten = 0; 312 uint64_t value = 0; 313 zap_leaf_t *l = zeh->zeh_found_leaf; 314 315 ASSERT3U(array_int_len, <=, buf_int_len); 316 317 while (len > 0) { 318 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 319 int i; 320 321 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 322 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 323 value = (value << 8) | la->la_array[i]; 324 byten++; 325 if (byten == array_int_len) { 326 stv(buf_int_len, buf, value); 327 byten = 0; 328 len--; 329 if (len == 0) 330 return; 331 buf += buf_int_len; 332 } 333 } 334 chunk = la->la_next; 335 } 336 } 337 338 /* 339 * Only to be used on 8-bit arrays. 340 * array_len is actual len in bytes (not encoded le_value_length). 341 * buf is null-terminated. 342 */ 343 static int 344 zap_leaf_array_equal(const zap_entry_handle_t *zeh, int chunk, 345 int array_len, const char *buf) 346 { 347 int bseen = 0; 348 zap_leaf_t *l = zeh->zeh_found_leaf; 349 350 while (bseen < array_len) { 351 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 352 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 353 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 354 if (bcmp(la->la_array, buf + bseen, toread)) 355 break; 356 chunk = la->la_next; 357 bseen += toread; 358 } 359 return (bseen == array_len); 360 } 361 362 /* 363 * Routines which manipulate leaf entries. 364 */ 365 366 int 367 zap_leaf_lookup(zap_leaf_t *l, 368 const char *name, uint64_t h, zap_entry_handle_t *zeh) 369 { 370 uint16_t *chunkp; 371 struct zap_leaf_entry *le; 372 373 zeh->zeh_head_leaf = l; 374 375 again: 376 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 377 378 for (chunkp = LEAF_HASH_ENTPTR(l, h); 379 *chunkp != CHAIN_END; chunkp = &le->le_next) { 380 uint16_t chunk = *chunkp; 381 le = &l->l_phys->l_chunk[chunk].l_entry; 382 383 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 384 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 385 386 if (le->le_hash != h) 387 continue; 388 389 zeh->zeh_found_leaf = l; 390 if (zap_leaf_array_equal(zeh, le->le_name_chunk, 391 le->le_name_length, name)) { 392 zeh->zeh_num_integers = le->le_value_length; 393 zeh->zeh_integer_size = le->le_int_size; 394 zeh->zeh_cd = le->le_cd; 395 zeh->zeh_hash = le->le_hash; 396 zeh->zeh_chunkp = chunkp; 397 zeh->zeh_found_leaf = l; 398 return (0); 399 } 400 } 401 402 if (l->l_next) { 403 l = l->l_next; 404 goto again; 405 } 406 407 return (ENOENT); 408 } 409 410 /* Return (h1,cd1 >= h2,cd2) */ 411 static int 412 hcd_gteq(uint64_t h1, uint32_t cd1, uint64_t h2, uint32_t cd2) 413 { 414 if (h1 > h2) 415 return (TRUE); 416 if (h1 == h2 && cd1 >= cd2) 417 return (TRUE); 418 return (FALSE); 419 } 420 421 int 422 zap_leaf_lookup_closest(zap_leaf_t *l, 423 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 424 { 425 uint16_t chunk; 426 uint64_t besth = -1ULL; 427 uint32_t bestcd = ZAP_MAXCD; 428 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES-1; 429 uint16_t lh; 430 struct zap_leaf_entry *le; 431 432 zeh->zeh_head_leaf = l; 433 434 again: 435 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 436 437 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 438 for (chunk = l->l_phys->l_hash[lh]; 439 chunk != CHAIN_END; chunk = le->le_next) { 440 le = &l->l_phys->l_chunk[chunk].l_entry; 441 442 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 443 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 444 445 if (hcd_gteq(le->le_hash, le->le_cd, h, cd) && 446 hcd_gteq(besth, bestcd, le->le_hash, le->le_cd)) { 447 ASSERT3U(bestlh, >=, lh); 448 bestlh = lh; 449 besth = le->le_hash; 450 bestcd = le->le_cd; 451 452 zeh->zeh_num_integers = le->le_value_length; 453 zeh->zeh_integer_size = le->le_int_size; 454 zeh->zeh_cd = le->le_cd; 455 zeh->zeh_hash = le->le_hash; 456 zeh->zeh_fakechunk = chunk; 457 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 458 zeh->zeh_found_leaf = l; 459 } 460 } 461 } 462 463 if (l->l_next) { 464 l = l->l_next; 465 goto again; 466 } 467 468 return (bestcd == ZAP_MAXCD ? ENOENT : 0); 469 } 470 471 int 472 zap_entry_read(const zap_entry_handle_t *zeh, 473 uint8_t integer_size, uint64_t num_integers, void *buf) 474 { 475 struct zap_leaf_entry *le; 476 477 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 478 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 479 480 if (le->le_int_size > integer_size) 481 return (EINVAL); 482 483 zap_leaf_array_read(zeh, le->le_value_chunk, le->le_int_size, 484 le->le_value_length, integer_size, num_integers, buf); 485 486 if (zeh->zeh_num_integers > num_integers) 487 return (EOVERFLOW); 488 return (0); 489 490 } 491 492 int 493 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) 494 { 495 struct zap_leaf_entry *le; 496 497 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 498 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 499 500 zap_leaf_array_read(zeh, le->le_name_chunk, 1, 501 le->le_name_length, 1, buflen, buf); 502 if (le->le_name_length > buflen) 503 return (EOVERFLOW); 504 return (0); 505 } 506 507 int 508 zap_entry_update(zap_entry_handle_t *zeh, 509 uint8_t integer_size, uint64_t num_integers, const void *buf) 510 { 511 int delta_chunks; 512 struct zap_leaf_entry *le; 513 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 514 515 delta_chunks = NCHUNKS(num_integers * integer_size) - 516 NCHUNKS(le->le_value_length * le->le_int_size); 517 518 if (zeh->zeh_found_leaf->lh_nfree < delta_chunks) 519 return (EAGAIN); 520 521 /* 522 * We should search other chained leaves (via 523 * zap_entry_remove,create?) otherwise returning EAGAIN will 524 * just send us into an infinite loop if we have to chain 525 * another leaf block, rather than being able to split this 526 * block. 527 */ 528 529 zap_leaf_array_free(zeh, &le->le_value_chunk); 530 le->le_value_chunk = 531 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 532 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 533 (MAX_ARRAY_BYTES + 1) : (num_integers); 534 le->le_int_size = integer_size; 535 return (0); 536 } 537 538 void 539 zap_entry_remove(zap_entry_handle_t *zeh) 540 { 541 uint16_t entry_chunk; 542 struct zap_leaf_entry *le; 543 zap_leaf_t *l = zeh->zeh_found_leaf; 544 545 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 546 547 entry_chunk = *zeh->zeh_chunkp; 548 le = &l->l_phys->l_chunk[entry_chunk].l_entry; 549 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 550 551 zap_leaf_array_free(zeh, &le->le_name_chunk); 552 zap_leaf_array_free(zeh, &le->le_value_chunk); 553 554 *zeh->zeh_chunkp = le->le_next; 555 zap_leaf_chunk_free(l, entry_chunk); 556 557 l->lh_nentries--; 558 } 559 560 int 561 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, 562 uint8_t integer_size, uint64_t num_integers, const void *buf, 563 zap_entry_handle_t *zeh) 564 { 565 uint16_t chunk; 566 uint16_t *chunkp; 567 struct zap_leaf_entry *le; 568 uint64_t namelen, valuelen; 569 int numchunks; 570 571 valuelen = integer_size * num_integers; 572 namelen = strlen(name) + 1; 573 ASSERT(namelen >= 2); 574 575 zeh->zeh_head_leaf = l; 576 577 if (namelen > MAXNAMELEN) 578 return (ENAMETOOLONG); 579 /* find the first leaf in the chain that has sufficient free space */ 580 numchunks = 1 + NCHUNKS(namelen) + NCHUNKS(valuelen); 581 if (numchunks > ZAP_LEAF_NUMCHUNKS) 582 return (E2BIG); 583 584 if (cd == ZAP_MAXCD) { 585 for (cd = 0; cd < ZAP_MAXCD; cd++) { 586 zap_leaf_t *ll; 587 for (ll = l; ll; ll = ll->l_next) { 588 for (chunk = *LEAF_HASH_ENTPTR(ll, h); 589 chunk != CHAIN_END; chunk = le->le_next) { 590 le = &ll->l_phys->l_chunk 591 [chunk].l_entry; 592 if (le->le_hash == h && 593 le->le_cd == cd) { 594 break; 595 } 596 } 597 /* 598 * if this cd is in use, no need to 599 * check more chained leafs 600 */ 601 if (chunk != CHAIN_END) 602 break; 603 } 604 /* If this cd is not in use, we are good. */ 605 if (chunk == CHAIN_END) 606 break; 607 } 608 /* If we tried all the cd's, we lose. */ 609 if (cd == ZAP_MAXCD) 610 return (ENOSPC); 611 } 612 613 for (; l; l = l->l_next) 614 if (l->lh_nfree >= numchunks) 615 break; 616 if (l == NULL) 617 return (EAGAIN); 618 619 zeh->zeh_found_leaf = l; 620 621 /* make the entry */ 622 chunk = zap_leaf_chunk_alloc(l); 623 le = &l->l_phys->l_chunk[chunk].l_entry; 624 le->le_type = ZAP_LEAF_ENTRY; 625 le->le_name_chunk = zap_leaf_array_create(zeh, name, 1, namelen); 626 le->le_name_length = namelen; 627 le->le_value_chunk = 628 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 629 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 630 (MAX_ARRAY_BYTES + 1) : (num_integers); 631 le->le_int_size = integer_size; 632 le->le_hash = h; 633 le->le_cd = cd; 634 635 /* link it into the hash chain */ 636 chunkp = LEAF_HASH_ENTPTR(l, h); 637 le->le_next = *chunkp; 638 *chunkp = chunk; 639 640 l->lh_nentries++; 641 642 zeh->zeh_num_integers = num_integers; 643 zeh->zeh_integer_size = le->le_int_size; 644 zeh->zeh_cd = le->le_cd; 645 zeh->zeh_hash = le->le_hash; 646 zeh->zeh_chunkp = chunkp; 647 648 return (0); 649 } 650 651 /* 652 * Routines for transferring entries between leafs. 653 */ 654 655 static void 656 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 657 { 658 struct zap_leaf_entry *le = &l->l_phys->l_chunk[entry].l_entry; 659 uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash); 660 le->le_next = *ptr; 661 *ptr = entry; 662 } 663 664 static void 665 zap_leaf_rehash_entries(zap_leaf_t *l) 666 { 667 int i; 668 669 if (l->lh_nentries == 0) 670 return; 671 672 /* break existing hash chains */ 673 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 674 675 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 676 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 677 if (le->le_type != ZAP_LEAF_ENTRY) 678 continue; 679 zap_leaf_rehash_entry(l, i); 680 } 681 } 682 683 static uint16_t 684 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 685 { 686 uint16_t new_chunk; 687 uint16_t *nchunkp = &new_chunk; 688 689 while (chunk != CHAIN_END) { 690 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 691 struct zap_leaf_array *nla = 692 &nl->l_phys->l_chunk[nchunk].l_array; 693 struct zap_leaf_array *la = 694 &l->l_phys->l_chunk[chunk].l_array; 695 int nextchunk = la->la_next; 696 697 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 698 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS); 699 700 *nla = *la; 701 702 zap_leaf_chunk_free(l, chunk); 703 chunk = nextchunk; 704 *nchunkp = nchunk; 705 nchunkp = &nla->la_next; 706 } 707 *nchunkp = CHAIN_END; 708 return (new_chunk); 709 } 710 711 static void 712 zap_leaf_transfer_entry(zap_t *zap, zap_leaf_t *l, int entry, zap_leaf_t *nhl, 713 dmu_tx_t *tx) 714 { 715 zap_leaf_t *nl; 716 struct zap_leaf_entry *le, *nle; 717 uint16_t chunk, nchunks; 718 719 le = &l->l_phys->l_chunk[entry].l_entry; 720 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 721 722 /* find a leaf in the destination leaf chain with enough free space */ 723 nchunks = 1 + NCHUNKS(le->le_name_length) + 724 NCHUNKS(le->le_value_length * le->le_int_size); 725 for (nl = nhl; nl; nl = nl->l_next) 726 if (nl->lh_nfree >= nchunks) 727 break; 728 if (nl == NULL) { 729 nl = zap_leaf_chainmore(nhl, zap_create_leaf(zap, tx)); 730 dprintf("transfer_entry: chaining leaf %x/%d\n", 731 nl->lh_prefix, nl->lh_prefix_len); 732 } 733 734 chunk = zap_leaf_chunk_alloc(nl); 735 nle = &nl->l_phys->l_chunk[chunk].l_entry; 736 *nle = *le; 737 738 zap_leaf_rehash_entry(nl, chunk); 739 740 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 741 nle->le_value_chunk = 742 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 743 744 zap_leaf_chunk_free(l, entry); 745 746 l->lh_nentries--; 747 nl->lh_nentries++; 748 } 749 750 /* 751 * Transfer entries whose hash bit 'bit' is 1 to nl1, and 0 to nl0. 752 * Ignore leaf chaining in source (l), but chain in destinations. 753 * We'll re-chain all the entries in l as we go along. 754 */ 755 static void 756 zap_leaf_transfer_entries(zap_t *zap, zap_leaf_t *l, 757 zap_leaf_t *nl0, zap_leaf_t *nl1, int bit, dmu_tx_t *tx) 758 { 759 int i; 760 761 ASSERT(bit < 64 && bit >= 0); 762 /* break existing hash chains */ 763 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 764 765 if (nl0 != l) 766 zap_leaf_rehash_entries(nl0); 767 if (nl1 != nl0) 768 zap_leaf_rehash_entries(nl1); 769 770 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 771 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 772 if (le->le_type != ZAP_LEAF_ENTRY) 773 continue; 774 775 /* 776 * We could find entries via hashtable instead. That 777 * would be O(hashents+numents) rather than 778 * O(numblks+numents), but this accesses memory more 779 * sequentially, and when we're called, the block is 780 * usually pretty full. 781 */ 782 783 if (le->le_hash & (1ULL << bit)) { 784 zap_leaf_transfer_entry(zap, l, i, nl1, tx); 785 } else { 786 if (nl0 == l) 787 zap_leaf_rehash_entry(l, i); 788 else 789 zap_leaf_transfer_entry(zap, l, i, nl0, tx); 790 } 791 } 792 793 } 794 795 /* 796 * nl will contain the entries whose hash prefix ends in 1 797 * handles leaf chaining 798 */ 799 zap_leaf_t * 800 zap_leaf_split(zap_t *zap, zap_leaf_t *hl, dmu_tx_t *tx) 801 { 802 zap_leaf_t *l = hl; 803 int bit = 64 - 1 - hl->lh_prefix_len; 804 zap_leaf_t *nl = zap_create_leaf(zap, tx); 805 806 /* set new prefix and prefix_len */ 807 hl->lh_prefix <<= 1; 808 hl->lh_prefix_len++; 809 nl->lh_prefix = hl->lh_prefix | 1; 810 nl->lh_prefix_len = hl->lh_prefix_len; 811 812 /* transfer odd entries from first leaf in hl chain to nl */ 813 zap_leaf_transfer_entries(zap, hl, hl, nl, bit, tx); 814 815 /* take rest of chain off hl */ 816 l = hl->l_next; 817 hl->l_next = NULL; 818 hl->lh_next = 0; 819 820 /* transfer even entries from hl chain back to hl, odd entries to nl */ 821 while (l) { 822 zap_leaf_t *next = l->l_next; 823 zap_leaf_transfer_entries(zap, l, hl, nl, bit, tx); 824 zap_destroy_leaf(zap, l, tx); 825 l = next; 826 } 827 828 return (nl); 829 } 830 831 void 832 zap_stats_leaf(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 833 { 834 int n, nchained = 0; 835 836 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 837 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 838 zs->zs_leafs_with_2n_pointers[n]++; 839 840 do { 841 int i; 842 843 n = l->lh_nentries/5; 844 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 845 zs->zs_blocks_with_n5_entries[n]++; 846 847 n = ((1<<ZAP_BLOCK_SHIFT) - 848 l->lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 849 (1<<ZAP_BLOCK_SHIFT); 850 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 851 zs->zs_blocks_n_tenths_full[n]++; 852 853 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) { 854 int nentries = 0; 855 int chunk = l->l_phys->l_hash[i]; 856 857 while (chunk != CHAIN_END) { 858 struct zap_leaf_entry *le = 859 &l->l_phys->l_chunk[chunk].l_entry; 860 861 n = 1 + NCHUNKS(le->le_name_length) + 862 NCHUNKS(le->le_value_length * 863 le->le_int_size); 864 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 865 zs->zs_entries_using_n_chunks[n]++; 866 867 chunk = le->le_next; 868 nentries++; 869 } 870 871 n = nentries; 872 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 873 zs->zs_buckets_with_n_entries[n]++; 874 } 875 876 nchained++; 877 l = l->l_next; 878 } while (l); 879 880 n = nchained-1; 881 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 882 zs->zs_leafs_with_n_chained[n]++; 883 } 884