1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The 512-byte leaf is broken into 32 16-byte chunks. 30 * chunk number n means l_chunk[n], even though the header precedes it. 31 * the names are stored null-terminated. 32 */ 33 34 #include <sys/zfs_context.h> 35 #include <sys/zap.h> 36 #include <sys/zap_impl.h> 37 #include <sys/zap_leaf.h> 38 #include <sys/spa.h> 39 #include <sys/dmu.h> 40 41 #define CHAIN_END 0xffff /* end of the chunk chain */ 42 43 /* half the (current) minimum block size */ 44 #define MAX_ARRAY_BYTES (8<<10) 45 46 #define NCHUNKS(bytes) (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) 47 48 /* 49 * XXX This will >> by a negative number when 50 * lh_prefix_len > 64-ZAP_LEAF_HASH_SHIFT. 51 */ 52 #define LEAF_HASH(l, h) \ 53 ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 54 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->lh_prefix_len))) 55 56 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 57 58 /* #define MEMCHECK */ 59 60 61 static void 62 zap_memset(void *a, int c, size_t n) 63 { 64 char *cp = a; 65 char *cpend = cp + n; 66 67 while (cp < cpend) 68 *cp++ = c; 69 } 70 71 static void 72 stv(int len, void *addr, uint64_t value) 73 { 74 switch (len) { 75 case 1: 76 *(uint8_t *)addr = value; 77 return; 78 case 2: 79 *(uint16_t *)addr = value; 80 return; 81 case 4: 82 *(uint32_t *)addr = value; 83 return; 84 case 8: 85 *(uint64_t *)addr = value; 86 return; 87 } 88 ASSERT(!"bad int len"); 89 } 90 91 static uint64_t 92 ldv(int len, const void *addr) 93 { 94 switch (len) { 95 case 1: 96 return (*(uint8_t *)addr); 97 case 2: 98 return (*(uint16_t *)addr); 99 case 4: 100 return (*(uint32_t *)addr); 101 case 8: 102 return (*(uint64_t *)addr); 103 } 104 ASSERT(!"bad int len"); 105 return (0xFEEDFACEDEADBEEF); 106 } 107 108 void 109 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) 110 { 111 int i; 112 zap_leaf_t l; 113 l.l_bs = highbit(size)-1; 114 l.l_phys = buf; 115 116 buf->l_hdr.lhr_block_type = BSWAP_64(buf->l_hdr.lhr_block_type); 117 buf->l_hdr.lhr_next = BSWAP_64(buf->l_hdr.lhr_next); 118 buf->l_hdr.lhr_prefix = BSWAP_64(buf->l_hdr.lhr_prefix); 119 buf->l_hdr.lhr_magic = BSWAP_32(buf->l_hdr.lhr_magic); 120 buf->l_hdr.lhr_nfree = BSWAP_16(buf->l_hdr.lhr_nfree); 121 buf->l_hdr.lhr_nentries = BSWAP_16(buf->l_hdr.lhr_nentries); 122 buf->l_hdr.lhr_prefix_len = BSWAP_16(buf->l_hdr.lhr_prefix_len); 123 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 124 125 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 126 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 127 128 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 129 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 130 struct zap_leaf_entry *le; 131 132 switch (lc->l_free.lf_type) { 133 case ZAP_CHUNK_ENTRY: 134 le = &lc->l_entry; 135 136 le->le_type = BSWAP_8(le->le_type); 137 le->le_int_size = BSWAP_8(le->le_int_size); 138 le->le_next = BSWAP_16(le->le_next); 139 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 140 le->le_name_length = BSWAP_16(le->le_name_length); 141 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 142 le->le_value_length = BSWAP_16(le->le_value_length); 143 le->le_cd = BSWAP_32(le->le_cd); 144 le->le_hash = BSWAP_64(le->le_hash); 145 break; 146 case ZAP_CHUNK_FREE: 147 lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 148 lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 149 break; 150 case ZAP_CHUNK_ARRAY: 151 lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 152 lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 153 /* la_array doesn't need swapping */ 154 break; 155 default: 156 ASSERT(!"bad leaf type"); 157 } 158 } 159 } 160 161 void 162 zap_leaf_init(zap_leaf_t *l) 163 { 164 int i; 165 166 l->l_bs = highbit(l->l_dbuf->db_size)-1; 167 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 168 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 169 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 170 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 171 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 172 } 173 ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 174 l->lh_block_type = ZBT_LEAF; 175 l->lh_magic = ZAP_LEAF_MAGIC; 176 l->lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 177 } 178 179 zap_leaf_t * 180 zap_leaf_chainmore(zap_leaf_t *l, zap_leaf_t *nl) 181 { 182 nl->lh_prefix = l->lh_prefix; 183 nl->lh_prefix_len = l->lh_prefix_len; 184 nl->l_next = l->l_next; 185 l->l_next = nl; 186 nl->lh_next = l->lh_next; 187 l->lh_next = nl->l_blkid; 188 return (nl); 189 } 190 191 /* 192 * Routines which manipulate leaf chunks (l_chunk[]). 193 */ 194 195 static uint16_t 196 zap_leaf_chunk_alloc(zap_leaf_t *l) 197 { 198 int chunk; 199 200 ASSERT(l->lh_nfree > 0); 201 202 chunk = l->l_phys->l_hdr.lh_freelist; 203 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 204 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 205 206 l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 207 208 #ifdef MEMCHECK 209 zap_memset(&ZAP_LEAF_CHUNK(l, chunk), 0xa1, sizeof (zap_leaf_chunk_t)); 210 #endif 211 212 l->lh_nfree--; 213 214 return (chunk); 215 } 216 217 static void 218 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 219 { 220 struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 221 ASSERT3U(l->lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 222 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 223 ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 224 225 #ifdef MEMCHECK 226 zap_memset(zlf, 0xf4, sizeof (zap_leaf_chunk_t)); 227 #endif 228 229 zlf->lf_type = ZAP_CHUNK_FREE; 230 zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 231 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 232 l->l_phys->l_hdr.lh_freelist = chunk; 233 234 l->lh_nfree++; 235 } 236 237 238 /* 239 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 240 */ 241 242 static uint16_t 243 zap_leaf_array_create(const zap_entry_handle_t *zeh, const char *buf, 244 int integer_size, int num_integers) 245 { 246 uint16_t chunk_head; 247 uint16_t *chunkp = &chunk_head; 248 int byten = 0; 249 uint64_t value; 250 int shift = (integer_size-1)*8; 251 int len = num_integers; 252 zap_leaf_t *l = zeh->zeh_found_leaf; 253 254 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 255 256 while (len > 0) { 257 uint16_t chunk = zap_leaf_chunk_alloc(l); 258 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 259 int i; 260 261 la->la_type = ZAP_CHUNK_ARRAY; 262 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 263 if (byten == 0) 264 value = ldv(integer_size, buf); 265 la->la_array[i] = (value & (0xff << shift)) >> shift; 266 value <<= 8; 267 if (++byten == integer_size) { 268 byten = 0; 269 buf += integer_size; 270 if (--len == 0) 271 break; 272 } 273 } 274 275 *chunkp = chunk; 276 chunkp = &la->la_next; 277 } 278 *chunkp = CHAIN_END; 279 280 return (chunk_head); 281 } 282 283 static void 284 zap_leaf_array_free(zap_entry_handle_t *zeh, uint16_t *chunkp) 285 { 286 uint16_t chunk = *chunkp; 287 zap_leaf_t *l = zeh->zeh_found_leaf; 288 289 *chunkp = CHAIN_END; 290 291 while (chunk != CHAIN_END) { 292 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 293 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 294 ZAP_CHUNK_ARRAY); 295 zap_leaf_chunk_free(l, chunk); 296 chunk = nextchunk; 297 } 298 } 299 300 /* array_len and buf_len are in integers, not bytes */ 301 static void 302 zap_leaf_array_read(const zap_entry_handle_t *zeh, uint16_t chunk, 303 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 304 char *buf) 305 { 306 int len = MIN(array_len, buf_len); 307 int byten = 0; 308 uint64_t value = 0; 309 zap_leaf_t *l = zeh->zeh_found_leaf; 310 311 ASSERT3U(array_int_len, <=, buf_int_len); 312 313 /* Fast path for one 8-byte integer */ 314 if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 315 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 316 uint8_t *ip = la->la_array; 317 uint64_t *buf64 = (uint64_t *)buf; 318 319 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 320 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 321 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 322 (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 323 return; 324 } 325 326 /* Fast path for an array of 1-byte integers (eg. the entry name) */ 327 if (array_int_len == 1 && buf_int_len == 1 && 328 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 329 while (chunk != CHAIN_END) { 330 struct zap_leaf_array *la = 331 &ZAP_LEAF_CHUNK(l, chunk).l_array; 332 bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES); 333 buf += ZAP_LEAF_ARRAY_BYTES; 334 chunk = la->la_next; 335 } 336 return; 337 } 338 339 while (len > 0) { 340 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 341 int i; 342 343 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 344 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 345 value = (value << 8) | la->la_array[i]; 346 byten++; 347 if (byten == array_int_len) { 348 stv(buf_int_len, buf, value); 349 byten = 0; 350 len--; 351 if (len == 0) 352 return; 353 buf += buf_int_len; 354 } 355 } 356 chunk = la->la_next; 357 } 358 } 359 360 /* 361 * Only to be used on 8-bit arrays. 362 * array_len is actual len in bytes (not encoded le_value_length). 363 * buf is null-terminated. 364 */ 365 static int 366 zap_leaf_array_equal(const zap_entry_handle_t *zeh, int chunk, 367 int array_len, const char *buf) 368 { 369 int bseen = 0; 370 zap_leaf_t *l = zeh->zeh_found_leaf; 371 372 while (bseen < array_len) { 373 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 374 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 375 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 376 if (bcmp(la->la_array, buf + bseen, toread)) 377 break; 378 chunk = la->la_next; 379 bseen += toread; 380 } 381 return (bseen == array_len); 382 } 383 384 /* 385 * Routines which manipulate leaf entries. 386 */ 387 388 int 389 zap_leaf_lookup(zap_leaf_t *l, 390 const char *name, uint64_t h, zap_entry_handle_t *zeh) 391 { 392 uint16_t *chunkp; 393 struct zap_leaf_entry *le; 394 395 zeh->zeh_head_leaf = l; 396 397 again: 398 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 399 400 for (chunkp = LEAF_HASH_ENTPTR(l, h); 401 *chunkp != CHAIN_END; chunkp = &le->le_next) { 402 uint16_t chunk = *chunkp; 403 le = ZAP_LEAF_ENTRY(l, chunk); 404 405 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 406 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 407 408 if (le->le_hash != h) 409 continue; 410 411 zeh->zeh_found_leaf = l; 412 if (zap_leaf_array_equal(zeh, le->le_name_chunk, 413 le->le_name_length, name)) { 414 zeh->zeh_num_integers = le->le_value_length; 415 zeh->zeh_integer_size = le->le_int_size; 416 zeh->zeh_cd = le->le_cd; 417 zeh->zeh_hash = le->le_hash; 418 zeh->zeh_chunkp = chunkp; 419 zeh->zeh_found_leaf = l; 420 return (0); 421 } 422 } 423 424 if (l->l_next) { 425 l = l->l_next; 426 goto again; 427 } 428 429 return (ENOENT); 430 } 431 432 /* Return (h1,cd1 >= h2,cd2) */ 433 #define HCD_GTEQ(h1, cd1, h2, cd2) \ 434 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 435 436 int 437 zap_leaf_lookup_closest(zap_leaf_t *l, 438 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 439 { 440 uint16_t chunk; 441 uint64_t besth = -1ULL; 442 uint32_t bestcd = ZAP_MAXCD; 443 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 444 uint16_t lh; 445 struct zap_leaf_entry *le; 446 447 zeh->zeh_head_leaf = l; 448 449 again: 450 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 451 452 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 453 for (chunk = l->l_phys->l_hash[lh]; 454 chunk != CHAIN_END; chunk = le->le_next) { 455 le = ZAP_LEAF_ENTRY(l, chunk); 456 457 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 458 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 459 460 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 461 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 462 ASSERT3U(bestlh, >=, lh); 463 bestlh = lh; 464 besth = le->le_hash; 465 bestcd = le->le_cd; 466 467 zeh->zeh_num_integers = le->le_value_length; 468 zeh->zeh_integer_size = le->le_int_size; 469 zeh->zeh_cd = le->le_cd; 470 zeh->zeh_hash = le->le_hash; 471 zeh->zeh_fakechunk = chunk; 472 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 473 zeh->zeh_found_leaf = l; 474 } 475 } 476 } 477 478 if (l->l_next) { 479 l = l->l_next; 480 goto again; 481 } 482 483 return (bestcd == ZAP_MAXCD ? ENOENT : 0); 484 } 485 486 int 487 zap_entry_read(const zap_entry_handle_t *zeh, 488 uint8_t integer_size, uint64_t num_integers, void *buf) 489 { 490 struct zap_leaf_entry *le = 491 ZAP_LEAF_ENTRY(zeh->zeh_found_leaf, *zeh->zeh_chunkp); 492 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 493 494 if (le->le_int_size > integer_size) 495 return (EINVAL); 496 497 zap_leaf_array_read(zeh, le->le_value_chunk, le->le_int_size, 498 le->le_value_length, integer_size, num_integers, buf); 499 500 if (zeh->zeh_num_integers > num_integers) 501 return (EOVERFLOW); 502 return (0); 503 504 } 505 506 int 507 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) 508 { 509 struct zap_leaf_entry *le = 510 ZAP_LEAF_ENTRY(zeh->zeh_found_leaf, *zeh->zeh_chunkp); 511 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 512 513 zap_leaf_array_read(zeh, le->le_name_chunk, 1, 514 le->le_name_length, 1, buflen, buf); 515 if (le->le_name_length > buflen) 516 return (EOVERFLOW); 517 return (0); 518 } 519 520 int 521 zap_entry_update(zap_entry_handle_t *zeh, 522 uint8_t integer_size, uint64_t num_integers, const void *buf) 523 { 524 int delta_chunks; 525 struct zap_leaf_entry *le = 526 ZAP_LEAF_ENTRY(zeh->zeh_found_leaf, *zeh->zeh_chunkp); 527 528 delta_chunks = NCHUNKS(num_integers * integer_size) - 529 NCHUNKS(le->le_value_length * le->le_int_size); 530 531 if (zeh->zeh_found_leaf->lh_nfree < delta_chunks) 532 return (EAGAIN); 533 534 /* 535 * We should search other chained leaves (via 536 * zap_entry_remove,create?) otherwise returning EAGAIN will 537 * just send us into an infinite loop if we have to chain 538 * another leaf block, rather than being able to split this 539 * block. 540 */ 541 542 zap_leaf_array_free(zeh, &le->le_value_chunk); 543 le->le_value_chunk = 544 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 545 le->le_value_length = num_integers; 546 le->le_int_size = integer_size; 547 return (0); 548 } 549 550 void 551 zap_entry_remove(zap_entry_handle_t *zeh) 552 { 553 uint16_t entry_chunk; 554 struct zap_leaf_entry *le; 555 zap_leaf_t *l = zeh->zeh_found_leaf; 556 557 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 558 559 entry_chunk = *zeh->zeh_chunkp; 560 le = ZAP_LEAF_ENTRY(l, entry_chunk); 561 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 562 563 zap_leaf_array_free(zeh, &le->le_name_chunk); 564 zap_leaf_array_free(zeh, &le->le_value_chunk); 565 566 *zeh->zeh_chunkp = le->le_next; 567 zap_leaf_chunk_free(l, entry_chunk); 568 569 l->lh_nentries--; 570 } 571 572 int 573 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, 574 uint8_t integer_size, uint64_t num_integers, const void *buf, 575 zap_entry_handle_t *zeh) 576 { 577 uint16_t chunk; 578 uint16_t *chunkp; 579 struct zap_leaf_entry *le; 580 uint64_t namelen, valuelen; 581 int numchunks; 582 583 valuelen = integer_size * num_integers; 584 namelen = strlen(name) + 1; 585 ASSERT(namelen >= 2); 586 587 zeh->zeh_head_leaf = l; 588 589 if (namelen > MAXNAMELEN) 590 return (ENAMETOOLONG); 591 /* find the first leaf in the chain that has sufficient free space */ 592 numchunks = 1 + NCHUNKS(namelen) + NCHUNKS(valuelen); 593 if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 594 return (E2BIG); 595 596 if (cd == ZAP_MAXCD) { 597 for (cd = 0; cd < ZAP_MAXCD; cd++) { 598 zap_leaf_t *ll; 599 for (ll = l; ll; ll = ll->l_next) { 600 for (chunk = *LEAF_HASH_ENTPTR(ll, h); 601 chunk != CHAIN_END; chunk = le->le_next) { 602 le = ZAP_LEAF_ENTRY(ll, chunk); 603 if (le->le_hash == h && 604 le->le_cd == cd) { 605 break; 606 } 607 } 608 /* 609 * if this cd is in use, no need to 610 * check more chained leafs 611 */ 612 if (chunk != CHAIN_END) 613 break; 614 } 615 /* If this cd is not in use, we are good. */ 616 if (chunk == CHAIN_END) 617 break; 618 } 619 /* If we tried all the cd's, we lose. */ 620 if (cd == ZAP_MAXCD) 621 return (ENOSPC); 622 } 623 624 for (; l; l = l->l_next) 625 if (l->lh_nfree >= numchunks) 626 break; 627 if (l == NULL) 628 return (EAGAIN); 629 630 zeh->zeh_found_leaf = l; 631 632 /* make the entry */ 633 chunk = zap_leaf_chunk_alloc(l); 634 le = ZAP_LEAF_ENTRY(l, chunk); 635 le->le_type = ZAP_CHUNK_ENTRY; 636 le->le_name_chunk = zap_leaf_array_create(zeh, name, 1, namelen); 637 le->le_name_length = namelen; 638 le->le_value_chunk = 639 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 640 le->le_value_length = num_integers; 641 le->le_int_size = integer_size; 642 le->le_hash = h; 643 le->le_cd = cd; 644 645 /* link it into the hash chain */ 646 chunkp = LEAF_HASH_ENTPTR(l, h); 647 le->le_next = *chunkp; 648 *chunkp = chunk; 649 650 l->lh_nentries++; 651 652 zeh->zeh_num_integers = num_integers; 653 zeh->zeh_integer_size = le->le_int_size; 654 zeh->zeh_cd = le->le_cd; 655 zeh->zeh_hash = le->le_hash; 656 zeh->zeh_chunkp = chunkp; 657 658 return (0); 659 } 660 661 /* 662 * Routines for transferring entries between leafs. 663 */ 664 665 static void 666 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 667 { 668 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 669 uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash); 670 le->le_next = *ptr; 671 *ptr = entry; 672 } 673 674 static void 675 zap_leaf_rehash_entries(zap_leaf_t *l) 676 { 677 int i; 678 679 if (l->lh_nentries == 0) 680 return; 681 682 /* break existing hash chains */ 683 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 684 685 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 686 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 687 if (le->le_type != ZAP_CHUNK_ENTRY) 688 continue; 689 zap_leaf_rehash_entry(l, i); 690 } 691 } 692 693 static uint16_t 694 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 695 { 696 uint16_t new_chunk; 697 uint16_t *nchunkp = &new_chunk; 698 699 while (chunk != CHAIN_END) { 700 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 701 struct zap_leaf_array *nla = 702 &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 703 struct zap_leaf_array *la = 704 &ZAP_LEAF_CHUNK(l, chunk).l_array; 705 int nextchunk = la->la_next; 706 707 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 708 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 709 710 *nla = *la; 711 712 zap_leaf_chunk_free(l, chunk); 713 chunk = nextchunk; 714 *nchunkp = nchunk; 715 nchunkp = &nla->la_next; 716 } 717 *nchunkp = CHAIN_END; 718 return (new_chunk); 719 } 720 721 static void 722 zap_leaf_transfer_entry(zap_t *zap, zap_leaf_t *l, int entry, zap_leaf_t *nhl, 723 dmu_tx_t *tx) 724 { 725 zap_leaf_t *nl; 726 struct zap_leaf_entry *le, *nle; 727 uint16_t chunk, nchunks; 728 729 le = ZAP_LEAF_ENTRY(l, entry); 730 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 731 732 /* find a leaf in the destination leaf chain with enough free space */ 733 nchunks = 1 + NCHUNKS(le->le_name_length) + 734 NCHUNKS(le->le_value_length * le->le_int_size); 735 for (nl = nhl; nl; nl = nl->l_next) 736 if (nl->lh_nfree >= nchunks) 737 break; 738 if (nl == NULL) { 739 nl = zap_leaf_chainmore(nhl, zap_create_leaf(zap, tx)); 740 dprintf("transfer_entry: chaining leaf %x/%d\n", 741 nl->lh_prefix, nl->lh_prefix_len); 742 } 743 744 chunk = zap_leaf_chunk_alloc(nl); 745 nle = ZAP_LEAF_ENTRY(nl, chunk); 746 *nle = *le; 747 748 zap_leaf_rehash_entry(nl, chunk); 749 750 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 751 nle->le_value_chunk = 752 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 753 754 zap_leaf_chunk_free(l, entry); 755 756 l->lh_nentries--; 757 nl->lh_nentries++; 758 } 759 760 /* 761 * Transfer entries whose hash bit 'bit' is 1 to nl1, and 0 to nl0. 762 * Ignore leaf chaining in source (l), but chain in destinations. 763 * We'll re-chain all the entries in l as we go along. 764 */ 765 static void 766 zap_leaf_transfer_entries(zap_t *zap, zap_leaf_t *l, 767 zap_leaf_t *nl0, zap_leaf_t *nl1, int bit, dmu_tx_t *tx) 768 { 769 int i; 770 771 ASSERT(bit < 64 && bit >= 0); 772 /* break existing hash chains */ 773 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 774 775 if (nl0 != l) 776 zap_leaf_rehash_entries(nl0); 777 if (nl1 != nl0) 778 zap_leaf_rehash_entries(nl1); 779 780 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 781 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 782 if (le->le_type != ZAP_CHUNK_ENTRY) 783 continue; 784 785 /* 786 * We could find entries via hashtable instead. That 787 * would be O(hashents+numents) rather than 788 * O(numblks+numents), but this accesses memory more 789 * sequentially, and when we're called, the block is 790 * usually pretty full. 791 */ 792 793 if (le->le_hash & (1ULL << bit)) { 794 zap_leaf_transfer_entry(zap, l, i, nl1, tx); 795 } else { 796 if (nl0 == l) 797 zap_leaf_rehash_entry(l, i); 798 else 799 zap_leaf_transfer_entry(zap, l, i, nl0, tx); 800 } 801 } 802 803 } 804 805 /* 806 * nl will contain the entries whose hash prefix ends in 1 807 * handles leaf chaining 808 */ 809 zap_leaf_t * 810 zap_leaf_split(zap_t *zap, zap_leaf_t *hl, dmu_tx_t *tx) 811 { 812 zap_leaf_t *l = hl; 813 int bit = 64 - 1 - hl->lh_prefix_len; 814 zap_leaf_t *nl = zap_create_leaf(zap, tx); 815 816 /* set new prefix and prefix_len */ 817 hl->lh_prefix <<= 1; 818 hl->lh_prefix_len++; 819 nl->lh_prefix = hl->lh_prefix | 1; 820 nl->lh_prefix_len = hl->lh_prefix_len; 821 822 /* transfer odd entries from first leaf in hl chain to nl */ 823 zap_leaf_transfer_entries(zap, hl, hl, nl, bit, tx); 824 825 /* take rest of chain off hl */ 826 l = hl->l_next; 827 hl->l_next = NULL; 828 hl->lh_next = 0; 829 830 /* transfer even entries from hl chain back to hl, odd entries to nl */ 831 while (l) { 832 zap_leaf_t *next = l->l_next; 833 zap_leaf_transfer_entries(zap, l, hl, nl, bit, tx); 834 zap_destroy_leaf(zap, l, tx); 835 l = next; 836 } 837 838 return (nl); 839 } 840 841 void 842 zap_stats_leaf(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 843 { 844 int n, nchained = 0; 845 846 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 847 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 848 zs->zs_leafs_with_2n_pointers[n]++; 849 850 do { 851 int i; 852 853 n = l->lh_nentries/5; 854 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 855 zs->zs_blocks_with_n5_entries[n]++; 856 857 n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 858 l->lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 859 (1<<FZAP_BLOCK_SHIFT(zap)); 860 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 861 zs->zs_blocks_n_tenths_full[n]++; 862 863 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 864 int nentries = 0; 865 int chunk = l->l_phys->l_hash[i]; 866 867 while (chunk != CHAIN_END) { 868 struct zap_leaf_entry *le = 869 ZAP_LEAF_ENTRY(l, chunk); 870 871 n = 1 + NCHUNKS(le->le_name_length) + 872 NCHUNKS(le->le_value_length * 873 le->le_int_size); 874 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 875 zs->zs_entries_using_n_chunks[n]++; 876 877 chunk = le->le_next; 878 nentries++; 879 } 880 881 n = nentries; 882 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 883 zs->zs_buckets_with_n_entries[n]++; 884 } 885 886 nchained++; 887 l = l->l_next; 888 } while (l); 889 890 n = nchained-1; 891 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 892 zs->zs_leafs_with_n_chained[n]++; 893 } 894