1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * The 512-byte leaf is broken into 32 16-byte chunks. 31 * chunk number n means l_chunk[n], even though the header precedes it. 32 * the names are stored null-terminated. 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/zap.h> 37 #include <sys/zap_impl.h> 38 #include <sys/zap_leaf.h> 39 #include <sys/spa.h> 40 #include <sys/dmu.h> 41 42 #define CHAIN_END 0xffff /* end of the chunk chain */ 43 44 /* somewhat arbitrary, could go up to around 100k ... */ 45 #define MAX_ARRAY_BYTES (8<<10) 46 47 #define NCHUNKS(bytes) (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) 48 49 /* 50 * XXX This will >> by a negative number when 51 * lh_prefix_len > 64-ZAP_LEAF_HASH_SHIFT. 52 */ 53 #define LEAF_HASH(l, h) \ 54 ((ZAP_LEAF_HASH_NUMENTRIES-1) & \ 55 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT-(l)->lh_prefix_len))) 56 57 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 58 59 /* #define MEMCHECK */ 60 61 62 static void 63 zap_memset(void *a, int c, size_t n) 64 { 65 char *cp = a; 66 char *cpend = cp + n; 67 68 while (cp < cpend) 69 *cp++ = c; 70 } 71 72 static void 73 stv(int len, void *addr, uint64_t value) 74 { 75 switch (len) { 76 case 1: 77 *(uint8_t *)addr = value; 78 return; 79 case 2: 80 *(uint16_t *)addr = value; 81 return; 82 case 4: 83 *(uint32_t *)addr = value; 84 return; 85 case 8: 86 *(uint64_t *)addr = value; 87 return; 88 } 89 ASSERT(!"bad int len"); 90 } 91 92 static uint64_t 93 ldv(int len, const void *addr) 94 { 95 switch (len) { 96 case 1: 97 return (*(uint8_t *)addr); 98 case 2: 99 return (*(uint16_t *)addr); 100 case 4: 101 return (*(uint32_t *)addr); 102 case 8: 103 return (*(uint64_t *)addr); 104 } 105 ASSERT(!"bad int len"); 106 return (0xFEEDFACEDEADBEEF); 107 } 108 109 void 110 zap_leaf_byteswap(zap_leaf_phys_t *buf) 111 { 112 int i; 113 114 buf->l_hdr.lhr_block_type = BSWAP_64(buf->l_hdr.lhr_block_type); 115 buf->l_hdr.lhr_next = BSWAP_64(buf->l_hdr.lhr_next); 116 buf->l_hdr.lhr_prefix = BSWAP_64(buf->l_hdr.lhr_prefix); 117 buf->l_hdr.lhr_magic = BSWAP_32(buf->l_hdr.lhr_magic); 118 buf->l_hdr.lhr_nfree = BSWAP_16(buf->l_hdr.lhr_nfree); 119 buf->l_hdr.lhr_nentries = BSWAP_16(buf->l_hdr.lhr_nentries); 120 buf->l_hdr.lhr_prefix_len = BSWAP_16(buf->l_hdr.lhr_prefix_len); 121 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 122 123 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) 124 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 125 126 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 127 struct zap_leaf_entry *le; 128 129 switch (buf->l_chunk[i].l_free.lf_type) { 130 case ZAP_LEAF_ENTRY: 131 le = &buf->l_chunk[i].l_entry; 132 133 le->le_type = BSWAP_8(le->le_type); 134 le->le_int_size = BSWAP_8(le->le_int_size); 135 le->le_next = BSWAP_16(le->le_next); 136 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 137 le->le_name_length = BSWAP_16(le->le_name_length); 138 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 139 le->le_value_length = BSWAP_16(le->le_value_length); 140 le->le_cd = BSWAP_32(le->le_cd); 141 le->le_hash = BSWAP_64(le->le_hash); 142 break; 143 case ZAP_LEAF_FREE: 144 buf->l_chunk[i].l_free.lf_type = 145 BSWAP_8(buf->l_chunk[i].l_free.lf_type); 146 buf->l_chunk[i].l_free.lf_next = 147 BSWAP_16(buf->l_chunk[i].l_free.lf_next); 148 break; 149 case ZAP_LEAF_ARRAY: 150 /* zap_leaf_array */ 151 buf->l_chunk[i].l_array.la_type = 152 BSWAP_8(buf->l_chunk[i].l_array.la_type); 153 buf->l_chunk[i].l_array.la_next = 154 BSWAP_16(buf->l_chunk[i].l_array.la_next); 155 /* la_array doesn't need swapping */ 156 break; 157 default: 158 ASSERT(!"bad leaf type"); 159 } 160 } 161 } 162 163 void 164 zap_leaf_init(zap_leaf_t *l) 165 { 166 int i; 167 168 ASSERT3U(sizeof (zap_leaf_phys_t), ==, l->l_dbuf->db_size); 169 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 170 zap_memset(&l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 171 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 172 l->l_phys->l_chunk[i].l_free.lf_type = ZAP_LEAF_FREE; 173 l->l_phys->l_chunk[i].l_free.lf_next = i+1; 174 } 175 l->l_phys->l_chunk[ZAP_LEAF_NUMCHUNKS-1].l_free.lf_next = CHAIN_END; 176 l->lh_block_type = ZBT_LEAF; 177 l->lh_magic = ZAP_LEAF_MAGIC; 178 l->lh_nfree = ZAP_LEAF_NUMCHUNKS; 179 } 180 181 zap_leaf_t * 182 zap_leaf_chainmore(zap_leaf_t *l, zap_leaf_t *nl) 183 { 184 nl->lh_prefix = l->lh_prefix; 185 nl->lh_prefix_len = l->lh_prefix_len; 186 nl->l_next = l->l_next; 187 l->l_next = nl; 188 nl->lh_next = l->lh_next; 189 l->lh_next = nl->l_blkid; 190 return (nl); 191 } 192 193 /* 194 * Routines which manipulate leaf chunks (l_chunk[]). 195 */ 196 197 static uint16_t 198 zap_leaf_chunk_alloc(zap_leaf_t *l) 199 { 200 int chunk; 201 202 ASSERT(l->lh_nfree > 0); 203 204 chunk = l->l_phys->l_hdr.lh_freelist; 205 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 206 ASSERT3U(l->l_phys->l_chunk[chunk].l_free.lf_type, ==, ZAP_LEAF_FREE); 207 208 l->l_phys->l_hdr.lh_freelist = l->l_phys->l_chunk[chunk].l_free.lf_next; 209 210 #ifdef MEMCHECK 211 zap_memset(&l->l_phys->l_chunk[chunk], 0xa1, 212 sizeof (l->l_phys->l_chunk[chunk])); 213 #endif 214 215 l->lh_nfree--; 216 217 return (chunk); 218 } 219 220 static void 221 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 222 { 223 struct zap_leaf_free *zlf = &l->l_phys->l_chunk[chunk].l_free; 224 ASSERT3U(l->lh_nfree, <, ZAP_LEAF_NUMCHUNKS); 225 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 226 ASSERT(zlf->lf_type != ZAP_LEAF_FREE); 227 228 #ifdef MEMCHECK 229 zap_memset(&l->l_phys->l_chunk[chunk], 0xf4, 230 sizeof (l->l_phys->l_chunk[chunk])); 231 #endif 232 233 zlf->lf_type = ZAP_LEAF_FREE; 234 zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 235 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 236 l->l_phys->l_hdr.lh_freelist = chunk; 237 238 l->lh_nfree++; 239 } 240 241 242 /* 243 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 244 */ 245 246 static uint16_t 247 zap_leaf_array_create(const zap_entry_handle_t *zeh, const char *buf, 248 int integer_size, int num_integers) 249 { 250 uint16_t chunk_head; 251 uint16_t *chunkp = &chunk_head; 252 int byten = 0; 253 uint64_t value; 254 int shift = (integer_size-1)*8; 255 int len = num_integers; 256 zap_leaf_t *l = zeh->zeh_found_leaf; 257 258 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 259 260 while (len > 0) { 261 uint16_t chunk = zap_leaf_chunk_alloc(l); 262 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 263 int i; 264 265 la->la_type = ZAP_LEAF_ARRAY; 266 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 267 if (byten == 0) 268 value = ldv(integer_size, buf); 269 la->la_array[i] = (value & (0xff << shift)) >> shift; 270 value <<= 8; 271 if (++byten == integer_size) { 272 byten = 0; 273 buf += integer_size; 274 if (--len == 0) 275 break; 276 } 277 } 278 279 *chunkp = chunk; 280 chunkp = &la->la_next; 281 } 282 *chunkp = CHAIN_END; 283 284 return (chunk_head); 285 } 286 287 static void 288 zap_leaf_array_free(zap_entry_handle_t *zeh, uint16_t *chunkp) 289 { 290 uint16_t chunk = *chunkp; 291 zap_leaf_t *l = zeh->zeh_found_leaf; 292 293 *chunkp = CHAIN_END; 294 295 while (chunk != CHAIN_END) { 296 int nextchunk = l->l_phys->l_chunk[chunk].l_array.la_next; 297 ASSERT3U(l->l_phys->l_chunk[chunk].l_array.la_type, ==, 298 ZAP_LEAF_ARRAY); 299 zap_leaf_chunk_free(l, chunk); 300 chunk = nextchunk; 301 } 302 } 303 304 /* array_len and buf_len are in integers, not bytes */ 305 static void 306 zap_leaf_array_read(const zap_entry_handle_t *zeh, uint16_t chunk, 307 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 308 char *buf) 309 { 310 int len = MIN(array_len, buf_len); 311 int byten = 0; 312 uint64_t value = 0; 313 zap_leaf_t *l = zeh->zeh_found_leaf; 314 315 ASSERT3U(array_int_len, <=, buf_int_len); 316 317 /* Fast path for one 8-byte integer */ 318 if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 319 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 320 uint8_t *ip = la->la_array; 321 uint64_t *buf64 = (uint64_t *)buf; 322 323 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 324 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 325 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 326 (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 327 return; 328 } 329 330 /* Fast path for an array of 1-byte integers (eg. the entry name) */ 331 if (array_int_len == 1 && buf_int_len == 1 && 332 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 333 while (chunk != CHAIN_END) { 334 struct zap_leaf_array *la = 335 &l->l_phys->l_chunk[chunk].l_array; 336 bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES); 337 buf += ZAP_LEAF_ARRAY_BYTES; 338 chunk = la->la_next; 339 } 340 return; 341 } 342 343 while (len > 0) { 344 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 345 int i; 346 347 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 348 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 349 value = (value << 8) | la->la_array[i]; 350 byten++; 351 if (byten == array_int_len) { 352 stv(buf_int_len, buf, value); 353 byten = 0; 354 len--; 355 if (len == 0) 356 return; 357 buf += buf_int_len; 358 } 359 } 360 chunk = la->la_next; 361 } 362 } 363 364 /* 365 * Only to be used on 8-bit arrays. 366 * array_len is actual len in bytes (not encoded le_value_length). 367 * buf is null-terminated. 368 */ 369 static int 370 zap_leaf_array_equal(const zap_entry_handle_t *zeh, int chunk, 371 int array_len, const char *buf) 372 { 373 int bseen = 0; 374 zap_leaf_t *l = zeh->zeh_found_leaf; 375 376 while (bseen < array_len) { 377 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 378 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 379 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 380 if (bcmp(la->la_array, buf + bseen, toread)) 381 break; 382 chunk = la->la_next; 383 bseen += toread; 384 } 385 return (bseen == array_len); 386 } 387 388 /* 389 * Routines which manipulate leaf entries. 390 */ 391 392 int 393 zap_leaf_lookup(zap_leaf_t *l, 394 const char *name, uint64_t h, zap_entry_handle_t *zeh) 395 { 396 uint16_t *chunkp; 397 struct zap_leaf_entry *le; 398 399 zeh->zeh_head_leaf = l; 400 401 again: 402 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 403 404 for (chunkp = LEAF_HASH_ENTPTR(l, h); 405 *chunkp != CHAIN_END; chunkp = &le->le_next) { 406 uint16_t chunk = *chunkp; 407 le = &l->l_phys->l_chunk[chunk].l_entry; 408 409 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 410 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 411 412 if (le->le_hash != h) 413 continue; 414 415 zeh->zeh_found_leaf = l; 416 if (zap_leaf_array_equal(zeh, le->le_name_chunk, 417 le->le_name_length, name)) { 418 zeh->zeh_num_integers = le->le_value_length; 419 zeh->zeh_integer_size = le->le_int_size; 420 zeh->zeh_cd = le->le_cd; 421 zeh->zeh_hash = le->le_hash; 422 zeh->zeh_chunkp = chunkp; 423 zeh->zeh_found_leaf = l; 424 return (0); 425 } 426 } 427 428 if (l->l_next) { 429 l = l->l_next; 430 goto again; 431 } 432 433 return (ENOENT); 434 } 435 436 /* Return (h1,cd1 >= h2,cd2) */ 437 #define HCD_GTEQ(h1, cd1, h2, cd2) \ 438 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 439 440 int 441 zap_leaf_lookup_closest(zap_leaf_t *l, 442 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 443 { 444 uint16_t chunk; 445 uint64_t besth = -1ULL; 446 uint32_t bestcd = ZAP_MAXCD; 447 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES-1; 448 uint16_t lh; 449 struct zap_leaf_entry *le; 450 451 zeh->zeh_head_leaf = l; 452 453 again: 454 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 455 456 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 457 for (chunk = l->l_phys->l_hash[lh]; 458 chunk != CHAIN_END; chunk = le->le_next) { 459 le = &l->l_phys->l_chunk[chunk].l_entry; 460 461 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 462 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 463 464 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 465 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 466 ASSERT3U(bestlh, >=, lh); 467 bestlh = lh; 468 besth = le->le_hash; 469 bestcd = le->le_cd; 470 471 zeh->zeh_num_integers = le->le_value_length; 472 zeh->zeh_integer_size = le->le_int_size; 473 zeh->zeh_cd = le->le_cd; 474 zeh->zeh_hash = le->le_hash; 475 zeh->zeh_fakechunk = chunk; 476 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 477 zeh->zeh_found_leaf = l; 478 } 479 } 480 } 481 482 if (l->l_next) { 483 l = l->l_next; 484 goto again; 485 } 486 487 return (bestcd == ZAP_MAXCD ? ENOENT : 0); 488 } 489 490 int 491 zap_entry_read(const zap_entry_handle_t *zeh, 492 uint8_t integer_size, uint64_t num_integers, void *buf) 493 { 494 struct zap_leaf_entry *le; 495 496 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 497 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 498 499 if (le->le_int_size > integer_size) 500 return (EINVAL); 501 502 zap_leaf_array_read(zeh, le->le_value_chunk, le->le_int_size, 503 le->le_value_length, integer_size, num_integers, buf); 504 505 if (zeh->zeh_num_integers > num_integers) 506 return (EOVERFLOW); 507 return (0); 508 509 } 510 511 int 512 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) 513 { 514 struct zap_leaf_entry *le; 515 516 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 517 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 518 519 zap_leaf_array_read(zeh, le->le_name_chunk, 1, 520 le->le_name_length, 1, buflen, buf); 521 if (le->le_name_length > buflen) 522 return (EOVERFLOW); 523 return (0); 524 } 525 526 int 527 zap_entry_update(zap_entry_handle_t *zeh, 528 uint8_t integer_size, uint64_t num_integers, const void *buf) 529 { 530 int delta_chunks; 531 struct zap_leaf_entry *le; 532 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 533 534 delta_chunks = NCHUNKS(num_integers * integer_size) - 535 NCHUNKS(le->le_value_length * le->le_int_size); 536 537 if (zeh->zeh_found_leaf->lh_nfree < delta_chunks) 538 return (EAGAIN); 539 540 /* 541 * We should search other chained leaves (via 542 * zap_entry_remove,create?) otherwise returning EAGAIN will 543 * just send us into an infinite loop if we have to chain 544 * another leaf block, rather than being able to split this 545 * block. 546 */ 547 548 zap_leaf_array_free(zeh, &le->le_value_chunk); 549 le->le_value_chunk = 550 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 551 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 552 (MAX_ARRAY_BYTES + 1) : (num_integers); 553 le->le_int_size = integer_size; 554 return (0); 555 } 556 557 void 558 zap_entry_remove(zap_entry_handle_t *zeh) 559 { 560 uint16_t entry_chunk; 561 struct zap_leaf_entry *le; 562 zap_leaf_t *l = zeh->zeh_found_leaf; 563 564 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 565 566 entry_chunk = *zeh->zeh_chunkp; 567 le = &l->l_phys->l_chunk[entry_chunk].l_entry; 568 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 569 570 zap_leaf_array_free(zeh, &le->le_name_chunk); 571 zap_leaf_array_free(zeh, &le->le_value_chunk); 572 573 *zeh->zeh_chunkp = le->le_next; 574 zap_leaf_chunk_free(l, entry_chunk); 575 576 l->lh_nentries--; 577 } 578 579 int 580 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, 581 uint8_t integer_size, uint64_t num_integers, const void *buf, 582 zap_entry_handle_t *zeh) 583 { 584 uint16_t chunk; 585 uint16_t *chunkp; 586 struct zap_leaf_entry *le; 587 uint64_t namelen, valuelen; 588 int numchunks; 589 590 valuelen = integer_size * num_integers; 591 namelen = strlen(name) + 1; 592 ASSERT(namelen >= 2); 593 594 zeh->zeh_head_leaf = l; 595 596 if (namelen > MAXNAMELEN) 597 return (ENAMETOOLONG); 598 /* find the first leaf in the chain that has sufficient free space */ 599 numchunks = 1 + NCHUNKS(namelen) + NCHUNKS(valuelen); 600 if (numchunks > ZAP_LEAF_NUMCHUNKS) 601 return (E2BIG); 602 603 if (cd == ZAP_MAXCD) { 604 for (cd = 0; cd < ZAP_MAXCD; cd++) { 605 zap_leaf_t *ll; 606 for (ll = l; ll; ll = ll->l_next) { 607 for (chunk = *LEAF_HASH_ENTPTR(ll, h); 608 chunk != CHAIN_END; chunk = le->le_next) { 609 le = &ll->l_phys->l_chunk 610 [chunk].l_entry; 611 if (le->le_hash == h && 612 le->le_cd == cd) { 613 break; 614 } 615 } 616 /* 617 * if this cd is in use, no need to 618 * check more chained leafs 619 */ 620 if (chunk != CHAIN_END) 621 break; 622 } 623 /* If this cd is not in use, we are good. */ 624 if (chunk == CHAIN_END) 625 break; 626 } 627 /* If we tried all the cd's, we lose. */ 628 if (cd == ZAP_MAXCD) 629 return (ENOSPC); 630 } 631 632 for (; l; l = l->l_next) 633 if (l->lh_nfree >= numchunks) 634 break; 635 if (l == NULL) 636 return (EAGAIN); 637 638 zeh->zeh_found_leaf = l; 639 640 /* make the entry */ 641 chunk = zap_leaf_chunk_alloc(l); 642 le = &l->l_phys->l_chunk[chunk].l_entry; 643 le->le_type = ZAP_LEAF_ENTRY; 644 le->le_name_chunk = zap_leaf_array_create(zeh, name, 1, namelen); 645 le->le_name_length = namelen; 646 le->le_value_chunk = 647 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 648 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 649 (MAX_ARRAY_BYTES + 1) : (num_integers); 650 le->le_int_size = integer_size; 651 le->le_hash = h; 652 le->le_cd = cd; 653 654 /* link it into the hash chain */ 655 chunkp = LEAF_HASH_ENTPTR(l, h); 656 le->le_next = *chunkp; 657 *chunkp = chunk; 658 659 l->lh_nentries++; 660 661 zeh->zeh_num_integers = num_integers; 662 zeh->zeh_integer_size = le->le_int_size; 663 zeh->zeh_cd = le->le_cd; 664 zeh->zeh_hash = le->le_hash; 665 zeh->zeh_chunkp = chunkp; 666 667 return (0); 668 } 669 670 /* 671 * Routines for transferring entries between leafs. 672 */ 673 674 static void 675 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 676 { 677 struct zap_leaf_entry *le = &l->l_phys->l_chunk[entry].l_entry; 678 uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash); 679 le->le_next = *ptr; 680 *ptr = entry; 681 } 682 683 static void 684 zap_leaf_rehash_entries(zap_leaf_t *l) 685 { 686 int i; 687 688 if (l->lh_nentries == 0) 689 return; 690 691 /* break existing hash chains */ 692 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 693 694 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 695 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 696 if (le->le_type != ZAP_LEAF_ENTRY) 697 continue; 698 zap_leaf_rehash_entry(l, i); 699 } 700 } 701 702 static uint16_t 703 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 704 { 705 uint16_t new_chunk; 706 uint16_t *nchunkp = &new_chunk; 707 708 while (chunk != CHAIN_END) { 709 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 710 struct zap_leaf_array *nla = 711 &nl->l_phys->l_chunk[nchunk].l_array; 712 struct zap_leaf_array *la = 713 &l->l_phys->l_chunk[chunk].l_array; 714 int nextchunk = la->la_next; 715 716 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 717 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS); 718 719 *nla = *la; 720 721 zap_leaf_chunk_free(l, chunk); 722 chunk = nextchunk; 723 *nchunkp = nchunk; 724 nchunkp = &nla->la_next; 725 } 726 *nchunkp = CHAIN_END; 727 return (new_chunk); 728 } 729 730 static void 731 zap_leaf_transfer_entry(zap_t *zap, zap_leaf_t *l, int entry, zap_leaf_t *nhl, 732 dmu_tx_t *tx) 733 { 734 zap_leaf_t *nl; 735 struct zap_leaf_entry *le, *nle; 736 uint16_t chunk, nchunks; 737 738 le = &l->l_phys->l_chunk[entry].l_entry; 739 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 740 741 /* find a leaf in the destination leaf chain with enough free space */ 742 nchunks = 1 + NCHUNKS(le->le_name_length) + 743 NCHUNKS(le->le_value_length * le->le_int_size); 744 for (nl = nhl; nl; nl = nl->l_next) 745 if (nl->lh_nfree >= nchunks) 746 break; 747 if (nl == NULL) { 748 nl = zap_leaf_chainmore(nhl, zap_create_leaf(zap, tx)); 749 dprintf("transfer_entry: chaining leaf %x/%d\n", 750 nl->lh_prefix, nl->lh_prefix_len); 751 } 752 753 chunk = zap_leaf_chunk_alloc(nl); 754 nle = &nl->l_phys->l_chunk[chunk].l_entry; 755 *nle = *le; 756 757 zap_leaf_rehash_entry(nl, chunk); 758 759 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 760 nle->le_value_chunk = 761 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 762 763 zap_leaf_chunk_free(l, entry); 764 765 l->lh_nentries--; 766 nl->lh_nentries++; 767 } 768 769 /* 770 * Transfer entries whose hash bit 'bit' is 1 to nl1, and 0 to nl0. 771 * Ignore leaf chaining in source (l), but chain in destinations. 772 * We'll re-chain all the entries in l as we go along. 773 */ 774 static void 775 zap_leaf_transfer_entries(zap_t *zap, zap_leaf_t *l, 776 zap_leaf_t *nl0, zap_leaf_t *nl1, int bit, dmu_tx_t *tx) 777 { 778 int i; 779 780 ASSERT(bit < 64 && bit >= 0); 781 /* break existing hash chains */ 782 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 783 784 if (nl0 != l) 785 zap_leaf_rehash_entries(nl0); 786 if (nl1 != nl0) 787 zap_leaf_rehash_entries(nl1); 788 789 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 790 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 791 if (le->le_type != ZAP_LEAF_ENTRY) 792 continue; 793 794 /* 795 * We could find entries via hashtable instead. That 796 * would be O(hashents+numents) rather than 797 * O(numblks+numents), but this accesses memory more 798 * sequentially, and when we're called, the block is 799 * usually pretty full. 800 */ 801 802 if (le->le_hash & (1ULL << bit)) { 803 zap_leaf_transfer_entry(zap, l, i, nl1, tx); 804 } else { 805 if (nl0 == l) 806 zap_leaf_rehash_entry(l, i); 807 else 808 zap_leaf_transfer_entry(zap, l, i, nl0, tx); 809 } 810 } 811 812 } 813 814 /* 815 * nl will contain the entries whose hash prefix ends in 1 816 * handles leaf chaining 817 */ 818 zap_leaf_t * 819 zap_leaf_split(zap_t *zap, zap_leaf_t *hl, dmu_tx_t *tx) 820 { 821 zap_leaf_t *l = hl; 822 int bit = 64 - 1 - hl->lh_prefix_len; 823 zap_leaf_t *nl = zap_create_leaf(zap, tx); 824 825 /* set new prefix and prefix_len */ 826 hl->lh_prefix <<= 1; 827 hl->lh_prefix_len++; 828 nl->lh_prefix = hl->lh_prefix | 1; 829 nl->lh_prefix_len = hl->lh_prefix_len; 830 831 /* transfer odd entries from first leaf in hl chain to nl */ 832 zap_leaf_transfer_entries(zap, hl, hl, nl, bit, tx); 833 834 /* take rest of chain off hl */ 835 l = hl->l_next; 836 hl->l_next = NULL; 837 hl->lh_next = 0; 838 839 /* transfer even entries from hl chain back to hl, odd entries to nl */ 840 while (l) { 841 zap_leaf_t *next = l->l_next; 842 zap_leaf_transfer_entries(zap, l, hl, nl, bit, tx); 843 zap_destroy_leaf(zap, l, tx); 844 l = next; 845 } 846 847 return (nl); 848 } 849 850 void 851 zap_stats_leaf(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 852 { 853 int n, nchained = 0; 854 855 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 856 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 857 zs->zs_leafs_with_2n_pointers[n]++; 858 859 do { 860 int i; 861 862 n = l->lh_nentries/5; 863 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 864 zs->zs_blocks_with_n5_entries[n]++; 865 866 n = ((1<<ZAP_BLOCK_SHIFT) - 867 l->lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 868 (1<<ZAP_BLOCK_SHIFT); 869 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 870 zs->zs_blocks_n_tenths_full[n]++; 871 872 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) { 873 int nentries = 0; 874 int chunk = l->l_phys->l_hash[i]; 875 876 while (chunk != CHAIN_END) { 877 struct zap_leaf_entry *le = 878 &l->l_phys->l_chunk[chunk].l_entry; 879 880 n = 1 + NCHUNKS(le->le_name_length) + 881 NCHUNKS(le->le_value_length * 882 le->le_int_size); 883 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 884 zs->zs_entries_using_n_chunks[n]++; 885 886 chunk = le->le_next; 887 nentries++; 888 } 889 890 n = nentries; 891 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 892 zs->zs_buckets_with_n_entries[n]++; 893 } 894 895 nchained++; 896 l = l->l_next; 897 } while (l); 898 899 n = nchained-1; 900 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 901 zs->zs_leafs_with_n_chained[n]++; 902 } 903