1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 25 * Copyright 2017 Nexenta Systems, Inc. 26 */ 27 28 /* 29 * The 512-byte leaf is broken into 32 16-byte chunks. 30 * chunk number n means l_chunk[n], even though the header precedes it. 31 * the names are stored null-terminated. 32 */ 33 34 #include <sys/zio.h> 35 #include <sys/spa.h> 36 #include <sys/dmu.h> 37 #include <sys/zfs_context.h> 38 #include <sys/fs/zfs.h> 39 #include <sys/zap.h> 40 #include <sys/zap_impl.h> 41 #include <sys/zap_leaf.h> 42 #include <sys/arc.h> 43 44 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, 45 uint16_t entry); 46 47 #define CHAIN_END 0xffff /* end of the chunk chain */ 48 49 #define LEAF_HASH(l, h) \ 50 ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 51 ((h) >> \ 52 (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) 53 54 #define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) 55 56 static void 57 stv(int len, void *addr, uint64_t value) 58 { 59 switch (len) { 60 case 1: 61 *(uint8_t *)addr = value; 62 return; 63 case 2: 64 *(uint16_t *)addr = value; 65 return; 66 case 4: 67 *(uint32_t *)addr = value; 68 return; 69 case 8: 70 *(uint64_t *)addr = value; 71 return; 72 default: 73 PANIC("bad int len %d", len); 74 } 75 } 76 77 static uint64_t 78 ldv(int len, const void *addr) 79 { 80 switch (len) { 81 case 1: 82 return (*(uint8_t *)addr); 83 case 2: 84 return (*(uint16_t *)addr); 85 case 4: 86 return (*(uint32_t *)addr); 87 case 8: 88 return (*(uint64_t *)addr); 89 default: 90 PANIC("bad int len %d", len); 91 } 92 return (0xFEEDFACEDEADBEEFULL); 93 } 94 95 void 96 zap_leaf_byteswap(zap_leaf_phys_t *buf, size_t size) 97 { 98 zap_leaf_t l; 99 dmu_buf_t l_dbuf; 100 101 l_dbuf.db_data = buf; 102 l.l_bs = highbit64(size) - 1; 103 l.l_dbuf = &l_dbuf; 104 105 buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); 106 buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); 107 buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); 108 buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); 109 buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); 110 buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); 111 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 112 113 for (uint_t i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 114 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 115 116 for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 117 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 118 struct zap_leaf_entry *le; 119 120 switch (lc->l_free.lf_type) { 121 case ZAP_CHUNK_ENTRY: 122 le = &lc->l_entry; 123 124 le->le_type = BSWAP_8(le->le_type); 125 le->le_value_intlen = BSWAP_8(le->le_value_intlen); 126 le->le_next = BSWAP_16(le->le_next); 127 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 128 le->le_name_numints = BSWAP_16(le->le_name_numints); 129 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 130 le->le_value_numints = BSWAP_16(le->le_value_numints); 131 le->le_cd = BSWAP_32(le->le_cd); 132 le->le_hash = BSWAP_64(le->le_hash); 133 break; 134 case ZAP_CHUNK_FREE: 135 lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 136 lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 137 break; 138 case ZAP_CHUNK_ARRAY: 139 lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 140 lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 141 /* la_array doesn't need swapping */ 142 break; 143 default: 144 cmn_err(CE_PANIC, "bad leaf type %d", 145 lc->l_free.lf_type); 146 } 147 } 148 } 149 150 void 151 zap_leaf_init(zap_leaf_t *l, boolean_t sort) 152 { 153 l->l_bs = highbit64(l->l_dbuf->db_size) - 1; 154 memset(&zap_leaf_phys(l)->l_hdr, 0, 155 sizeof (struct zap_leaf_header)); 156 memset(zap_leaf_phys(l)->l_hash, CHAIN_END, 157 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 158 for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 159 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 160 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 161 } 162 ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 163 zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF; 164 zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 165 zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 166 if (sort) 167 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 168 } 169 170 /* 171 * Routines which manipulate leaf chunks (l_chunk[]). 172 */ 173 174 static uint16_t 175 zap_leaf_chunk_alloc(zap_leaf_t *l) 176 { 177 ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0); 178 179 uint_t chunk = zap_leaf_phys(l)->l_hdr.lh_freelist; 180 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 181 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 182 183 zap_leaf_phys(l)->l_hdr.lh_freelist = 184 ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 185 186 zap_leaf_phys(l)->l_hdr.lh_nfree--; 187 188 return (chunk); 189 } 190 191 static void 192 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 193 { 194 struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 195 ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 196 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 197 ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 198 199 zlf->lf_type = ZAP_CHUNK_FREE; 200 zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist; 201 memset(zlf->lf_pad, 0, sizeof (zlf->lf_pad)); /* help it to compress */ 202 zap_leaf_phys(l)->l_hdr.lh_freelist = chunk; 203 204 zap_leaf_phys(l)->l_hdr.lh_nfree++; 205 } 206 207 /* 208 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 209 */ 210 211 static uint16_t 212 zap_leaf_array_create(zap_leaf_t *l, const char *buf, 213 int integer_size, int num_integers) 214 { 215 uint16_t chunk_head; 216 uint16_t *chunkp = &chunk_head; 217 int byten = integer_size; 218 uint64_t value = 0; 219 int shift = (integer_size - 1) * 8; 220 int len = num_integers; 221 222 ASSERT3U(num_integers * integer_size, <=, ZAP_MAXVALUELEN); 223 224 if (len > 0) 225 value = ldv(integer_size, buf); 226 while (len > 0) { 227 uint16_t chunk = zap_leaf_chunk_alloc(l); 228 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 229 230 la->la_type = ZAP_CHUNK_ARRAY; 231 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 232 la->la_array[i] = value >> shift; 233 value <<= 8; 234 if (--byten == 0) { 235 if (--len == 0) 236 break; 237 byten = integer_size; 238 buf += integer_size; 239 value = ldv(integer_size, buf); 240 } 241 } 242 243 *chunkp = chunk; 244 chunkp = &la->la_next; 245 } 246 *chunkp = CHAIN_END; 247 248 return (chunk_head); 249 } 250 251 static void 252 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) 253 { 254 uint16_t chunk = *chunkp; 255 256 *chunkp = CHAIN_END; 257 258 while (chunk != CHAIN_END) { 259 uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 260 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 261 ZAP_CHUNK_ARRAY); 262 zap_leaf_chunk_free(l, chunk); 263 chunk = nextchunk; 264 } 265 } 266 267 /* array_len and buf_len are in integers, not bytes */ 268 static void 269 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, 270 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 271 void *buf) 272 { 273 int len = MIN(array_len, buf_len); 274 int byten = 0; 275 uint64_t value = 0; 276 char *p = buf; 277 278 ASSERT3U(array_int_len, <=, buf_int_len); 279 280 /* Fast path for one 8-byte integer */ 281 if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 282 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 283 uint8_t *ip = la->la_array; 284 uint64_t *buf64 = buf; 285 286 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 287 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 288 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 289 (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 290 return; 291 } 292 293 /* Fast path for an array of 1-byte integers (eg. the entry name) */ 294 if (array_int_len == 1 && buf_int_len == 1 && 295 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 296 while (chunk != CHAIN_END) { 297 struct zap_leaf_array *la = 298 &ZAP_LEAF_CHUNK(l, chunk).l_array; 299 memcpy(p, la->la_array, ZAP_LEAF_ARRAY_BYTES); 300 p += ZAP_LEAF_ARRAY_BYTES; 301 chunk = la->la_next; 302 } 303 return; 304 } 305 306 while (len > 0) { 307 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 308 309 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 310 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 311 value = (value << 8) | la->la_array[i]; 312 byten++; 313 if (byten == array_int_len) { 314 stv(buf_int_len, p, value); 315 byten = 0; 316 len--; 317 if (len == 0) 318 return; 319 p += buf_int_len; 320 } 321 } 322 chunk = la->la_next; 323 } 324 } 325 326 static boolean_t 327 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, 328 uint_t chunk, int array_numints) 329 { 330 int bseen = 0; 331 332 if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { 333 uint64_t *thiskey = 334 kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP); 335 ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); 336 337 zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, 338 sizeof (*thiskey), array_numints, thiskey); 339 boolean_t match = memcmp(thiskey, zn->zn_key_orig, 340 array_numints * sizeof (*thiskey)) == 0; 341 kmem_free(thiskey, array_numints * sizeof (*thiskey)); 342 return (match); 343 } 344 345 ASSERT(zn->zn_key_intlen == 1); 346 if (zn->zn_matchtype & MT_NORMALIZE) { 347 char *thisname = kmem_alloc(array_numints, KM_SLEEP); 348 349 zap_leaf_array_read(l, chunk, sizeof (char), array_numints, 350 sizeof (char), array_numints, thisname); 351 boolean_t match = zap_match(zn, thisname); 352 kmem_free(thisname, array_numints); 353 return (match); 354 } 355 356 /* 357 * Fast path for exact matching. 358 * First check that the lengths match, so that we don't read 359 * past the end of the zn_key_orig array. 360 */ 361 if (array_numints != zn->zn_key_orig_numints) 362 return (B_FALSE); 363 while (bseen < array_numints) { 364 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 365 int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); 366 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 367 if (memcmp(la->la_array, (char *)zn->zn_key_orig + bseen, 368 toread)) 369 break; 370 chunk = la->la_next; 371 bseen += toread; 372 } 373 return (bseen == array_numints); 374 } 375 376 /* 377 * Routines which manipulate leaf entries. 378 */ 379 380 int 381 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) 382 { 383 struct zap_leaf_entry *le; 384 385 ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 386 387 for (uint16_t *chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); 388 *chunkp != CHAIN_END; chunkp = &le->le_next) { 389 uint16_t chunk = *chunkp; 390 le = ZAP_LEAF_ENTRY(l, chunk); 391 392 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 393 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 394 395 if (le->le_hash != zn->zn_hash) 396 continue; 397 398 /* 399 * NB: the entry chain is always sorted by cd on 400 * normalized zap objects, so this will find the 401 * lowest-cd match for MT_NORMALIZE. 402 */ 403 ASSERT((zn->zn_matchtype == 0) || 404 (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); 405 if (zap_leaf_array_match(l, zn, le->le_name_chunk, 406 le->le_name_numints)) { 407 zeh->zeh_num_integers = le->le_value_numints; 408 zeh->zeh_integer_size = le->le_value_intlen; 409 zeh->zeh_cd = le->le_cd; 410 zeh->zeh_hash = le->le_hash; 411 zeh->zeh_chunkp = chunkp; 412 zeh->zeh_leaf = l; 413 return (0); 414 } 415 } 416 417 return (SET_ERROR(ENOENT)); 418 } 419 420 /* Return (h1,cd1 >= h2,cd2) */ 421 #define HCD_GTEQ(h1, cd1, h2, cd2) \ 422 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 423 424 int 425 zap_leaf_lookup_closest(zap_leaf_t *l, 426 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 427 { 428 uint64_t besth = -1ULL; 429 uint32_t bestcd = -1U; 430 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 431 struct zap_leaf_entry *le; 432 433 ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 434 435 for (uint16_t lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 436 for (uint16_t chunk = zap_leaf_phys(l)->l_hash[lh]; 437 chunk != CHAIN_END; chunk = le->le_next) { 438 le = ZAP_LEAF_ENTRY(l, chunk); 439 440 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 441 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 442 443 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 444 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 445 ASSERT3U(bestlh, >=, lh); 446 bestlh = lh; 447 besth = le->le_hash; 448 bestcd = le->le_cd; 449 450 zeh->zeh_num_integers = le->le_value_numints; 451 zeh->zeh_integer_size = le->le_value_intlen; 452 zeh->zeh_cd = le->le_cd; 453 zeh->zeh_hash = le->le_hash; 454 zeh->zeh_fakechunk = chunk; 455 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 456 zeh->zeh_leaf = l; 457 } 458 } 459 } 460 461 return (bestcd == -1U ? SET_ERROR(ENOENT) : 0); 462 } 463 464 int 465 zap_entry_read(const zap_entry_handle_t *zeh, 466 uint8_t integer_size, uint64_t num_integers, void *buf) 467 { 468 struct zap_leaf_entry *le = 469 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 470 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 471 472 if (le->le_value_intlen > integer_size) 473 return (SET_ERROR(EINVAL)); 474 475 zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, 476 le->le_value_intlen, le->le_value_numints, 477 integer_size, num_integers, buf); 478 479 if (zeh->zeh_num_integers > num_integers) 480 return (SET_ERROR(EOVERFLOW)); 481 return (0); 482 483 } 484 485 int 486 zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, 487 char *buf) 488 { 489 struct zap_leaf_entry *le = 490 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 491 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 492 493 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 494 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, 495 le->le_name_numints, 8, buflen / 8, buf); 496 } else { 497 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, 498 le->le_name_numints, 1, buflen, buf); 499 } 500 if (le->le_name_numints > buflen) 501 return (SET_ERROR(EOVERFLOW)); 502 return (0); 503 } 504 505 int 506 zap_entry_update(zap_entry_handle_t *zeh, 507 uint8_t integer_size, uint64_t num_integers, const void *buf) 508 { 509 zap_leaf_t *l = zeh->zeh_leaf; 510 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); 511 512 int delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - 513 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); 514 515 if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks) 516 return (SET_ERROR(EAGAIN)); 517 518 zap_leaf_array_free(l, &le->le_value_chunk); 519 le->le_value_chunk = 520 zap_leaf_array_create(l, buf, integer_size, num_integers); 521 le->le_value_numints = num_integers; 522 le->le_value_intlen = integer_size; 523 return (0); 524 } 525 526 void 527 zap_entry_remove(zap_entry_handle_t *zeh) 528 { 529 zap_leaf_t *l = zeh->zeh_leaf; 530 531 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 532 533 uint16_t entry_chunk = *zeh->zeh_chunkp; 534 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk); 535 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 536 537 zap_leaf_array_free(l, &le->le_name_chunk); 538 zap_leaf_array_free(l, &le->le_value_chunk); 539 540 *zeh->zeh_chunkp = le->le_next; 541 zap_leaf_chunk_free(l, entry_chunk); 542 543 zap_leaf_phys(l)->l_hdr.lh_nentries--; 544 } 545 546 int 547 zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, 548 uint8_t integer_size, uint64_t num_integers, const void *buf, 549 zap_entry_handle_t *zeh) 550 { 551 uint16_t chunk; 552 struct zap_leaf_entry *le; 553 uint64_t h = zn->zn_hash; 554 555 uint64_t valuelen = integer_size * num_integers; 556 557 uint_t numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * 558 zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); 559 if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 560 return (SET_ERROR(E2BIG)); 561 562 if (cd == ZAP_NEED_CD) { 563 /* find the lowest unused cd */ 564 if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { 565 cd = 0; 566 567 for (chunk = *LEAF_HASH_ENTPTR(l, h); 568 chunk != CHAIN_END; chunk = le->le_next) { 569 le = ZAP_LEAF_ENTRY(l, chunk); 570 if (le->le_cd > cd) 571 break; 572 if (le->le_hash == h) { 573 ASSERT3U(cd, ==, le->le_cd); 574 cd++; 575 } 576 } 577 } else { 578 /* old unsorted format; do it the O(n^2) way */ 579 for (cd = 0; ; cd++) { 580 for (chunk = *LEAF_HASH_ENTPTR(l, h); 581 chunk != CHAIN_END; chunk = le->le_next) { 582 le = ZAP_LEAF_ENTRY(l, chunk); 583 if (le->le_hash == h && 584 le->le_cd == cd) { 585 break; 586 } 587 } 588 /* If this cd is not in use, we are good. */ 589 if (chunk == CHAIN_END) 590 break; 591 } 592 } 593 /* 594 * We would run out of space in a block before we could 595 * store enough entries to run out of CD values. 596 */ 597 ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); 598 } 599 600 if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks) 601 return (SET_ERROR(EAGAIN)); 602 603 /* make the entry */ 604 chunk = zap_leaf_chunk_alloc(l); 605 le = ZAP_LEAF_ENTRY(l, chunk); 606 le->le_type = ZAP_CHUNK_ENTRY; 607 le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, 608 zn->zn_key_intlen, zn->zn_key_orig_numints); 609 le->le_name_numints = zn->zn_key_orig_numints; 610 le->le_value_chunk = 611 zap_leaf_array_create(l, buf, integer_size, num_integers); 612 le->le_value_numints = num_integers; 613 le->le_value_intlen = integer_size; 614 le->le_hash = h; 615 le->le_cd = cd; 616 617 /* link it into the hash chain */ 618 /* XXX if we did the search above, we could just use that */ 619 uint16_t *chunkp = zap_leaf_rehash_entry(l, le, chunk); 620 621 zap_leaf_phys(l)->l_hdr.lh_nentries++; 622 623 zeh->zeh_leaf = l; 624 zeh->zeh_num_integers = num_integers; 625 zeh->zeh_integer_size = le->le_value_intlen; 626 zeh->zeh_cd = le->le_cd; 627 zeh->zeh_hash = le->le_hash; 628 zeh->zeh_chunkp = chunkp; 629 630 return (0); 631 } 632 633 /* 634 * Determine if there is another entry with the same normalized form. 635 * For performance purposes, either zn or name must be provided (the 636 * other can be NULL). Note, there usually won't be any hash 637 * conflicts, in which case we don't need the concatenated/normalized 638 * form of the name. But all callers have one of these on hand anyway, 639 * so might as well take advantage. A cleaner but slower interface 640 * would accept neither argument, and compute the normalized name as 641 * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))). 642 */ 643 boolean_t 644 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, 645 const char *name, zap_t *zap) 646 { 647 struct zap_leaf_entry *le; 648 boolean_t allocdzn = B_FALSE; 649 650 if (zap->zap_normflags == 0) 651 return (B_FALSE); 652 653 for (uint16_t chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); 654 chunk != CHAIN_END; chunk = le->le_next) { 655 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); 656 if (le->le_hash != zeh->zeh_hash) 657 continue; 658 if (le->le_cd == zeh->zeh_cd) 659 continue; 660 661 if (zn == NULL) { 662 zn = zap_name_alloc_str(zap, name, MT_NORMALIZE); 663 allocdzn = B_TRUE; 664 } 665 if (zap_leaf_array_match(zeh->zeh_leaf, zn, 666 le->le_name_chunk, le->le_name_numints)) { 667 if (allocdzn) 668 zap_name_free(zn); 669 return (B_TRUE); 670 } 671 } 672 if (allocdzn) 673 zap_name_free(zn); 674 return (B_FALSE); 675 } 676 677 /* 678 * Routines for transferring entries between leafs. 679 */ 680 681 static uint16_t * 682 zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry) 683 { 684 struct zap_leaf_entry *le2; 685 uint16_t *chunkp; 686 687 /* 688 * keep the entry chain sorted by cd 689 * NB: this will not cause problems for unsorted leafs, though 690 * it is unnecessary there. 691 */ 692 for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); 693 *chunkp != CHAIN_END; chunkp = &le2->le_next) { 694 le2 = ZAP_LEAF_ENTRY(l, *chunkp); 695 if (le2->le_cd > le->le_cd) 696 break; 697 } 698 699 le->le_next = *chunkp; 700 *chunkp = entry; 701 return (chunkp); 702 } 703 704 static uint16_t 705 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 706 { 707 uint16_t new_chunk; 708 uint16_t *nchunkp = &new_chunk; 709 710 while (chunk != CHAIN_END) { 711 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 712 struct zap_leaf_array *nla = 713 &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 714 struct zap_leaf_array *la = 715 &ZAP_LEAF_CHUNK(l, chunk).l_array; 716 uint_t nextchunk = la->la_next; 717 718 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 719 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 720 721 *nla = *la; /* structure assignment */ 722 723 zap_leaf_chunk_free(l, chunk); 724 chunk = nextchunk; 725 *nchunkp = nchunk; 726 nchunkp = &nla->la_next; 727 } 728 *nchunkp = CHAIN_END; 729 return (new_chunk); 730 } 731 732 static void 733 zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl) 734 { 735 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 736 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 737 738 uint16_t chunk = zap_leaf_chunk_alloc(nl); 739 struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk); 740 *nle = *le; /* structure assignment */ 741 742 (void) zap_leaf_rehash_entry(nl, nle, chunk); 743 744 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 745 nle->le_value_chunk = 746 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 747 748 zap_leaf_chunk_free(l, entry); 749 750 zap_leaf_phys(l)->l_hdr.lh_nentries--; 751 zap_leaf_phys(nl)->l_hdr.lh_nentries++; 752 } 753 754 /* 755 * Transfer the entries whose hash prefix ends in 1 to the new leaf. 756 */ 757 void 758 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) 759 { 760 uint_t bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len; 761 762 /* set new prefix and prefix_len */ 763 zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1; 764 zap_leaf_phys(l)->l_hdr.lh_prefix_len++; 765 zap_leaf_phys(nl)->l_hdr.lh_prefix = 766 zap_leaf_phys(l)->l_hdr.lh_prefix | 1; 767 zap_leaf_phys(nl)->l_hdr.lh_prefix_len = 768 zap_leaf_phys(l)->l_hdr.lh_prefix_len; 769 770 /* break existing hash chains */ 771 memset(zap_leaf_phys(l)->l_hash, CHAIN_END, 772 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 773 774 if (sort) 775 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 776 777 /* 778 * Transfer entries whose hash bit 'bit' is set to nl; rehash 779 * the remaining entries 780 * 781 * NB: We could find entries via the hashtable instead. That 782 * would be O(hashents+numents) rather than O(numblks+numents), 783 * but this accesses memory more sequentially, and when we're 784 * called, the block is usually pretty full. 785 */ 786 for (uint_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 787 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 788 if (le->le_type != ZAP_CHUNK_ENTRY) 789 continue; 790 791 if (le->le_hash & (1ULL << bit)) 792 zap_leaf_transfer_entry(l, i, nl); 793 else 794 (void) zap_leaf_rehash_entry(l, le, i); 795 } 796 } 797 798 void 799 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 800 { 801 uint_t n = zap_f_phys(zap)->zap_ptrtbl.zt_shift - 802 zap_leaf_phys(l)->l_hdr.lh_prefix_len; 803 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 804 zs->zs_leafs_with_2n_pointers[n]++; 805 806 807 n = zap_leaf_phys(l)->l_hdr.lh_nentries/5; 808 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 809 zs->zs_blocks_with_n5_entries[n]++; 810 811 n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 812 zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 813 (1<<FZAP_BLOCK_SHIFT(zap)); 814 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 815 zs->zs_blocks_n_tenths_full[n]++; 816 817 for (uint_t i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 818 uint_t nentries = 0; 819 uint_t chunk = zap_leaf_phys(l)->l_hash[i]; 820 821 while (chunk != CHAIN_END) { 822 struct zap_leaf_entry *le = 823 ZAP_LEAF_ENTRY(l, chunk); 824 825 n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + 826 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * 827 le->le_value_intlen); 828 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 829 zs->zs_entries_using_n_chunks[n]++; 830 831 chunk = le->le_next; 832 nentries++; 833 } 834 835 n = nentries; 836 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 837 zs->zs_buckets_with_n_entries[n]++; 838 } 839 } 840