1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 */ 25 26 #include <sys/zio.h> 27 #include <sys/spa.h> 28 #include <sys/dmu.h> 29 #include <sys/zfs_context.h> 30 #include <sys/zap.h> 31 #include <sys/refcount.h> 32 #include <sys/zap_impl.h> 33 #include <sys/zap_leaf.h> 34 #include <sys/avl.h> 35 #include <sys/arc.h> 36 37 #ifdef _KERNEL 38 #include <sys/sunddi.h> 39 #endif 40 41 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); 42 43 uint64_t 44 zap_getflags(zap_t *zap) 45 { 46 if (zap->zap_ismicro) 47 return (0); 48 return (zap->zap_u.zap_fat.zap_phys->zap_flags); 49 } 50 51 int 52 zap_hashbits(zap_t *zap) 53 { 54 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 55 return (48); 56 else 57 return (28); 58 } 59 60 uint32_t 61 zap_maxcd(zap_t *zap) 62 { 63 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 64 return ((1<<16)-1); 65 else 66 return (-1U); 67 } 68 69 static uint64_t 70 zap_hash(zap_name_t *zn) 71 { 72 zap_t *zap = zn->zn_zap; 73 uint64_t h = 0; 74 75 if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 76 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 77 h = *(uint64_t *)zn->zn_key_orig; 78 } else { 79 h = zap->zap_salt; 80 ASSERT(h != 0); 81 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 82 83 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 84 int i; 85 const uint64_t *wp = zn->zn_key_norm; 86 87 ASSERT(zn->zn_key_intlen == 8); 88 for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 89 int j; 90 uint64_t word = *wp; 91 92 for (j = 0; j < zn->zn_key_intlen; j++) { 93 h = (h >> 8) ^ 94 zfs_crc64_table[(h ^ word) & 0xFF]; 95 word >>= NBBY; 96 } 97 } 98 } else { 99 int i, len; 100 const uint8_t *cp = zn->zn_key_norm; 101 102 /* 103 * We previously stored the terminating null on 104 * disk, but didn't hash it, so we need to 105 * continue to not hash it. (The 106 * zn_key_*_numints includes the terminating 107 * null for non-binary keys.) 108 */ 109 len = zn->zn_key_norm_numints - 1; 110 111 ASSERT(zn->zn_key_intlen == 1); 112 for (i = 0; i < len; cp++, i++) { 113 h = (h >> 8) ^ 114 zfs_crc64_table[(h ^ *cp) & 0xFF]; 115 } 116 } 117 } 118 /* 119 * Don't use all 64 bits, since we need some in the cookie for 120 * the collision differentiator. We MUST use the high bits, 121 * since those are the ones that we first pay attention to when 122 * chosing the bucket. 123 */ 124 h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 125 126 return (h); 127 } 128 129 static int 130 zap_normalize(zap_t *zap, const char *name, char *namenorm) 131 { 132 size_t inlen, outlen; 133 int err; 134 135 ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 136 137 inlen = strlen(name) + 1; 138 outlen = ZAP_MAXNAMELEN; 139 140 err = 0; 141 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 142 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 143 U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 144 145 return (err); 146 } 147 148 boolean_t 149 zap_match(zap_name_t *zn, const char *matchname) 150 { 151 ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 152 153 if (zn->zn_matchtype == MT_FIRST) { 154 char norm[ZAP_MAXNAMELEN]; 155 156 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 157 return (B_FALSE); 158 159 return (strcmp(zn->zn_key_norm, norm) == 0); 160 } else { 161 /* MT_BEST or MT_EXACT */ 162 return (strcmp(zn->zn_key_orig, matchname) == 0); 163 } 164 } 165 166 void 167 zap_name_free(zap_name_t *zn) 168 { 169 kmem_free(zn, sizeof (zap_name_t)); 170 } 171 172 zap_name_t * 173 zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 174 { 175 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 176 177 zn->zn_zap = zap; 178 zn->zn_key_intlen = sizeof (*key); 179 zn->zn_key_orig = key; 180 zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 181 zn->zn_matchtype = mt; 182 if (zap->zap_normflags) { 183 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 184 zap_name_free(zn); 185 return (NULL); 186 } 187 zn->zn_key_norm = zn->zn_normbuf; 188 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 189 } else { 190 if (mt != MT_EXACT) { 191 zap_name_free(zn); 192 return (NULL); 193 } 194 zn->zn_key_norm = zn->zn_key_orig; 195 zn->zn_key_norm_numints = zn->zn_key_orig_numints; 196 } 197 198 zn->zn_hash = zap_hash(zn); 199 return (zn); 200 } 201 202 zap_name_t * 203 zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 204 { 205 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 206 207 ASSERT(zap->zap_normflags == 0); 208 zn->zn_zap = zap; 209 zn->zn_key_intlen = sizeof (*key); 210 zn->zn_key_orig = zn->zn_key_norm = key; 211 zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 212 zn->zn_matchtype = MT_EXACT; 213 214 zn->zn_hash = zap_hash(zn); 215 return (zn); 216 } 217 218 static void 219 mzap_byteswap(mzap_phys_t *buf, size_t size) 220 { 221 int i, max; 222 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 223 buf->mz_salt = BSWAP_64(buf->mz_salt); 224 buf->mz_normflags = BSWAP_64(buf->mz_normflags); 225 max = (size / MZAP_ENT_LEN) - 1; 226 for (i = 0; i < max; i++) { 227 buf->mz_chunk[i].mze_value = 228 BSWAP_64(buf->mz_chunk[i].mze_value); 229 buf->mz_chunk[i].mze_cd = 230 BSWAP_32(buf->mz_chunk[i].mze_cd); 231 } 232 } 233 234 void 235 zap_byteswap(void *buf, size_t size) 236 { 237 uint64_t block_type; 238 239 block_type = *(uint64_t *)buf; 240 241 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 242 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 243 mzap_byteswap(buf, size); 244 } else { 245 fzap_byteswap(buf, size); 246 } 247 } 248 249 static int 250 mze_compare(const void *arg1, const void *arg2) 251 { 252 const mzap_ent_t *mze1 = arg1; 253 const mzap_ent_t *mze2 = arg2; 254 255 if (mze1->mze_hash > mze2->mze_hash) 256 return (+1); 257 if (mze1->mze_hash < mze2->mze_hash) 258 return (-1); 259 if (mze1->mze_cd > mze2->mze_cd) 260 return (+1); 261 if (mze1->mze_cd < mze2->mze_cd) 262 return (-1); 263 return (0); 264 } 265 266 static void 267 mze_insert(zap_t *zap, int chunkid, uint64_t hash) 268 { 269 mzap_ent_t *mze; 270 271 ASSERT(zap->zap_ismicro); 272 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 273 274 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 275 mze->mze_chunkid = chunkid; 276 mze->mze_hash = hash; 277 mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 278 ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 279 avl_add(&zap->zap_m.zap_avl, mze); 280 } 281 282 static mzap_ent_t * 283 mze_find(zap_name_t *zn) 284 { 285 mzap_ent_t mze_tofind; 286 mzap_ent_t *mze; 287 avl_index_t idx; 288 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 289 290 ASSERT(zn->zn_zap->zap_ismicro); 291 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 292 293 mze_tofind.mze_hash = zn->zn_hash; 294 mze_tofind.mze_cd = 0; 295 296 again: 297 mze = avl_find(avl, &mze_tofind, &idx); 298 if (mze == NULL) 299 mze = avl_nearest(avl, idx, AVL_AFTER); 300 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 301 ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 302 if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 303 return (mze); 304 } 305 if (zn->zn_matchtype == MT_BEST) { 306 zn->zn_matchtype = MT_FIRST; 307 goto again; 308 } 309 return (NULL); 310 } 311 312 static uint32_t 313 mze_find_unused_cd(zap_t *zap, uint64_t hash) 314 { 315 mzap_ent_t mze_tofind; 316 mzap_ent_t *mze; 317 avl_index_t idx; 318 avl_tree_t *avl = &zap->zap_m.zap_avl; 319 uint32_t cd; 320 321 ASSERT(zap->zap_ismicro); 322 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 323 324 mze_tofind.mze_hash = hash; 325 mze_tofind.mze_cd = 0; 326 327 cd = 0; 328 for (mze = avl_find(avl, &mze_tofind, &idx); 329 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 330 if (mze->mze_cd != cd) 331 break; 332 cd++; 333 } 334 335 return (cd); 336 } 337 338 static void 339 mze_remove(zap_t *zap, mzap_ent_t *mze) 340 { 341 ASSERT(zap->zap_ismicro); 342 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 343 344 avl_remove(&zap->zap_m.zap_avl, mze); 345 kmem_free(mze, sizeof (mzap_ent_t)); 346 } 347 348 static void 349 mze_destroy(zap_t *zap) 350 { 351 mzap_ent_t *mze; 352 void *avlcookie = NULL; 353 354 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 355 kmem_free(mze, sizeof (mzap_ent_t)); 356 avl_destroy(&zap->zap_m.zap_avl); 357 } 358 359 static zap_t * 360 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 361 { 362 zap_t *winner; 363 zap_t *zap; 364 int i; 365 366 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 367 368 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 369 rw_init(&zap->zap_rwlock, 0, 0, 0); 370 rw_enter(&zap->zap_rwlock, RW_WRITER); 371 zap->zap_objset = os; 372 zap->zap_object = obj; 373 zap->zap_dbuf = db; 374 375 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 376 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 377 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; 378 } else { 379 zap->zap_ismicro = TRUE; 380 } 381 382 /* 383 * Make sure that zap_ismicro is set before we let others see 384 * it, because zap_lockdir() checks zap_ismicro without the lock 385 * held. 386 */ 387 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 388 389 if (winner != NULL) { 390 rw_exit(&zap->zap_rwlock); 391 rw_destroy(&zap->zap_rwlock); 392 if (!zap->zap_ismicro) 393 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 394 kmem_free(zap, sizeof (zap_t)); 395 return (winner); 396 } 397 398 if (zap->zap_ismicro) { 399 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 400 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; 401 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 402 avl_create(&zap->zap_m.zap_avl, mze_compare, 403 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 404 405 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 406 mzap_ent_phys_t *mze = 407 &zap->zap_m.zap_phys->mz_chunk[i]; 408 if (mze->mze_name[0]) { 409 zap_name_t *zn; 410 411 zap->zap_m.zap_num_entries++; 412 zn = zap_name_alloc(zap, mze->mze_name, 413 MT_EXACT); 414 mze_insert(zap, i, zn->zn_hash); 415 zap_name_free(zn); 416 } 417 } 418 } else { 419 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 420 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; 421 422 ASSERT3U(sizeof (struct zap_leaf_header), ==, 423 2*ZAP_LEAF_CHUNKSIZE); 424 425 /* 426 * The embedded pointer table should not overlap the 427 * other members. 428 */ 429 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 430 &zap->zap_f.zap_phys->zap_salt); 431 432 /* 433 * The embedded pointer table should end at the end of 434 * the block 435 */ 436 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 437 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 438 (uintptr_t)zap->zap_f.zap_phys, ==, 439 zap->zap_dbuf->db_size); 440 } 441 rw_exit(&zap->zap_rwlock); 442 return (zap); 443 } 444 445 int 446 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 447 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 448 { 449 zap_t *zap; 450 dmu_buf_t *db; 451 krw_t lt; 452 int err; 453 454 *zapp = NULL; 455 456 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); 457 if (err) 458 return (err); 459 460 #ifdef ZFS_DEBUG 461 { 462 dmu_object_info_t doi; 463 dmu_object_info_from_db(db, &doi); 464 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 465 } 466 #endif 467 468 zap = dmu_buf_get_user(db); 469 if (zap == NULL) 470 zap = mzap_open(os, obj, db); 471 472 /* 473 * We're checking zap_ismicro without the lock held, in order to 474 * tell what type of lock we want. Once we have some sort of 475 * lock, see if it really is the right type. In practice this 476 * can only be different if it was upgraded from micro to fat, 477 * and micro wanted WRITER but fat only needs READER. 478 */ 479 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 480 rw_enter(&zap->zap_rwlock, lt); 481 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 482 /* it was upgraded, now we only need reader */ 483 ASSERT(lt == RW_WRITER); 484 ASSERT(RW_READER == 485 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 486 rw_downgrade(&zap->zap_rwlock); 487 lt = RW_READER; 488 } 489 490 zap->zap_objset = os; 491 492 if (lt == RW_WRITER) 493 dmu_buf_will_dirty(db, tx); 494 495 ASSERT3P(zap->zap_dbuf, ==, db); 496 497 ASSERT(!zap->zap_ismicro || 498 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 499 if (zap->zap_ismicro && tx && adding && 500 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 501 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 502 if (newsz > MZAP_MAX_BLKSZ) { 503 dprintf("upgrading obj %llu: num_entries=%u\n", 504 obj, zap->zap_m.zap_num_entries); 505 *zapp = zap; 506 return (mzap_upgrade(zapp, tx, 0)); 507 } 508 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 509 ASSERT0(err); 510 zap->zap_m.zap_num_chunks = 511 db->db_size / MZAP_ENT_LEN - 1; 512 } 513 514 *zapp = zap; 515 return (0); 516 } 517 518 void 519 zap_unlockdir(zap_t *zap) 520 { 521 rw_exit(&zap->zap_rwlock); 522 dmu_buf_rele(zap->zap_dbuf, NULL); 523 } 524 525 static int 526 mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) 527 { 528 mzap_phys_t *mzp; 529 int i, sz, nchunks; 530 int err = 0; 531 zap_t *zap = *zapp; 532 533 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 534 535 sz = zap->zap_dbuf->db_size; 536 mzp = kmem_alloc(sz, KM_SLEEP); 537 bcopy(zap->zap_dbuf->db_data, mzp, sz); 538 nchunks = zap->zap_m.zap_num_chunks; 539 540 if (!flags) { 541 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 542 1ULL << fzap_default_block_shift, 0, tx); 543 if (err) { 544 kmem_free(mzp, sz); 545 return (err); 546 } 547 } 548 549 dprintf("upgrading obj=%llu with %u chunks\n", 550 zap->zap_object, nchunks); 551 /* XXX destroy the avl later, so we can use the stored hash value */ 552 mze_destroy(zap); 553 554 fzap_upgrade(zap, tx, flags); 555 556 for (i = 0; i < nchunks; i++) { 557 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 558 zap_name_t *zn; 559 if (mze->mze_name[0] == 0) 560 continue; 561 dprintf("adding %s=%llu\n", 562 mze->mze_name, mze->mze_value); 563 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 564 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); 565 zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 566 zap_name_free(zn); 567 if (err) 568 break; 569 } 570 kmem_free(mzp, sz); 571 *zapp = zap; 572 return (err); 573 } 574 575 void 576 mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 577 dmu_tx_t *tx) 578 { 579 dmu_buf_t *db; 580 mzap_phys_t *zp; 581 582 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 583 584 #ifdef ZFS_DEBUG 585 { 586 dmu_object_info_t doi; 587 dmu_object_info_from_db(db, &doi); 588 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 589 } 590 #endif 591 592 dmu_buf_will_dirty(db, tx); 593 zp = db->db_data; 594 zp->mz_block_type = ZBT_MICRO; 595 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 596 zp->mz_normflags = normflags; 597 dmu_buf_rele(db, FTAG); 598 599 if (flags != 0) { 600 zap_t *zap; 601 /* Only fat zap supports flags; upgrade immediately. */ 602 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 603 B_FALSE, B_FALSE, &zap)); 604 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); 605 zap_unlockdir(zap); 606 } 607 } 608 609 int 610 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 611 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 612 { 613 return (zap_create_claim_norm(os, obj, 614 0, ot, bonustype, bonuslen, tx)); 615 } 616 617 int 618 zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 619 dmu_object_type_t ot, 620 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 621 { 622 int err; 623 624 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 625 if (err != 0) 626 return (err); 627 mzap_create_impl(os, obj, normflags, 0, tx); 628 return (0); 629 } 630 631 uint64_t 632 zap_create(objset_t *os, dmu_object_type_t ot, 633 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 634 { 635 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 636 } 637 638 uint64_t 639 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 640 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 641 { 642 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 643 644 mzap_create_impl(os, obj, normflags, 0, tx); 645 return (obj); 646 } 647 648 uint64_t 649 zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 650 dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 651 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 652 { 653 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 654 655 ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 656 leaf_blockshift <= SPA_MAXBLOCKSHIFT && 657 indirect_blockshift >= SPA_MINBLOCKSHIFT && 658 indirect_blockshift <= SPA_MAXBLOCKSHIFT); 659 660 VERIFY(dmu_object_set_blocksize(os, obj, 661 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 662 663 mzap_create_impl(os, obj, normflags, flags, tx); 664 return (obj); 665 } 666 667 int 668 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 669 { 670 /* 671 * dmu_object_free will free the object number and free the 672 * data. Freeing the data will cause our pageout function to be 673 * called, which will destroy our data (zap_leaf_t's and zap_t). 674 */ 675 676 return (dmu_object_free(os, zapobj, tx)); 677 } 678 679 _NOTE(ARGSUSED(0)) 680 void 681 zap_evict(dmu_buf_t *db, void *vzap) 682 { 683 zap_t *zap = vzap; 684 685 rw_destroy(&zap->zap_rwlock); 686 687 if (zap->zap_ismicro) 688 mze_destroy(zap); 689 else 690 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 691 692 kmem_free(zap, sizeof (zap_t)); 693 } 694 695 int 696 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 697 { 698 zap_t *zap; 699 int err; 700 701 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 702 if (err) 703 return (err); 704 if (!zap->zap_ismicro) { 705 err = fzap_count(zap, count); 706 } else { 707 *count = zap->zap_m.zap_num_entries; 708 } 709 zap_unlockdir(zap); 710 return (err); 711 } 712 713 /* 714 * zn may be NULL; if not specified, it will be computed if needed. 715 * See also the comment above zap_entry_normalization_conflict(). 716 */ 717 static boolean_t 718 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 719 { 720 mzap_ent_t *other; 721 int direction = AVL_BEFORE; 722 boolean_t allocdzn = B_FALSE; 723 724 if (zap->zap_normflags == 0) 725 return (B_FALSE); 726 727 again: 728 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 729 other && other->mze_hash == mze->mze_hash; 730 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 731 732 if (zn == NULL) { 733 zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 734 MT_FIRST); 735 allocdzn = B_TRUE; 736 } 737 if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 738 if (allocdzn) 739 zap_name_free(zn); 740 return (B_TRUE); 741 } 742 } 743 744 if (direction == AVL_BEFORE) { 745 direction = AVL_AFTER; 746 goto again; 747 } 748 749 if (allocdzn) 750 zap_name_free(zn); 751 return (B_FALSE); 752 } 753 754 /* 755 * Routines for manipulating attributes. 756 */ 757 758 int 759 zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 760 uint64_t integer_size, uint64_t num_integers, void *buf) 761 { 762 return (zap_lookup_norm(os, zapobj, name, integer_size, 763 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 764 } 765 766 int 767 zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 768 uint64_t integer_size, uint64_t num_integers, void *buf, 769 matchtype_t mt, char *realname, int rn_len, 770 boolean_t *ncp) 771 { 772 zap_t *zap; 773 int err; 774 mzap_ent_t *mze; 775 zap_name_t *zn; 776 777 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 778 if (err) 779 return (err); 780 zn = zap_name_alloc(zap, name, mt); 781 if (zn == NULL) { 782 zap_unlockdir(zap); 783 return (SET_ERROR(ENOTSUP)); 784 } 785 786 if (!zap->zap_ismicro) { 787 err = fzap_lookup(zn, integer_size, num_integers, buf, 788 realname, rn_len, ncp); 789 } else { 790 mze = mze_find(zn); 791 if (mze == NULL) { 792 err = SET_ERROR(ENOENT); 793 } else { 794 if (num_integers < 1) { 795 err = SET_ERROR(EOVERFLOW); 796 } else if (integer_size != 8) { 797 err = SET_ERROR(EINVAL); 798 } else { 799 *(uint64_t *)buf = 800 MZE_PHYS(zap, mze)->mze_value; 801 (void) strlcpy(realname, 802 MZE_PHYS(zap, mze)->mze_name, rn_len); 803 if (ncp) { 804 *ncp = mzap_normalization_conflict(zap, 805 zn, mze); 806 } 807 } 808 } 809 } 810 zap_name_free(zn); 811 zap_unlockdir(zap); 812 return (err); 813 } 814 815 int 816 zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 817 int key_numints) 818 { 819 zap_t *zap; 820 int err; 821 zap_name_t *zn; 822 823 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 824 if (err) 825 return (err); 826 zn = zap_name_alloc_uint64(zap, key, key_numints); 827 if (zn == NULL) { 828 zap_unlockdir(zap); 829 return (SET_ERROR(ENOTSUP)); 830 } 831 832 fzap_prefetch(zn); 833 zap_name_free(zn); 834 zap_unlockdir(zap); 835 return (err); 836 } 837 838 int 839 zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 840 int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 841 { 842 zap_t *zap; 843 int err; 844 zap_name_t *zn; 845 846 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 847 if (err) 848 return (err); 849 zn = zap_name_alloc_uint64(zap, key, key_numints); 850 if (zn == NULL) { 851 zap_unlockdir(zap); 852 return (SET_ERROR(ENOTSUP)); 853 } 854 855 err = fzap_lookup(zn, integer_size, num_integers, buf, 856 NULL, 0, NULL); 857 zap_name_free(zn); 858 zap_unlockdir(zap); 859 return (err); 860 } 861 862 int 863 zap_contains(objset_t *os, uint64_t zapobj, const char *name) 864 { 865 int err = zap_lookup_norm(os, zapobj, name, 0, 866 0, NULL, MT_EXACT, NULL, 0, NULL); 867 if (err == EOVERFLOW || err == EINVAL) 868 err = 0; /* found, but skipped reading the value */ 869 return (err); 870 } 871 872 int 873 zap_length(objset_t *os, uint64_t zapobj, const char *name, 874 uint64_t *integer_size, uint64_t *num_integers) 875 { 876 zap_t *zap; 877 int err; 878 mzap_ent_t *mze; 879 zap_name_t *zn; 880 881 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 882 if (err) 883 return (err); 884 zn = zap_name_alloc(zap, name, MT_EXACT); 885 if (zn == NULL) { 886 zap_unlockdir(zap); 887 return (SET_ERROR(ENOTSUP)); 888 } 889 if (!zap->zap_ismicro) { 890 err = fzap_length(zn, integer_size, num_integers); 891 } else { 892 mze = mze_find(zn); 893 if (mze == NULL) { 894 err = SET_ERROR(ENOENT); 895 } else { 896 if (integer_size) 897 *integer_size = 8; 898 if (num_integers) 899 *num_integers = 1; 900 } 901 } 902 zap_name_free(zn); 903 zap_unlockdir(zap); 904 return (err); 905 } 906 907 int 908 zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 909 int key_numints, uint64_t *integer_size, uint64_t *num_integers) 910 { 911 zap_t *zap; 912 int err; 913 zap_name_t *zn; 914 915 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 916 if (err) 917 return (err); 918 zn = zap_name_alloc_uint64(zap, key, key_numints); 919 if (zn == NULL) { 920 zap_unlockdir(zap); 921 return (SET_ERROR(ENOTSUP)); 922 } 923 err = fzap_length(zn, integer_size, num_integers); 924 zap_name_free(zn); 925 zap_unlockdir(zap); 926 return (err); 927 } 928 929 static void 930 mzap_addent(zap_name_t *zn, uint64_t value) 931 { 932 int i; 933 zap_t *zap = zn->zn_zap; 934 int start = zap->zap_m.zap_alloc_next; 935 uint32_t cd; 936 937 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 938 939 #ifdef ZFS_DEBUG 940 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 941 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 942 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 943 } 944 #endif 945 946 cd = mze_find_unused_cd(zap, zn->zn_hash); 947 /* given the limited size of the microzap, this can't happen */ 948 ASSERT(cd < zap_maxcd(zap)); 949 950 again: 951 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 952 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 953 if (mze->mze_name[0] == 0) { 954 mze->mze_value = value; 955 mze->mze_cd = cd; 956 (void) strcpy(mze->mze_name, zn->zn_key_orig); 957 zap->zap_m.zap_num_entries++; 958 zap->zap_m.zap_alloc_next = i+1; 959 if (zap->zap_m.zap_alloc_next == 960 zap->zap_m.zap_num_chunks) 961 zap->zap_m.zap_alloc_next = 0; 962 mze_insert(zap, i, zn->zn_hash); 963 return; 964 } 965 } 966 if (start != 0) { 967 start = 0; 968 goto again; 969 } 970 ASSERT(!"out of entries!"); 971 } 972 973 int 974 zap_add(objset_t *os, uint64_t zapobj, const char *key, 975 int integer_size, uint64_t num_integers, 976 const void *val, dmu_tx_t *tx) 977 { 978 zap_t *zap; 979 int err; 980 mzap_ent_t *mze; 981 const uint64_t *intval = val; 982 zap_name_t *zn; 983 984 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 985 if (err) 986 return (err); 987 zn = zap_name_alloc(zap, key, MT_EXACT); 988 if (zn == NULL) { 989 zap_unlockdir(zap); 990 return (SET_ERROR(ENOTSUP)); 991 } 992 if (!zap->zap_ismicro) { 993 err = fzap_add(zn, integer_size, num_integers, val, tx); 994 zap = zn->zn_zap; /* fzap_add() may change zap */ 995 } else if (integer_size != 8 || num_integers != 1 || 996 strlen(key) >= MZAP_NAME_LEN) { 997 err = mzap_upgrade(&zn->zn_zap, tx, 0); 998 if (err == 0) 999 err = fzap_add(zn, integer_size, num_integers, val, tx); 1000 zap = zn->zn_zap; /* fzap_add() may change zap */ 1001 } else { 1002 mze = mze_find(zn); 1003 if (mze != NULL) { 1004 err = SET_ERROR(EEXIST); 1005 } else { 1006 mzap_addent(zn, *intval); 1007 } 1008 } 1009 ASSERT(zap == zn->zn_zap); 1010 zap_name_free(zn); 1011 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1012 zap_unlockdir(zap); 1013 return (err); 1014 } 1015 1016 int 1017 zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1018 int key_numints, int integer_size, uint64_t num_integers, 1019 const void *val, dmu_tx_t *tx) 1020 { 1021 zap_t *zap; 1022 int err; 1023 zap_name_t *zn; 1024 1025 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1026 if (err) 1027 return (err); 1028 zn = zap_name_alloc_uint64(zap, key, key_numints); 1029 if (zn == NULL) { 1030 zap_unlockdir(zap); 1031 return (SET_ERROR(ENOTSUP)); 1032 } 1033 err = fzap_add(zn, integer_size, num_integers, val, tx); 1034 zap = zn->zn_zap; /* fzap_add() may change zap */ 1035 zap_name_free(zn); 1036 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1037 zap_unlockdir(zap); 1038 return (err); 1039 } 1040 1041 int 1042 zap_update(objset_t *os, uint64_t zapobj, const char *name, 1043 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1044 { 1045 zap_t *zap; 1046 mzap_ent_t *mze; 1047 uint64_t oldval; 1048 const uint64_t *intval = val; 1049 zap_name_t *zn; 1050 int err; 1051 1052 #ifdef ZFS_DEBUG 1053 /* 1054 * If there is an old value, it shouldn't change across the 1055 * lockdir (eg, due to bprewrite's xlation). 1056 */ 1057 if (integer_size == 8 && num_integers == 1) 1058 (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); 1059 #endif 1060 1061 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1062 if (err) 1063 return (err); 1064 zn = zap_name_alloc(zap, name, MT_EXACT); 1065 if (zn == NULL) { 1066 zap_unlockdir(zap); 1067 return (SET_ERROR(ENOTSUP)); 1068 } 1069 if (!zap->zap_ismicro) { 1070 err = fzap_update(zn, integer_size, num_integers, val, tx); 1071 zap = zn->zn_zap; /* fzap_update() may change zap */ 1072 } else if (integer_size != 8 || num_integers != 1 || 1073 strlen(name) >= MZAP_NAME_LEN) { 1074 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1075 zapobj, integer_size, num_integers, name); 1076 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1077 if (err == 0) 1078 err = fzap_update(zn, integer_size, num_integers, 1079 val, tx); 1080 zap = zn->zn_zap; /* fzap_update() may change zap */ 1081 } else { 1082 mze = mze_find(zn); 1083 if (mze != NULL) { 1084 ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); 1085 MZE_PHYS(zap, mze)->mze_value = *intval; 1086 } else { 1087 mzap_addent(zn, *intval); 1088 } 1089 } 1090 ASSERT(zap == zn->zn_zap); 1091 zap_name_free(zn); 1092 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1093 zap_unlockdir(zap); 1094 return (err); 1095 } 1096 1097 int 1098 zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1099 int key_numints, 1100 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1101 { 1102 zap_t *zap; 1103 zap_name_t *zn; 1104 int err; 1105 1106 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1107 if (err) 1108 return (err); 1109 zn = zap_name_alloc_uint64(zap, key, key_numints); 1110 if (zn == NULL) { 1111 zap_unlockdir(zap); 1112 return (SET_ERROR(ENOTSUP)); 1113 } 1114 err = fzap_update(zn, integer_size, num_integers, val, tx); 1115 zap = zn->zn_zap; /* fzap_update() may change zap */ 1116 zap_name_free(zn); 1117 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1118 zap_unlockdir(zap); 1119 return (err); 1120 } 1121 1122 int 1123 zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1124 { 1125 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 1126 } 1127 1128 int 1129 zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1130 matchtype_t mt, dmu_tx_t *tx) 1131 { 1132 zap_t *zap; 1133 int err; 1134 mzap_ent_t *mze; 1135 zap_name_t *zn; 1136 1137 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1138 if (err) 1139 return (err); 1140 zn = zap_name_alloc(zap, name, mt); 1141 if (zn == NULL) { 1142 zap_unlockdir(zap); 1143 return (SET_ERROR(ENOTSUP)); 1144 } 1145 if (!zap->zap_ismicro) { 1146 err = fzap_remove(zn, tx); 1147 } else { 1148 mze = mze_find(zn); 1149 if (mze == NULL) { 1150 err = SET_ERROR(ENOENT); 1151 } else { 1152 zap->zap_m.zap_num_entries--; 1153 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 1154 sizeof (mzap_ent_phys_t)); 1155 mze_remove(zap, mze); 1156 } 1157 } 1158 zap_name_free(zn); 1159 zap_unlockdir(zap); 1160 return (err); 1161 } 1162 1163 int 1164 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1165 int key_numints, dmu_tx_t *tx) 1166 { 1167 zap_t *zap; 1168 int err; 1169 zap_name_t *zn; 1170 1171 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1172 if (err) 1173 return (err); 1174 zn = zap_name_alloc_uint64(zap, key, key_numints); 1175 if (zn == NULL) { 1176 zap_unlockdir(zap); 1177 return (SET_ERROR(ENOTSUP)); 1178 } 1179 err = fzap_remove(zn, tx); 1180 zap_name_free(zn); 1181 zap_unlockdir(zap); 1182 return (err); 1183 } 1184 1185 /* 1186 * Routines for iterating over the attributes. 1187 */ 1188 1189 void 1190 zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1191 uint64_t serialized) 1192 { 1193 zc->zc_objset = os; 1194 zc->zc_zap = NULL; 1195 zc->zc_leaf = NULL; 1196 zc->zc_zapobj = zapobj; 1197 zc->zc_serialized = serialized; 1198 zc->zc_hash = 0; 1199 zc->zc_cd = 0; 1200 } 1201 1202 void 1203 zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1204 { 1205 zap_cursor_init_serialized(zc, os, zapobj, 0); 1206 } 1207 1208 void 1209 zap_cursor_fini(zap_cursor_t *zc) 1210 { 1211 if (zc->zc_zap) { 1212 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1213 zap_unlockdir(zc->zc_zap); 1214 zc->zc_zap = NULL; 1215 } 1216 if (zc->zc_leaf) { 1217 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1218 zap_put_leaf(zc->zc_leaf); 1219 zc->zc_leaf = NULL; 1220 } 1221 zc->zc_objset = NULL; 1222 } 1223 1224 uint64_t 1225 zap_cursor_serialize(zap_cursor_t *zc) 1226 { 1227 if (zc->zc_hash == -1ULL) 1228 return (-1ULL); 1229 if (zc->zc_zap == NULL) 1230 return (zc->zc_serialized); 1231 ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1232 ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1233 1234 /* 1235 * We want to keep the high 32 bits of the cursor zero if we can, so 1236 * that 32-bit programs can access this. So usually use a small 1237 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1238 * of the cursor. 1239 * 1240 * [ collision differentiator | zap_hashbits()-bit hash value ] 1241 */ 1242 return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1243 ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1244 } 1245 1246 int 1247 zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1248 { 1249 int err; 1250 avl_index_t idx; 1251 mzap_ent_t mze_tofind; 1252 mzap_ent_t *mze; 1253 1254 if (zc->zc_hash == -1ULL) 1255 return (SET_ERROR(ENOENT)); 1256 1257 if (zc->zc_zap == NULL) { 1258 int hb; 1259 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1260 RW_READER, TRUE, FALSE, &zc->zc_zap); 1261 if (err) 1262 return (err); 1263 1264 /* 1265 * To support zap_cursor_init_serialized, advance, retrieve, 1266 * we must add to the existing zc_cd, which may already 1267 * be 1 due to the zap_cursor_advance. 1268 */ 1269 ASSERT(zc->zc_hash == 0); 1270 hb = zap_hashbits(zc->zc_zap); 1271 zc->zc_hash = zc->zc_serialized << (64 - hb); 1272 zc->zc_cd += zc->zc_serialized >> hb; 1273 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1274 zc->zc_cd = 0; 1275 } else { 1276 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1277 } 1278 if (!zc->zc_zap->zap_ismicro) { 1279 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1280 } else { 1281 mze_tofind.mze_hash = zc->zc_hash; 1282 mze_tofind.mze_cd = zc->zc_cd; 1283 1284 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1285 if (mze == NULL) { 1286 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1287 idx, AVL_AFTER); 1288 } 1289 if (mze) { 1290 mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 1291 ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 1292 za->za_normalization_conflict = 1293 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1294 za->za_integer_length = 8; 1295 za->za_num_integers = 1; 1296 za->za_first_integer = mzep->mze_value; 1297 (void) strcpy(za->za_name, mzep->mze_name); 1298 zc->zc_hash = mze->mze_hash; 1299 zc->zc_cd = mze->mze_cd; 1300 err = 0; 1301 } else { 1302 zc->zc_hash = -1ULL; 1303 err = SET_ERROR(ENOENT); 1304 } 1305 } 1306 rw_exit(&zc->zc_zap->zap_rwlock); 1307 return (err); 1308 } 1309 1310 void 1311 zap_cursor_advance(zap_cursor_t *zc) 1312 { 1313 if (zc->zc_hash == -1ULL) 1314 return; 1315 zc->zc_cd++; 1316 } 1317 1318 int 1319 zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1320 { 1321 int err; 1322 zap_t *zap; 1323 1324 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1325 if (err) 1326 return (err); 1327 1328 bzero(zs, sizeof (zap_stats_t)); 1329 1330 if (zap->zap_ismicro) { 1331 zs->zs_blocksize = zap->zap_dbuf->db_size; 1332 zs->zs_num_entries = zap->zap_m.zap_num_entries; 1333 zs->zs_num_blocks = 1; 1334 } else { 1335 fzap_get_stats(zap, zs); 1336 } 1337 zap_unlockdir(zap); 1338 return (0); 1339 } 1340 1341 int 1342 zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, 1343 uint64_t *towrite, uint64_t *tooverwrite) 1344 { 1345 zap_t *zap; 1346 int err = 0; 1347 1348 1349 /* 1350 * Since, we don't have a name, we cannot figure out which blocks will 1351 * be affected in this operation. So, account for the worst case : 1352 * - 3 blocks overwritten: target leaf, ptrtbl block, header block 1353 * - 4 new blocks written if adding: 1354 * - 2 blocks for possibly split leaves, 1355 * - 2 grown ptrtbl blocks 1356 * 1357 * This also accomodates the case where an add operation to a fairly 1358 * large microzap results in a promotion to fatzap. 1359 */ 1360 if (name == NULL) { 1361 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1362 return (err); 1363 } 1364 1365 /* 1366 * We lock the zap with adding == FALSE. Because, if we pass 1367 * the actual value of add, it could trigger a mzap_upgrade(). 1368 * At present we are just evaluating the possibility of this operation 1369 * and hence we donot want to trigger an upgrade. 1370 */ 1371 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1372 if (err) 1373 return (err); 1374 1375 if (!zap->zap_ismicro) { 1376 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 1377 if (zn) { 1378 err = fzap_count_write(zn, add, towrite, 1379 tooverwrite); 1380 zap_name_free(zn); 1381 } else { 1382 /* 1383 * We treat this case as similar to (name == NULL) 1384 */ 1385 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1386 } 1387 } else { 1388 /* 1389 * We are here if (name != NULL) and this is a micro-zap. 1390 * We account for the header block depending on whether it 1391 * is freeable. 1392 * 1393 * Incase of an add-operation it is hard to find out 1394 * if this add will promote this microzap to fatzap. 1395 * Hence, we consider the worst case and account for the 1396 * blocks assuming this microzap would be promoted to a 1397 * fatzap. 1398 * 1399 * 1 block overwritten : header block 1400 * 4 new blocks written : 2 new split leaf, 2 grown 1401 * ptrtbl blocks 1402 */ 1403 if (dmu_buf_freeable(zap->zap_dbuf)) 1404 *tooverwrite += SPA_MAXBLOCKSIZE; 1405 else 1406 *towrite += SPA_MAXBLOCKSIZE; 1407 1408 if (add) { 1409 *towrite += 4 * SPA_MAXBLOCKSIZE; 1410 } 1411 } 1412 1413 zap_unlockdir(zap); 1414 return (err); 1415 } 1416