1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/zio.h> 26 #include <sys/spa.h> 27 #include <sys/dmu.h> 28 #include <sys/zfs_context.h> 29 #include <sys/zap.h> 30 #include <sys/refcount.h> 31 #include <sys/zap_impl.h> 32 #include <sys/zap_leaf.h> 33 #include <sys/avl.h> 34 35 #ifdef _KERNEL 36 #include <sys/sunddi.h> 37 #endif 38 39 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); 40 41 uint64_t 42 zap_getflags(zap_t *zap) 43 { 44 if (zap->zap_ismicro) 45 return (0); 46 return (zap->zap_u.zap_fat.zap_phys->zap_flags); 47 } 48 49 int 50 zap_hashbits(zap_t *zap) 51 { 52 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 53 return (48); 54 else 55 return (28); 56 } 57 58 uint32_t 59 zap_maxcd(zap_t *zap) 60 { 61 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 62 return ((1<<16)-1); 63 else 64 return (-1U); 65 } 66 67 static uint64_t 68 zap_hash(zap_name_t *zn) 69 { 70 zap_t *zap = zn->zn_zap; 71 uint64_t h = 0; 72 73 if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 74 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 75 h = *(uint64_t *)zn->zn_key_orig; 76 } else { 77 h = zap->zap_salt; 78 ASSERT(h != 0); 79 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 80 81 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 82 int i; 83 const uint64_t *wp = zn->zn_key_norm; 84 85 ASSERT(zn->zn_key_intlen == 8); 86 for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 87 int j; 88 uint64_t word = *wp; 89 90 for (j = 0; j < zn->zn_key_intlen; j++) { 91 h = (h >> 8) ^ 92 zfs_crc64_table[(h ^ word) & 0xFF]; 93 word >>= NBBY; 94 } 95 } 96 } else { 97 int i, len; 98 const uint8_t *cp = zn->zn_key_norm; 99 100 /* 101 * We previously stored the terminating null on 102 * disk, but didn't hash it, so we need to 103 * continue to not hash it. (The 104 * zn_key_*_numints includes the terminating 105 * null for non-binary keys.) 106 */ 107 len = zn->zn_key_norm_numints - 1; 108 109 ASSERT(zn->zn_key_intlen == 1); 110 for (i = 0; i < len; cp++, i++) { 111 h = (h >> 8) ^ 112 zfs_crc64_table[(h ^ *cp) & 0xFF]; 113 } 114 } 115 } 116 /* 117 * Don't use all 64 bits, since we need some in the cookie for 118 * the collision differentiator. We MUST use the high bits, 119 * since those are the ones that we first pay attention to when 120 * chosing the bucket. 121 */ 122 h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 123 124 return (h); 125 } 126 127 static int 128 zap_normalize(zap_t *zap, const char *name, char *namenorm) 129 { 130 size_t inlen, outlen; 131 int err; 132 133 ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 134 135 inlen = strlen(name) + 1; 136 outlen = ZAP_MAXNAMELEN; 137 138 err = 0; 139 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 140 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 141 U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 142 143 return (err); 144 } 145 146 boolean_t 147 zap_match(zap_name_t *zn, const char *matchname) 148 { 149 ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 150 151 if (zn->zn_matchtype == MT_FIRST) { 152 char norm[ZAP_MAXNAMELEN]; 153 154 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 155 return (B_FALSE); 156 157 return (strcmp(zn->zn_key_norm, norm) == 0); 158 } else { 159 /* MT_BEST or MT_EXACT */ 160 return (strcmp(zn->zn_key_orig, matchname) == 0); 161 } 162 } 163 164 void 165 zap_name_free(zap_name_t *zn) 166 { 167 kmem_free(zn, sizeof (zap_name_t)); 168 } 169 170 zap_name_t * 171 zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 172 { 173 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 174 175 zn->zn_zap = zap; 176 zn->zn_key_intlen = sizeof (*key); 177 zn->zn_key_orig = key; 178 zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 179 zn->zn_matchtype = mt; 180 if (zap->zap_normflags) { 181 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 182 zap_name_free(zn); 183 return (NULL); 184 } 185 zn->zn_key_norm = zn->zn_normbuf; 186 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 187 } else { 188 if (mt != MT_EXACT) { 189 zap_name_free(zn); 190 return (NULL); 191 } 192 zn->zn_key_norm = zn->zn_key_orig; 193 zn->zn_key_norm_numints = zn->zn_key_orig_numints; 194 } 195 196 zn->zn_hash = zap_hash(zn); 197 return (zn); 198 } 199 200 zap_name_t * 201 zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 202 { 203 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 204 205 ASSERT(zap->zap_normflags == 0); 206 zn->zn_zap = zap; 207 zn->zn_key_intlen = sizeof (*key); 208 zn->zn_key_orig = zn->zn_key_norm = key; 209 zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 210 zn->zn_matchtype = MT_EXACT; 211 212 zn->zn_hash = zap_hash(zn); 213 return (zn); 214 } 215 216 static void 217 mzap_byteswap(mzap_phys_t *buf, size_t size) 218 { 219 int i, max; 220 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 221 buf->mz_salt = BSWAP_64(buf->mz_salt); 222 buf->mz_normflags = BSWAP_64(buf->mz_normflags); 223 max = (size / MZAP_ENT_LEN) - 1; 224 for (i = 0; i < max; i++) { 225 buf->mz_chunk[i].mze_value = 226 BSWAP_64(buf->mz_chunk[i].mze_value); 227 buf->mz_chunk[i].mze_cd = 228 BSWAP_32(buf->mz_chunk[i].mze_cd); 229 } 230 } 231 232 void 233 zap_byteswap(void *buf, size_t size) 234 { 235 uint64_t block_type; 236 237 block_type = *(uint64_t *)buf; 238 239 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 240 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 241 mzap_byteswap(buf, size); 242 } else { 243 fzap_byteswap(buf, size); 244 } 245 } 246 247 static int 248 mze_compare(const void *arg1, const void *arg2) 249 { 250 const mzap_ent_t *mze1 = arg1; 251 const mzap_ent_t *mze2 = arg2; 252 253 if (mze1->mze_hash > mze2->mze_hash) 254 return (+1); 255 if (mze1->mze_hash < mze2->mze_hash) 256 return (-1); 257 if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd) 258 return (+1); 259 if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd) 260 return (-1); 261 return (0); 262 } 263 264 static void 265 mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep) 266 { 267 mzap_ent_t *mze; 268 269 ASSERT(zap->zap_ismicro); 270 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 271 ASSERT(mzep->mze_cd < zap_maxcd(zap)); 272 273 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 274 mze->mze_chunkid = chunkid; 275 mze->mze_hash = hash; 276 mze->mze_phys = *mzep; 277 avl_add(&zap->zap_m.zap_avl, mze); 278 } 279 280 static mzap_ent_t * 281 mze_find(zap_name_t *zn) 282 { 283 mzap_ent_t mze_tofind; 284 mzap_ent_t *mze; 285 avl_index_t idx; 286 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 287 288 ASSERT(zn->zn_zap->zap_ismicro); 289 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 290 291 mze_tofind.mze_hash = zn->zn_hash; 292 mze_tofind.mze_phys.mze_cd = 0; 293 294 again: 295 mze = avl_find(avl, &mze_tofind, &idx); 296 if (mze == NULL) 297 mze = avl_nearest(avl, idx, AVL_AFTER); 298 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 299 if (zap_match(zn, mze->mze_phys.mze_name)) 300 return (mze); 301 } 302 if (zn->zn_matchtype == MT_BEST) { 303 zn->zn_matchtype = MT_FIRST; 304 goto again; 305 } 306 return (NULL); 307 } 308 309 static uint32_t 310 mze_find_unused_cd(zap_t *zap, uint64_t hash) 311 { 312 mzap_ent_t mze_tofind; 313 mzap_ent_t *mze; 314 avl_index_t idx; 315 avl_tree_t *avl = &zap->zap_m.zap_avl; 316 uint32_t cd; 317 318 ASSERT(zap->zap_ismicro); 319 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 320 321 mze_tofind.mze_hash = hash; 322 mze_tofind.mze_phys.mze_cd = 0; 323 324 cd = 0; 325 for (mze = avl_find(avl, &mze_tofind, &idx); 326 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 327 if (mze->mze_phys.mze_cd != cd) 328 break; 329 cd++; 330 } 331 332 return (cd); 333 } 334 335 static void 336 mze_remove(zap_t *zap, mzap_ent_t *mze) 337 { 338 ASSERT(zap->zap_ismicro); 339 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 340 341 avl_remove(&zap->zap_m.zap_avl, mze); 342 kmem_free(mze, sizeof (mzap_ent_t)); 343 } 344 345 static void 346 mze_destroy(zap_t *zap) 347 { 348 mzap_ent_t *mze; 349 void *avlcookie = NULL; 350 351 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 352 kmem_free(mze, sizeof (mzap_ent_t)); 353 avl_destroy(&zap->zap_m.zap_avl); 354 } 355 356 static zap_t * 357 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 358 { 359 zap_t *winner; 360 zap_t *zap; 361 int i; 362 363 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 364 365 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 366 rw_init(&zap->zap_rwlock, 0, 0, 0); 367 rw_enter(&zap->zap_rwlock, RW_WRITER); 368 zap->zap_objset = os; 369 zap->zap_object = obj; 370 zap->zap_dbuf = db; 371 372 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 373 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 374 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; 375 } else { 376 zap->zap_ismicro = TRUE; 377 } 378 379 /* 380 * Make sure that zap_ismicro is set before we let others see 381 * it, because zap_lockdir() checks zap_ismicro without the lock 382 * held. 383 */ 384 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 385 386 if (winner != NULL) { 387 rw_exit(&zap->zap_rwlock); 388 rw_destroy(&zap->zap_rwlock); 389 if (!zap->zap_ismicro) 390 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 391 kmem_free(zap, sizeof (zap_t)); 392 return (winner); 393 } 394 395 if (zap->zap_ismicro) { 396 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 397 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; 398 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 399 avl_create(&zap->zap_m.zap_avl, mze_compare, 400 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 401 402 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 403 mzap_ent_phys_t *mze = 404 &zap->zap_m.zap_phys->mz_chunk[i]; 405 if (mze->mze_name[0]) { 406 zap_name_t *zn; 407 408 zap->zap_m.zap_num_entries++; 409 zn = zap_name_alloc(zap, mze->mze_name, 410 MT_EXACT); 411 mze_insert(zap, i, zn->zn_hash, mze); 412 zap_name_free(zn); 413 } 414 } 415 } else { 416 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 417 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; 418 419 ASSERT3U(sizeof (struct zap_leaf_header), ==, 420 2*ZAP_LEAF_CHUNKSIZE); 421 422 /* 423 * The embedded pointer table should not overlap the 424 * other members. 425 */ 426 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 427 &zap->zap_f.zap_phys->zap_salt); 428 429 /* 430 * The embedded pointer table should end at the end of 431 * the block 432 */ 433 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 434 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 435 (uintptr_t)zap->zap_f.zap_phys, ==, 436 zap->zap_dbuf->db_size); 437 } 438 rw_exit(&zap->zap_rwlock); 439 return (zap); 440 } 441 442 int 443 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 444 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 445 { 446 zap_t *zap; 447 dmu_buf_t *db; 448 krw_t lt; 449 int err; 450 451 *zapp = NULL; 452 453 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); 454 if (err) 455 return (err); 456 457 #ifdef ZFS_DEBUG 458 { 459 dmu_object_info_t doi; 460 dmu_object_info_from_db(db, &doi); 461 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 462 } 463 #endif 464 465 zap = dmu_buf_get_user(db); 466 if (zap == NULL) 467 zap = mzap_open(os, obj, db); 468 469 /* 470 * We're checking zap_ismicro without the lock held, in order to 471 * tell what type of lock we want. Once we have some sort of 472 * lock, see if it really is the right type. In practice this 473 * can only be different if it was upgraded from micro to fat, 474 * and micro wanted WRITER but fat only needs READER. 475 */ 476 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 477 rw_enter(&zap->zap_rwlock, lt); 478 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 479 /* it was upgraded, now we only need reader */ 480 ASSERT(lt == RW_WRITER); 481 ASSERT(RW_READER == 482 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 483 rw_downgrade(&zap->zap_rwlock); 484 lt = RW_READER; 485 } 486 487 zap->zap_objset = os; 488 489 if (lt == RW_WRITER) 490 dmu_buf_will_dirty(db, tx); 491 492 ASSERT3P(zap->zap_dbuf, ==, db); 493 494 ASSERT(!zap->zap_ismicro || 495 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 496 if (zap->zap_ismicro && tx && adding && 497 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 498 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 499 if (newsz > MZAP_MAX_BLKSZ) { 500 dprintf("upgrading obj %llu: num_entries=%u\n", 501 obj, zap->zap_m.zap_num_entries); 502 *zapp = zap; 503 return (mzap_upgrade(zapp, tx, 0)); 504 } 505 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 506 ASSERT3U(err, ==, 0); 507 zap->zap_m.zap_num_chunks = 508 db->db_size / MZAP_ENT_LEN - 1; 509 } 510 511 *zapp = zap; 512 return (0); 513 } 514 515 void 516 zap_unlockdir(zap_t *zap) 517 { 518 rw_exit(&zap->zap_rwlock); 519 dmu_buf_rele(zap->zap_dbuf, NULL); 520 } 521 522 static int 523 mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) 524 { 525 mzap_phys_t *mzp; 526 int i, sz, nchunks; 527 int err = 0; 528 zap_t *zap = *zapp; 529 530 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 531 532 sz = zap->zap_dbuf->db_size; 533 mzp = kmem_alloc(sz, KM_SLEEP); 534 bcopy(zap->zap_dbuf->db_data, mzp, sz); 535 nchunks = zap->zap_m.zap_num_chunks; 536 537 if (!flags) { 538 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 539 1ULL << fzap_default_block_shift, 0, tx); 540 if (err) { 541 kmem_free(mzp, sz); 542 return (err); 543 } 544 } 545 546 dprintf("upgrading obj=%llu with %u chunks\n", 547 zap->zap_object, nchunks); 548 /* XXX destroy the avl later, so we can use the stored hash value */ 549 mze_destroy(zap); 550 551 fzap_upgrade(zap, tx, flags); 552 553 for (i = 0; i < nchunks; i++) { 554 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 555 zap_name_t *zn; 556 if (mze->mze_name[0] == 0) 557 continue; 558 dprintf("adding %s=%llu\n", 559 mze->mze_name, mze->mze_value); 560 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 561 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); 562 zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 563 zap_name_free(zn); 564 if (err) 565 break; 566 } 567 kmem_free(mzp, sz); 568 *zapp = zap; 569 return (err); 570 } 571 572 static void 573 mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 574 dmu_tx_t *tx) 575 { 576 dmu_buf_t *db; 577 mzap_phys_t *zp; 578 579 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 580 581 #ifdef ZFS_DEBUG 582 { 583 dmu_object_info_t doi; 584 dmu_object_info_from_db(db, &doi); 585 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 586 } 587 #endif 588 589 dmu_buf_will_dirty(db, tx); 590 zp = db->db_data; 591 zp->mz_block_type = ZBT_MICRO; 592 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 593 zp->mz_normflags = normflags; 594 dmu_buf_rele(db, FTAG); 595 596 if (flags != 0) { 597 zap_t *zap; 598 /* Only fat zap supports flags; upgrade immediately. */ 599 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 600 B_FALSE, B_FALSE, &zap)); 601 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); 602 zap_unlockdir(zap); 603 } 604 } 605 606 int 607 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 608 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 609 { 610 return (zap_create_claim_norm(os, obj, 611 0, ot, bonustype, bonuslen, tx)); 612 } 613 614 int 615 zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 616 dmu_object_type_t ot, 617 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 618 { 619 int err; 620 621 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 622 if (err != 0) 623 return (err); 624 mzap_create_impl(os, obj, normflags, 0, tx); 625 return (0); 626 } 627 628 uint64_t 629 zap_create(objset_t *os, dmu_object_type_t ot, 630 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 631 { 632 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 633 } 634 635 uint64_t 636 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 637 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 638 { 639 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 640 641 mzap_create_impl(os, obj, normflags, 0, tx); 642 return (obj); 643 } 644 645 uint64_t 646 zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 647 dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 648 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 649 { 650 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 651 652 ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 653 leaf_blockshift <= SPA_MAXBLOCKSHIFT && 654 indirect_blockshift >= SPA_MINBLOCKSHIFT && 655 indirect_blockshift <= SPA_MAXBLOCKSHIFT); 656 657 VERIFY(dmu_object_set_blocksize(os, obj, 658 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 659 660 mzap_create_impl(os, obj, normflags, flags, tx); 661 return (obj); 662 } 663 664 int 665 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 666 { 667 /* 668 * dmu_object_free will free the object number and free the 669 * data. Freeing the data will cause our pageout function to be 670 * called, which will destroy our data (zap_leaf_t's and zap_t). 671 */ 672 673 return (dmu_object_free(os, zapobj, tx)); 674 } 675 676 _NOTE(ARGSUSED(0)) 677 void 678 zap_evict(dmu_buf_t *db, void *vzap) 679 { 680 zap_t *zap = vzap; 681 682 rw_destroy(&zap->zap_rwlock); 683 684 if (zap->zap_ismicro) 685 mze_destroy(zap); 686 else 687 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 688 689 kmem_free(zap, sizeof (zap_t)); 690 } 691 692 int 693 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 694 { 695 zap_t *zap; 696 int err; 697 698 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 699 if (err) 700 return (err); 701 if (!zap->zap_ismicro) { 702 err = fzap_count(zap, count); 703 } else { 704 *count = zap->zap_m.zap_num_entries; 705 } 706 zap_unlockdir(zap); 707 return (err); 708 } 709 710 /* 711 * zn may be NULL; if not specified, it will be computed if needed. 712 * See also the comment above zap_entry_normalization_conflict(). 713 */ 714 static boolean_t 715 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 716 { 717 mzap_ent_t *other; 718 int direction = AVL_BEFORE; 719 boolean_t allocdzn = B_FALSE; 720 721 if (zap->zap_normflags == 0) 722 return (B_FALSE); 723 724 again: 725 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 726 other && other->mze_hash == mze->mze_hash; 727 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 728 729 if (zn == NULL) { 730 zn = zap_name_alloc(zap, mze->mze_phys.mze_name, 731 MT_FIRST); 732 allocdzn = B_TRUE; 733 } 734 if (zap_match(zn, other->mze_phys.mze_name)) { 735 if (allocdzn) 736 zap_name_free(zn); 737 return (B_TRUE); 738 } 739 } 740 741 if (direction == AVL_BEFORE) { 742 direction = AVL_AFTER; 743 goto again; 744 } 745 746 if (allocdzn) 747 zap_name_free(zn); 748 return (B_FALSE); 749 } 750 751 /* 752 * Routines for manipulating attributes. 753 */ 754 755 int 756 zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 757 uint64_t integer_size, uint64_t num_integers, void *buf) 758 { 759 return (zap_lookup_norm(os, zapobj, name, integer_size, 760 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 761 } 762 763 int 764 zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 765 uint64_t integer_size, uint64_t num_integers, void *buf, 766 matchtype_t mt, char *realname, int rn_len, 767 boolean_t *ncp) 768 { 769 zap_t *zap; 770 int err; 771 mzap_ent_t *mze; 772 zap_name_t *zn; 773 774 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 775 if (err) 776 return (err); 777 zn = zap_name_alloc(zap, name, mt); 778 if (zn == NULL) { 779 zap_unlockdir(zap); 780 return (ENOTSUP); 781 } 782 783 if (!zap->zap_ismicro) { 784 err = fzap_lookup(zn, integer_size, num_integers, buf, 785 realname, rn_len, ncp); 786 } else { 787 mze = mze_find(zn); 788 if (mze == NULL) { 789 err = ENOENT; 790 } else { 791 if (num_integers < 1) { 792 err = EOVERFLOW; 793 } else if (integer_size != 8) { 794 err = EINVAL; 795 } else { 796 *(uint64_t *)buf = mze->mze_phys.mze_value; 797 (void) strlcpy(realname, 798 mze->mze_phys.mze_name, rn_len); 799 if (ncp) { 800 *ncp = mzap_normalization_conflict(zap, 801 zn, mze); 802 } 803 } 804 } 805 } 806 zap_name_free(zn); 807 zap_unlockdir(zap); 808 return (err); 809 } 810 811 int 812 zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 813 int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 814 { 815 zap_t *zap; 816 int err; 817 zap_name_t *zn; 818 819 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 820 if (err) 821 return (err); 822 zn = zap_name_alloc_uint64(zap, key, key_numints); 823 if (zn == NULL) { 824 zap_unlockdir(zap); 825 return (ENOTSUP); 826 } 827 828 err = fzap_lookup(zn, integer_size, num_integers, buf, 829 NULL, 0, NULL); 830 zap_name_free(zn); 831 zap_unlockdir(zap); 832 return (err); 833 } 834 835 int 836 zap_contains(objset_t *os, uint64_t zapobj, const char *name) 837 { 838 int err = (zap_lookup_norm(os, zapobj, name, 0, 839 0, NULL, MT_EXACT, NULL, 0, NULL)); 840 if (err == EOVERFLOW || err == EINVAL) 841 err = 0; /* found, but skipped reading the value */ 842 return (err); 843 } 844 845 int 846 zap_length(objset_t *os, uint64_t zapobj, const char *name, 847 uint64_t *integer_size, uint64_t *num_integers) 848 { 849 zap_t *zap; 850 int err; 851 mzap_ent_t *mze; 852 zap_name_t *zn; 853 854 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 855 if (err) 856 return (err); 857 zn = zap_name_alloc(zap, name, MT_EXACT); 858 if (zn == NULL) { 859 zap_unlockdir(zap); 860 return (ENOTSUP); 861 } 862 if (!zap->zap_ismicro) { 863 err = fzap_length(zn, integer_size, num_integers); 864 } else { 865 mze = mze_find(zn); 866 if (mze == NULL) { 867 err = ENOENT; 868 } else { 869 if (integer_size) 870 *integer_size = 8; 871 if (num_integers) 872 *num_integers = 1; 873 } 874 } 875 zap_name_free(zn); 876 zap_unlockdir(zap); 877 return (err); 878 } 879 880 int 881 zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 882 int key_numints, uint64_t *integer_size, uint64_t *num_integers) 883 { 884 zap_t *zap; 885 int err; 886 zap_name_t *zn; 887 888 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 889 if (err) 890 return (err); 891 zn = zap_name_alloc_uint64(zap, key, key_numints); 892 if (zn == NULL) { 893 zap_unlockdir(zap); 894 return (ENOTSUP); 895 } 896 err = fzap_length(zn, integer_size, num_integers); 897 zap_name_free(zn); 898 zap_unlockdir(zap); 899 return (err); 900 } 901 902 static void 903 mzap_addent(zap_name_t *zn, uint64_t value) 904 { 905 int i; 906 zap_t *zap = zn->zn_zap; 907 int start = zap->zap_m.zap_alloc_next; 908 uint32_t cd; 909 910 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 911 912 #ifdef ZFS_DEBUG 913 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 914 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 915 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 916 } 917 #endif 918 919 cd = mze_find_unused_cd(zap, zn->zn_hash); 920 /* given the limited size of the microzap, this can't happen */ 921 ASSERT(cd < zap_maxcd(zap)); 922 923 again: 924 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 925 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 926 if (mze->mze_name[0] == 0) { 927 mze->mze_value = value; 928 mze->mze_cd = cd; 929 (void) strcpy(mze->mze_name, zn->zn_key_orig); 930 zap->zap_m.zap_num_entries++; 931 zap->zap_m.zap_alloc_next = i+1; 932 if (zap->zap_m.zap_alloc_next == 933 zap->zap_m.zap_num_chunks) 934 zap->zap_m.zap_alloc_next = 0; 935 mze_insert(zap, i, zn->zn_hash, mze); 936 return; 937 } 938 } 939 if (start != 0) { 940 start = 0; 941 goto again; 942 } 943 ASSERT(!"out of entries!"); 944 } 945 946 int 947 zap_add(objset_t *os, uint64_t zapobj, const char *key, 948 int integer_size, uint64_t num_integers, 949 const void *val, dmu_tx_t *tx) 950 { 951 zap_t *zap; 952 int err; 953 mzap_ent_t *mze; 954 const uint64_t *intval = val; 955 zap_name_t *zn; 956 957 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 958 if (err) 959 return (err); 960 zn = zap_name_alloc(zap, key, MT_EXACT); 961 if (zn == NULL) { 962 zap_unlockdir(zap); 963 return (ENOTSUP); 964 } 965 if (!zap->zap_ismicro) { 966 err = fzap_add(zn, integer_size, num_integers, val, tx); 967 zap = zn->zn_zap; /* fzap_add() may change zap */ 968 } else if (integer_size != 8 || num_integers != 1 || 969 strlen(key) >= MZAP_NAME_LEN) { 970 err = mzap_upgrade(&zn->zn_zap, tx, 0); 971 if (err == 0) 972 err = fzap_add(zn, integer_size, num_integers, val, tx); 973 zap = zn->zn_zap; /* fzap_add() may change zap */ 974 } else { 975 mze = mze_find(zn); 976 if (mze != NULL) { 977 err = EEXIST; 978 } else { 979 mzap_addent(zn, *intval); 980 } 981 } 982 ASSERT(zap == zn->zn_zap); 983 zap_name_free(zn); 984 if (zap != NULL) /* may be NULL if fzap_add() failed */ 985 zap_unlockdir(zap); 986 return (err); 987 } 988 989 int 990 zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 991 int key_numints, int integer_size, uint64_t num_integers, 992 const void *val, dmu_tx_t *tx) 993 { 994 zap_t *zap; 995 int err; 996 zap_name_t *zn; 997 998 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 999 if (err) 1000 return (err); 1001 zn = zap_name_alloc_uint64(zap, key, key_numints); 1002 if (zn == NULL) { 1003 zap_unlockdir(zap); 1004 return (ENOTSUP); 1005 } 1006 err = fzap_add(zn, integer_size, num_integers, val, tx); 1007 zap = zn->zn_zap; /* fzap_add() may change zap */ 1008 zap_name_free(zn); 1009 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1010 zap_unlockdir(zap); 1011 return (err); 1012 } 1013 1014 int 1015 zap_update(objset_t *os, uint64_t zapobj, const char *name, 1016 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1017 { 1018 zap_t *zap; 1019 mzap_ent_t *mze; 1020 const uint64_t *intval = val; 1021 zap_name_t *zn; 1022 int err; 1023 1024 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1025 if (err) 1026 return (err); 1027 zn = zap_name_alloc(zap, name, MT_EXACT); 1028 if (zn == NULL) { 1029 zap_unlockdir(zap); 1030 return (ENOTSUP); 1031 } 1032 if (!zap->zap_ismicro) { 1033 err = fzap_update(zn, integer_size, num_integers, val, tx); 1034 zap = zn->zn_zap; /* fzap_update() may change zap */ 1035 } else if (integer_size != 8 || num_integers != 1 || 1036 strlen(name) >= MZAP_NAME_LEN) { 1037 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1038 zapobj, integer_size, num_integers, name); 1039 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1040 if (err == 0) 1041 err = fzap_update(zn, integer_size, num_integers, 1042 val, tx); 1043 zap = zn->zn_zap; /* fzap_update() may change zap */ 1044 } else { 1045 mze = mze_find(zn); 1046 if (mze != NULL) { 1047 mze->mze_phys.mze_value = *intval; 1048 zap->zap_m.zap_phys->mz_chunk 1049 [mze->mze_chunkid].mze_value = *intval; 1050 } else { 1051 mzap_addent(zn, *intval); 1052 } 1053 } 1054 ASSERT(zap == zn->zn_zap); 1055 zap_name_free(zn); 1056 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1057 zap_unlockdir(zap); 1058 return (err); 1059 } 1060 1061 int 1062 zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1063 int key_numints, 1064 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1065 { 1066 zap_t *zap; 1067 zap_name_t *zn; 1068 int err; 1069 1070 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1071 if (err) 1072 return (err); 1073 zn = zap_name_alloc_uint64(zap, key, key_numints); 1074 if (zn == NULL) { 1075 zap_unlockdir(zap); 1076 return (ENOTSUP); 1077 } 1078 err = fzap_update(zn, integer_size, num_integers, val, tx); 1079 zap = zn->zn_zap; /* fzap_update() may change zap */ 1080 zap_name_free(zn); 1081 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1082 zap_unlockdir(zap); 1083 return (err); 1084 } 1085 1086 int 1087 zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1088 { 1089 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 1090 } 1091 1092 int 1093 zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1094 matchtype_t mt, dmu_tx_t *tx) 1095 { 1096 zap_t *zap; 1097 int err; 1098 mzap_ent_t *mze; 1099 zap_name_t *zn; 1100 1101 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1102 if (err) 1103 return (err); 1104 zn = zap_name_alloc(zap, name, mt); 1105 if (zn == NULL) { 1106 zap_unlockdir(zap); 1107 return (ENOTSUP); 1108 } 1109 if (!zap->zap_ismicro) { 1110 err = fzap_remove(zn, tx); 1111 } else { 1112 mze = mze_find(zn); 1113 if (mze == NULL) { 1114 err = ENOENT; 1115 } else { 1116 zap->zap_m.zap_num_entries--; 1117 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 1118 sizeof (mzap_ent_phys_t)); 1119 mze_remove(zap, mze); 1120 } 1121 } 1122 zap_name_free(zn); 1123 zap_unlockdir(zap); 1124 return (err); 1125 } 1126 1127 int 1128 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1129 int key_numints, dmu_tx_t *tx) 1130 { 1131 zap_t *zap; 1132 int err; 1133 zap_name_t *zn; 1134 1135 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1136 if (err) 1137 return (err); 1138 zn = zap_name_alloc_uint64(zap, key, key_numints); 1139 if (zn == NULL) { 1140 zap_unlockdir(zap); 1141 return (ENOTSUP); 1142 } 1143 err = fzap_remove(zn, tx); 1144 zap_name_free(zn); 1145 zap_unlockdir(zap); 1146 return (err); 1147 } 1148 1149 /* 1150 * Routines for iterating over the attributes. 1151 */ 1152 1153 void 1154 zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1155 uint64_t serialized) 1156 { 1157 zc->zc_objset = os; 1158 zc->zc_zap = NULL; 1159 zc->zc_leaf = NULL; 1160 zc->zc_zapobj = zapobj; 1161 zc->zc_serialized = serialized; 1162 zc->zc_hash = 0; 1163 zc->zc_cd = 0; 1164 } 1165 1166 void 1167 zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1168 { 1169 zap_cursor_init_serialized(zc, os, zapobj, 0); 1170 } 1171 1172 void 1173 zap_cursor_fini(zap_cursor_t *zc) 1174 { 1175 if (zc->zc_zap) { 1176 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1177 zap_unlockdir(zc->zc_zap); 1178 zc->zc_zap = NULL; 1179 } 1180 if (zc->zc_leaf) { 1181 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1182 zap_put_leaf(zc->zc_leaf); 1183 zc->zc_leaf = NULL; 1184 } 1185 zc->zc_objset = NULL; 1186 } 1187 1188 uint64_t 1189 zap_cursor_serialize(zap_cursor_t *zc) 1190 { 1191 if (zc->zc_hash == -1ULL) 1192 return (-1ULL); 1193 if (zc->zc_zap == NULL) 1194 return (zc->zc_serialized); 1195 ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1196 ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1197 1198 /* 1199 * We want to keep the high 32 bits of the cursor zero if we can, so 1200 * that 32-bit programs can access this. So usually use a small 1201 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1202 * of the cursor. 1203 * 1204 * [ collision differentiator | zap_hashbits()-bit hash value ] 1205 */ 1206 return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1207 ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1208 } 1209 1210 int 1211 zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1212 { 1213 int err; 1214 avl_index_t idx; 1215 mzap_ent_t mze_tofind; 1216 mzap_ent_t *mze; 1217 1218 if (zc->zc_hash == -1ULL) 1219 return (ENOENT); 1220 1221 if (zc->zc_zap == NULL) { 1222 int hb; 1223 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1224 RW_READER, TRUE, FALSE, &zc->zc_zap); 1225 if (err) 1226 return (err); 1227 1228 /* 1229 * To support zap_cursor_init_serialized, advance, retrieve, 1230 * we must add to the existing zc_cd, which may already 1231 * be 1 due to the zap_cursor_advance. 1232 */ 1233 ASSERT(zc->zc_hash == 0); 1234 hb = zap_hashbits(zc->zc_zap); 1235 zc->zc_hash = zc->zc_serialized << (64 - hb); 1236 zc->zc_cd += zc->zc_serialized >> hb; 1237 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1238 zc->zc_cd = 0; 1239 } else { 1240 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1241 } 1242 if (!zc->zc_zap->zap_ismicro) { 1243 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1244 } else { 1245 err = ENOENT; 1246 1247 mze_tofind.mze_hash = zc->zc_hash; 1248 mze_tofind.mze_phys.mze_cd = zc->zc_cd; 1249 1250 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1251 if (mze == NULL) { 1252 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1253 idx, AVL_AFTER); 1254 } 1255 if (mze) { 1256 ASSERT(0 == bcmp(&mze->mze_phys, 1257 &zc->zc_zap->zap_m.zap_phys->mz_chunk 1258 [mze->mze_chunkid], sizeof (mze->mze_phys))); 1259 1260 za->za_normalization_conflict = 1261 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1262 za->za_integer_length = 8; 1263 za->za_num_integers = 1; 1264 za->za_first_integer = mze->mze_phys.mze_value; 1265 (void) strcpy(za->za_name, mze->mze_phys.mze_name); 1266 zc->zc_hash = mze->mze_hash; 1267 zc->zc_cd = mze->mze_phys.mze_cd; 1268 err = 0; 1269 } else { 1270 zc->zc_hash = -1ULL; 1271 } 1272 } 1273 rw_exit(&zc->zc_zap->zap_rwlock); 1274 return (err); 1275 } 1276 1277 void 1278 zap_cursor_advance(zap_cursor_t *zc) 1279 { 1280 if (zc->zc_hash == -1ULL) 1281 return; 1282 zc->zc_cd++; 1283 } 1284 1285 int 1286 zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) 1287 { 1288 int err = 0; 1289 mzap_ent_t *mze; 1290 zap_name_t *zn; 1291 1292 if (zc->zc_zap == NULL) { 1293 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1294 RW_READER, TRUE, FALSE, &zc->zc_zap); 1295 if (err) 1296 return (err); 1297 } else { 1298 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1299 } 1300 1301 zn = zap_name_alloc(zc->zc_zap, name, mt); 1302 if (zn == NULL) { 1303 rw_exit(&zc->zc_zap->zap_rwlock); 1304 return (ENOTSUP); 1305 } 1306 1307 if (!zc->zc_zap->zap_ismicro) { 1308 err = fzap_cursor_move_to_key(zc, zn); 1309 } else { 1310 mze = mze_find(zn); 1311 if (mze == NULL) { 1312 err = ENOENT; 1313 goto out; 1314 } 1315 zc->zc_hash = mze->mze_hash; 1316 zc->zc_cd = mze->mze_phys.mze_cd; 1317 } 1318 1319 out: 1320 zap_name_free(zn); 1321 rw_exit(&zc->zc_zap->zap_rwlock); 1322 return (err); 1323 } 1324 1325 int 1326 zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1327 { 1328 int err; 1329 zap_t *zap; 1330 1331 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1332 if (err) 1333 return (err); 1334 1335 bzero(zs, sizeof (zap_stats_t)); 1336 1337 if (zap->zap_ismicro) { 1338 zs->zs_blocksize = zap->zap_dbuf->db_size; 1339 zs->zs_num_entries = zap->zap_m.zap_num_entries; 1340 zs->zs_num_blocks = 1; 1341 } else { 1342 fzap_get_stats(zap, zs); 1343 } 1344 zap_unlockdir(zap); 1345 return (0); 1346 } 1347 1348 int 1349 zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, 1350 uint64_t *towrite, uint64_t *tooverwrite) 1351 { 1352 zap_t *zap; 1353 int err = 0; 1354 1355 1356 /* 1357 * Since, we don't have a name, we cannot figure out which blocks will 1358 * be affected in this operation. So, account for the worst case : 1359 * - 3 blocks overwritten: target leaf, ptrtbl block, header block 1360 * - 4 new blocks written if adding: 1361 * - 2 blocks for possibly split leaves, 1362 * - 2 grown ptrtbl blocks 1363 * 1364 * This also accomodates the case where an add operation to a fairly 1365 * large microzap results in a promotion to fatzap. 1366 */ 1367 if (name == NULL) { 1368 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1369 return (err); 1370 } 1371 1372 /* 1373 * We lock the zap with adding == FALSE. Because, if we pass 1374 * the actual value of add, it could trigger a mzap_upgrade(). 1375 * At present we are just evaluating the possibility of this operation 1376 * and hence we donot want to trigger an upgrade. 1377 */ 1378 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1379 if (err) 1380 return (err); 1381 1382 if (!zap->zap_ismicro) { 1383 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 1384 if (zn) { 1385 err = fzap_count_write(zn, add, towrite, 1386 tooverwrite); 1387 zap_name_free(zn); 1388 } else { 1389 /* 1390 * We treat this case as similar to (name == NULL) 1391 */ 1392 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1393 } 1394 } else { 1395 /* 1396 * We are here if (name != NULL) and this is a micro-zap. 1397 * We account for the header block depending on whether it 1398 * is freeable. 1399 * 1400 * Incase of an add-operation it is hard to find out 1401 * if this add will promote this microzap to fatzap. 1402 * Hence, we consider the worst case and account for the 1403 * blocks assuming this microzap would be promoted to a 1404 * fatzap. 1405 * 1406 * 1 block overwritten : header block 1407 * 4 new blocks written : 2 new split leaf, 2 grown 1408 * ptrtbl blocks 1409 */ 1410 if (dmu_buf_freeable(zap->zap_dbuf)) 1411 *tooverwrite += SPA_MAXBLOCKSIZE; 1412 else 1413 *towrite += SPA_MAXBLOCKSIZE; 1414 1415 if (add) { 1416 *towrite += 4 * SPA_MAXBLOCKSIZE; 1417 } 1418 } 1419 1420 zap_unlockdir(zap); 1421 return (err); 1422 } 1423