1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/spa.h> 29 #include <sys/dmu.h> 30 #include <sys/zfs_context.h> 31 #include <sys/zap.h> 32 #include <sys/refcount.h> 33 #include <sys/zap_impl.h> 34 #include <sys/zap_leaf.h> 35 #include <sys/avl.h> 36 #include <sys/zfs_i18n.h> 37 38 static void mzap_upgrade(zap_t *zap, dmu_tx_t *tx); 39 40 41 static uint64_t 42 zap_hash(zap_t *zap, const char *normname) 43 { 44 const uint8_t *cp; 45 uint8_t c; 46 uint64_t crc = zap->zap_salt; 47 48 /* NB: name must already be normalized, if necessary */ 49 50 ASSERT(crc != 0); 51 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 52 for (cp = (const uint8_t *)normname; (c = *cp) != '\0'; cp++) { 53 crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF]; 54 } 55 56 /* 57 * Only use 28 bits, since we need 4 bits in the cookie for the 58 * collision differentiator. We MUST use the high bits, since 59 * those are the ones that we first pay attention to when 60 * chosing the bucket. 61 */ 62 crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); 63 64 return (crc); 65 } 66 67 static int 68 zap_normalize(zap_t *zap, const char *name, char *namenorm) 69 { 70 size_t inlen, outlen; 71 int err; 72 73 inlen = strlen(name) + 1; 74 outlen = ZAP_MAXNAMELEN; 75 76 err = 0; 77 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 78 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL, U8_UNICODE_LATEST, 79 &err); 80 81 return (err); 82 } 83 84 boolean_t 85 zap_match(zap_name_t *zn, const char *matchname) 86 { 87 if (zn->zn_matchtype == MT_FIRST) { 88 char norm[ZAP_MAXNAMELEN]; 89 90 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 91 return (B_FALSE); 92 93 return (strcmp(zn->zn_name_norm, norm) == 0); 94 } else { 95 /* MT_BEST or MT_EXACT */ 96 return (strcmp(zn->zn_name_orij, matchname) == 0); 97 } 98 } 99 100 void 101 zap_name_free(zap_name_t *zn) 102 { 103 kmem_free(zn, sizeof (zap_name_t)); 104 } 105 106 /* XXX combine this with zap_lockdir()? */ 107 zap_name_t * 108 zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt) 109 { 110 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 111 112 zn->zn_zap = zap; 113 zn->zn_name_orij = name; 114 zn->zn_matchtype = mt; 115 if (zap->zap_normflags) { 116 if (zap_normalize(zap, name, zn->zn_normbuf) != 0) { 117 zap_name_free(zn); 118 return (NULL); 119 } 120 zn->zn_name_norm = zn->zn_normbuf; 121 } else { 122 if (mt != MT_EXACT) { 123 zap_name_free(zn); 124 return (NULL); 125 } 126 zn->zn_name_norm = zn->zn_name_orij; 127 } 128 129 zn->zn_hash = zap_hash(zap, zn->zn_name_norm); 130 return (zn); 131 } 132 133 static void 134 mzap_byteswap(mzap_phys_t *buf, size_t size) 135 { 136 int i, max; 137 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 138 buf->mz_salt = BSWAP_64(buf->mz_salt); 139 max = (size / MZAP_ENT_LEN) - 1; 140 for (i = 0; i < max; i++) { 141 buf->mz_chunk[i].mze_value = 142 BSWAP_64(buf->mz_chunk[i].mze_value); 143 buf->mz_chunk[i].mze_cd = 144 BSWAP_32(buf->mz_chunk[i].mze_cd); 145 } 146 } 147 148 void 149 zap_byteswap(void *buf, size_t size) 150 { 151 uint64_t block_type; 152 153 block_type = *(uint64_t *)buf; 154 155 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 156 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 157 mzap_byteswap(buf, size); 158 } else { 159 fzap_byteswap(buf, size); 160 } 161 } 162 163 static int 164 mze_compare(const void *arg1, const void *arg2) 165 { 166 const mzap_ent_t *mze1 = arg1; 167 const mzap_ent_t *mze2 = arg2; 168 169 if (mze1->mze_hash > mze2->mze_hash) 170 return (+1); 171 if (mze1->mze_hash < mze2->mze_hash) 172 return (-1); 173 if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd) 174 return (+1); 175 if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd) 176 return (-1); 177 return (0); 178 } 179 180 static void 181 mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep) 182 { 183 mzap_ent_t *mze; 184 185 ASSERT(zap->zap_ismicro); 186 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 187 ASSERT(mzep->mze_cd < ZAP_MAXCD); 188 189 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 190 mze->mze_chunkid = chunkid; 191 mze->mze_hash = hash; 192 mze->mze_phys = *mzep; 193 avl_add(&zap->zap_m.zap_avl, mze); 194 } 195 196 static mzap_ent_t * 197 mze_find(zap_name_t *zn) 198 { 199 mzap_ent_t mze_tofind; 200 mzap_ent_t *mze; 201 avl_index_t idx; 202 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 203 204 ASSERT(zn->zn_zap->zap_ismicro); 205 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 206 207 if (strlen(zn->zn_name_norm) >= sizeof (mze_tofind.mze_phys.mze_name)) 208 return (NULL); 209 210 mze_tofind.mze_hash = zn->zn_hash; 211 mze_tofind.mze_phys.mze_cd = 0; 212 213 again: 214 mze = avl_find(avl, &mze_tofind, &idx); 215 if (mze == NULL) 216 mze = avl_nearest(avl, idx, AVL_AFTER); 217 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 218 if (zap_match(zn, mze->mze_phys.mze_name)) 219 return (mze); 220 } 221 if (zn->zn_matchtype == MT_BEST) { 222 zn->zn_matchtype = MT_FIRST; 223 goto again; 224 } 225 return (NULL); 226 } 227 228 static uint32_t 229 mze_find_unused_cd(zap_t *zap, uint64_t hash) 230 { 231 mzap_ent_t mze_tofind; 232 mzap_ent_t *mze; 233 avl_index_t idx; 234 avl_tree_t *avl = &zap->zap_m.zap_avl; 235 uint32_t cd; 236 237 ASSERT(zap->zap_ismicro); 238 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 239 240 mze_tofind.mze_hash = hash; 241 mze_tofind.mze_phys.mze_cd = 0; 242 243 cd = 0; 244 for (mze = avl_find(avl, &mze_tofind, &idx); 245 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 246 if (mze->mze_phys.mze_cd != cd) 247 break; 248 cd++; 249 } 250 251 return (cd); 252 } 253 254 static void 255 mze_remove(zap_t *zap, mzap_ent_t *mze) 256 { 257 ASSERT(zap->zap_ismicro); 258 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 259 260 avl_remove(&zap->zap_m.zap_avl, mze); 261 kmem_free(mze, sizeof (mzap_ent_t)); 262 } 263 264 static void 265 mze_destroy(zap_t *zap) 266 { 267 mzap_ent_t *mze; 268 void *avlcookie = NULL; 269 270 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 271 kmem_free(mze, sizeof (mzap_ent_t)); 272 avl_destroy(&zap->zap_m.zap_avl); 273 } 274 275 static zap_t * 276 mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 277 { 278 zap_t *winner; 279 zap_t *zap; 280 int i; 281 282 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 283 284 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 285 rw_init(&zap->zap_rwlock, 0, 0, 0); 286 rw_enter(&zap->zap_rwlock, RW_WRITER); 287 zap->zap_objset = os; 288 zap->zap_object = obj; 289 zap->zap_dbuf = db; 290 291 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 292 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 293 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; 294 } else { 295 zap->zap_ismicro = TRUE; 296 } 297 298 /* 299 * Make sure that zap_ismicro is set before we let others see 300 * it, because zap_lockdir() checks zap_ismicro without the lock 301 * held. 302 */ 303 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 304 305 if (winner != NULL) { 306 rw_exit(&zap->zap_rwlock); 307 rw_destroy(&zap->zap_rwlock); 308 if (!zap->zap_ismicro) 309 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 310 kmem_free(zap, sizeof (zap_t)); 311 return (winner); 312 } 313 314 if (zap->zap_ismicro) { 315 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 316 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; 317 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 318 avl_create(&zap->zap_m.zap_avl, mze_compare, 319 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 320 321 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 322 mzap_ent_phys_t *mze = 323 &zap->zap_m.zap_phys->mz_chunk[i]; 324 if (mze->mze_name[0]) { 325 zap_name_t *zn; 326 327 zap->zap_m.zap_num_entries++; 328 zn = zap_name_alloc(zap, mze->mze_name, 329 MT_EXACT); 330 mze_insert(zap, i, zn->zn_hash, mze); 331 zap_name_free(zn); 332 } 333 } 334 } else { 335 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 336 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; 337 338 ASSERT3U(sizeof (struct zap_leaf_header), ==, 339 2*ZAP_LEAF_CHUNKSIZE); 340 341 /* 342 * The embedded pointer table should not overlap the 343 * other members. 344 */ 345 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 346 &zap->zap_f.zap_phys->zap_salt); 347 348 /* 349 * The embedded pointer table should end at the end of 350 * the block 351 */ 352 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 353 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 354 (uintptr_t)zap->zap_f.zap_phys, ==, 355 zap->zap_dbuf->db_size); 356 } 357 rw_exit(&zap->zap_rwlock); 358 return (zap); 359 } 360 361 int 362 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 363 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 364 { 365 zap_t *zap; 366 dmu_buf_t *db; 367 krw_t lt; 368 int err; 369 370 *zapp = NULL; 371 372 err = dmu_buf_hold(os, obj, 0, NULL, &db); 373 if (err) 374 return (err); 375 376 #ifdef ZFS_DEBUG 377 { 378 dmu_object_info_t doi; 379 dmu_object_info_from_db(db, &doi); 380 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 381 } 382 #endif 383 384 zap = dmu_buf_get_user(db); 385 if (zap == NULL) 386 zap = mzap_open(os, obj, db); 387 388 /* 389 * We're checking zap_ismicro without the lock held, in order to 390 * tell what type of lock we want. Once we have some sort of 391 * lock, see if it really is the right type. In practice this 392 * can only be different if it was upgraded from micro to fat, 393 * and micro wanted WRITER but fat only needs READER. 394 */ 395 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 396 rw_enter(&zap->zap_rwlock, lt); 397 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 398 /* it was upgraded, now we only need reader */ 399 ASSERT(lt == RW_WRITER); 400 ASSERT(RW_READER == 401 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 402 rw_downgrade(&zap->zap_rwlock); 403 lt = RW_READER; 404 } 405 406 zap->zap_objset = os; 407 408 if (lt == RW_WRITER) 409 dmu_buf_will_dirty(db, tx); 410 411 ASSERT3P(zap->zap_dbuf, ==, db); 412 413 ASSERT(!zap->zap_ismicro || 414 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 415 if (zap->zap_ismicro && tx && adding && 416 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 417 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 418 if (newsz > MZAP_MAX_BLKSZ) { 419 dprintf("upgrading obj %llu: num_entries=%u\n", 420 obj, zap->zap_m.zap_num_entries); 421 mzap_upgrade(zap, tx); 422 *zapp = zap; 423 return (0); 424 } 425 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 426 ASSERT3U(err, ==, 0); 427 zap->zap_m.zap_num_chunks = 428 db->db_size / MZAP_ENT_LEN - 1; 429 } 430 431 *zapp = zap; 432 return (0); 433 } 434 435 void 436 zap_unlockdir(zap_t *zap) 437 { 438 rw_exit(&zap->zap_rwlock); 439 dmu_buf_rele(zap->zap_dbuf, NULL); 440 } 441 442 static void 443 mzap_upgrade(zap_t *zap, dmu_tx_t *tx) 444 { 445 mzap_phys_t *mzp; 446 int i, sz, nchunks, err; 447 448 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 449 450 sz = zap->zap_dbuf->db_size; 451 mzp = kmem_alloc(sz, KM_SLEEP); 452 bcopy(zap->zap_dbuf->db_data, mzp, sz); 453 nchunks = zap->zap_m.zap_num_chunks; 454 455 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 456 1ULL << fzap_default_block_shift, 0, tx); 457 ASSERT(err == 0); 458 459 dprintf("upgrading obj=%llu with %u chunks\n", 460 zap->zap_object, nchunks); 461 /* XXX destroy the avl later, so we can use the stored hash value */ 462 mze_destroy(zap); 463 464 fzap_upgrade(zap, tx); 465 466 for (i = 0; i < nchunks; i++) { 467 int err; 468 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 469 zap_name_t *zn; 470 if (mze->mze_name[0] == 0) 471 continue; 472 dprintf("adding %s=%llu\n", 473 mze->mze_name, mze->mze_value); 474 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 475 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, 476 mze->mze_cd, tx); 477 zap_name_free(zn); 478 ASSERT3U(err, ==, 0); 479 } 480 kmem_free(mzp, sz); 481 } 482 483 static void 484 mzap_create_impl(objset_t *os, uint64_t obj, int normflags, dmu_tx_t *tx) 485 { 486 dmu_buf_t *db; 487 mzap_phys_t *zp; 488 489 ASSERT(normflags == 0 || 490 spa_version(dmu_objset_spa(os)) >= SPA_VERSION_NORMALIZATION); 491 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db)); 492 493 #ifdef ZFS_DEBUG 494 { 495 dmu_object_info_t doi; 496 dmu_object_info_from_db(db, &doi); 497 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 498 } 499 #endif 500 501 dmu_buf_will_dirty(db, tx); 502 zp = db->db_data; 503 zp->mz_block_type = ZBT_MICRO; 504 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 505 zp->mz_normflags = normflags; 506 dmu_buf_rele(db, FTAG); 507 } 508 509 int 510 zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 511 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 512 { 513 return (zap_create_claim_norm(os, obj, 514 0, ot, bonustype, bonuslen, tx)); 515 } 516 517 int 518 zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 519 dmu_object_type_t ot, 520 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 521 { 522 int err; 523 524 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 525 if (err != 0) 526 return (err); 527 mzap_create_impl(os, obj, normflags, tx); 528 return (0); 529 } 530 531 uint64_t 532 zap_create(objset_t *os, dmu_object_type_t ot, 533 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 534 { 535 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 536 } 537 538 uint64_t 539 zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 540 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 541 { 542 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 543 544 mzap_create_impl(os, obj, normflags, tx); 545 return (obj); 546 } 547 548 int 549 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 550 { 551 /* 552 * dmu_object_free will free the object number and free the 553 * data. Freeing the data will cause our pageout function to be 554 * called, which will destroy our data (zap_leaf_t's and zap_t). 555 */ 556 557 return (dmu_object_free(os, zapobj, tx)); 558 } 559 560 _NOTE(ARGSUSED(0)) 561 void 562 zap_evict(dmu_buf_t *db, void *vzap) 563 { 564 zap_t *zap = vzap; 565 566 rw_destroy(&zap->zap_rwlock); 567 568 if (zap->zap_ismicro) 569 mze_destroy(zap); 570 else 571 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 572 573 kmem_free(zap, sizeof (zap_t)); 574 } 575 576 int 577 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 578 { 579 zap_t *zap; 580 int err; 581 582 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 583 if (err) 584 return (err); 585 if (!zap->zap_ismicro) { 586 err = fzap_count(zap, count); 587 } else { 588 *count = zap->zap_m.zap_num_entries; 589 } 590 zap_unlockdir(zap); 591 return (err); 592 } 593 594 /* 595 * zn may be NULL; if not specified, it will be computed if needed. 596 * See also the comment above zap_entry_normalization_conflict(). 597 */ 598 static boolean_t 599 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 600 { 601 mzap_ent_t *other; 602 int direction = AVL_BEFORE; 603 boolean_t allocdzn = B_FALSE; 604 605 if (zap->zap_normflags == 0) 606 return (B_FALSE); 607 608 again: 609 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 610 other && other->mze_hash == mze->mze_hash; 611 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 612 613 if (zn == NULL) { 614 zn = zap_name_alloc(zap, mze->mze_phys.mze_name, 615 MT_FIRST); 616 allocdzn = B_TRUE; 617 } 618 if (zap_match(zn, other->mze_phys.mze_name)) { 619 if (allocdzn) 620 zap_name_free(zn); 621 return (B_TRUE); 622 } 623 } 624 625 if (direction == AVL_BEFORE) { 626 direction = AVL_AFTER; 627 goto again; 628 } 629 630 if (allocdzn) 631 zap_name_free(zn); 632 return (B_FALSE); 633 } 634 635 /* 636 * Routines for manipulating attributes. 637 */ 638 639 int 640 zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 641 uint64_t integer_size, uint64_t num_integers, void *buf) 642 { 643 return (zap_lookup_norm(os, zapobj, name, integer_size, 644 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 645 } 646 647 int 648 zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 649 uint64_t integer_size, uint64_t num_integers, void *buf, 650 matchtype_t mt, char *realname, int rn_len, 651 boolean_t *ncp) 652 { 653 zap_t *zap; 654 int err; 655 mzap_ent_t *mze; 656 zap_name_t *zn; 657 658 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 659 if (err) 660 return (err); 661 zn = zap_name_alloc(zap, name, mt); 662 if (zn == NULL) { 663 zap_unlockdir(zap); 664 return (ENOTSUP); 665 } 666 667 if (!zap->zap_ismicro) { 668 err = fzap_lookup(zn, integer_size, num_integers, buf, 669 realname, rn_len, ncp); 670 } else { 671 mze = mze_find(zn); 672 if (mze == NULL) { 673 err = ENOENT; 674 } else { 675 if (num_integers < 1) { 676 err = EOVERFLOW; 677 } else if (integer_size != 8) { 678 err = EINVAL; 679 } else { 680 *(uint64_t *)buf = mze->mze_phys.mze_value; 681 (void) strlcpy(realname, 682 mze->mze_phys.mze_name, rn_len); 683 if (ncp) { 684 *ncp = mzap_normalization_conflict(zap, 685 zn, mze); 686 } 687 } 688 } 689 } 690 zap_name_free(zn); 691 zap_unlockdir(zap); 692 return (err); 693 } 694 695 int 696 zap_length(objset_t *os, uint64_t zapobj, const char *name, 697 uint64_t *integer_size, uint64_t *num_integers) 698 { 699 zap_t *zap; 700 int err; 701 mzap_ent_t *mze; 702 zap_name_t *zn; 703 704 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 705 if (err) 706 return (err); 707 zn = zap_name_alloc(zap, name, MT_EXACT); 708 if (zn == NULL) { 709 zap_unlockdir(zap); 710 return (ENOTSUP); 711 } 712 if (!zap->zap_ismicro) { 713 err = fzap_length(zn, integer_size, num_integers); 714 } else { 715 mze = mze_find(zn); 716 if (mze == NULL) { 717 err = ENOENT; 718 } else { 719 if (integer_size) 720 *integer_size = 8; 721 if (num_integers) 722 *num_integers = 1; 723 } 724 } 725 zap_name_free(zn); 726 zap_unlockdir(zap); 727 return (err); 728 } 729 730 static void 731 mzap_addent(zap_name_t *zn, uint64_t value) 732 { 733 int i; 734 zap_t *zap = zn->zn_zap; 735 int start = zap->zap_m.zap_alloc_next; 736 uint32_t cd; 737 738 dprintf("obj=%llu %s=%llu\n", zap->zap_object, 739 zn->zn_name_orij, value); 740 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 741 742 #ifdef ZFS_DEBUG 743 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 744 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 745 ASSERT(strcmp(zn->zn_name_orij, mze->mze_name) != 0); 746 } 747 #endif 748 749 cd = mze_find_unused_cd(zap, zn->zn_hash); 750 /* given the limited size of the microzap, this can't happen */ 751 ASSERT(cd != ZAP_MAXCD); 752 753 again: 754 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 755 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 756 if (mze->mze_name[0] == 0) { 757 mze->mze_value = value; 758 mze->mze_cd = cd; 759 (void) strcpy(mze->mze_name, zn->zn_name_orij); 760 zap->zap_m.zap_num_entries++; 761 zap->zap_m.zap_alloc_next = i+1; 762 if (zap->zap_m.zap_alloc_next == 763 zap->zap_m.zap_num_chunks) 764 zap->zap_m.zap_alloc_next = 0; 765 mze_insert(zap, i, zn->zn_hash, mze); 766 return; 767 } 768 } 769 if (start != 0) { 770 start = 0; 771 goto again; 772 } 773 ASSERT(!"out of entries!"); 774 } 775 776 int 777 zap_add(objset_t *os, uint64_t zapobj, const char *name, 778 int integer_size, uint64_t num_integers, 779 const void *val, dmu_tx_t *tx) 780 { 781 zap_t *zap; 782 int err; 783 mzap_ent_t *mze; 784 const uint64_t *intval = val; 785 zap_name_t *zn; 786 787 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 788 if (err) 789 return (err); 790 zn = zap_name_alloc(zap, name, MT_EXACT); 791 if (zn == NULL) { 792 zap_unlockdir(zap); 793 return (ENOTSUP); 794 } 795 if (!zap->zap_ismicro) { 796 err = fzap_add(zn, integer_size, num_integers, val, tx); 797 } else if (integer_size != 8 || num_integers != 1 || 798 strlen(name) >= MZAP_NAME_LEN) { 799 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 800 zapobj, integer_size, num_integers, name); 801 mzap_upgrade(zap, tx); 802 err = fzap_add(zn, integer_size, num_integers, val, tx); 803 } else { 804 mze = mze_find(zn); 805 if (mze != NULL) { 806 err = EEXIST; 807 } else { 808 mzap_addent(zn, *intval); 809 } 810 } 811 zap_name_free(zn); 812 zap_unlockdir(zap); 813 return (err); 814 } 815 816 int 817 zap_update(objset_t *os, uint64_t zapobj, const char *name, 818 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 819 { 820 zap_t *zap; 821 mzap_ent_t *mze; 822 const uint64_t *intval = val; 823 zap_name_t *zn; 824 int err; 825 826 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 827 if (err) 828 return (err); 829 zn = zap_name_alloc(zap, name, MT_EXACT); 830 if (zn == NULL) { 831 zap_unlockdir(zap); 832 return (ENOTSUP); 833 } 834 if (!zap->zap_ismicro) { 835 err = fzap_update(zn, integer_size, num_integers, val, tx); 836 } else if (integer_size != 8 || num_integers != 1 || 837 strlen(name) >= MZAP_NAME_LEN) { 838 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 839 zapobj, integer_size, num_integers, name); 840 mzap_upgrade(zap, tx); 841 err = fzap_update(zn, integer_size, num_integers, val, tx); 842 } else { 843 mze = mze_find(zn); 844 if (mze != NULL) { 845 mze->mze_phys.mze_value = *intval; 846 zap->zap_m.zap_phys->mz_chunk 847 [mze->mze_chunkid].mze_value = *intval; 848 } else { 849 mzap_addent(zn, *intval); 850 } 851 } 852 zap_name_free(zn); 853 zap_unlockdir(zap); 854 return (err); 855 } 856 857 int 858 zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 859 { 860 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 861 } 862 863 int 864 zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 865 matchtype_t mt, dmu_tx_t *tx) 866 { 867 zap_t *zap; 868 int err; 869 mzap_ent_t *mze; 870 zap_name_t *zn; 871 872 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 873 if (err) 874 return (err); 875 zn = zap_name_alloc(zap, name, mt); 876 if (zn == NULL) { 877 zap_unlockdir(zap); 878 return (ENOTSUP); 879 } 880 if (!zap->zap_ismicro) { 881 err = fzap_remove(zn, tx); 882 } else { 883 mze = mze_find(zn); 884 if (mze == NULL) { 885 err = ENOENT; 886 } else { 887 zap->zap_m.zap_num_entries--; 888 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 889 sizeof (mzap_ent_phys_t)); 890 mze_remove(zap, mze); 891 } 892 } 893 zap_name_free(zn); 894 zap_unlockdir(zap); 895 return (err); 896 } 897 898 /* 899 * Routines for iterating over the attributes. 900 */ 901 902 /* 903 * We want to keep the high 32 bits of the cursor zero if we can, so 904 * that 32-bit programs can access this. So use a small hash value so 905 * we can fit 4 bits of cd into the 32-bit cursor. 906 * 907 * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ] 908 */ 909 void 910 zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 911 uint64_t serialized) 912 { 913 zc->zc_objset = os; 914 zc->zc_zap = NULL; 915 zc->zc_leaf = NULL; 916 zc->zc_zapobj = zapobj; 917 if (serialized == -1ULL) { 918 zc->zc_hash = -1ULL; 919 zc->zc_cd = 0; 920 } else { 921 zc->zc_hash = serialized << (64-ZAP_HASHBITS); 922 zc->zc_cd = serialized >> ZAP_HASHBITS; 923 if (zc->zc_cd >= ZAP_MAXCD) /* corrupt serialized */ 924 zc->zc_cd = 0; 925 } 926 } 927 928 void 929 zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 930 { 931 zap_cursor_init_serialized(zc, os, zapobj, 0); 932 } 933 934 void 935 zap_cursor_fini(zap_cursor_t *zc) 936 { 937 if (zc->zc_zap) { 938 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 939 zap_unlockdir(zc->zc_zap); 940 zc->zc_zap = NULL; 941 } 942 if (zc->zc_leaf) { 943 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 944 zap_put_leaf(zc->zc_leaf); 945 zc->zc_leaf = NULL; 946 } 947 zc->zc_objset = NULL; 948 } 949 950 uint64_t 951 zap_cursor_serialize(zap_cursor_t *zc) 952 { 953 if (zc->zc_hash == -1ULL) 954 return (-1ULL); 955 ASSERT((zc->zc_hash & (ZAP_MAXCD-1)) == 0); 956 ASSERT(zc->zc_cd < ZAP_MAXCD); 957 return ((zc->zc_hash >> (64-ZAP_HASHBITS)) | 958 ((uint64_t)zc->zc_cd << ZAP_HASHBITS)); 959 } 960 961 int 962 zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 963 { 964 int err; 965 avl_index_t idx; 966 mzap_ent_t mze_tofind; 967 mzap_ent_t *mze; 968 969 if (zc->zc_hash == -1ULL) 970 return (ENOENT); 971 972 if (zc->zc_zap == NULL) { 973 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 974 RW_READER, TRUE, FALSE, &zc->zc_zap); 975 if (err) 976 return (err); 977 } else { 978 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 979 } 980 if (!zc->zc_zap->zap_ismicro) { 981 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 982 } else { 983 err = ENOENT; 984 985 mze_tofind.mze_hash = zc->zc_hash; 986 mze_tofind.mze_phys.mze_cd = zc->zc_cd; 987 988 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 989 if (mze == NULL) { 990 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 991 idx, AVL_AFTER); 992 } 993 if (mze) { 994 ASSERT(0 == bcmp(&mze->mze_phys, 995 &zc->zc_zap->zap_m.zap_phys->mz_chunk 996 [mze->mze_chunkid], sizeof (mze->mze_phys))); 997 998 za->za_normalization_conflict = 999 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1000 za->za_integer_length = 8; 1001 za->za_num_integers = 1; 1002 za->za_first_integer = mze->mze_phys.mze_value; 1003 (void) strcpy(za->za_name, mze->mze_phys.mze_name); 1004 zc->zc_hash = mze->mze_hash; 1005 zc->zc_cd = mze->mze_phys.mze_cd; 1006 err = 0; 1007 } else { 1008 zc->zc_hash = -1ULL; 1009 } 1010 } 1011 rw_exit(&zc->zc_zap->zap_rwlock); 1012 return (err); 1013 } 1014 1015 void 1016 zap_cursor_advance(zap_cursor_t *zc) 1017 { 1018 if (zc->zc_hash == -1ULL) 1019 return; 1020 zc->zc_cd++; 1021 if (zc->zc_cd >= ZAP_MAXCD) { 1022 zc->zc_cd = 0; 1023 zc->zc_hash += 1ULL<<(64-ZAP_HASHBITS); 1024 if (zc->zc_hash == 0) /* EOF */ 1025 zc->zc_hash = -1ULL; 1026 } 1027 } 1028 1029 int 1030 zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1031 { 1032 int err; 1033 zap_t *zap; 1034 1035 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1036 if (err) 1037 return (err); 1038 1039 bzero(zs, sizeof (zap_stats_t)); 1040 1041 if (zap->zap_ismicro) { 1042 zs->zs_blocksize = zap->zap_dbuf->db_size; 1043 zs->zs_num_entries = zap->zap_m.zap_num_entries; 1044 zs->zs_num_blocks = 1; 1045 } else { 1046 fzap_get_stats(zap, zs); 1047 } 1048 zap_unlockdir(zap); 1049 return (0); 1050 } 1051