1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/dmu.h> 26 #include <sys/dmu_impl.h> 27 #include <sys/dbuf.h> 28 #include <sys/dmu_tx.h> 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ 31 #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ 32 #include <sys/dsl_pool.h> 33 #include <sys/zap_impl.h> /* for fzap_default_block_shift */ 34 #include <sys/spa.h> 35 #include <sys/sa.h> 36 #include <sys/sa_impl.h> 37 #include <sys/zfs_context.h> 38 #include <sys/varargs.h> 39 40 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, 41 uint64_t arg1, uint64_t arg2); 42 43 44 dmu_tx_t * 45 dmu_tx_create_dd(dsl_dir_t *dd) 46 { 47 dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); 48 tx->tx_dir = dd; 49 if (dd) 50 tx->tx_pool = dd->dd_pool; 51 list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), 52 offsetof(dmu_tx_hold_t, txh_node)); 53 list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), 54 offsetof(dmu_tx_callback_t, dcb_node)); 55 #ifdef ZFS_DEBUG 56 refcount_create(&tx->tx_space_written); 57 refcount_create(&tx->tx_space_freed); 58 #endif 59 return (tx); 60 } 61 62 dmu_tx_t * 63 dmu_tx_create(objset_t *os) 64 { 65 dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); 66 tx->tx_objset = os; 67 tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset); 68 return (tx); 69 } 70 71 dmu_tx_t * 72 dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) 73 { 74 dmu_tx_t *tx = dmu_tx_create_dd(NULL); 75 76 ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); 77 tx->tx_pool = dp; 78 tx->tx_txg = txg; 79 tx->tx_anyobj = TRUE; 80 81 return (tx); 82 } 83 84 int 85 dmu_tx_is_syncing(dmu_tx_t *tx) 86 { 87 return (tx->tx_anyobj); 88 } 89 90 int 91 dmu_tx_private_ok(dmu_tx_t *tx) 92 { 93 return (tx->tx_anyobj); 94 } 95 96 static dmu_tx_hold_t * 97 dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, 98 enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) 99 { 100 dmu_tx_hold_t *txh; 101 dnode_t *dn = NULL; 102 int err; 103 104 if (object != DMU_NEW_OBJECT) { 105 err = dnode_hold(os, object, tx, &dn); 106 if (err) { 107 tx->tx_err = err; 108 return (NULL); 109 } 110 111 if (err == 0 && tx->tx_txg != 0) { 112 mutex_enter(&dn->dn_mtx); 113 /* 114 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a 115 * problem, but there's no way for it to happen (for 116 * now, at least). 117 */ 118 ASSERT(dn->dn_assigned_txg == 0); 119 dn->dn_assigned_txg = tx->tx_txg; 120 (void) refcount_add(&dn->dn_tx_holds, tx); 121 mutex_exit(&dn->dn_mtx); 122 } 123 } 124 125 txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); 126 txh->txh_tx = tx; 127 txh->txh_dnode = dn; 128 #ifdef ZFS_DEBUG 129 txh->txh_type = type; 130 txh->txh_arg1 = arg1; 131 txh->txh_arg2 = arg2; 132 #endif 133 list_insert_tail(&tx->tx_holds, txh); 134 135 return (txh); 136 } 137 138 void 139 dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) 140 { 141 /* 142 * If we're syncing, they can manipulate any object anyhow, and 143 * the hold on the dnode_t can cause problems. 144 */ 145 if (!dmu_tx_is_syncing(tx)) { 146 (void) dmu_tx_hold_object_impl(tx, os, 147 object, THT_NEWOBJECT, 0, 0); 148 } 149 } 150 151 static int 152 dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) 153 { 154 int err; 155 dmu_buf_impl_t *db; 156 157 rw_enter(&dn->dn_struct_rwlock, RW_READER); 158 db = dbuf_hold_level(dn, level, blkid, FTAG); 159 rw_exit(&dn->dn_struct_rwlock); 160 if (db == NULL) 161 return (EIO); 162 err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); 163 dbuf_rele(db, FTAG); 164 return (err); 165 } 166 167 static void 168 dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db, 169 int level, uint64_t blkid, boolean_t freeable, uint64_t *history) 170 { 171 objset_t *os = dn->dn_objset; 172 dsl_dataset_t *ds = os->os_dsl_dataset; 173 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 174 dmu_buf_impl_t *parent = NULL; 175 blkptr_t *bp = NULL; 176 uint64_t space; 177 178 if (level >= dn->dn_nlevels || history[level] == blkid) 179 return; 180 181 history[level] = blkid; 182 183 space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift); 184 185 if (db == NULL || db == dn->dn_dbuf) { 186 ASSERT(level != 0); 187 db = NULL; 188 } else { 189 ASSERT(db->db_dnode == dn); 190 ASSERT(db->db_level == level); 191 ASSERT(db->db.db_size == space); 192 ASSERT(db->db_blkid == blkid); 193 bp = db->db_blkptr; 194 parent = db->db_parent; 195 } 196 197 freeable = (bp && (freeable || 198 dsl_dataset_block_freeable(ds, bp, bp->blk_birth))); 199 200 if (freeable) 201 txh->txh_space_tooverwrite += space; 202 else 203 txh->txh_space_towrite += space; 204 if (bp) 205 txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp); 206 207 dmu_tx_count_twig(txh, dn, parent, level + 1, 208 blkid >> epbs, freeable, history); 209 } 210 211 /* ARGSUSED */ 212 static void 213 dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 214 { 215 dnode_t *dn = txh->txh_dnode; 216 uint64_t start, end, i; 217 int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; 218 int err = 0; 219 220 if (len == 0) 221 return; 222 223 min_bs = SPA_MINBLOCKSHIFT; 224 max_bs = SPA_MAXBLOCKSHIFT; 225 min_ibs = DN_MIN_INDBLKSHIFT; 226 max_ibs = DN_MAX_INDBLKSHIFT; 227 228 if (dn) { 229 uint64_t history[DN_MAX_LEVELS]; 230 int nlvls = dn->dn_nlevels; 231 int delta; 232 233 /* 234 * For i/o error checking, read the first and last level-0 235 * blocks (if they are not aligned), and all the level-1 blocks. 236 */ 237 if (dn->dn_maxblkid == 0) { 238 delta = dn->dn_datablksz; 239 start = (off < dn->dn_datablksz) ? 0 : 1; 240 end = (off+len <= dn->dn_datablksz) ? 0 : 1; 241 if (start == 0 && (off > 0 || len < dn->dn_datablksz)) { 242 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 243 if (err) 244 goto out; 245 delta -= off; 246 } 247 } else { 248 zio_t *zio = zio_root(dn->dn_objset->os_spa, 249 NULL, NULL, ZIO_FLAG_CANFAIL); 250 251 /* first level-0 block */ 252 start = off >> dn->dn_datablkshift; 253 if (P2PHASE(off, dn->dn_datablksz) || 254 len < dn->dn_datablksz) { 255 err = dmu_tx_check_ioerr(zio, dn, 0, start); 256 if (err) 257 goto out; 258 } 259 260 /* last level-0 block */ 261 end = (off+len-1) >> dn->dn_datablkshift; 262 if (end != start && end <= dn->dn_maxblkid && 263 P2PHASE(off+len, dn->dn_datablksz)) { 264 err = dmu_tx_check_ioerr(zio, dn, 0, end); 265 if (err) 266 goto out; 267 } 268 269 /* level-1 blocks */ 270 if (nlvls > 1) { 271 int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 272 for (i = (start>>shft)+1; i < end>>shft; i++) { 273 err = dmu_tx_check_ioerr(zio, dn, 1, i); 274 if (err) 275 goto out; 276 } 277 } 278 279 err = zio_wait(zio); 280 if (err) 281 goto out; 282 delta = P2NPHASE(off, dn->dn_datablksz); 283 } 284 285 if (dn->dn_maxblkid > 0) { 286 /* 287 * The blocksize can't change, 288 * so we can make a more precise estimate. 289 */ 290 ASSERT(dn->dn_datablkshift != 0); 291 min_bs = max_bs = dn->dn_datablkshift; 292 min_ibs = max_ibs = dn->dn_indblkshift; 293 } else if (dn->dn_indblkshift > max_ibs) { 294 /* 295 * This ensures that if we reduce DN_MAX_INDBLKSHIFT, 296 * the code will still work correctly on older pools. 297 */ 298 min_ibs = max_ibs = dn->dn_indblkshift; 299 } 300 301 /* 302 * If this write is not off the end of the file 303 * we need to account for overwrites/unref. 304 */ 305 if (start <= dn->dn_maxblkid) { 306 for (int l = 0; l < DN_MAX_LEVELS; l++) 307 history[l] = -1ULL; 308 } 309 while (start <= dn->dn_maxblkid) { 310 dmu_buf_impl_t *db; 311 312 rw_enter(&dn->dn_struct_rwlock, RW_READER); 313 err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db); 314 rw_exit(&dn->dn_struct_rwlock); 315 316 if (err) { 317 txh->txh_tx->tx_err = err; 318 return; 319 } 320 321 dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE, 322 history); 323 dbuf_rele(db, FTAG); 324 if (++start > end) { 325 /* 326 * Account for new indirects appearing 327 * before this IO gets assigned into a txg. 328 */ 329 bits = 64 - min_bs; 330 epbs = min_ibs - SPA_BLKPTRSHIFT; 331 for (bits -= epbs * (nlvls - 1); 332 bits >= 0; bits -= epbs) 333 txh->txh_fudge += 1ULL << max_ibs; 334 goto out; 335 } 336 off += delta; 337 if (len >= delta) 338 len -= delta; 339 delta = dn->dn_datablksz; 340 } 341 } 342 343 /* 344 * 'end' is the last thing we will access, not one past. 345 * This way we won't overflow when accessing the last byte. 346 */ 347 start = P2ALIGN(off, 1ULL << max_bs); 348 end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; 349 txh->txh_space_towrite += end - start + 1; 350 351 start >>= min_bs; 352 end >>= min_bs; 353 354 epbs = min_ibs - SPA_BLKPTRSHIFT; 355 356 /* 357 * The object contains at most 2^(64 - min_bs) blocks, 358 * and each indirect level maps 2^epbs. 359 */ 360 for (bits = 64 - min_bs; bits >= 0; bits -= epbs) { 361 start >>= epbs; 362 end >>= epbs; 363 ASSERT3U(end, >=, start); 364 txh->txh_space_towrite += (end - start + 1) << max_ibs; 365 if (start != 0) { 366 /* 367 * We also need a new blkid=0 indirect block 368 * to reference any existing file data. 369 */ 370 txh->txh_space_towrite += 1ULL << max_ibs; 371 } 372 } 373 374 out: 375 if (txh->txh_space_towrite + txh->txh_space_tooverwrite > 376 2 * DMU_MAX_ACCESS) 377 err = EFBIG; 378 379 if (err) 380 txh->txh_tx->tx_err = err; 381 } 382 383 static void 384 dmu_tx_count_dnode(dmu_tx_hold_t *txh) 385 { 386 dnode_t *dn = txh->txh_dnode; 387 dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode; 388 uint64_t space = mdn->dn_datablksz + 389 ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); 390 391 if (dn && dn->dn_dbuf->db_blkptr && 392 dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 393 dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) { 394 txh->txh_space_tooverwrite += space; 395 txh->txh_space_tounref += space; 396 } else { 397 txh->txh_space_towrite += space; 398 if (dn && dn->dn_dbuf->db_blkptr) 399 txh->txh_space_tounref += space; 400 } 401 } 402 403 void 404 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) 405 { 406 dmu_tx_hold_t *txh; 407 408 ASSERT(tx->tx_txg == 0); 409 ASSERT(len < DMU_MAX_ACCESS); 410 ASSERT(len == 0 || UINT64_MAX - off >= len - 1); 411 412 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 413 object, THT_WRITE, off, len); 414 if (txh == NULL) 415 return; 416 417 dmu_tx_count_write(txh, off, len); 418 dmu_tx_count_dnode(txh); 419 } 420 421 static void 422 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 423 { 424 uint64_t blkid, nblks, lastblk; 425 uint64_t space = 0, unref = 0, skipped = 0; 426 dnode_t *dn = txh->txh_dnode; 427 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 428 spa_t *spa = txh->txh_tx->tx_pool->dp_spa; 429 int epbs; 430 431 if (dn->dn_nlevels == 0) 432 return; 433 434 /* 435 * The struct_rwlock protects us against dn_nlevels 436 * changing, in case (against all odds) we manage to dirty & 437 * sync out the changes after we check for being dirty. 438 * Also, dbuf_hold_impl() wants us to have the struct_rwlock. 439 */ 440 rw_enter(&dn->dn_struct_rwlock, RW_READER); 441 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 442 if (dn->dn_maxblkid == 0) { 443 if (off == 0 && len >= dn->dn_datablksz) { 444 blkid = 0; 445 nblks = 1; 446 } else { 447 rw_exit(&dn->dn_struct_rwlock); 448 return; 449 } 450 } else { 451 blkid = off >> dn->dn_datablkshift; 452 nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; 453 454 if (blkid >= dn->dn_maxblkid) { 455 rw_exit(&dn->dn_struct_rwlock); 456 return; 457 } 458 if (blkid + nblks > dn->dn_maxblkid) 459 nblks = dn->dn_maxblkid - blkid; 460 461 } 462 if (dn->dn_nlevels == 1) { 463 int i; 464 for (i = 0; i < nblks; i++) { 465 blkptr_t *bp = dn->dn_phys->dn_blkptr; 466 ASSERT3U(blkid + i, <, dn->dn_nblkptr); 467 bp += blkid + i; 468 if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) { 469 dprintf_bp(bp, "can free old%s", ""); 470 space += bp_get_dsize(spa, bp); 471 } 472 unref += BP_GET_ASIZE(bp); 473 } 474 nblks = 0; 475 } 476 477 /* 478 * Add in memory requirements of higher-level indirects. 479 * This assumes a worst-possible scenario for dn_nlevels. 480 */ 481 { 482 uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); 483 int level = (dn->dn_nlevels > 1) ? 2 : 1; 484 485 while (level++ < DN_MAX_LEVELS) { 486 txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; 487 blkcnt = 1 + (blkcnt >> epbs); 488 } 489 ASSERT(blkcnt <= dn->dn_nblkptr); 490 } 491 492 lastblk = blkid + nblks - 1; 493 while (nblks) { 494 dmu_buf_impl_t *dbuf; 495 uint64_t ibyte, new_blkid; 496 int epb = 1 << epbs; 497 int err, i, blkoff, tochk; 498 blkptr_t *bp; 499 500 ibyte = blkid << dn->dn_datablkshift; 501 err = dnode_next_offset(dn, 502 DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0); 503 new_blkid = ibyte >> dn->dn_datablkshift; 504 if (err == ESRCH) { 505 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 506 break; 507 } 508 if (err) { 509 txh->txh_tx->tx_err = err; 510 break; 511 } 512 if (new_blkid > lastblk) { 513 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 514 break; 515 } 516 517 if (new_blkid > blkid) { 518 ASSERT((new_blkid >> epbs) > (blkid >> epbs)); 519 skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1; 520 nblks -= new_blkid - blkid; 521 blkid = new_blkid; 522 } 523 blkoff = P2PHASE(blkid, epb); 524 tochk = MIN(epb - blkoff, nblks); 525 526 err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf); 527 if (err) { 528 txh->txh_tx->tx_err = err; 529 break; 530 } 531 532 txh->txh_memory_tohold += dbuf->db.db_size; 533 534 /* 535 * We don't check memory_tohold against DMU_MAX_ACCESS because 536 * memory_tohold is an over-estimation (especially the >L1 537 * indirect blocks), so it could fail. Callers should have 538 * already verified that they will not be holding too much 539 * memory. 540 */ 541 542 err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); 543 if (err != 0) { 544 txh->txh_tx->tx_err = err; 545 dbuf_rele(dbuf, FTAG); 546 break; 547 } 548 549 bp = dbuf->db.db_data; 550 bp += blkoff; 551 552 for (i = 0; i < tochk; i++) { 553 if (dsl_dataset_block_freeable(ds, &bp[i], 554 bp[i].blk_birth)) { 555 dprintf_bp(&bp[i], "can free old%s", ""); 556 space += bp_get_dsize(spa, &bp[i]); 557 } 558 unref += BP_GET_ASIZE(bp); 559 } 560 dbuf_rele(dbuf, FTAG); 561 562 blkid += tochk; 563 nblks -= tochk; 564 } 565 rw_exit(&dn->dn_struct_rwlock); 566 567 /* account for new level 1 indirect blocks that might show up */ 568 if (skipped > 0) { 569 txh->txh_fudge += skipped << dn->dn_indblkshift; 570 skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); 571 txh->txh_memory_tohold += skipped << dn->dn_indblkshift; 572 } 573 txh->txh_space_tofree += space; 574 txh->txh_space_tounref += unref; 575 } 576 577 void 578 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) 579 { 580 dmu_tx_hold_t *txh; 581 dnode_t *dn; 582 uint64_t start, end, i; 583 int err, shift; 584 zio_t *zio; 585 586 ASSERT(tx->tx_txg == 0); 587 588 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 589 object, THT_FREE, off, len); 590 if (txh == NULL) 591 return; 592 dn = txh->txh_dnode; 593 594 /* first block */ 595 if (off != 0) 596 dmu_tx_count_write(txh, off, 1); 597 /* last block */ 598 if (len != DMU_OBJECT_END) 599 dmu_tx_count_write(txh, off+len, 1); 600 601 dmu_tx_count_dnode(txh); 602 603 if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) 604 return; 605 if (len == DMU_OBJECT_END) 606 len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off; 607 608 /* 609 * For i/o error checking, read the first and last level-0 610 * blocks, and all the level-1 blocks. The above count_write's 611 * have already taken care of the level-0 blocks. 612 */ 613 if (dn->dn_nlevels > 1) { 614 shift = dn->dn_datablkshift + dn->dn_indblkshift - 615 SPA_BLKPTRSHIFT; 616 start = off >> shift; 617 end = dn->dn_datablkshift ? ((off+len) >> shift) : 0; 618 619 zio = zio_root(tx->tx_pool->dp_spa, 620 NULL, NULL, ZIO_FLAG_CANFAIL); 621 for (i = start; i <= end; i++) { 622 uint64_t ibyte = i << shift; 623 err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); 624 i = ibyte >> shift; 625 if (err == ESRCH) 626 break; 627 if (err) { 628 tx->tx_err = err; 629 return; 630 } 631 632 err = dmu_tx_check_ioerr(zio, dn, 1, i); 633 if (err) { 634 tx->tx_err = err; 635 return; 636 } 637 } 638 err = zio_wait(zio); 639 if (err) { 640 tx->tx_err = err; 641 return; 642 } 643 } 644 645 dmu_tx_count_free(txh, off, len); 646 } 647 648 void 649 dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) 650 { 651 dmu_tx_hold_t *txh; 652 dnode_t *dn; 653 uint64_t nblocks; 654 int epbs, err; 655 656 ASSERT(tx->tx_txg == 0); 657 658 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 659 object, THT_ZAP, add, (uintptr_t)name); 660 if (txh == NULL) 661 return; 662 dn = txh->txh_dnode; 663 664 dmu_tx_count_dnode(txh); 665 666 if (dn == NULL) { 667 /* 668 * We will be able to fit a new object's entries into one leaf 669 * block. So there will be at most 2 blocks total, 670 * including the header block. 671 */ 672 dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); 673 return; 674 } 675 676 ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); 677 678 if (dn->dn_maxblkid == 0 && !add) { 679 /* 680 * If there is only one block (i.e. this is a micro-zap) 681 * and we are not adding anything, the accounting is simple. 682 */ 683 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 684 if (err) { 685 tx->tx_err = err; 686 return; 687 } 688 689 /* 690 * Use max block size here, since we don't know how much 691 * the size will change between now and the dbuf dirty call. 692 */ 693 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 694 &dn->dn_phys->dn_blkptr[0], 695 dn->dn_phys->dn_blkptr[0].blk_birth)) { 696 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 697 } else { 698 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 699 } 700 if (dn->dn_phys->dn_blkptr[0].blk_birth) 701 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 702 return; 703 } 704 705 if (dn->dn_maxblkid > 0 && name) { 706 /* 707 * access the name in this fat-zap so that we'll check 708 * for i/o errors to the leaf blocks, etc. 709 */ 710 err = zap_lookup(dn->dn_objset, dn->dn_object, name, 711 8, 0, NULL); 712 if (err == EIO) { 713 tx->tx_err = err; 714 return; 715 } 716 } 717 718 err = zap_count_write(dn->dn_objset, dn->dn_object, name, add, 719 &txh->txh_space_towrite, &txh->txh_space_tooverwrite); 720 721 /* 722 * If the modified blocks are scattered to the four winds, 723 * we'll have to modify an indirect twig for each. 724 */ 725 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 726 for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) 727 if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj) 728 txh->txh_space_towrite += 3 << dn->dn_indblkshift; 729 else 730 txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; 731 } 732 733 void 734 dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) 735 { 736 dmu_tx_hold_t *txh; 737 738 ASSERT(tx->tx_txg == 0); 739 740 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 741 object, THT_BONUS, 0, 0); 742 if (txh) 743 dmu_tx_count_dnode(txh); 744 } 745 746 void 747 dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) 748 { 749 dmu_tx_hold_t *txh; 750 ASSERT(tx->tx_txg == 0); 751 752 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 753 DMU_NEW_OBJECT, THT_SPACE, space, 0); 754 755 txh->txh_space_towrite += space; 756 } 757 758 int 759 dmu_tx_holds(dmu_tx_t *tx, uint64_t object) 760 { 761 dmu_tx_hold_t *txh; 762 int holds = 0; 763 764 /* 765 * By asserting that the tx is assigned, we're counting the 766 * number of dn_tx_holds, which is the same as the number of 767 * dn_holds. Otherwise, we'd be counting dn_holds, but 768 * dn_tx_holds could be 0. 769 */ 770 ASSERT(tx->tx_txg != 0); 771 772 /* if (tx->tx_anyobj == TRUE) */ 773 /* return (0); */ 774 775 for (txh = list_head(&tx->tx_holds); txh; 776 txh = list_next(&tx->tx_holds, txh)) { 777 if (txh->txh_dnode && txh->txh_dnode->dn_object == object) 778 holds++; 779 } 780 781 return (holds); 782 } 783 784 #ifdef ZFS_DEBUG 785 void 786 dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) 787 { 788 dmu_tx_hold_t *txh; 789 int match_object = FALSE, match_offset = FALSE; 790 dnode_t *dn = db->db_dnode; 791 792 ASSERT(tx->tx_txg != 0); 793 ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); 794 ASSERT3U(dn->dn_object, ==, db->db.db_object); 795 796 if (tx->tx_anyobj) 797 return; 798 799 /* XXX No checking on the meta dnode for now */ 800 if (db->db.db_object == DMU_META_DNODE_OBJECT) 801 return; 802 803 for (txh = list_head(&tx->tx_holds); txh; 804 txh = list_next(&tx->tx_holds, txh)) { 805 ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); 806 if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) 807 match_object = TRUE; 808 if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { 809 int datablkshift = dn->dn_datablkshift ? 810 dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; 811 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 812 int shift = datablkshift + epbs * db->db_level; 813 uint64_t beginblk = shift >= 64 ? 0 : 814 (txh->txh_arg1 >> shift); 815 uint64_t endblk = shift >= 64 ? 0 : 816 ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); 817 uint64_t blkid = db->db_blkid; 818 819 /* XXX txh_arg2 better not be zero... */ 820 821 dprintf("found txh type %x beginblk=%llx endblk=%llx\n", 822 txh->txh_type, beginblk, endblk); 823 824 switch (txh->txh_type) { 825 case THT_WRITE: 826 if (blkid >= beginblk && blkid <= endblk) 827 match_offset = TRUE; 828 /* 829 * We will let this hold work for the bonus 830 * or spill buffer so that we don't need to 831 * hold it when creating a new object. 832 */ 833 if (blkid == DMU_BONUS_BLKID || 834 blkid == DMU_SPILL_BLKID) 835 match_offset = TRUE; 836 /* 837 * They might have to increase nlevels, 838 * thus dirtying the new TLIBs. Or the 839 * might have to change the block size, 840 * thus dirying the new lvl=0 blk=0. 841 */ 842 if (blkid == 0) 843 match_offset = TRUE; 844 break; 845 case THT_FREE: 846 /* 847 * We will dirty all the level 1 blocks in 848 * the free range and perhaps the first and 849 * last level 0 block. 850 */ 851 if (blkid >= beginblk && (blkid <= endblk || 852 txh->txh_arg2 == DMU_OBJECT_END)) 853 match_offset = TRUE; 854 break; 855 case THT_SPILL: 856 if (blkid == DMU_SPILL_BLKID) 857 match_offset = TRUE; 858 break; 859 case THT_BONUS: 860 if (blkid == DMU_BONUS_BLKID) 861 match_offset = TRUE; 862 break; 863 case THT_ZAP: 864 match_offset = TRUE; 865 break; 866 case THT_NEWOBJECT: 867 match_object = TRUE; 868 break; 869 default: 870 ASSERT(!"bad txh_type"); 871 } 872 } 873 if (match_object && match_offset) 874 return; 875 } 876 panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", 877 (u_longlong_t)db->db.db_object, db->db_level, 878 (u_longlong_t)db->db_blkid); 879 } 880 #endif 881 882 static int 883 dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) 884 { 885 dmu_tx_hold_t *txh; 886 spa_t *spa = tx->tx_pool->dp_spa; 887 uint64_t memory, asize, fsize, usize; 888 uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; 889 890 ASSERT3U(tx->tx_txg, ==, 0); 891 892 if (tx->tx_err) 893 return (tx->tx_err); 894 895 if (spa_suspended(spa)) { 896 /* 897 * If the user has indicated a blocking failure mode 898 * then return ERESTART which will block in dmu_tx_wait(). 899 * Otherwise, return EIO so that an error can get 900 * propagated back to the VOP calls. 901 * 902 * Note that we always honor the txg_how flag regardless 903 * of the failuremode setting. 904 */ 905 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && 906 txg_how != TXG_WAIT) 907 return (EIO); 908 909 return (ERESTART); 910 } 911 912 tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); 913 tx->tx_needassign_txh = NULL; 914 915 /* 916 * NB: No error returns are allowed after txg_hold_open, but 917 * before processing the dnode holds, due to the 918 * dmu_tx_unassign() logic. 919 */ 920 921 towrite = tofree = tooverwrite = tounref = tohold = fudge = 0; 922 for (txh = list_head(&tx->tx_holds); txh; 923 txh = list_next(&tx->tx_holds, txh)) { 924 dnode_t *dn = txh->txh_dnode; 925 if (dn != NULL) { 926 mutex_enter(&dn->dn_mtx); 927 if (dn->dn_assigned_txg == tx->tx_txg - 1) { 928 mutex_exit(&dn->dn_mtx); 929 tx->tx_needassign_txh = txh; 930 return (ERESTART); 931 } 932 if (dn->dn_assigned_txg == 0) 933 dn->dn_assigned_txg = tx->tx_txg; 934 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 935 (void) refcount_add(&dn->dn_tx_holds, tx); 936 mutex_exit(&dn->dn_mtx); 937 } 938 towrite += txh->txh_space_towrite; 939 tofree += txh->txh_space_tofree; 940 tooverwrite += txh->txh_space_tooverwrite; 941 tounref += txh->txh_space_tounref; 942 tohold += txh->txh_memory_tohold; 943 fudge += txh->txh_fudge; 944 } 945 946 /* 947 * NB: This check must be after we've held the dnodes, so that 948 * the dmu_tx_unassign() logic will work properly 949 */ 950 if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) 951 return (ERESTART); 952 953 /* 954 * If a snapshot has been taken since we made our estimates, 955 * assume that we won't be able to free or overwrite anything. 956 */ 957 if (tx->tx_objset && 958 dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) > 959 tx->tx_lastsnap_txg) { 960 towrite += tooverwrite; 961 tooverwrite = tofree = 0; 962 } 963 964 /* needed allocation: worst-case estimate of write space */ 965 asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite); 966 /* freed space estimate: worst-case overwrite + free estimate */ 967 fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; 968 /* convert unrefd space to worst-case estimate */ 969 usize = spa_get_asize(tx->tx_pool->dp_spa, tounref); 970 /* calculate memory footprint estimate */ 971 memory = towrite + tooverwrite + tohold; 972 973 #ifdef ZFS_DEBUG 974 /* 975 * Add in 'tohold' to account for our dirty holds on this memory 976 * XXX - the "fudge" factor is to account for skipped blocks that 977 * we missed because dnode_next_offset() misses in-core-only blocks. 978 */ 979 tx->tx_space_towrite = asize + 980 spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge); 981 tx->tx_space_tofree = tofree; 982 tx->tx_space_tooverwrite = tooverwrite; 983 tx->tx_space_tounref = tounref; 984 #endif 985 986 if (tx->tx_dir && asize != 0) { 987 int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, 988 asize, fsize, usize, &tx->tx_tempreserve_cookie, tx); 989 if (err) 990 return (err); 991 } 992 993 return (0); 994 } 995 996 static void 997 dmu_tx_unassign(dmu_tx_t *tx) 998 { 999 dmu_tx_hold_t *txh; 1000 1001 if (tx->tx_txg == 0) 1002 return; 1003 1004 txg_rele_to_quiesce(&tx->tx_txgh); 1005 1006 for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; 1007 txh = list_next(&tx->tx_holds, txh)) { 1008 dnode_t *dn = txh->txh_dnode; 1009 1010 if (dn == NULL) 1011 continue; 1012 mutex_enter(&dn->dn_mtx); 1013 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 1014 1015 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 1016 dn->dn_assigned_txg = 0; 1017 cv_broadcast(&dn->dn_notxholds); 1018 } 1019 mutex_exit(&dn->dn_mtx); 1020 } 1021 1022 txg_rele_to_sync(&tx->tx_txgh); 1023 1024 tx->tx_lasttried_txg = tx->tx_txg; 1025 tx->tx_txg = 0; 1026 } 1027 1028 /* 1029 * Assign tx to a transaction group. txg_how can be one of: 1030 * 1031 * (1) TXG_WAIT. If the current open txg is full, waits until there's 1032 * a new one. This should be used when you're not holding locks. 1033 * If will only fail if we're truly out of space (or over quota). 1034 * 1035 * (2) TXG_NOWAIT. If we can't assign into the current open txg without 1036 * blocking, returns immediately with ERESTART. This should be used 1037 * whenever you're holding locks. On an ERESTART error, the caller 1038 * should drop locks, do a dmu_tx_wait(tx), and try again. 1039 * 1040 * (3) A specific txg. Use this if you need to ensure that multiple 1041 * transactions all sync in the same txg. Like TXG_NOWAIT, it 1042 * returns ERESTART if it can't assign you into the requested txg. 1043 */ 1044 int 1045 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) 1046 { 1047 int err; 1048 1049 ASSERT(tx->tx_txg == 0); 1050 ASSERT(txg_how != 0); 1051 ASSERT(!dsl_pool_sync_context(tx->tx_pool)); 1052 1053 while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { 1054 dmu_tx_unassign(tx); 1055 1056 if (err != ERESTART || txg_how != TXG_WAIT) 1057 return (err); 1058 1059 dmu_tx_wait(tx); 1060 } 1061 1062 txg_rele_to_quiesce(&tx->tx_txgh); 1063 1064 return (0); 1065 } 1066 1067 void 1068 dmu_tx_wait(dmu_tx_t *tx) 1069 { 1070 spa_t *spa = tx->tx_pool->dp_spa; 1071 1072 ASSERT(tx->tx_txg == 0); 1073 1074 /* 1075 * It's possible that the pool has become active after this thread 1076 * has tried to obtain a tx. If that's the case then his 1077 * tx_lasttried_txg would not have been assigned. 1078 */ 1079 if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { 1080 txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1); 1081 } else if (tx->tx_needassign_txh) { 1082 dnode_t *dn = tx->tx_needassign_txh->txh_dnode; 1083 1084 mutex_enter(&dn->dn_mtx); 1085 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) 1086 cv_wait(&dn->dn_notxholds, &dn->dn_mtx); 1087 mutex_exit(&dn->dn_mtx); 1088 tx->tx_needassign_txh = NULL; 1089 } else { 1090 txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); 1091 } 1092 } 1093 1094 void 1095 dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) 1096 { 1097 #ifdef ZFS_DEBUG 1098 if (tx->tx_dir == NULL || delta == 0) 1099 return; 1100 1101 if (delta > 0) { 1102 ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=, 1103 tx->tx_space_towrite); 1104 (void) refcount_add_many(&tx->tx_space_written, delta, NULL); 1105 } else { 1106 (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); 1107 } 1108 #endif 1109 } 1110 1111 void 1112 dmu_tx_commit(dmu_tx_t *tx) 1113 { 1114 dmu_tx_hold_t *txh; 1115 1116 ASSERT(tx->tx_txg != 0); 1117 1118 while (txh = list_head(&tx->tx_holds)) { 1119 dnode_t *dn = txh->txh_dnode; 1120 1121 list_remove(&tx->tx_holds, txh); 1122 kmem_free(txh, sizeof (dmu_tx_hold_t)); 1123 if (dn == NULL) 1124 continue; 1125 mutex_enter(&dn->dn_mtx); 1126 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 1127 1128 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 1129 dn->dn_assigned_txg = 0; 1130 cv_broadcast(&dn->dn_notxholds); 1131 } 1132 mutex_exit(&dn->dn_mtx); 1133 dnode_rele(dn, tx); 1134 } 1135 1136 if (tx->tx_tempreserve_cookie) 1137 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); 1138 1139 if (!list_is_empty(&tx->tx_callbacks)) 1140 txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); 1141 1142 if (tx->tx_anyobj == FALSE) 1143 txg_rele_to_sync(&tx->tx_txgh); 1144 1145 list_destroy(&tx->tx_callbacks); 1146 list_destroy(&tx->tx_holds); 1147 #ifdef ZFS_DEBUG 1148 dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", 1149 tx->tx_space_towrite, refcount_count(&tx->tx_space_written), 1150 tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); 1151 refcount_destroy_many(&tx->tx_space_written, 1152 refcount_count(&tx->tx_space_written)); 1153 refcount_destroy_many(&tx->tx_space_freed, 1154 refcount_count(&tx->tx_space_freed)); 1155 #endif 1156 kmem_free(tx, sizeof (dmu_tx_t)); 1157 } 1158 1159 void 1160 dmu_tx_abort(dmu_tx_t *tx) 1161 { 1162 dmu_tx_hold_t *txh; 1163 1164 ASSERT(tx->tx_txg == 0); 1165 1166 while (txh = list_head(&tx->tx_holds)) { 1167 dnode_t *dn = txh->txh_dnode; 1168 1169 list_remove(&tx->tx_holds, txh); 1170 kmem_free(txh, sizeof (dmu_tx_hold_t)); 1171 if (dn != NULL) 1172 dnode_rele(dn, tx); 1173 } 1174 1175 /* 1176 * Call any registered callbacks with an error code. 1177 */ 1178 if (!list_is_empty(&tx->tx_callbacks)) 1179 dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); 1180 1181 list_destroy(&tx->tx_callbacks); 1182 list_destroy(&tx->tx_holds); 1183 #ifdef ZFS_DEBUG 1184 refcount_destroy_many(&tx->tx_space_written, 1185 refcount_count(&tx->tx_space_written)); 1186 refcount_destroy_many(&tx->tx_space_freed, 1187 refcount_count(&tx->tx_space_freed)); 1188 #endif 1189 kmem_free(tx, sizeof (dmu_tx_t)); 1190 } 1191 1192 uint64_t 1193 dmu_tx_get_txg(dmu_tx_t *tx) 1194 { 1195 ASSERT(tx->tx_txg != 0); 1196 return (tx->tx_txg); 1197 } 1198 1199 void 1200 dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) 1201 { 1202 dmu_tx_callback_t *dcb; 1203 1204 dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); 1205 1206 dcb->dcb_func = func; 1207 dcb->dcb_data = data; 1208 1209 list_insert_tail(&tx->tx_callbacks, dcb); 1210 } 1211 1212 /* 1213 * Call all the commit callbacks on a list, with a given error code. 1214 */ 1215 void 1216 dmu_tx_do_callbacks(list_t *cb_list, int error) 1217 { 1218 dmu_tx_callback_t *dcb; 1219 1220 while (dcb = list_head(cb_list)) { 1221 list_remove(cb_list, dcb); 1222 dcb->dcb_func(dcb->dcb_data, error); 1223 kmem_free(dcb, sizeof (dmu_tx_callback_t)); 1224 } 1225 } 1226 1227 /* 1228 * Interface to hold a bunch of attributes. 1229 * used for creating new files. 1230 * attrsize is the total size of all attributes 1231 * to be added during object creation 1232 * 1233 * For updating/adding a single attribute dmu_tx_hold_sa() should be used. 1234 */ 1235 1236 /* 1237 * hold necessary attribute name for attribute registration. 1238 * should be a very rare case where this is needed. If it does 1239 * happen it would only happen on the first write to the file system. 1240 */ 1241 static void 1242 dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) 1243 { 1244 int i; 1245 1246 if (!sa->sa_need_attr_registration) 1247 return; 1248 1249 for (i = 0; i != sa->sa_num_attrs; i++) { 1250 if (!sa->sa_attr_table[i].sa_registered) { 1251 if (sa->sa_reg_attr_obj) 1252 dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, 1253 B_TRUE, sa->sa_attr_table[i].sa_name); 1254 else 1255 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1256 B_TRUE, sa->sa_attr_table[i].sa_name); 1257 } 1258 } 1259 } 1260 1261 1262 void 1263 dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) 1264 { 1265 dnode_t *dn; 1266 dmu_tx_hold_t *txh; 1267 blkptr_t *bp; 1268 1269 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, 1270 THT_SPILL, 0, 0); 1271 1272 dn = txh->txh_dnode; 1273 1274 if (dn == NULL) 1275 return; 1276 1277 /* If blkptr doesn't exist then add space to towrite */ 1278 bp = &dn->dn_phys->dn_spill; 1279 if (BP_IS_HOLE(bp)) { 1280 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 1281 txh->txh_space_tounref = 0; 1282 } else { 1283 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 1284 bp, bp->blk_birth)) 1285 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 1286 else 1287 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 1288 if (bp->blk_birth) 1289 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 1290 } 1291 } 1292 1293 void 1294 dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) 1295 { 1296 sa_os_t *sa = tx->tx_objset->os_sa; 1297 1298 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1299 1300 if (tx->tx_objset->os_sa->sa_master_obj == 0) 1301 return; 1302 1303 if (tx->tx_objset->os_sa->sa_layout_attr_obj) 1304 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 1305 else { 1306 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 1307 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 1308 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1309 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1310 } 1311 1312 dmu_tx_sa_registration_hold(sa, tx); 1313 1314 if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill) 1315 return; 1316 1317 (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, 1318 THT_SPILL, 0, 0); 1319 } 1320 1321 /* 1322 * Hold SA attribute 1323 * 1324 * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size) 1325 * 1326 * variable_size is the total size of all variable sized attributes 1327 * passed to this function. It is not the total size of all 1328 * variable size attributes that *may* exist on this object. 1329 */ 1330 void 1331 dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) 1332 { 1333 uint64_t object; 1334 sa_os_t *sa = tx->tx_objset->os_sa; 1335 1336 ASSERT(hdl != NULL); 1337 1338 object = sa_handle_object(hdl); 1339 1340 dmu_tx_hold_bonus(tx, object); 1341 1342 if (tx->tx_objset->os_sa->sa_master_obj == 0) 1343 return; 1344 1345 if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || 1346 tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { 1347 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 1348 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 1349 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1350 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1351 } 1352 1353 dmu_tx_sa_registration_hold(sa, tx); 1354 1355 if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) 1356 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 1357 1358 if (sa->sa_force_spill || may_grow || hdl->sa_spill || 1359 ((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_have_spill) { 1360 ASSERT(tx->tx_txg == 0); 1361 dmu_tx_hold_spill(tx, object); 1362 } 1363 } 1364