1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/dmu.h> 26 #include <sys/dmu_impl.h> 27 #include <sys/dbuf.h> 28 #include <sys/dmu_tx.h> 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ 31 #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ 32 #include <sys/dsl_pool.h> 33 #include <sys/zap_impl.h> /* for fzap_default_block_shift */ 34 #include <sys/spa.h> 35 #include <sys/sa.h> 36 #include <sys/sa_impl.h> 37 #include <sys/zfs_context.h> 38 #include <sys/varargs.h> 39 40 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, 41 uint64_t arg1, uint64_t arg2); 42 43 44 dmu_tx_t * 45 dmu_tx_create_dd(dsl_dir_t *dd) 46 { 47 dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); 48 tx->tx_dir = dd; 49 if (dd) 50 tx->tx_pool = dd->dd_pool; 51 list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), 52 offsetof(dmu_tx_hold_t, txh_node)); 53 list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), 54 offsetof(dmu_tx_callback_t, dcb_node)); 55 #ifdef ZFS_DEBUG 56 refcount_create(&tx->tx_space_written); 57 refcount_create(&tx->tx_space_freed); 58 #endif 59 return (tx); 60 } 61 62 dmu_tx_t * 63 dmu_tx_create(objset_t *os) 64 { 65 dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); 66 tx->tx_objset = os; 67 tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset); 68 return (tx); 69 } 70 71 dmu_tx_t * 72 dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) 73 { 74 dmu_tx_t *tx = dmu_tx_create_dd(NULL); 75 76 ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); 77 tx->tx_pool = dp; 78 tx->tx_txg = txg; 79 tx->tx_anyobj = TRUE; 80 81 return (tx); 82 } 83 84 int 85 dmu_tx_is_syncing(dmu_tx_t *tx) 86 { 87 return (tx->tx_anyobj); 88 } 89 90 int 91 dmu_tx_private_ok(dmu_tx_t *tx) 92 { 93 return (tx->tx_anyobj); 94 } 95 96 static dmu_tx_hold_t * 97 dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, 98 enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) 99 { 100 dmu_tx_hold_t *txh; 101 dnode_t *dn = NULL; 102 int err; 103 104 if (object != DMU_NEW_OBJECT) { 105 err = dnode_hold(os, object, tx, &dn); 106 if (err) { 107 tx->tx_err = err; 108 return (NULL); 109 } 110 111 if (err == 0 && tx->tx_txg != 0) { 112 mutex_enter(&dn->dn_mtx); 113 /* 114 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a 115 * problem, but there's no way for it to happen (for 116 * now, at least). 117 */ 118 ASSERT(dn->dn_assigned_txg == 0); 119 dn->dn_assigned_txg = tx->tx_txg; 120 (void) refcount_add(&dn->dn_tx_holds, tx); 121 mutex_exit(&dn->dn_mtx); 122 } 123 } 124 125 txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); 126 txh->txh_tx = tx; 127 txh->txh_dnode = dn; 128 #ifdef ZFS_DEBUG 129 txh->txh_type = type; 130 txh->txh_arg1 = arg1; 131 txh->txh_arg2 = arg2; 132 #endif 133 list_insert_tail(&tx->tx_holds, txh); 134 135 return (txh); 136 } 137 138 void 139 dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) 140 { 141 /* 142 * If we're syncing, they can manipulate any object anyhow, and 143 * the hold on the dnode_t can cause problems. 144 */ 145 if (!dmu_tx_is_syncing(tx)) { 146 (void) dmu_tx_hold_object_impl(tx, os, 147 object, THT_NEWOBJECT, 0, 0); 148 } 149 } 150 151 static int 152 dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) 153 { 154 int err; 155 dmu_buf_impl_t *db; 156 157 rw_enter(&dn->dn_struct_rwlock, RW_READER); 158 db = dbuf_hold_level(dn, level, blkid, FTAG); 159 rw_exit(&dn->dn_struct_rwlock); 160 if (db == NULL) 161 return (EIO); 162 err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); 163 dbuf_rele(db, FTAG); 164 return (err); 165 } 166 167 static void 168 dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db, 169 int level, uint64_t blkid, boolean_t freeable, uint64_t *history) 170 { 171 objset_t *os = dn->dn_objset; 172 dsl_dataset_t *ds = os->os_dsl_dataset; 173 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 174 dmu_buf_impl_t *parent = NULL; 175 blkptr_t *bp = NULL; 176 uint64_t space; 177 178 if (level >= dn->dn_nlevels || history[level] == blkid) 179 return; 180 181 history[level] = blkid; 182 183 space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift); 184 185 if (db == NULL || db == dn->dn_dbuf) { 186 ASSERT(level != 0); 187 db = NULL; 188 } else { 189 ASSERT(db->db_dnode == dn); 190 ASSERT(db->db_level == level); 191 ASSERT(db->db.db_size == space); 192 ASSERT(db->db_blkid == blkid); 193 bp = db->db_blkptr; 194 parent = db->db_parent; 195 } 196 197 freeable = (bp && (freeable || 198 dsl_dataset_block_freeable(ds, bp->blk_birth))); 199 200 if (freeable) 201 txh->txh_space_tooverwrite += space; 202 else 203 txh->txh_space_towrite += space; 204 if (bp) 205 txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp); 206 207 dmu_tx_count_twig(txh, dn, parent, level + 1, 208 blkid >> epbs, freeable, history); 209 } 210 211 /* ARGSUSED */ 212 static void 213 dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 214 { 215 dnode_t *dn = txh->txh_dnode; 216 uint64_t start, end, i; 217 int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; 218 int err = 0; 219 220 if (len == 0) 221 return; 222 223 min_bs = SPA_MINBLOCKSHIFT; 224 max_bs = SPA_MAXBLOCKSHIFT; 225 min_ibs = DN_MIN_INDBLKSHIFT; 226 max_ibs = DN_MAX_INDBLKSHIFT; 227 228 if (dn) { 229 uint64_t history[DN_MAX_LEVELS]; 230 int nlvls = dn->dn_nlevels; 231 int delta; 232 233 /* 234 * For i/o error checking, read the first and last level-0 235 * blocks (if they are not aligned), and all the level-1 blocks. 236 */ 237 if (dn->dn_maxblkid == 0) { 238 delta = dn->dn_datablksz; 239 start = (off < dn->dn_datablksz) ? 0 : 1; 240 end = (off+len <= dn->dn_datablksz) ? 0 : 1; 241 if (start == 0 && (off > 0 || len < dn->dn_datablksz)) { 242 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 243 if (err) 244 goto out; 245 delta -= off; 246 } 247 } else { 248 zio_t *zio = zio_root(dn->dn_objset->os_spa, 249 NULL, NULL, ZIO_FLAG_CANFAIL); 250 251 /* first level-0 block */ 252 start = off >> dn->dn_datablkshift; 253 if (P2PHASE(off, dn->dn_datablksz) || 254 len < dn->dn_datablksz) { 255 err = dmu_tx_check_ioerr(zio, dn, 0, start); 256 if (err) 257 goto out; 258 } 259 260 /* last level-0 block */ 261 end = (off+len-1) >> dn->dn_datablkshift; 262 if (end != start && end <= dn->dn_maxblkid && 263 P2PHASE(off+len, dn->dn_datablksz)) { 264 err = dmu_tx_check_ioerr(zio, dn, 0, end); 265 if (err) 266 goto out; 267 } 268 269 /* level-1 blocks */ 270 if (nlvls > 1) { 271 int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 272 for (i = (start>>shft)+1; i < end>>shft; i++) { 273 err = dmu_tx_check_ioerr(zio, dn, 1, i); 274 if (err) 275 goto out; 276 } 277 } 278 279 err = zio_wait(zio); 280 if (err) 281 goto out; 282 delta = P2NPHASE(off, dn->dn_datablksz); 283 } 284 285 if (dn->dn_maxblkid > 0) { 286 /* 287 * The blocksize can't change, 288 * so we can make a more precise estimate. 289 */ 290 ASSERT(dn->dn_datablkshift != 0); 291 min_bs = max_bs = dn->dn_datablkshift; 292 min_ibs = max_ibs = dn->dn_indblkshift; 293 } else if (dn->dn_indblkshift > max_ibs) { 294 /* 295 * This ensures that if we reduce DN_MAX_INDBLKSHIFT, 296 * the code will still work correctly on older pools. 297 */ 298 min_ibs = max_ibs = dn->dn_indblkshift; 299 } 300 301 /* 302 * If this write is not off the end of the file 303 * we need to account for overwrites/unref. 304 */ 305 if (start <= dn->dn_maxblkid) { 306 for (int l = 0; l < DN_MAX_LEVELS; l++) 307 history[l] = -1ULL; 308 } 309 while (start <= dn->dn_maxblkid) { 310 dmu_buf_impl_t *db; 311 312 rw_enter(&dn->dn_struct_rwlock, RW_READER); 313 err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db); 314 rw_exit(&dn->dn_struct_rwlock); 315 316 if (err) { 317 txh->txh_tx->tx_err = err; 318 return; 319 } 320 321 dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE, 322 history); 323 dbuf_rele(db, FTAG); 324 if (++start > end) { 325 /* 326 * Account for new indirects appearing 327 * before this IO gets assigned into a txg. 328 */ 329 bits = 64 - min_bs; 330 epbs = min_ibs - SPA_BLKPTRSHIFT; 331 for (bits -= epbs * (nlvls - 1); 332 bits >= 0; bits -= epbs) 333 txh->txh_fudge += 1ULL << max_ibs; 334 goto out; 335 } 336 off += delta; 337 if (len >= delta) 338 len -= delta; 339 delta = dn->dn_datablksz; 340 } 341 } 342 343 /* 344 * 'end' is the last thing we will access, not one past. 345 * This way we won't overflow when accessing the last byte. 346 */ 347 start = P2ALIGN(off, 1ULL << max_bs); 348 end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; 349 txh->txh_space_towrite += end - start + 1; 350 351 start >>= min_bs; 352 end >>= min_bs; 353 354 epbs = min_ibs - SPA_BLKPTRSHIFT; 355 356 /* 357 * The object contains at most 2^(64 - min_bs) blocks, 358 * and each indirect level maps 2^epbs. 359 */ 360 for (bits = 64 - min_bs; bits >= 0; bits -= epbs) { 361 start >>= epbs; 362 end >>= epbs; 363 ASSERT3U(end, >=, start); 364 txh->txh_space_towrite += (end - start + 1) << max_ibs; 365 if (start != 0) { 366 /* 367 * We also need a new blkid=0 indirect block 368 * to reference any existing file data. 369 */ 370 txh->txh_space_towrite += 1ULL << max_ibs; 371 } 372 } 373 374 out: 375 if (txh->txh_space_towrite + txh->txh_space_tooverwrite > 376 2 * DMU_MAX_ACCESS) 377 err = EFBIG; 378 379 if (err) 380 txh->txh_tx->tx_err = err; 381 } 382 383 static void 384 dmu_tx_count_dnode(dmu_tx_hold_t *txh) 385 { 386 dnode_t *dn = txh->txh_dnode; 387 dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode; 388 uint64_t space = mdn->dn_datablksz + 389 ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); 390 391 if (dn && dn->dn_dbuf->db_blkptr && 392 dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 393 dn->dn_dbuf->db_blkptr->blk_birth)) { 394 txh->txh_space_tooverwrite += space; 395 txh->txh_space_tounref += space; 396 } else { 397 txh->txh_space_towrite += space; 398 if (dn && dn->dn_dbuf->db_blkptr) 399 txh->txh_space_tounref += space; 400 } 401 } 402 403 void 404 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) 405 { 406 dmu_tx_hold_t *txh; 407 408 ASSERT(tx->tx_txg == 0); 409 ASSERT(len < DMU_MAX_ACCESS); 410 ASSERT(len == 0 || UINT64_MAX - off >= len - 1); 411 412 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 413 object, THT_WRITE, off, len); 414 if (txh == NULL) 415 return; 416 417 dmu_tx_count_write(txh, off, len); 418 dmu_tx_count_dnode(txh); 419 } 420 421 static void 422 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 423 { 424 uint64_t blkid, nblks, lastblk; 425 uint64_t space = 0, unref = 0, skipped = 0; 426 dnode_t *dn = txh->txh_dnode; 427 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 428 spa_t *spa = txh->txh_tx->tx_pool->dp_spa; 429 int epbs; 430 431 if (dn->dn_nlevels == 0) 432 return; 433 434 /* 435 * The struct_rwlock protects us against dn_nlevels 436 * changing, in case (against all odds) we manage to dirty & 437 * sync out the changes after we check for being dirty. 438 * Also, dbuf_hold_impl() wants us to have the struct_rwlock. 439 */ 440 rw_enter(&dn->dn_struct_rwlock, RW_READER); 441 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 442 if (dn->dn_maxblkid == 0) { 443 if (off == 0 && len >= dn->dn_datablksz) { 444 blkid = 0; 445 nblks = 1; 446 } else { 447 rw_exit(&dn->dn_struct_rwlock); 448 return; 449 } 450 } else { 451 blkid = off >> dn->dn_datablkshift; 452 nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; 453 454 if (blkid >= dn->dn_maxblkid) { 455 rw_exit(&dn->dn_struct_rwlock); 456 return; 457 } 458 if (blkid + nblks > dn->dn_maxblkid) 459 nblks = dn->dn_maxblkid - blkid; 460 461 } 462 if (dn->dn_nlevels == 1) { 463 int i; 464 for (i = 0; i < nblks; i++) { 465 blkptr_t *bp = dn->dn_phys->dn_blkptr; 466 ASSERT3U(blkid + i, <, dn->dn_nblkptr); 467 bp += blkid + i; 468 if (dsl_dataset_block_freeable(ds, bp->blk_birth)) { 469 dprintf_bp(bp, "can free old%s", ""); 470 space += bp_get_dsize(spa, bp); 471 } 472 unref += BP_GET_ASIZE(bp); 473 } 474 nblks = 0; 475 } 476 477 /* 478 * Add in memory requirements of higher-level indirects. 479 * This assumes a worst-possible scenario for dn_nlevels. 480 */ 481 { 482 uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); 483 int level = (dn->dn_nlevels > 1) ? 2 : 1; 484 485 while (level++ < DN_MAX_LEVELS) { 486 txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; 487 blkcnt = 1 + (blkcnt >> epbs); 488 } 489 ASSERT(blkcnt <= dn->dn_nblkptr); 490 } 491 492 lastblk = blkid + nblks - 1; 493 while (nblks) { 494 dmu_buf_impl_t *dbuf; 495 uint64_t ibyte, new_blkid; 496 int epb = 1 << epbs; 497 int err, i, blkoff, tochk; 498 blkptr_t *bp; 499 500 ibyte = blkid << dn->dn_datablkshift; 501 err = dnode_next_offset(dn, 502 DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0); 503 new_blkid = ibyte >> dn->dn_datablkshift; 504 if (err == ESRCH) { 505 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 506 break; 507 } 508 if (err) { 509 txh->txh_tx->tx_err = err; 510 break; 511 } 512 if (new_blkid > lastblk) { 513 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 514 break; 515 } 516 517 if (new_blkid > blkid) { 518 ASSERT((new_blkid >> epbs) > (blkid >> epbs)); 519 skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1; 520 nblks -= new_blkid - blkid; 521 blkid = new_blkid; 522 } 523 blkoff = P2PHASE(blkid, epb); 524 tochk = MIN(epb - blkoff, nblks); 525 526 err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf); 527 if (err) { 528 txh->txh_tx->tx_err = err; 529 break; 530 } 531 532 txh->txh_memory_tohold += dbuf->db.db_size; 533 534 /* 535 * We don't check memory_tohold against DMU_MAX_ACCESS because 536 * memory_tohold is an over-estimation (especially the >L1 537 * indirect blocks), so it could fail. Callers should have 538 * already verified that they will not be holding too much 539 * memory. 540 */ 541 542 err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); 543 if (err != 0) { 544 txh->txh_tx->tx_err = err; 545 dbuf_rele(dbuf, FTAG); 546 break; 547 } 548 549 bp = dbuf->db.db_data; 550 bp += blkoff; 551 552 for (i = 0; i < tochk; i++) { 553 if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) { 554 dprintf_bp(&bp[i], "can free old%s", ""); 555 space += bp_get_dsize(spa, &bp[i]); 556 } 557 unref += BP_GET_ASIZE(bp); 558 } 559 dbuf_rele(dbuf, FTAG); 560 561 blkid += tochk; 562 nblks -= tochk; 563 } 564 rw_exit(&dn->dn_struct_rwlock); 565 566 /* account for new level 1 indirect blocks that might show up */ 567 if (skipped > 0) { 568 txh->txh_fudge += skipped << dn->dn_indblkshift; 569 skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); 570 txh->txh_memory_tohold += skipped << dn->dn_indblkshift; 571 } 572 txh->txh_space_tofree += space; 573 txh->txh_space_tounref += unref; 574 } 575 576 void 577 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) 578 { 579 dmu_tx_hold_t *txh; 580 dnode_t *dn; 581 uint64_t start, end, i; 582 int err, shift; 583 zio_t *zio; 584 585 ASSERT(tx->tx_txg == 0); 586 587 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 588 object, THT_FREE, off, len); 589 if (txh == NULL) 590 return; 591 dn = txh->txh_dnode; 592 593 /* first block */ 594 if (off != 0) 595 dmu_tx_count_write(txh, off, 1); 596 /* last block */ 597 if (len != DMU_OBJECT_END) 598 dmu_tx_count_write(txh, off+len, 1); 599 600 dmu_tx_count_dnode(txh); 601 602 if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) 603 return; 604 if (len == DMU_OBJECT_END) 605 len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off; 606 607 /* 608 * For i/o error checking, read the first and last level-0 609 * blocks, and all the level-1 blocks. The above count_write's 610 * have already taken care of the level-0 blocks. 611 */ 612 if (dn->dn_nlevels > 1) { 613 shift = dn->dn_datablkshift + dn->dn_indblkshift - 614 SPA_BLKPTRSHIFT; 615 start = off >> shift; 616 end = dn->dn_datablkshift ? ((off+len) >> shift) : 0; 617 618 zio = zio_root(tx->tx_pool->dp_spa, 619 NULL, NULL, ZIO_FLAG_CANFAIL); 620 for (i = start; i <= end; i++) { 621 uint64_t ibyte = i << shift; 622 err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); 623 i = ibyte >> shift; 624 if (err == ESRCH) 625 break; 626 if (err) { 627 tx->tx_err = err; 628 return; 629 } 630 631 err = dmu_tx_check_ioerr(zio, dn, 1, i); 632 if (err) { 633 tx->tx_err = err; 634 return; 635 } 636 } 637 err = zio_wait(zio); 638 if (err) { 639 tx->tx_err = err; 640 return; 641 } 642 } 643 644 dmu_tx_count_free(txh, off, len); 645 } 646 647 void 648 dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) 649 { 650 dmu_tx_hold_t *txh; 651 dnode_t *dn; 652 uint64_t nblocks; 653 int epbs, err; 654 655 ASSERT(tx->tx_txg == 0); 656 657 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 658 object, THT_ZAP, add, (uintptr_t)name); 659 if (txh == NULL) 660 return; 661 dn = txh->txh_dnode; 662 663 dmu_tx_count_dnode(txh); 664 665 if (dn == NULL) { 666 /* 667 * We will be able to fit a new object's entries into one leaf 668 * block. So there will be at most 2 blocks total, 669 * including the header block. 670 */ 671 dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); 672 return; 673 } 674 675 ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); 676 677 if (dn->dn_maxblkid == 0 && !add) { 678 /* 679 * If there is only one block (i.e. this is a micro-zap) 680 * and we are not adding anything, the accounting is simple. 681 */ 682 err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 683 if (err) { 684 tx->tx_err = err; 685 return; 686 } 687 688 /* 689 * Use max block size here, since we don't know how much 690 * the size will change between now and the dbuf dirty call. 691 */ 692 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 693 dn->dn_phys->dn_blkptr[0].blk_birth)) { 694 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 695 } else { 696 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 697 } 698 if (dn->dn_phys->dn_blkptr[0].blk_birth) 699 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 700 return; 701 } 702 703 if (dn->dn_maxblkid > 0 && name) { 704 /* 705 * access the name in this fat-zap so that we'll check 706 * for i/o errors to the leaf blocks, etc. 707 */ 708 err = zap_lookup(dn->dn_objset, dn->dn_object, name, 709 8, 0, NULL); 710 if (err == EIO) { 711 tx->tx_err = err; 712 return; 713 } 714 } 715 716 err = zap_count_write(dn->dn_objset, dn->dn_object, name, add, 717 &txh->txh_space_towrite, &txh->txh_space_tooverwrite); 718 719 /* 720 * If the modified blocks are scattered to the four winds, 721 * we'll have to modify an indirect twig for each. 722 */ 723 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 724 for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) 725 if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj) 726 txh->txh_space_towrite += 3 << dn->dn_indblkshift; 727 else 728 txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; 729 } 730 731 void 732 dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) 733 { 734 dmu_tx_hold_t *txh; 735 736 ASSERT(tx->tx_txg == 0); 737 738 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 739 object, THT_BONUS, 0, 0); 740 if (txh) 741 dmu_tx_count_dnode(txh); 742 } 743 744 void 745 dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) 746 { 747 dmu_tx_hold_t *txh; 748 ASSERT(tx->tx_txg == 0); 749 750 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 751 DMU_NEW_OBJECT, THT_SPACE, space, 0); 752 753 txh->txh_space_towrite += space; 754 } 755 756 int 757 dmu_tx_holds(dmu_tx_t *tx, uint64_t object) 758 { 759 dmu_tx_hold_t *txh; 760 int holds = 0; 761 762 /* 763 * By asserting that the tx is assigned, we're counting the 764 * number of dn_tx_holds, which is the same as the number of 765 * dn_holds. Otherwise, we'd be counting dn_holds, but 766 * dn_tx_holds could be 0. 767 */ 768 ASSERT(tx->tx_txg != 0); 769 770 /* if (tx->tx_anyobj == TRUE) */ 771 /* return (0); */ 772 773 for (txh = list_head(&tx->tx_holds); txh; 774 txh = list_next(&tx->tx_holds, txh)) { 775 if (txh->txh_dnode && txh->txh_dnode->dn_object == object) 776 holds++; 777 } 778 779 return (holds); 780 } 781 782 #ifdef ZFS_DEBUG 783 void 784 dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) 785 { 786 dmu_tx_hold_t *txh; 787 int match_object = FALSE, match_offset = FALSE; 788 dnode_t *dn = db->db_dnode; 789 790 ASSERT(tx->tx_txg != 0); 791 ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); 792 ASSERT3U(dn->dn_object, ==, db->db.db_object); 793 794 if (tx->tx_anyobj) 795 return; 796 797 /* XXX No checking on the meta dnode for now */ 798 if (db->db.db_object == DMU_META_DNODE_OBJECT) 799 return; 800 801 for (txh = list_head(&tx->tx_holds); txh; 802 txh = list_next(&tx->tx_holds, txh)) { 803 ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); 804 if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) 805 match_object = TRUE; 806 if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { 807 int datablkshift = dn->dn_datablkshift ? 808 dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; 809 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 810 int shift = datablkshift + epbs * db->db_level; 811 uint64_t beginblk = shift >= 64 ? 0 : 812 (txh->txh_arg1 >> shift); 813 uint64_t endblk = shift >= 64 ? 0 : 814 ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); 815 uint64_t blkid = db->db_blkid; 816 817 /* XXX txh_arg2 better not be zero... */ 818 819 dprintf("found txh type %x beginblk=%llx endblk=%llx\n", 820 txh->txh_type, beginblk, endblk); 821 822 switch (txh->txh_type) { 823 case THT_WRITE: 824 if (blkid >= beginblk && blkid <= endblk) 825 match_offset = TRUE; 826 /* 827 * We will let this hold work for the bonus 828 * or spill buffer so that we don't need to 829 * hold it when creating a new object. 830 */ 831 if (blkid == DMU_BONUS_BLKID || 832 blkid == DMU_SPILL_BLKID) 833 match_offset = TRUE; 834 /* 835 * They might have to increase nlevels, 836 * thus dirtying the new TLIBs. Or the 837 * might have to change the block size, 838 * thus dirying the new lvl=0 blk=0. 839 */ 840 if (blkid == 0) 841 match_offset = TRUE; 842 break; 843 case THT_FREE: 844 /* 845 * We will dirty all the level 1 blocks in 846 * the free range and perhaps the first and 847 * last level 0 block. 848 */ 849 if (blkid >= beginblk && (blkid <= endblk || 850 txh->txh_arg2 == DMU_OBJECT_END)) 851 match_offset = TRUE; 852 break; 853 case THT_SPILL: 854 if (blkid == DMU_SPILL_BLKID) 855 match_offset = TRUE; 856 break; 857 case THT_BONUS: 858 if (blkid == DMU_BONUS_BLKID) 859 match_offset = TRUE; 860 break; 861 case THT_ZAP: 862 match_offset = TRUE; 863 break; 864 case THT_NEWOBJECT: 865 match_object = TRUE; 866 break; 867 default: 868 ASSERT(!"bad txh_type"); 869 } 870 } 871 if (match_object && match_offset) 872 return; 873 } 874 panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", 875 (u_longlong_t)db->db.db_object, db->db_level, 876 (u_longlong_t)db->db_blkid); 877 } 878 #endif 879 880 static int 881 dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) 882 { 883 dmu_tx_hold_t *txh; 884 spa_t *spa = tx->tx_pool->dp_spa; 885 uint64_t memory, asize, fsize, usize; 886 uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; 887 888 ASSERT3U(tx->tx_txg, ==, 0); 889 890 if (tx->tx_err) 891 return (tx->tx_err); 892 893 if (spa_suspended(spa)) { 894 /* 895 * If the user has indicated a blocking failure mode 896 * then return ERESTART which will block in dmu_tx_wait(). 897 * Otherwise, return EIO so that an error can get 898 * propagated back to the VOP calls. 899 * 900 * Note that we always honor the txg_how flag regardless 901 * of the failuremode setting. 902 */ 903 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && 904 txg_how != TXG_WAIT) 905 return (EIO); 906 907 return (ERESTART); 908 } 909 910 tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); 911 tx->tx_needassign_txh = NULL; 912 913 /* 914 * NB: No error returns are allowed after txg_hold_open, but 915 * before processing the dnode holds, due to the 916 * dmu_tx_unassign() logic. 917 */ 918 919 towrite = tofree = tooverwrite = tounref = tohold = fudge = 0; 920 for (txh = list_head(&tx->tx_holds); txh; 921 txh = list_next(&tx->tx_holds, txh)) { 922 dnode_t *dn = txh->txh_dnode; 923 if (dn != NULL) { 924 mutex_enter(&dn->dn_mtx); 925 if (dn->dn_assigned_txg == tx->tx_txg - 1) { 926 mutex_exit(&dn->dn_mtx); 927 tx->tx_needassign_txh = txh; 928 return (ERESTART); 929 } 930 if (dn->dn_assigned_txg == 0) 931 dn->dn_assigned_txg = tx->tx_txg; 932 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 933 (void) refcount_add(&dn->dn_tx_holds, tx); 934 mutex_exit(&dn->dn_mtx); 935 } 936 towrite += txh->txh_space_towrite; 937 tofree += txh->txh_space_tofree; 938 tooverwrite += txh->txh_space_tooverwrite; 939 tounref += txh->txh_space_tounref; 940 tohold += txh->txh_memory_tohold; 941 fudge += txh->txh_fudge; 942 } 943 944 /* 945 * NB: This check must be after we've held the dnodes, so that 946 * the dmu_tx_unassign() logic will work properly 947 */ 948 if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) 949 return (ERESTART); 950 951 /* 952 * If a snapshot has been taken since we made our estimates, 953 * assume that we won't be able to free or overwrite anything. 954 */ 955 if (tx->tx_objset && 956 dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) > 957 tx->tx_lastsnap_txg) { 958 towrite += tooverwrite; 959 tooverwrite = tofree = 0; 960 } 961 962 /* needed allocation: worst-case estimate of write space */ 963 asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite); 964 /* freed space estimate: worst-case overwrite + free estimate */ 965 fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; 966 /* convert unrefd space to worst-case estimate */ 967 usize = spa_get_asize(tx->tx_pool->dp_spa, tounref); 968 /* calculate memory footprint estimate */ 969 memory = towrite + tooverwrite + tohold; 970 971 #ifdef ZFS_DEBUG 972 /* 973 * Add in 'tohold' to account for our dirty holds on this memory 974 * XXX - the "fudge" factor is to account for skipped blocks that 975 * we missed because dnode_next_offset() misses in-core-only blocks. 976 */ 977 tx->tx_space_towrite = asize + 978 spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge); 979 tx->tx_space_tofree = tofree; 980 tx->tx_space_tooverwrite = tooverwrite; 981 tx->tx_space_tounref = tounref; 982 #endif 983 984 if (tx->tx_dir && asize != 0) { 985 int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, 986 asize, fsize, usize, &tx->tx_tempreserve_cookie, tx); 987 if (err) 988 return (err); 989 } 990 991 return (0); 992 } 993 994 static void 995 dmu_tx_unassign(dmu_tx_t *tx) 996 { 997 dmu_tx_hold_t *txh; 998 999 if (tx->tx_txg == 0) 1000 return; 1001 1002 txg_rele_to_quiesce(&tx->tx_txgh); 1003 1004 for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; 1005 txh = list_next(&tx->tx_holds, txh)) { 1006 dnode_t *dn = txh->txh_dnode; 1007 1008 if (dn == NULL) 1009 continue; 1010 mutex_enter(&dn->dn_mtx); 1011 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 1012 1013 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 1014 dn->dn_assigned_txg = 0; 1015 cv_broadcast(&dn->dn_notxholds); 1016 } 1017 mutex_exit(&dn->dn_mtx); 1018 } 1019 1020 txg_rele_to_sync(&tx->tx_txgh); 1021 1022 tx->tx_lasttried_txg = tx->tx_txg; 1023 tx->tx_txg = 0; 1024 } 1025 1026 /* 1027 * Assign tx to a transaction group. txg_how can be one of: 1028 * 1029 * (1) TXG_WAIT. If the current open txg is full, waits until there's 1030 * a new one. This should be used when you're not holding locks. 1031 * If will only fail if we're truly out of space (or over quota). 1032 * 1033 * (2) TXG_NOWAIT. If we can't assign into the current open txg without 1034 * blocking, returns immediately with ERESTART. This should be used 1035 * whenever you're holding locks. On an ERESTART error, the caller 1036 * should drop locks, do a dmu_tx_wait(tx), and try again. 1037 * 1038 * (3) A specific txg. Use this if you need to ensure that multiple 1039 * transactions all sync in the same txg. Like TXG_NOWAIT, it 1040 * returns ERESTART if it can't assign you into the requested txg. 1041 */ 1042 int 1043 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) 1044 { 1045 int err; 1046 1047 ASSERT(tx->tx_txg == 0); 1048 ASSERT(txg_how != 0); 1049 ASSERT(!dsl_pool_sync_context(tx->tx_pool)); 1050 1051 while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { 1052 dmu_tx_unassign(tx); 1053 1054 if (err != ERESTART || txg_how != TXG_WAIT) 1055 return (err); 1056 1057 dmu_tx_wait(tx); 1058 } 1059 1060 txg_rele_to_quiesce(&tx->tx_txgh); 1061 1062 return (0); 1063 } 1064 1065 void 1066 dmu_tx_wait(dmu_tx_t *tx) 1067 { 1068 spa_t *spa = tx->tx_pool->dp_spa; 1069 1070 ASSERT(tx->tx_txg == 0); 1071 1072 /* 1073 * It's possible that the pool has become active after this thread 1074 * has tried to obtain a tx. If that's the case then his 1075 * tx_lasttried_txg would not have been assigned. 1076 */ 1077 if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { 1078 txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1); 1079 } else if (tx->tx_needassign_txh) { 1080 dnode_t *dn = tx->tx_needassign_txh->txh_dnode; 1081 1082 mutex_enter(&dn->dn_mtx); 1083 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) 1084 cv_wait(&dn->dn_notxholds, &dn->dn_mtx); 1085 mutex_exit(&dn->dn_mtx); 1086 tx->tx_needassign_txh = NULL; 1087 } else { 1088 txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); 1089 } 1090 } 1091 1092 void 1093 dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) 1094 { 1095 #ifdef ZFS_DEBUG 1096 if (tx->tx_dir == NULL || delta == 0) 1097 return; 1098 1099 if (delta > 0) { 1100 ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=, 1101 tx->tx_space_towrite); 1102 (void) refcount_add_many(&tx->tx_space_written, delta, NULL); 1103 } else { 1104 (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); 1105 } 1106 #endif 1107 } 1108 1109 void 1110 dmu_tx_commit(dmu_tx_t *tx) 1111 { 1112 dmu_tx_hold_t *txh; 1113 1114 ASSERT(tx->tx_txg != 0); 1115 1116 while (txh = list_head(&tx->tx_holds)) { 1117 dnode_t *dn = txh->txh_dnode; 1118 1119 list_remove(&tx->tx_holds, txh); 1120 kmem_free(txh, sizeof (dmu_tx_hold_t)); 1121 if (dn == NULL) 1122 continue; 1123 mutex_enter(&dn->dn_mtx); 1124 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 1125 1126 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 1127 dn->dn_assigned_txg = 0; 1128 cv_broadcast(&dn->dn_notxholds); 1129 } 1130 mutex_exit(&dn->dn_mtx); 1131 dnode_rele(dn, tx); 1132 } 1133 1134 if (tx->tx_tempreserve_cookie) 1135 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); 1136 1137 if (!list_is_empty(&tx->tx_callbacks)) 1138 txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); 1139 1140 if (tx->tx_anyobj == FALSE) 1141 txg_rele_to_sync(&tx->tx_txgh); 1142 1143 list_destroy(&tx->tx_callbacks); 1144 list_destroy(&tx->tx_holds); 1145 #ifdef ZFS_DEBUG 1146 dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", 1147 tx->tx_space_towrite, refcount_count(&tx->tx_space_written), 1148 tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); 1149 refcount_destroy_many(&tx->tx_space_written, 1150 refcount_count(&tx->tx_space_written)); 1151 refcount_destroy_many(&tx->tx_space_freed, 1152 refcount_count(&tx->tx_space_freed)); 1153 #endif 1154 kmem_free(tx, sizeof (dmu_tx_t)); 1155 } 1156 1157 void 1158 dmu_tx_abort(dmu_tx_t *tx) 1159 { 1160 dmu_tx_hold_t *txh; 1161 1162 ASSERT(tx->tx_txg == 0); 1163 1164 while (txh = list_head(&tx->tx_holds)) { 1165 dnode_t *dn = txh->txh_dnode; 1166 1167 list_remove(&tx->tx_holds, txh); 1168 kmem_free(txh, sizeof (dmu_tx_hold_t)); 1169 if (dn != NULL) 1170 dnode_rele(dn, tx); 1171 } 1172 1173 /* 1174 * Call any registered callbacks with an error code. 1175 */ 1176 if (!list_is_empty(&tx->tx_callbacks)) 1177 dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); 1178 1179 list_destroy(&tx->tx_callbacks); 1180 list_destroy(&tx->tx_holds); 1181 #ifdef ZFS_DEBUG 1182 refcount_destroy_many(&tx->tx_space_written, 1183 refcount_count(&tx->tx_space_written)); 1184 refcount_destroy_many(&tx->tx_space_freed, 1185 refcount_count(&tx->tx_space_freed)); 1186 #endif 1187 kmem_free(tx, sizeof (dmu_tx_t)); 1188 } 1189 1190 uint64_t 1191 dmu_tx_get_txg(dmu_tx_t *tx) 1192 { 1193 ASSERT(tx->tx_txg != 0); 1194 return (tx->tx_txg); 1195 } 1196 1197 void 1198 dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) 1199 { 1200 dmu_tx_callback_t *dcb; 1201 1202 dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); 1203 1204 dcb->dcb_func = func; 1205 dcb->dcb_data = data; 1206 1207 list_insert_tail(&tx->tx_callbacks, dcb); 1208 } 1209 1210 /* 1211 * Call all the commit callbacks on a list, with a given error code. 1212 */ 1213 void 1214 dmu_tx_do_callbacks(list_t *cb_list, int error) 1215 { 1216 dmu_tx_callback_t *dcb; 1217 1218 while (dcb = list_head(cb_list)) { 1219 list_remove(cb_list, dcb); 1220 dcb->dcb_func(dcb->dcb_data, error); 1221 kmem_free(dcb, sizeof (dmu_tx_callback_t)); 1222 } 1223 } 1224 1225 /* 1226 * Interface to hold a bunch of attributes. 1227 * used for creating new files. 1228 * attrsize is the total size of all attributes 1229 * to be added during object creation 1230 * 1231 * For updating/adding a single attribute dmu_tx_hold_sa() should be used. 1232 */ 1233 1234 /* 1235 * hold necessary attribute name for attribute registration. 1236 * should be a very rare case where this is needed. If it does 1237 * happen it would only happen on the first write to the file system. 1238 */ 1239 static void 1240 dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) 1241 { 1242 int i; 1243 1244 if (!sa->sa_need_attr_registration) 1245 return; 1246 1247 for (i = 0; i != sa->sa_num_attrs; i++) { 1248 if (!sa->sa_attr_table[i].sa_registered) { 1249 if (sa->sa_reg_attr_obj) 1250 dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, 1251 B_TRUE, sa->sa_attr_table[i].sa_name); 1252 else 1253 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1254 B_TRUE, sa->sa_attr_table[i].sa_name); 1255 } 1256 } 1257 } 1258 1259 1260 void 1261 dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) 1262 { 1263 dnode_t *dn; 1264 dmu_tx_hold_t *txh; 1265 blkptr_t *bp; 1266 1267 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, 1268 THT_SPILL, 0, 0); 1269 1270 dn = txh->txh_dnode; 1271 1272 if (dn == NULL) 1273 return; 1274 1275 /* If blkptr doesn't exist then add space to towrite */ 1276 bp = &dn->dn_phys->dn_spill; 1277 if (BP_IS_HOLE(bp)) { 1278 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 1279 txh->txh_space_tounref = 0; 1280 } else { 1281 if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 1282 bp->blk_birth)) 1283 txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 1284 else 1285 txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 1286 if (bp->blk_birth) 1287 txh->txh_space_tounref += SPA_MAXBLOCKSIZE; 1288 } 1289 } 1290 1291 void 1292 dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) 1293 { 1294 sa_os_t *sa = tx->tx_objset->os_sa; 1295 1296 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1297 1298 if (tx->tx_objset->os_sa->sa_master_obj == 0) 1299 return; 1300 1301 if (tx->tx_objset->os_sa->sa_layout_attr_obj) 1302 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 1303 else { 1304 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 1305 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 1306 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1307 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1308 } 1309 1310 dmu_tx_sa_registration_hold(sa, tx); 1311 1312 if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill) 1313 return; 1314 1315 (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, 1316 THT_SPILL, 0, 0); 1317 } 1318 1319 /* 1320 * Hold SA attribute 1321 * 1322 * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size) 1323 * 1324 * variable_size is the total size of all variable sized attributes 1325 * passed to this function. It is not the total size of all 1326 * variable size attributes that *may* exist on this object. 1327 */ 1328 void 1329 dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) 1330 { 1331 uint64_t object; 1332 sa_os_t *sa = tx->tx_objset->os_sa; 1333 1334 ASSERT(hdl != NULL); 1335 1336 object = sa_handle_object(hdl); 1337 1338 dmu_tx_hold_bonus(tx, object); 1339 1340 if (tx->tx_objset->os_sa->sa_master_obj == 0) 1341 return; 1342 1343 if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || 1344 tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { 1345 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); 1346 dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); 1347 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1348 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1349 } 1350 1351 dmu_tx_sa_registration_hold(sa, tx); 1352 1353 if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) 1354 dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); 1355 1356 if (sa->sa_force_spill || may_grow || hdl->sa_spill || 1357 ((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_have_spill) { 1358 ASSERT(tx->tx_txg == 0); 1359 dmu_tx_hold_spill(tx, object); 1360 } 1361 } 1362