1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 #include <sys/zvol.h> 43 44 static char *dsl_reaper = "the grim reaper"; 45 46 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 47 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 55 56 57 /* 58 * Figure out how much of this delta should be propogated to the dsl_dir 59 * layer. If there's a refreservation, that space has already been 60 * partially accounted for in our ancestors. 61 */ 62 static int64_t 63 parent_delta(dsl_dataset_t *ds, int64_t delta) 64 { 65 uint64_t old_bytes, new_bytes; 66 67 if (ds->ds_reserved == 0) 68 return (delta); 69 70 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 71 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 72 73 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 74 return (new_bytes - old_bytes); 75 } 76 77 void 78 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 79 { 80 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 81 int compressed = BP_GET_PSIZE(bp); 82 int uncompressed = BP_GET_UCSIZE(bp); 83 int64_t delta; 84 85 dprintf_bp(bp, "born, ds=%p\n", ds); 86 87 ASSERT(dmu_tx_is_syncing(tx)); 88 /* It could have been compressed away to nothing */ 89 if (BP_IS_HOLE(bp)) 90 return; 91 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 92 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 93 if (ds == NULL) { 94 /* 95 * Account for the meta-objset space in its placeholder 96 * dsl_dir. 97 */ 98 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 99 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 100 used, compressed, uncompressed, tx); 101 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 102 return; 103 } 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 mutex_enter(&ds->ds_dir->dd_lock); 106 mutex_enter(&ds->ds_lock); 107 delta = parent_delta(ds, used); 108 ds->ds_phys->ds_used_bytes += used; 109 ds->ds_phys->ds_compressed_bytes += compressed; 110 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 111 ds->ds_phys->ds_unique_bytes += used; 112 mutex_exit(&ds->ds_lock); 113 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 114 compressed, uncompressed, tx); 115 dsl_dir_transfer_space(ds->ds_dir, used - delta, 116 DD_USED_REFRSRV, DD_USED_HEAD, tx); 117 mutex_exit(&ds->ds_dir->dd_lock); 118 } 119 120 int 121 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 122 dmu_tx_t *tx) 123 { 124 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 125 int compressed = BP_GET_PSIZE(bp); 126 int uncompressed = BP_GET_UCSIZE(bp); 127 128 ASSERT(pio != NULL); 129 ASSERT(dmu_tx_is_syncing(tx)); 130 /* No block pointer => nothing to free */ 131 if (BP_IS_HOLE(bp)) 132 return (0); 133 134 ASSERT(used > 0); 135 if (ds == NULL) { 136 int err; 137 /* 138 * Account for the meta-objset space in its placeholder 139 * dataset. 140 */ 141 err = dsl_free(pio, tx->tx_pool, 142 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 143 ASSERT(err == 0); 144 145 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 146 -used, -compressed, -uncompressed, tx); 147 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 148 return (used); 149 } 150 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 151 152 ASSERT(!dsl_dataset_is_snapshot(ds)); 153 dmu_buf_will_dirty(ds->ds_dbuf, tx); 154 155 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 156 int err; 157 int64_t delta; 158 159 dprintf_bp(bp, "freeing: %s", ""); 160 err = dsl_free(pio, tx->tx_pool, 161 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 162 ASSERT(err == 0); 163 164 mutex_enter(&ds->ds_dir->dd_lock); 165 mutex_enter(&ds->ds_lock); 166 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 167 !DS_UNIQUE_IS_ACCURATE(ds)); 168 delta = parent_delta(ds, -used); 169 ds->ds_phys->ds_unique_bytes -= used; 170 mutex_exit(&ds->ds_lock); 171 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 172 delta, -compressed, -uncompressed, tx); 173 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 174 DD_USED_REFRSRV, DD_USED_HEAD, tx); 175 mutex_exit(&ds->ds_dir->dd_lock); 176 } else { 177 dprintf_bp(bp, "putting on dead list: %s", ""); 178 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 179 ASSERT3U(ds->ds_prev->ds_object, ==, 180 ds->ds_phys->ds_prev_snap_obj); 181 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 184 ds->ds_object && bp->blk_birth > 185 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 186 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 187 mutex_enter(&ds->ds_prev->ds_lock); 188 ds->ds_prev->ds_phys->ds_unique_bytes += used; 189 mutex_exit(&ds->ds_prev->ds_lock); 190 } 191 if (bp->blk_birth > ds->ds_origin_txg) { 192 dsl_dir_transfer_space(ds->ds_dir, used, 193 DD_USED_HEAD, DD_USED_SNAP, tx); 194 } 195 } 196 mutex_enter(&ds->ds_lock); 197 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 198 ds->ds_phys->ds_used_bytes -= used; 199 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 200 ds->ds_phys->ds_compressed_bytes -= compressed; 201 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 202 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 203 mutex_exit(&ds->ds_lock); 204 205 return (used); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 boolean_t 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 244 245 unique_remove(ds->ds_fsid_guid); 246 247 if (ds->ds_objset != NULL) 248 dmu_objset_evict(ds->ds_objset); 249 250 if (ds->ds_prev) { 251 dsl_dataset_drop_ref(ds->ds_prev, ds); 252 ds->ds_prev = NULL; 253 } 254 255 bplist_close(&ds->ds_deadlist); 256 if (ds->ds_dir) 257 dsl_dir_close(ds->ds_dir, ds); 258 259 ASSERT(!list_link_active(&ds->ds_synced_link)); 260 261 mutex_destroy(&ds->ds_lock); 262 mutex_destroy(&ds->ds_recvlock); 263 mutex_destroy(&ds->ds_opening_lock); 264 mutex_destroy(&ds->ds_deadlist.bpl_lock); 265 rw_destroy(&ds->ds_rwlock); 266 cv_destroy(&ds->ds_exclusive_cv); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 static int 297 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 298 { 299 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 300 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 301 matchtype_t mt; 302 int err; 303 304 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 305 mt = MT_FIRST; 306 else 307 mt = MT_EXACT; 308 309 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 310 value, mt, NULL, 0, NULL); 311 if (err == ENOTSUP && mt == MT_FIRST) 312 err = zap_lookup(mos, snapobj, name, 8, 1, value); 313 return (err); 314 } 315 316 static int 317 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 318 { 319 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 320 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 321 matchtype_t mt; 322 int err; 323 324 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 325 mt = MT_FIRST; 326 else 327 mt = MT_EXACT; 328 329 err = zap_remove_norm(mos, snapobj, name, mt, tx); 330 if (err == ENOTSUP && mt == MT_FIRST) 331 err = zap_remove(mos, snapobj, name, tx); 332 return (err); 333 } 334 335 static int 336 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 337 dsl_dataset_t **dsp) 338 { 339 objset_t *mos = dp->dp_meta_objset; 340 dmu_buf_t *dbuf; 341 dsl_dataset_t *ds; 342 int err; 343 344 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 345 dsl_pool_sync_context(dp)); 346 347 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 348 if (err) 349 return (err); 350 ds = dmu_buf_get_user(dbuf); 351 if (ds == NULL) { 352 dsl_dataset_t *winner; 353 354 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 355 ds->ds_dbuf = dbuf; 356 ds->ds_object = dsobj; 357 ds->ds_phys = dbuf->db_data; 358 359 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 360 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 361 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 362 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 363 NULL); 364 rw_init(&ds->ds_rwlock, 0, 0, 0); 365 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 366 367 err = bplist_open(&ds->ds_deadlist, 368 mos, ds->ds_phys->ds_deadlist_obj); 369 if (err == 0) { 370 err = dsl_dir_open_obj(dp, 371 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 372 } 373 if (err) { 374 /* 375 * we don't really need to close the blist if we 376 * just opened it. 377 */ 378 mutex_destroy(&ds->ds_lock); 379 mutex_destroy(&ds->ds_recvlock); 380 mutex_destroy(&ds->ds_opening_lock); 381 mutex_destroy(&ds->ds_deadlist.bpl_lock); 382 rw_destroy(&ds->ds_rwlock); 383 cv_destroy(&ds->ds_exclusive_cv); 384 kmem_free(ds, sizeof (dsl_dataset_t)); 385 dmu_buf_rele(dbuf, tag); 386 return (err); 387 } 388 389 if (!dsl_dataset_is_snapshot(ds)) { 390 ds->ds_snapname[0] = '\0'; 391 if (ds->ds_phys->ds_prev_snap_obj) { 392 err = dsl_dataset_get_ref(dp, 393 ds->ds_phys->ds_prev_snap_obj, 394 ds, &ds->ds_prev); 395 } 396 397 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 398 dsl_dataset_t *origin; 399 400 err = dsl_dataset_hold_obj(dp, 401 ds->ds_dir->dd_phys->dd_origin_obj, 402 FTAG, &origin); 403 if (err == 0) { 404 ds->ds_origin_txg = 405 origin->ds_phys->ds_creation_txg; 406 dsl_dataset_rele(origin, FTAG); 407 } 408 } 409 } else { 410 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 411 err = dsl_dataset_get_snapname(ds); 412 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 413 err = zap_count( 414 ds->ds_dir->dd_pool->dp_meta_objset, 415 ds->ds_phys->ds_userrefs_obj, 416 &ds->ds_userrefs); 417 } 418 } 419 420 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 421 /* 422 * In sync context, we're called with either no lock 423 * or with the write lock. If we're not syncing, 424 * we're always called with the read lock held. 425 */ 426 boolean_t need_lock = 427 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 428 dsl_pool_sync_context(dp); 429 430 if (need_lock) 431 rw_enter(&dp->dp_config_rwlock, RW_READER); 432 433 err = dsl_prop_get_ds(ds, 434 "refreservation", sizeof (uint64_t), 1, 435 &ds->ds_reserved, NULL); 436 if (err == 0) { 437 err = dsl_prop_get_ds(ds, 438 "refquota", sizeof (uint64_t), 1, 439 &ds->ds_quota, NULL); 440 } 441 442 if (need_lock) 443 rw_exit(&dp->dp_config_rwlock); 444 } else { 445 ds->ds_reserved = ds->ds_quota = 0; 446 } 447 448 if (err == 0) { 449 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 450 dsl_dataset_evict); 451 } 452 if (err || winner) { 453 bplist_close(&ds->ds_deadlist); 454 if (ds->ds_prev) 455 dsl_dataset_drop_ref(ds->ds_prev, ds); 456 dsl_dir_close(ds->ds_dir, ds); 457 mutex_destroy(&ds->ds_lock); 458 mutex_destroy(&ds->ds_recvlock); 459 mutex_destroy(&ds->ds_opening_lock); 460 mutex_destroy(&ds->ds_deadlist.bpl_lock); 461 rw_destroy(&ds->ds_rwlock); 462 cv_destroy(&ds->ds_exclusive_cv); 463 kmem_free(ds, sizeof (dsl_dataset_t)); 464 if (err) { 465 dmu_buf_rele(dbuf, tag); 466 return (err); 467 } 468 ds = winner; 469 } else { 470 ds->ds_fsid_guid = 471 unique_insert(ds->ds_phys->ds_fsid_guid); 472 } 473 } 474 ASSERT3P(ds->ds_dbuf, ==, dbuf); 475 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 476 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 477 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 478 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 479 mutex_enter(&ds->ds_lock); 480 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 481 mutex_exit(&ds->ds_lock); 482 dmu_buf_rele(ds->ds_dbuf, tag); 483 return (ENOENT); 484 } 485 mutex_exit(&ds->ds_lock); 486 *dsp = ds; 487 return (0); 488 } 489 490 static int 491 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 492 { 493 dsl_pool_t *dp = ds->ds_dir->dd_pool; 494 495 /* 496 * In syncing context we don't want the rwlock lock: there 497 * may be an existing writer waiting for sync phase to 498 * finish. We don't need to worry about such writers, since 499 * sync phase is single-threaded, so the writer can't be 500 * doing anything while we are active. 501 */ 502 if (dsl_pool_sync_context(dp)) { 503 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 504 return (0); 505 } 506 507 /* 508 * Normal users will hold the ds_rwlock as a READER until they 509 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 510 * drop their READER lock after they set the ds_owner field. 511 * 512 * If the dataset is being destroyed, the destroy thread will 513 * obtain a WRITER lock for exclusive access after it's done its 514 * open-context work and then change the ds_owner to 515 * dsl_reaper once destruction is assured. So threads 516 * may block here temporarily, until the "destructability" of 517 * the dataset is determined. 518 */ 519 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 520 mutex_enter(&ds->ds_lock); 521 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 522 rw_exit(&dp->dp_config_rwlock); 523 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 524 if (DSL_DATASET_IS_DESTROYED(ds)) { 525 mutex_exit(&ds->ds_lock); 526 dsl_dataset_drop_ref(ds, tag); 527 rw_enter(&dp->dp_config_rwlock, RW_READER); 528 return (ENOENT); 529 } 530 rw_enter(&dp->dp_config_rwlock, RW_READER); 531 } 532 mutex_exit(&ds->ds_lock); 533 return (0); 534 } 535 536 int 537 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 538 dsl_dataset_t **dsp) 539 { 540 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 541 542 if (err) 543 return (err); 544 return (dsl_dataset_hold_ref(*dsp, tag)); 545 } 546 547 int 548 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 549 void *tag, dsl_dataset_t **dsp) 550 { 551 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 552 if (err) 553 return (err); 554 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 555 dsl_dataset_rele(*dsp, tag); 556 *dsp = NULL; 557 return (EBUSY); 558 } 559 return (0); 560 } 561 562 int 563 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 564 { 565 dsl_dir_t *dd; 566 dsl_pool_t *dp; 567 const char *snapname; 568 uint64_t obj; 569 int err = 0; 570 571 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 572 if (err) 573 return (err); 574 575 dp = dd->dd_pool; 576 obj = dd->dd_phys->dd_head_dataset_obj; 577 rw_enter(&dp->dp_config_rwlock, RW_READER); 578 if (obj) 579 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 580 else 581 err = ENOENT; 582 if (err) 583 goto out; 584 585 err = dsl_dataset_hold_ref(*dsp, tag); 586 587 /* we may be looking for a snapshot */ 588 if (err == 0 && snapname != NULL) { 589 dsl_dataset_t *ds = NULL; 590 591 if (*snapname++ != '@') { 592 dsl_dataset_rele(*dsp, tag); 593 err = ENOENT; 594 goto out; 595 } 596 597 dprintf("looking for snapshot '%s'\n", snapname); 598 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 599 if (err == 0) 600 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 601 dsl_dataset_rele(*dsp, tag); 602 603 ASSERT3U((err == 0), ==, (ds != NULL)); 604 605 if (ds) { 606 mutex_enter(&ds->ds_lock); 607 if (ds->ds_snapname[0] == 0) 608 (void) strlcpy(ds->ds_snapname, snapname, 609 sizeof (ds->ds_snapname)); 610 mutex_exit(&ds->ds_lock); 611 err = dsl_dataset_hold_ref(ds, tag); 612 *dsp = err ? NULL : ds; 613 } 614 } 615 out: 616 rw_exit(&dp->dp_config_rwlock); 617 dsl_dir_close(dd, FTAG); 618 return (err); 619 } 620 621 int 622 dsl_dataset_own(const char *name, boolean_t inconsistentok, 623 void *tag, dsl_dataset_t **dsp) 624 { 625 int err = dsl_dataset_hold(name, tag, dsp); 626 if (err) 627 return (err); 628 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 629 dsl_dataset_rele(*dsp, tag); 630 return (EBUSY); 631 } 632 return (0); 633 } 634 635 void 636 dsl_dataset_name(dsl_dataset_t *ds, char *name) 637 { 638 if (ds == NULL) { 639 (void) strcpy(name, "mos"); 640 } else { 641 dsl_dir_name(ds->ds_dir, name); 642 VERIFY(0 == dsl_dataset_get_snapname(ds)); 643 if (ds->ds_snapname[0]) { 644 (void) strcat(name, "@"); 645 /* 646 * We use a "recursive" mutex so that we 647 * can call dprintf_ds() with ds_lock held. 648 */ 649 if (!MUTEX_HELD(&ds->ds_lock)) { 650 mutex_enter(&ds->ds_lock); 651 (void) strcat(name, ds->ds_snapname); 652 mutex_exit(&ds->ds_lock); 653 } else { 654 (void) strcat(name, ds->ds_snapname); 655 } 656 } 657 } 658 } 659 660 static int 661 dsl_dataset_namelen(dsl_dataset_t *ds) 662 { 663 int result; 664 665 if (ds == NULL) { 666 result = 3; /* "mos" */ 667 } else { 668 result = dsl_dir_namelen(ds->ds_dir); 669 VERIFY(0 == dsl_dataset_get_snapname(ds)); 670 if (ds->ds_snapname[0]) { 671 ++result; /* adding one for the @-sign */ 672 if (!MUTEX_HELD(&ds->ds_lock)) { 673 mutex_enter(&ds->ds_lock); 674 result += strlen(ds->ds_snapname); 675 mutex_exit(&ds->ds_lock); 676 } else { 677 result += strlen(ds->ds_snapname); 678 } 679 } 680 } 681 682 return (result); 683 } 684 685 void 686 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 687 { 688 dmu_buf_rele(ds->ds_dbuf, tag); 689 } 690 691 void 692 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 693 { 694 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 695 rw_exit(&ds->ds_rwlock); 696 } 697 dsl_dataset_drop_ref(ds, tag); 698 } 699 700 void 701 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 702 { 703 ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 704 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 705 706 mutex_enter(&ds->ds_lock); 707 ds->ds_owner = NULL; 708 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 709 rw_exit(&ds->ds_rwlock); 710 cv_broadcast(&ds->ds_exclusive_cv); 711 } 712 mutex_exit(&ds->ds_lock); 713 if (ds->ds_dbuf) 714 dsl_dataset_drop_ref(ds, tag); 715 else 716 dsl_dataset_evict(ds->ds_dbuf, ds); 717 } 718 719 boolean_t 720 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 721 { 722 boolean_t gotit = FALSE; 723 724 mutex_enter(&ds->ds_lock); 725 if (ds->ds_owner == NULL && 726 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 727 ds->ds_owner = tag; 728 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 729 rw_exit(&ds->ds_rwlock); 730 gotit = TRUE; 731 } 732 mutex_exit(&ds->ds_lock); 733 return (gotit); 734 } 735 736 void 737 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 738 { 739 ASSERT3P(owner, ==, ds->ds_owner); 740 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 741 rw_enter(&ds->ds_rwlock, RW_WRITER); 742 } 743 744 uint64_t 745 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 746 uint64_t flags, dmu_tx_t *tx) 747 { 748 dsl_pool_t *dp = dd->dd_pool; 749 dmu_buf_t *dbuf; 750 dsl_dataset_phys_t *dsphys; 751 uint64_t dsobj; 752 objset_t *mos = dp->dp_meta_objset; 753 754 if (origin == NULL) 755 origin = dp->dp_origin_snap; 756 757 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 758 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 759 ASSERT(dmu_tx_is_syncing(tx)); 760 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 761 762 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 763 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 764 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 765 dmu_buf_will_dirty(dbuf, tx); 766 dsphys = dbuf->db_data; 767 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 768 dsphys->ds_dir_obj = dd->dd_object; 769 dsphys->ds_flags = flags; 770 dsphys->ds_fsid_guid = unique_create(); 771 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 772 sizeof (dsphys->ds_guid)); 773 dsphys->ds_snapnames_zapobj = 774 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 775 DMU_OT_NONE, 0, tx); 776 dsphys->ds_creation_time = gethrestime_sec(); 777 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 778 dsphys->ds_deadlist_obj = 779 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 780 781 if (origin) { 782 dsphys->ds_prev_snap_obj = origin->ds_object; 783 dsphys->ds_prev_snap_txg = 784 origin->ds_phys->ds_creation_txg; 785 dsphys->ds_used_bytes = 786 origin->ds_phys->ds_used_bytes; 787 dsphys->ds_compressed_bytes = 788 origin->ds_phys->ds_compressed_bytes; 789 dsphys->ds_uncompressed_bytes = 790 origin->ds_phys->ds_uncompressed_bytes; 791 dsphys->ds_bp = origin->ds_phys->ds_bp; 792 dsphys->ds_flags |= origin->ds_phys->ds_flags; 793 794 dmu_buf_will_dirty(origin->ds_dbuf, tx); 795 origin->ds_phys->ds_num_children++; 796 797 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 798 if (origin->ds_phys->ds_next_clones_obj == 0) { 799 origin->ds_phys->ds_next_clones_obj = 800 zap_create(mos, 801 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 802 } 803 VERIFY(0 == zap_add_int(mos, 804 origin->ds_phys->ds_next_clones_obj, 805 dsobj, tx)); 806 } 807 808 dmu_buf_will_dirty(dd->dd_dbuf, tx); 809 dd->dd_phys->dd_origin_obj = origin->ds_object; 810 } 811 812 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 813 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 814 815 dmu_buf_rele(dbuf, FTAG); 816 817 dmu_buf_will_dirty(dd->dd_dbuf, tx); 818 dd->dd_phys->dd_head_dataset_obj = dsobj; 819 820 return (dsobj); 821 } 822 823 uint64_t 824 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 825 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 826 { 827 dsl_pool_t *dp = pdd->dd_pool; 828 uint64_t dsobj, ddobj; 829 dsl_dir_t *dd; 830 831 ASSERT(lastname[0] != '@'); 832 833 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 834 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 835 836 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 837 838 dsl_deleg_set_create_perms(dd, tx, cr); 839 840 dsl_dir_close(dd, FTAG); 841 842 return (dsobj); 843 } 844 845 struct destroyarg { 846 dsl_sync_task_group_t *dstg; 847 char *snapname; 848 char *failed; 849 boolean_t defer; 850 }; 851 852 static int 853 dsl_snapshot_destroy_one(char *name, void *arg) 854 { 855 struct destroyarg *da = arg; 856 dsl_dataset_t *ds; 857 int err; 858 char *dsname; 859 860 dsname = kmem_asprintf("%s@%s", name, da->snapname); 861 err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); 862 strfree(dsname); 863 if (err == 0) { 864 struct dsl_ds_destroyarg *dsda; 865 866 dsl_dataset_make_exclusive(ds, da->dstg); 867 if (ds->ds_objset != NULL) { 868 dmu_objset_evict(ds->ds_objset); 869 ds->ds_objset = NULL; 870 } 871 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); 872 dsda->ds = ds; 873 dsda->defer = da->defer; 874 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 875 dsl_dataset_destroy_sync, dsda, da->dstg, 0); 876 } else if (err == ENOENT) { 877 err = 0; 878 } else { 879 (void) strcpy(da->failed, name); 880 } 881 return (err); 882 } 883 884 /* 885 * Destroy 'snapname' in all descendants of 'fsname'. 886 */ 887 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 888 int 889 dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) 890 { 891 int err; 892 struct destroyarg da; 893 dsl_sync_task_t *dst; 894 spa_t *spa; 895 896 err = spa_open(fsname, &spa, FTAG); 897 if (err) 898 return (err); 899 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 900 da.snapname = snapname; 901 da.failed = fsname; 902 da.defer = defer; 903 904 err = dmu_objset_find(fsname, 905 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 906 907 if (err == 0) 908 err = dsl_sync_task_group_wait(da.dstg); 909 910 for (dst = list_head(&da.dstg->dstg_tasks); dst; 911 dst = list_next(&da.dstg->dstg_tasks, dst)) { 912 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 913 dsl_dataset_t *ds = dsda->ds; 914 915 /* 916 * Return the file system name that triggered the error 917 */ 918 if (dst->dst_err) { 919 dsl_dataset_name(ds, fsname); 920 *strchr(fsname, '@') = '\0'; 921 } 922 ASSERT3P(dsda->rm_origin, ==, NULL); 923 dsl_dataset_disown(ds, da.dstg); 924 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 925 } 926 927 dsl_sync_task_group_destroy(da.dstg); 928 spa_close(spa, FTAG); 929 return (err); 930 } 931 932 static boolean_t 933 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 934 { 935 boolean_t might_destroy = B_FALSE; 936 937 mutex_enter(&ds->ds_lock); 938 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 939 DS_IS_DEFER_DESTROY(ds)) 940 might_destroy = B_TRUE; 941 mutex_exit(&ds->ds_lock); 942 943 return (might_destroy); 944 } 945 946 #ifdef _KERNEL 947 static int 948 dsl_dataset_zvol_cleanup(dsl_dataset_t *ds, const char *name) 949 { 950 int error; 951 objset_t *os; 952 953 error = dmu_objset_from_ds(ds, &os); 954 if (error) 955 return (error); 956 957 if (dmu_objset_type(os) == DMU_OST_ZVOL) 958 error = zvol_remove_minor(name); 959 960 return (error); 961 } 962 #endif 963 964 /* 965 * If we're removing a clone, and these three conditions are true: 966 * 1) the clone's origin has no other children 967 * 2) the clone's origin has no user references 968 * 3) the clone's origin has been marked for deferred destruction 969 * Then, prepare to remove the origin as part of this sync task group. 970 */ 971 static int 972 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 973 { 974 dsl_dataset_t *ds = dsda->ds; 975 dsl_dataset_t *origin = ds->ds_prev; 976 977 if (dsl_dataset_might_destroy_origin(origin)) { 978 char *name; 979 int namelen; 980 int error; 981 982 namelen = dsl_dataset_namelen(origin) + 1; 983 name = kmem_alloc(namelen, KM_SLEEP); 984 dsl_dataset_name(origin, name); 985 #ifdef _KERNEL 986 error = zfs_unmount_snap(name, NULL); 987 if (error) { 988 kmem_free(name, namelen); 989 return (error); 990 } 991 error = dsl_dataset_zvol_cleanup(origin, name); 992 if (error) { 993 kmem_free(name, namelen); 994 return (error); 995 } 996 #endif 997 error = dsl_dataset_own(name, B_TRUE, tag, &origin); 998 kmem_free(name, namelen); 999 if (error) 1000 return (error); 1001 dsda->rm_origin = origin; 1002 dsl_dataset_make_exclusive(origin, tag); 1003 1004 if (origin->ds_objset != NULL) { 1005 dmu_objset_evict(origin->ds_objset); 1006 origin->ds_objset = NULL; 1007 } 1008 } 1009 1010 return (0); 1011 } 1012 1013 /* 1014 * ds must be opened as OWNER. On return (whether successful or not), 1015 * ds will be closed and caller can no longer dereference it. 1016 */ 1017 int 1018 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1019 { 1020 int err; 1021 dsl_sync_task_group_t *dstg; 1022 objset_t *os; 1023 dsl_dir_t *dd; 1024 uint64_t obj; 1025 struct dsl_ds_destroyarg dsda = {0}; 1026 1027 dsda.ds = ds; 1028 1029 if (dsl_dataset_is_snapshot(ds)) { 1030 /* Destroying a snapshot is simpler */ 1031 dsl_dataset_make_exclusive(ds, tag); 1032 1033 if (ds->ds_objset != NULL) { 1034 dmu_objset_evict(ds->ds_objset); 1035 ds->ds_objset = NULL; 1036 } 1037 /* NOTE: defer is always B_FALSE for non-snapshots */ 1038 dsda.defer = defer; 1039 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1040 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1041 &dsda, tag, 0); 1042 ASSERT3P(dsda.rm_origin, ==, NULL); 1043 goto out; 1044 } 1045 1046 dd = ds->ds_dir; 1047 1048 /* 1049 * Check for errors and mark this ds as inconsistent, in 1050 * case we crash while freeing the objects. 1051 */ 1052 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1053 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1054 if (err) 1055 goto out; 1056 1057 err = dmu_objset_from_ds(ds, &os); 1058 if (err) 1059 goto out; 1060 1061 /* 1062 * remove the objects in open context, so that we won't 1063 * have too much to do in syncing context. 1064 */ 1065 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1066 ds->ds_phys->ds_prev_snap_txg)) { 1067 /* 1068 * Ignore errors, if there is not enough disk space 1069 * we will deal with it in dsl_dataset_destroy_sync(). 1070 */ 1071 (void) dmu_free_object(os, obj); 1072 } 1073 1074 /* 1075 * We need to sync out all in-flight IO before we try to evict 1076 * (the dataset evict func is trying to clear the cached entries 1077 * for this dataset in the ARC). 1078 */ 1079 txg_wait_synced(dd->dd_pool, 0); 1080 1081 /* 1082 * If we managed to free all the objects in open 1083 * context, the user space accounting should be zero. 1084 */ 1085 if (ds->ds_phys->ds_bp.blk_fill == 0 && 1086 dmu_objset_userused_enabled(os)) { 1087 uint64_t count; 1088 1089 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1090 count == 0); 1091 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1092 count == 0); 1093 } 1094 1095 if (err != ESRCH) 1096 goto out; 1097 1098 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1099 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1100 rw_exit(&dd->dd_pool->dp_config_rwlock); 1101 1102 if (err) 1103 goto out; 1104 1105 if (ds->ds_objset) { 1106 /* 1107 * We need to sync out all in-flight IO before we try 1108 * to evict (the dataset evict func is trying to clear 1109 * the cached entries for this dataset in the ARC). 1110 */ 1111 txg_wait_synced(dd->dd_pool, 0); 1112 } 1113 1114 /* 1115 * Blow away the dsl_dir + head dataset. 1116 */ 1117 dsl_dataset_make_exclusive(ds, tag); 1118 if (ds->ds_objset) { 1119 dmu_objset_evict(ds->ds_objset); 1120 ds->ds_objset = NULL; 1121 } 1122 1123 /* 1124 * If we're removing a clone, we might also need to remove its 1125 * origin. 1126 */ 1127 do { 1128 dsda.need_prep = B_FALSE; 1129 if (dsl_dir_is_clone(dd)) { 1130 err = dsl_dataset_origin_rm_prep(&dsda, tag); 1131 if (err) { 1132 dsl_dir_close(dd, FTAG); 1133 goto out; 1134 } 1135 } 1136 1137 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1138 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1139 dsl_dataset_destroy_sync, &dsda, tag, 0); 1140 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1141 dsl_dir_destroy_sync, dd, FTAG, 0); 1142 err = dsl_sync_task_group_wait(dstg); 1143 dsl_sync_task_group_destroy(dstg); 1144 1145 /* 1146 * We could be racing against 'zfs release' or 'zfs destroy -d' 1147 * on the origin snap, in which case we can get EBUSY if we 1148 * needed to destroy the origin snap but were not ready to 1149 * do so. 1150 */ 1151 if (dsda.need_prep) { 1152 ASSERT(err == EBUSY); 1153 ASSERT(dsl_dir_is_clone(dd)); 1154 ASSERT(dsda.rm_origin == NULL); 1155 } 1156 } while (dsda.need_prep); 1157 1158 if (dsda.rm_origin != NULL) 1159 dsl_dataset_disown(dsda.rm_origin, tag); 1160 1161 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1162 if (err) 1163 dsl_dir_close(dd, FTAG); 1164 out: 1165 dsl_dataset_disown(ds, tag); 1166 return (err); 1167 } 1168 1169 blkptr_t * 1170 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1171 { 1172 return (&ds->ds_phys->ds_bp); 1173 } 1174 1175 void 1176 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1177 { 1178 ASSERT(dmu_tx_is_syncing(tx)); 1179 /* If it's the meta-objset, set dp_meta_rootbp */ 1180 if (ds == NULL) { 1181 tx->tx_pool->dp_meta_rootbp = *bp; 1182 } else { 1183 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1184 ds->ds_phys->ds_bp = *bp; 1185 } 1186 } 1187 1188 spa_t * 1189 dsl_dataset_get_spa(dsl_dataset_t *ds) 1190 { 1191 return (ds->ds_dir->dd_pool->dp_spa); 1192 } 1193 1194 void 1195 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1196 { 1197 dsl_pool_t *dp; 1198 1199 if (ds == NULL) /* this is the meta-objset */ 1200 return; 1201 1202 ASSERT(ds->ds_objset != NULL); 1203 1204 if (ds->ds_phys->ds_next_snap_obj != 0) 1205 panic("dirtying snapshot!"); 1206 1207 dp = ds->ds_dir->dd_pool; 1208 1209 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1210 /* up the hold count until we can be written out */ 1211 dmu_buf_add_ref(ds->ds_dbuf, ds); 1212 } 1213 } 1214 1215 /* 1216 * The unique space in the head dataset can be calculated by subtracting 1217 * the space used in the most recent snapshot, that is still being used 1218 * in this file system, from the space currently in use. To figure out 1219 * the space in the most recent snapshot still in use, we need to take 1220 * the total space used in the snapshot and subtract out the space that 1221 * has been freed up since the snapshot was taken. 1222 */ 1223 static void 1224 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1225 { 1226 uint64_t mrs_used; 1227 uint64_t dlused, dlcomp, dluncomp; 1228 1229 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1230 1231 if (ds->ds_phys->ds_prev_snap_obj != 0) 1232 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1233 else 1234 mrs_used = 0; 1235 1236 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1237 &dluncomp)); 1238 1239 ASSERT3U(dlused, <=, mrs_used); 1240 ds->ds_phys->ds_unique_bytes = 1241 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1242 1243 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1244 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1245 SPA_VERSION_UNIQUE_ACCURATE) 1246 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1247 } 1248 1249 static uint64_t 1250 dsl_dataset_unique(dsl_dataset_t *ds) 1251 { 1252 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1253 dsl_dataset_recalc_head_uniq(ds); 1254 1255 return (ds->ds_phys->ds_unique_bytes); 1256 } 1257 1258 struct killarg { 1259 dsl_dataset_t *ds; 1260 zio_t *zio; 1261 dmu_tx_t *tx; 1262 }; 1263 1264 /* ARGSUSED */ 1265 static int 1266 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1267 const dnode_phys_t *dnp, void *arg) 1268 { 1269 struct killarg *ka = arg; 1270 1271 if (bp == NULL) 1272 return (0); 1273 1274 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1275 (zb->zb_object != 0 && dnp == NULL)) { 1276 /* 1277 * It's a block in the intent log. It has no 1278 * accounting, so just free it. 1279 */ 1280 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1281 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1282 } else { 1283 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1284 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1285 } 1286 1287 return (0); 1288 } 1289 1290 /* ARGSUSED */ 1291 static int 1292 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1293 { 1294 dsl_dataset_t *ds = arg1; 1295 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1296 uint64_t count; 1297 int err; 1298 1299 /* 1300 * Can't delete a head dataset if there are snapshots of it. 1301 * (Except if the only snapshots are from the branch we cloned 1302 * from.) 1303 */ 1304 if (ds->ds_prev != NULL && 1305 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1306 return (EINVAL); 1307 1308 /* 1309 * This is really a dsl_dir thing, but check it here so that 1310 * we'll be less likely to leave this dataset inconsistent & 1311 * nearly destroyed. 1312 */ 1313 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1314 if (err) 1315 return (err); 1316 if (count != 0) 1317 return (EEXIST); 1318 1319 return (0); 1320 } 1321 1322 /* ARGSUSED */ 1323 static void 1324 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1325 { 1326 dsl_dataset_t *ds = arg1; 1327 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1328 1329 /* Mark it as inconsistent on-disk, in case we crash */ 1330 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1331 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1332 1333 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1334 cr, "dataset = %llu", ds->ds_object); 1335 } 1336 1337 static int 1338 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1339 dmu_tx_t *tx) 1340 { 1341 dsl_dataset_t *ds = dsda->ds; 1342 dsl_dataset_t *ds_prev = ds->ds_prev; 1343 1344 if (dsl_dataset_might_destroy_origin(ds_prev)) { 1345 struct dsl_ds_destroyarg ndsda = {0}; 1346 1347 /* 1348 * If we're not prepared to remove the origin, don't remove 1349 * the clone either. 1350 */ 1351 if (dsda->rm_origin == NULL) { 1352 dsda->need_prep = B_TRUE; 1353 return (EBUSY); 1354 } 1355 1356 ndsda.ds = ds_prev; 1357 ndsda.is_origin_rm = B_TRUE; 1358 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1359 } 1360 1361 /* 1362 * If we're not going to remove the origin after all, 1363 * undo the open context setup. 1364 */ 1365 if (dsda->rm_origin != NULL) { 1366 dsl_dataset_disown(dsda->rm_origin, tag); 1367 dsda->rm_origin = NULL; 1368 } 1369 1370 return (0); 1371 } 1372 1373 /* ARGSUSED */ 1374 int 1375 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1376 { 1377 struct dsl_ds_destroyarg *dsda = arg1; 1378 dsl_dataset_t *ds = dsda->ds; 1379 1380 /* we have an owner hold, so noone else can destroy us */ 1381 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1382 1383 /* 1384 * Only allow deferred destroy on pools that support it. 1385 * NOTE: deferred destroy is only supported on snapshots. 1386 */ 1387 if (dsda->defer) { 1388 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1389 SPA_VERSION_USERREFS) 1390 return (ENOTSUP); 1391 ASSERT(dsl_dataset_is_snapshot(ds)); 1392 return (0); 1393 } 1394 1395 /* 1396 * Can't delete a head dataset if there are snapshots of it. 1397 * (Except if the only snapshots are from the branch we cloned 1398 * from.) 1399 */ 1400 if (ds->ds_prev != NULL && 1401 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1402 return (EINVAL); 1403 1404 /* 1405 * If we made changes this txg, traverse_dsl_dataset won't find 1406 * them. Try again. 1407 */ 1408 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1409 return (EAGAIN); 1410 1411 if (dsl_dataset_is_snapshot(ds)) { 1412 /* 1413 * If this snapshot has an elevated user reference count, 1414 * we can't destroy it yet. 1415 */ 1416 if (ds->ds_userrefs > 0 && !dsda->releasing) 1417 return (EBUSY); 1418 1419 mutex_enter(&ds->ds_lock); 1420 /* 1421 * Can't delete a branch point. However, if we're destroying 1422 * a clone and removing its origin due to it having a user 1423 * hold count of 0 and having been marked for deferred destroy, 1424 * it's OK for the origin to have a single clone. 1425 */ 1426 if (ds->ds_phys->ds_num_children > 1427 (dsda->is_origin_rm ? 2 : 1)) { 1428 mutex_exit(&ds->ds_lock); 1429 return (EEXIST); 1430 } 1431 mutex_exit(&ds->ds_lock); 1432 } else if (dsl_dir_is_clone(ds->ds_dir)) { 1433 return (dsl_dataset_origin_check(dsda, arg2, tx)); 1434 } 1435 1436 /* XXX we should do some i/o error checking... */ 1437 return (0); 1438 } 1439 1440 struct refsarg { 1441 kmutex_t lock; 1442 boolean_t gone; 1443 kcondvar_t cv; 1444 }; 1445 1446 /* ARGSUSED */ 1447 static void 1448 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1449 { 1450 struct refsarg *arg = argv; 1451 1452 mutex_enter(&arg->lock); 1453 arg->gone = TRUE; 1454 cv_signal(&arg->cv); 1455 mutex_exit(&arg->lock); 1456 } 1457 1458 static void 1459 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1460 { 1461 struct refsarg arg; 1462 1463 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1464 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1465 arg.gone = FALSE; 1466 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1467 dsl_dataset_refs_gone); 1468 dmu_buf_rele(ds->ds_dbuf, tag); 1469 mutex_enter(&arg.lock); 1470 while (!arg.gone) 1471 cv_wait(&arg.cv, &arg.lock); 1472 ASSERT(arg.gone); 1473 mutex_exit(&arg.lock); 1474 ds->ds_dbuf = NULL; 1475 ds->ds_phys = NULL; 1476 mutex_destroy(&arg.lock); 1477 cv_destroy(&arg.cv); 1478 } 1479 1480 void 1481 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1482 { 1483 struct dsl_ds_destroyarg *dsda = arg1; 1484 dsl_dataset_t *ds = dsda->ds; 1485 zio_t *zio; 1486 int err; 1487 int after_branch_point = FALSE; 1488 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1489 objset_t *mos = dp->dp_meta_objset; 1490 dsl_dataset_t *ds_prev = NULL; 1491 uint64_t obj; 1492 1493 ASSERT(ds->ds_owner); 1494 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1495 ASSERT(ds->ds_prev == NULL || 1496 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1497 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1498 1499 if (dsda->defer) { 1500 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1501 if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) { 1502 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1503 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1504 return; 1505 } 1506 } 1507 1508 /* signal any waiters that this dataset is going away */ 1509 mutex_enter(&ds->ds_lock); 1510 ds->ds_owner = dsl_reaper; 1511 cv_broadcast(&ds->ds_exclusive_cv); 1512 mutex_exit(&ds->ds_lock); 1513 1514 /* Remove our reservation */ 1515 if (ds->ds_reserved != 0) { 1516 uint64_t val = 0; 1517 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1518 ASSERT3U(ds->ds_reserved, ==, 0); 1519 } 1520 1521 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1522 1523 dsl_pool_ds_destroyed(ds, tx); 1524 1525 obj = ds->ds_object; 1526 1527 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1528 if (ds->ds_prev) { 1529 ds_prev = ds->ds_prev; 1530 } else { 1531 VERIFY(0 == dsl_dataset_hold_obj(dp, 1532 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1533 } 1534 after_branch_point = 1535 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1536 1537 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1538 if (after_branch_point && 1539 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1540 VERIFY3U(0, ==, zap_remove_int(mos, 1541 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1542 if (ds->ds_phys->ds_next_snap_obj != 0) { 1543 VERIFY(0 == zap_add_int(mos, 1544 ds_prev->ds_phys->ds_next_clones_obj, 1545 ds->ds_phys->ds_next_snap_obj, tx)); 1546 } 1547 } 1548 if (after_branch_point && 1549 ds->ds_phys->ds_next_snap_obj == 0) { 1550 /* This clone is toast. */ 1551 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1552 ds_prev->ds_phys->ds_num_children--; 1553 1554 /* 1555 * If the clone's origin has no other clones, no 1556 * user holds, and has been marked for deferred 1557 * deletion, then we should have done the necessary 1558 * destroy setup for it. 1559 */ 1560 if (ds_prev->ds_phys->ds_num_children == 1 && 1561 ds_prev->ds_userrefs == 0 && 1562 DS_IS_DEFER_DESTROY(ds_prev)) { 1563 ASSERT3P(dsda->rm_origin, !=, NULL); 1564 } else { 1565 ASSERT3P(dsda->rm_origin, ==, NULL); 1566 } 1567 } else if (!after_branch_point) { 1568 ds_prev->ds_phys->ds_next_snap_obj = 1569 ds->ds_phys->ds_next_snap_obj; 1570 } 1571 } 1572 1573 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1574 1575 if (ds->ds_phys->ds_next_snap_obj != 0) { 1576 blkptr_t bp; 1577 dsl_dataset_t *ds_next; 1578 uint64_t itor = 0; 1579 uint64_t old_unique; 1580 int64_t used = 0, compressed = 0, uncompressed = 0; 1581 1582 VERIFY(0 == dsl_dataset_hold_obj(dp, 1583 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1584 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1585 1586 old_unique = dsl_dataset_unique(ds_next); 1587 1588 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1589 ds_next->ds_phys->ds_prev_snap_obj = 1590 ds->ds_phys->ds_prev_snap_obj; 1591 ds_next->ds_phys->ds_prev_snap_txg = 1592 ds->ds_phys->ds_prev_snap_txg; 1593 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1594 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1595 1596 /* 1597 * Transfer to our deadlist (which will become next's 1598 * new deadlist) any entries from next's current 1599 * deadlist which were born before prev, and free the 1600 * other entries. 1601 * 1602 * XXX we're doing this long task with the config lock held 1603 */ 1604 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1605 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1606 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1607 &bp, tx)); 1608 if (ds_prev && !after_branch_point && 1609 bp.blk_birth > 1610 ds_prev->ds_phys->ds_prev_snap_txg) { 1611 ds_prev->ds_phys->ds_unique_bytes += 1612 bp_get_dasize(dp->dp_spa, &bp); 1613 } 1614 } else { 1615 used += bp_get_dasize(dp->dp_spa, &bp); 1616 compressed += BP_GET_PSIZE(&bp); 1617 uncompressed += BP_GET_UCSIZE(&bp); 1618 /* XXX check return value? */ 1619 (void) dsl_free(zio, dp, tx->tx_txg, 1620 &bp, NULL, NULL, ARC_NOWAIT); 1621 } 1622 } 1623 1624 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1625 1626 /* change snapused */ 1627 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1628 -used, -compressed, -uncompressed, tx); 1629 1630 /* free next's deadlist */ 1631 bplist_close(&ds_next->ds_deadlist); 1632 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1633 1634 /* set next's deadlist to our deadlist */ 1635 bplist_close(&ds->ds_deadlist); 1636 ds_next->ds_phys->ds_deadlist_obj = 1637 ds->ds_phys->ds_deadlist_obj; 1638 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1639 ds_next->ds_phys->ds_deadlist_obj)); 1640 ds->ds_phys->ds_deadlist_obj = 0; 1641 1642 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1643 /* 1644 * Update next's unique to include blocks which 1645 * were previously shared by only this snapshot 1646 * and it. Those blocks will be born after the 1647 * prev snap and before this snap, and will have 1648 * died after the next snap and before the one 1649 * after that (ie. be on the snap after next's 1650 * deadlist). 1651 * 1652 * XXX we're doing this long task with the 1653 * config lock held 1654 */ 1655 dsl_dataset_t *ds_after_next; 1656 uint64_t space; 1657 1658 VERIFY(0 == dsl_dataset_hold_obj(dp, 1659 ds_next->ds_phys->ds_next_snap_obj, 1660 FTAG, &ds_after_next)); 1661 1662 VERIFY(0 == 1663 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1664 ds->ds_phys->ds_prev_snap_txg, 1665 ds->ds_phys->ds_creation_txg, &space)); 1666 ds_next->ds_phys->ds_unique_bytes += space; 1667 1668 dsl_dataset_rele(ds_after_next, FTAG); 1669 ASSERT3P(ds_next->ds_prev, ==, NULL); 1670 } else { 1671 ASSERT3P(ds_next->ds_prev, ==, ds); 1672 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1673 ds_next->ds_prev = NULL; 1674 if (ds_prev) { 1675 VERIFY(0 == dsl_dataset_get_ref(dp, 1676 ds->ds_phys->ds_prev_snap_obj, 1677 ds_next, &ds_next->ds_prev)); 1678 } 1679 1680 dsl_dataset_recalc_head_uniq(ds_next); 1681 1682 /* 1683 * Reduce the amount of our unconsmed refreservation 1684 * being charged to our parent by the amount of 1685 * new unique data we have gained. 1686 */ 1687 if (old_unique < ds_next->ds_reserved) { 1688 int64_t mrsdelta; 1689 uint64_t new_unique = 1690 ds_next->ds_phys->ds_unique_bytes; 1691 1692 ASSERT(old_unique <= new_unique); 1693 mrsdelta = MIN(new_unique - old_unique, 1694 ds_next->ds_reserved - old_unique); 1695 dsl_dir_diduse_space(ds->ds_dir, 1696 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1697 } 1698 } 1699 dsl_dataset_rele(ds_next, FTAG); 1700 } else { 1701 /* 1702 * There's no next snapshot, so this is a head dataset. 1703 * Destroy the deadlist. Unless it's a clone, the 1704 * deadlist should be empty. (If it's a clone, it's 1705 * safe to ignore the deadlist contents.) 1706 */ 1707 struct killarg ka; 1708 1709 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1710 bplist_close(&ds->ds_deadlist); 1711 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1712 ds->ds_phys->ds_deadlist_obj = 0; 1713 1714 /* 1715 * Free everything that we point to (that's born after 1716 * the previous snapshot, if we are a clone) 1717 * 1718 * NB: this should be very quick, because we already 1719 * freed all the objects in open context. 1720 */ 1721 ka.ds = ds; 1722 ka.zio = zio; 1723 ka.tx = tx; 1724 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1725 TRAVERSE_POST, kill_blkptr, &ka); 1726 ASSERT3U(err, ==, 0); 1727 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1728 ds->ds_phys->ds_unique_bytes == 0); 1729 1730 if (ds->ds_prev != NULL) { 1731 dsl_dataset_rele(ds->ds_prev, ds); 1732 ds->ds_prev = ds_prev = NULL; 1733 } 1734 } 1735 1736 err = zio_wait(zio); 1737 ASSERT3U(err, ==, 0); 1738 1739 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1740 /* Erase the link in the dir */ 1741 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1742 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1743 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1744 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1745 ASSERT(err == 0); 1746 } else { 1747 /* remove from snapshot namespace */ 1748 dsl_dataset_t *ds_head; 1749 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1750 VERIFY(0 == dsl_dataset_hold_obj(dp, 1751 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1752 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1753 #ifdef ZFS_DEBUG 1754 { 1755 uint64_t val; 1756 1757 err = dsl_dataset_snap_lookup(ds_head, 1758 ds->ds_snapname, &val); 1759 ASSERT3U(err, ==, 0); 1760 ASSERT3U(val, ==, obj); 1761 } 1762 #endif 1763 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1764 ASSERT(err == 0); 1765 dsl_dataset_rele(ds_head, FTAG); 1766 } 1767 1768 if (ds_prev && ds->ds_prev != ds_prev) 1769 dsl_dataset_rele(ds_prev, FTAG); 1770 1771 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1772 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1773 cr, "dataset = %llu", ds->ds_object); 1774 1775 if (ds->ds_phys->ds_next_clones_obj != 0) { 1776 uint64_t count; 1777 ASSERT(0 == zap_count(mos, 1778 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1779 VERIFY(0 == dmu_object_free(mos, 1780 ds->ds_phys->ds_next_clones_obj, tx)); 1781 } 1782 if (ds->ds_phys->ds_props_obj != 0) 1783 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1784 if (ds->ds_phys->ds_userrefs_obj != 0) 1785 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1786 dsl_dir_close(ds->ds_dir, ds); 1787 ds->ds_dir = NULL; 1788 dsl_dataset_drain_refs(ds, tag); 1789 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1790 1791 if (dsda->rm_origin) { 1792 /* 1793 * Remove the origin of the clone we just destroyed. 1794 */ 1795 struct dsl_ds_destroyarg ndsda = {0}; 1796 1797 ndsda.ds = dsda->rm_origin; 1798 dsl_dataset_destroy_sync(&ndsda, tag, cr, tx); 1799 } 1800 } 1801 1802 static int 1803 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1804 { 1805 uint64_t asize; 1806 1807 if (!dmu_tx_is_syncing(tx)) 1808 return (0); 1809 1810 /* 1811 * If there's an fs-only reservation, any blocks that might become 1812 * owned by the snapshot dataset must be accommodated by space 1813 * outside of the reservation. 1814 */ 1815 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1816 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1817 return (ENOSPC); 1818 1819 /* 1820 * Propogate any reserved space for this snapshot to other 1821 * snapshot checks in this sync group. 1822 */ 1823 if (asize > 0) 1824 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1825 1826 return (0); 1827 } 1828 1829 /* ARGSUSED */ 1830 int 1831 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1832 { 1833 dsl_dataset_t *ds = arg1; 1834 const char *snapname = arg2; 1835 int err; 1836 uint64_t value; 1837 1838 /* 1839 * We don't allow multiple snapshots of the same txg. If there 1840 * is already one, try again. 1841 */ 1842 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1843 return (EAGAIN); 1844 1845 /* 1846 * Check for conflicting name snapshot name. 1847 */ 1848 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1849 if (err == 0) 1850 return (EEXIST); 1851 if (err != ENOENT) 1852 return (err); 1853 1854 /* 1855 * Check that the dataset's name is not too long. Name consists 1856 * of the dataset's length + 1 for the @-sign + snapshot name's length 1857 */ 1858 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1859 return (ENAMETOOLONG); 1860 1861 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1862 if (err) 1863 return (err); 1864 1865 ds->ds_trysnap_txg = tx->tx_txg; 1866 return (0); 1867 } 1868 1869 void 1870 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1871 { 1872 dsl_dataset_t *ds = arg1; 1873 const char *snapname = arg2; 1874 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1875 dmu_buf_t *dbuf; 1876 dsl_dataset_phys_t *dsphys; 1877 uint64_t dsobj, crtxg; 1878 objset_t *mos = dp->dp_meta_objset; 1879 int err; 1880 1881 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1882 1883 /* 1884 * The origin's ds_creation_txg has to be < TXG_INITIAL 1885 */ 1886 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1887 crtxg = 1; 1888 else 1889 crtxg = tx->tx_txg; 1890 1891 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1892 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1893 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1894 dmu_buf_will_dirty(dbuf, tx); 1895 dsphys = dbuf->db_data; 1896 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1897 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1898 dsphys->ds_fsid_guid = unique_create(); 1899 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1900 sizeof (dsphys->ds_guid)); 1901 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1902 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1903 dsphys->ds_next_snap_obj = ds->ds_object; 1904 dsphys->ds_num_children = 1; 1905 dsphys->ds_creation_time = gethrestime_sec(); 1906 dsphys->ds_creation_txg = crtxg; 1907 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1908 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1909 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1910 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1911 dsphys->ds_flags = ds->ds_phys->ds_flags; 1912 dsphys->ds_bp = ds->ds_phys->ds_bp; 1913 dmu_buf_rele(dbuf, FTAG); 1914 1915 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1916 if (ds->ds_prev) { 1917 uint64_t next_clones_obj = 1918 ds->ds_prev->ds_phys->ds_next_clones_obj; 1919 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1920 ds->ds_object || 1921 ds->ds_prev->ds_phys->ds_num_children > 1); 1922 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1923 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1924 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1925 ds->ds_prev->ds_phys->ds_creation_txg); 1926 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1927 } else if (next_clones_obj != 0) { 1928 VERIFY3U(0, ==, zap_remove_int(mos, 1929 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1930 VERIFY3U(0, ==, zap_add_int(mos, 1931 next_clones_obj, dsobj, tx)); 1932 } 1933 } 1934 1935 /* 1936 * If we have a reference-reservation on this dataset, we will 1937 * need to increase the amount of refreservation being charged 1938 * since our unique space is going to zero. 1939 */ 1940 if (ds->ds_reserved) { 1941 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1942 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1943 add, 0, 0, tx); 1944 } 1945 1946 bplist_close(&ds->ds_deadlist); 1947 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1948 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1949 ds->ds_phys->ds_prev_snap_obj = dsobj; 1950 ds->ds_phys->ds_prev_snap_txg = crtxg; 1951 ds->ds_phys->ds_unique_bytes = 0; 1952 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1953 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1954 ds->ds_phys->ds_deadlist_obj = 1955 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1956 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1957 ds->ds_phys->ds_deadlist_obj)); 1958 1959 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1960 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1961 snapname, 8, 1, &dsobj, tx); 1962 ASSERT(err == 0); 1963 1964 if (ds->ds_prev) 1965 dsl_dataset_drop_ref(ds->ds_prev, ds); 1966 VERIFY(0 == dsl_dataset_get_ref(dp, 1967 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1968 1969 dsl_pool_ds_snapshotted(ds, tx); 1970 1971 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1972 "dataset = %llu", dsobj); 1973 } 1974 1975 void 1976 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1977 { 1978 ASSERT(dmu_tx_is_syncing(tx)); 1979 ASSERT(ds->ds_objset != NULL); 1980 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1981 1982 /* 1983 * in case we had to change ds_fsid_guid when we opened it, 1984 * sync it out now. 1985 */ 1986 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1987 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1988 1989 dsl_dir_dirty(ds->ds_dir, tx); 1990 dmu_objset_sync(ds->ds_objset, zio, tx); 1991 } 1992 1993 void 1994 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1995 { 1996 uint64_t refd, avail, uobjs, aobjs; 1997 1998 dsl_dir_stats(ds->ds_dir, nv); 1999 2000 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2001 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2002 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2003 2004 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2005 ds->ds_phys->ds_creation_time); 2006 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2007 ds->ds_phys->ds_creation_txg); 2008 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2009 ds->ds_quota); 2010 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2011 ds->ds_reserved); 2012 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2013 ds->ds_phys->ds_guid); 2014 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs); 2015 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2016 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2017 2018 if (ds->ds_phys->ds_next_snap_obj) { 2019 /* 2020 * This is a snapshot; override the dd's space used with 2021 * our unique space and compression ratio. 2022 */ 2023 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2024 ds->ds_phys->ds_unique_bytes); 2025 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 2026 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2027 (ds->ds_phys->ds_uncompressed_bytes * 100 / 2028 ds->ds_phys->ds_compressed_bytes)); 2029 } 2030 } 2031 2032 void 2033 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2034 { 2035 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2036 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2037 stat->dds_guid = ds->ds_phys->ds_guid; 2038 if (ds->ds_phys->ds_next_snap_obj) { 2039 stat->dds_is_snapshot = B_TRUE; 2040 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2041 } else { 2042 stat->dds_is_snapshot = B_FALSE; 2043 stat->dds_num_clones = 0; 2044 } 2045 2046 /* clone origin is really a dsl_dir thing... */ 2047 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2048 if (dsl_dir_is_clone(ds->ds_dir)) { 2049 dsl_dataset_t *ods; 2050 2051 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2052 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2053 dsl_dataset_name(ods, stat->dds_origin); 2054 dsl_dataset_drop_ref(ods, FTAG); 2055 } else { 2056 stat->dds_origin[0] = '\0'; 2057 } 2058 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2059 } 2060 2061 uint64_t 2062 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2063 { 2064 return (ds->ds_fsid_guid); 2065 } 2066 2067 void 2068 dsl_dataset_space(dsl_dataset_t *ds, 2069 uint64_t *refdbytesp, uint64_t *availbytesp, 2070 uint64_t *usedobjsp, uint64_t *availobjsp) 2071 { 2072 *refdbytesp = ds->ds_phys->ds_used_bytes; 2073 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2074 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2075 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2076 if (ds->ds_quota != 0) { 2077 /* 2078 * Adjust available bytes according to refquota 2079 */ 2080 if (*refdbytesp < ds->ds_quota) 2081 *availbytesp = MIN(*availbytesp, 2082 ds->ds_quota - *refdbytesp); 2083 else 2084 *availbytesp = 0; 2085 } 2086 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2087 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2088 } 2089 2090 boolean_t 2091 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2092 { 2093 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2094 2095 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2096 dsl_pool_sync_context(dp)); 2097 if (ds->ds_prev == NULL) 2098 return (B_FALSE); 2099 if (ds->ds_phys->ds_bp.blk_birth > 2100 ds->ds_prev->ds_phys->ds_creation_txg) 2101 return (B_TRUE); 2102 return (B_FALSE); 2103 } 2104 2105 /* ARGSUSED */ 2106 static int 2107 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2108 { 2109 dsl_dataset_t *ds = arg1; 2110 char *newsnapname = arg2; 2111 dsl_dir_t *dd = ds->ds_dir; 2112 dsl_dataset_t *hds; 2113 uint64_t val; 2114 int err; 2115 2116 err = dsl_dataset_hold_obj(dd->dd_pool, 2117 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2118 if (err) 2119 return (err); 2120 2121 /* new name better not be in use */ 2122 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2123 dsl_dataset_rele(hds, FTAG); 2124 2125 if (err == 0) 2126 err = EEXIST; 2127 else if (err == ENOENT) 2128 err = 0; 2129 2130 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2131 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2132 err = ENAMETOOLONG; 2133 2134 return (err); 2135 } 2136 2137 static void 2138 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2139 cred_t *cr, dmu_tx_t *tx) 2140 { 2141 dsl_dataset_t *ds = arg1; 2142 const char *newsnapname = arg2; 2143 dsl_dir_t *dd = ds->ds_dir; 2144 objset_t *mos = dd->dd_pool->dp_meta_objset; 2145 dsl_dataset_t *hds; 2146 int err; 2147 2148 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2149 2150 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2151 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2152 2153 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2154 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2155 ASSERT3U(err, ==, 0); 2156 mutex_enter(&ds->ds_lock); 2157 (void) strcpy(ds->ds_snapname, newsnapname); 2158 mutex_exit(&ds->ds_lock); 2159 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2160 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2161 ASSERT3U(err, ==, 0); 2162 2163 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2164 cr, "dataset = %llu", ds->ds_object); 2165 dsl_dataset_rele(hds, FTAG); 2166 } 2167 2168 struct renamesnaparg { 2169 dsl_sync_task_group_t *dstg; 2170 char failed[MAXPATHLEN]; 2171 char *oldsnap; 2172 char *newsnap; 2173 }; 2174 2175 static int 2176 dsl_snapshot_rename_one(char *name, void *arg) 2177 { 2178 struct renamesnaparg *ra = arg; 2179 dsl_dataset_t *ds = NULL; 2180 char *cp; 2181 int err; 2182 2183 cp = name + strlen(name); 2184 *cp = '@'; 2185 (void) strcpy(cp + 1, ra->oldsnap); 2186 2187 /* 2188 * For recursive snapshot renames the parent won't be changing 2189 * so we just pass name for both the to/from argument. 2190 */ 2191 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2192 if (err == ENOENT) { 2193 return (0); 2194 } else if (err) { 2195 (void) strcpy(ra->failed, name); 2196 return (err); 2197 } 2198 2199 #ifdef _KERNEL 2200 /* 2201 * For all filesystems undergoing rename, we'll need to unmount it. 2202 */ 2203 (void) zfs_unmount_snap(name, NULL); 2204 #endif 2205 err = dsl_dataset_hold(name, ra->dstg, &ds); 2206 *cp = '\0'; 2207 if (err == ENOENT) { 2208 return (0); 2209 } else if (err) { 2210 (void) strcpy(ra->failed, name); 2211 return (err); 2212 } 2213 2214 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2215 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2216 2217 return (0); 2218 } 2219 2220 static int 2221 dsl_recursive_rename(char *oldname, const char *newname) 2222 { 2223 int err; 2224 struct renamesnaparg *ra; 2225 dsl_sync_task_t *dst; 2226 spa_t *spa; 2227 char *cp, *fsname = spa_strdup(oldname); 2228 int len = strlen(oldname); 2229 2230 /* truncate the snapshot name to get the fsname */ 2231 cp = strchr(fsname, '@'); 2232 *cp = '\0'; 2233 2234 err = spa_open(fsname, &spa, FTAG); 2235 if (err) { 2236 kmem_free(fsname, len + 1); 2237 return (err); 2238 } 2239 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2240 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2241 2242 ra->oldsnap = strchr(oldname, '@') + 1; 2243 ra->newsnap = strchr(newname, '@') + 1; 2244 *ra->failed = '\0'; 2245 2246 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2247 DS_FIND_CHILDREN); 2248 kmem_free(fsname, len + 1); 2249 2250 if (err == 0) { 2251 err = dsl_sync_task_group_wait(ra->dstg); 2252 } 2253 2254 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2255 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2256 dsl_dataset_t *ds = dst->dst_arg1; 2257 if (dst->dst_err) { 2258 dsl_dir_name(ds->ds_dir, ra->failed); 2259 (void) strcat(ra->failed, "@"); 2260 (void) strcat(ra->failed, ra->newsnap); 2261 } 2262 dsl_dataset_rele(ds, ra->dstg); 2263 } 2264 2265 if (err) 2266 (void) strcpy(oldname, ra->failed); 2267 2268 dsl_sync_task_group_destroy(ra->dstg); 2269 kmem_free(ra, sizeof (struct renamesnaparg)); 2270 spa_close(spa, FTAG); 2271 return (err); 2272 } 2273 2274 static int 2275 dsl_valid_rename(char *oldname, void *arg) 2276 { 2277 int delta = *(int *)arg; 2278 2279 if (strlen(oldname) + delta >= MAXNAMELEN) 2280 return (ENAMETOOLONG); 2281 2282 return (0); 2283 } 2284 2285 #pragma weak dmu_objset_rename = dsl_dataset_rename 2286 int 2287 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2288 { 2289 dsl_dir_t *dd; 2290 dsl_dataset_t *ds; 2291 const char *tail; 2292 int err; 2293 2294 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2295 if (err) 2296 return (err); 2297 /* 2298 * If there are more than 2 references there may be holds 2299 * hanging around that haven't been cleared out yet. 2300 */ 2301 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2302 txg_wait_synced(dd->dd_pool, 0); 2303 if (tail == NULL) { 2304 int delta = strlen(newname) - strlen(oldname); 2305 2306 /* if we're growing, validate child name lengths */ 2307 if (delta > 0) 2308 err = dmu_objset_find(oldname, dsl_valid_rename, 2309 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2310 2311 if (!err) 2312 err = dsl_dir_rename(dd, newname); 2313 dsl_dir_close(dd, FTAG); 2314 return (err); 2315 } 2316 if (tail[0] != '@') { 2317 /* the name ended in a nonexistant component */ 2318 dsl_dir_close(dd, FTAG); 2319 return (ENOENT); 2320 } 2321 2322 dsl_dir_close(dd, FTAG); 2323 2324 /* new name must be snapshot in same filesystem */ 2325 tail = strchr(newname, '@'); 2326 if (tail == NULL) 2327 return (EINVAL); 2328 tail++; 2329 if (strncmp(oldname, newname, tail - newname) != 0) 2330 return (EXDEV); 2331 2332 if (recursive) { 2333 err = dsl_recursive_rename(oldname, newname); 2334 } else { 2335 err = dsl_dataset_hold(oldname, FTAG, &ds); 2336 if (err) 2337 return (err); 2338 2339 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2340 dsl_dataset_snapshot_rename_check, 2341 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2342 2343 dsl_dataset_rele(ds, FTAG); 2344 } 2345 2346 return (err); 2347 } 2348 2349 struct promotenode { 2350 list_node_t link; 2351 dsl_dataset_t *ds; 2352 }; 2353 2354 struct promotearg { 2355 list_t shared_snaps, origin_snaps, clone_snaps; 2356 dsl_dataset_t *origin_origin, *origin_head; 2357 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2358 }; 2359 2360 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2361 2362 /* ARGSUSED */ 2363 static int 2364 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2365 { 2366 dsl_dataset_t *hds = arg1; 2367 struct promotearg *pa = arg2; 2368 struct promotenode *snap = list_head(&pa->shared_snaps); 2369 dsl_dataset_t *origin_ds = snap->ds; 2370 int err; 2371 2372 /* Check that it is a real clone */ 2373 if (!dsl_dir_is_clone(hds->ds_dir)) 2374 return (EINVAL); 2375 2376 /* Since this is so expensive, don't do the preliminary check */ 2377 if (!dmu_tx_is_syncing(tx)) 2378 return (0); 2379 2380 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2381 return (EXDEV); 2382 2383 /* compute origin's new unique space */ 2384 snap = list_tail(&pa->clone_snaps); 2385 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2386 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2387 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2388 if (err) 2389 return (err); 2390 2391 /* 2392 * Walk the snapshots that we are moving 2393 * 2394 * Compute space to transfer. Consider the incremental changes 2395 * to used for each snapshot: 2396 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2397 * So each snapshot gave birth to: 2398 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2399 * So a sequence would look like: 2400 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2401 * Which simplifies to: 2402 * uN + kN + kN-1 + ... + k1 + k0 2403 * Note however, if we stop before we reach the ORIGIN we get: 2404 * uN + kN + kN-1 + ... + kM - uM-1 2405 */ 2406 pa->used = origin_ds->ds_phys->ds_used_bytes; 2407 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2408 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2409 for (snap = list_head(&pa->shared_snaps); snap; 2410 snap = list_next(&pa->shared_snaps, snap)) { 2411 uint64_t val, dlused, dlcomp, dluncomp; 2412 dsl_dataset_t *ds = snap->ds; 2413 2414 /* Check that the snapshot name does not conflict */ 2415 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2416 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2417 if (err == 0) 2418 return (EEXIST); 2419 if (err != ENOENT) 2420 return (err); 2421 2422 /* The very first snapshot does not have a deadlist */ 2423 if (ds->ds_phys->ds_prev_snap_obj == 0) 2424 continue; 2425 2426 if (err = bplist_space(&ds->ds_deadlist, 2427 &dlused, &dlcomp, &dluncomp)) 2428 return (err); 2429 pa->used += dlused; 2430 pa->comp += dlcomp; 2431 pa->uncomp += dluncomp; 2432 } 2433 2434 /* 2435 * If we are a clone of a clone then we never reached ORIGIN, 2436 * so we need to subtract out the clone origin's used space. 2437 */ 2438 if (pa->origin_origin) { 2439 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2440 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2441 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2442 } 2443 2444 /* Check that there is enough space here */ 2445 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2446 pa->used); 2447 if (err) 2448 return (err); 2449 2450 /* 2451 * Compute the amounts of space that will be used by snapshots 2452 * after the promotion (for both origin and clone). For each, 2453 * it is the amount of space that will be on all of their 2454 * deadlists (that was not born before their new origin). 2455 */ 2456 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2457 uint64_t space; 2458 2459 /* 2460 * Note, typically this will not be a clone of a clone, 2461 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2462 * these snaplist_space() -> bplist_space_birthrange() 2463 * calls will be fast because they do not have to 2464 * iterate over all bps. 2465 */ 2466 snap = list_head(&pa->origin_snaps); 2467 err = snaplist_space(&pa->shared_snaps, 2468 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2469 if (err) 2470 return (err); 2471 2472 err = snaplist_space(&pa->clone_snaps, 2473 snap->ds->ds_origin_txg, &space); 2474 if (err) 2475 return (err); 2476 pa->cloneusedsnap += space; 2477 } 2478 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2479 err = snaplist_space(&pa->origin_snaps, 2480 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2481 if (err) 2482 return (err); 2483 } 2484 2485 return (0); 2486 } 2487 2488 static void 2489 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2490 { 2491 dsl_dataset_t *hds = arg1; 2492 struct promotearg *pa = arg2; 2493 struct promotenode *snap = list_head(&pa->shared_snaps); 2494 dsl_dataset_t *origin_ds = snap->ds; 2495 dsl_dataset_t *origin_head; 2496 dsl_dir_t *dd = hds->ds_dir; 2497 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2498 dsl_dir_t *odd = NULL; 2499 uint64_t oldnext_obj; 2500 int64_t delta; 2501 2502 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2503 2504 snap = list_head(&pa->origin_snaps); 2505 origin_head = snap->ds; 2506 2507 /* 2508 * We need to explicitly open odd, since origin_ds's dd will be 2509 * changing. 2510 */ 2511 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2512 NULL, FTAG, &odd)); 2513 2514 /* change origin's next snap */ 2515 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2516 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2517 snap = list_tail(&pa->clone_snaps); 2518 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2519 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2520 2521 /* change the origin's next clone */ 2522 if (origin_ds->ds_phys->ds_next_clones_obj) { 2523 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2524 origin_ds->ds_phys->ds_next_clones_obj, 2525 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2526 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2527 origin_ds->ds_phys->ds_next_clones_obj, 2528 oldnext_obj, tx)); 2529 } 2530 2531 /* change origin */ 2532 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2533 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2534 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2535 hds->ds_origin_txg = origin_head->ds_origin_txg; 2536 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2537 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2538 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2539 2540 /* move snapshots to this dir */ 2541 for (snap = list_head(&pa->shared_snaps); snap; 2542 snap = list_next(&pa->shared_snaps, snap)) { 2543 dsl_dataset_t *ds = snap->ds; 2544 2545 /* unregister props as dsl_dir is changing */ 2546 if (ds->ds_objset) { 2547 dmu_objset_evict(ds->ds_objset); 2548 ds->ds_objset = NULL; 2549 } 2550 /* move snap name entry */ 2551 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2552 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2553 ds->ds_snapname, tx)); 2554 VERIFY(0 == zap_add(dp->dp_meta_objset, 2555 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2556 8, 1, &ds->ds_object, tx)); 2557 /* change containing dsl_dir */ 2558 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2559 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2560 ds->ds_phys->ds_dir_obj = dd->dd_object; 2561 ASSERT3P(ds->ds_dir, ==, odd); 2562 dsl_dir_close(ds->ds_dir, ds); 2563 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2564 NULL, ds, &ds->ds_dir)); 2565 2566 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2567 } 2568 2569 /* 2570 * Change space accounting. 2571 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2572 * both be valid, or both be 0 (resulting in delta == 0). This 2573 * is true for each of {clone,origin} independently. 2574 */ 2575 2576 delta = pa->cloneusedsnap - 2577 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2578 ASSERT3S(delta, >=, 0); 2579 ASSERT3U(pa->used, >=, delta); 2580 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2581 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2582 pa->used - delta, pa->comp, pa->uncomp, tx); 2583 2584 delta = pa->originusedsnap - 2585 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2586 ASSERT3S(delta, <=, 0); 2587 ASSERT3U(pa->used, >=, -delta); 2588 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2589 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2590 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2591 2592 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2593 2594 /* log history record */ 2595 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2596 cr, "dataset = %llu", hds->ds_object); 2597 2598 dsl_dir_close(odd, FTAG); 2599 } 2600 2601 static char *snaplist_tag = "snaplist"; 2602 /* 2603 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2604 * (exclusive) and last_obj (inclusive). The list will be in reverse 2605 * order (last_obj will be the list_head()). If first_obj == 0, do all 2606 * snapshots back to this dataset's origin. 2607 */ 2608 static int 2609 snaplist_make(dsl_pool_t *dp, boolean_t own, 2610 uint64_t first_obj, uint64_t last_obj, list_t *l) 2611 { 2612 uint64_t obj = last_obj; 2613 2614 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2615 2616 list_create(l, sizeof (struct promotenode), 2617 offsetof(struct promotenode, link)); 2618 2619 while (obj != first_obj) { 2620 dsl_dataset_t *ds; 2621 struct promotenode *snap; 2622 int err; 2623 2624 if (own) { 2625 err = dsl_dataset_own_obj(dp, obj, 2626 0, snaplist_tag, &ds); 2627 if (err == 0) 2628 dsl_dataset_make_exclusive(ds, snaplist_tag); 2629 } else { 2630 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2631 } 2632 if (err == ENOENT) { 2633 /* lost race with snapshot destroy */ 2634 struct promotenode *last = list_tail(l); 2635 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2636 obj = last->ds->ds_phys->ds_prev_snap_obj; 2637 continue; 2638 } else if (err) { 2639 return (err); 2640 } 2641 2642 if (first_obj == 0) 2643 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2644 2645 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2646 snap->ds = ds; 2647 list_insert_tail(l, snap); 2648 obj = ds->ds_phys->ds_prev_snap_obj; 2649 } 2650 2651 return (0); 2652 } 2653 2654 static int 2655 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2656 { 2657 struct promotenode *snap; 2658 2659 *spacep = 0; 2660 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2661 uint64_t used; 2662 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2663 mintxg, UINT64_MAX, &used); 2664 if (err) 2665 return (err); 2666 *spacep += used; 2667 } 2668 return (0); 2669 } 2670 2671 static void 2672 snaplist_destroy(list_t *l, boolean_t own) 2673 { 2674 struct promotenode *snap; 2675 2676 if (!l || !list_link_active(&l->list_head)) 2677 return; 2678 2679 while ((snap = list_tail(l)) != NULL) { 2680 list_remove(l, snap); 2681 if (own) 2682 dsl_dataset_disown(snap->ds, snaplist_tag); 2683 else 2684 dsl_dataset_rele(snap->ds, snaplist_tag); 2685 kmem_free(snap, sizeof (struct promotenode)); 2686 } 2687 list_destroy(l); 2688 } 2689 2690 /* 2691 * Promote a clone. Nomenclature note: 2692 * "clone" or "cds": the original clone which is being promoted 2693 * "origin" or "ods": the snapshot which is originally clone's origin 2694 * "origin head" or "ohds": the dataset which is the head 2695 * (filesystem/volume) for the origin 2696 * "origin origin": the origin of the origin's filesystem (typically 2697 * NULL, indicating that the clone is not a clone of a clone). 2698 */ 2699 int 2700 dsl_dataset_promote(const char *name) 2701 { 2702 dsl_dataset_t *ds; 2703 dsl_dir_t *dd; 2704 dsl_pool_t *dp; 2705 dmu_object_info_t doi; 2706 struct promotearg pa = { 0 }; 2707 struct promotenode *snap; 2708 int err; 2709 2710 err = dsl_dataset_hold(name, FTAG, &ds); 2711 if (err) 2712 return (err); 2713 dd = ds->ds_dir; 2714 dp = dd->dd_pool; 2715 2716 err = dmu_object_info(dp->dp_meta_objset, 2717 ds->ds_phys->ds_snapnames_zapobj, &doi); 2718 if (err) { 2719 dsl_dataset_rele(ds, FTAG); 2720 return (err); 2721 } 2722 2723 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2724 dsl_dataset_rele(ds, FTAG); 2725 return (EINVAL); 2726 } 2727 2728 /* 2729 * We are going to inherit all the snapshots taken before our 2730 * origin (i.e., our new origin will be our parent's origin). 2731 * Take ownership of them so that we can rename them into our 2732 * namespace. 2733 */ 2734 rw_enter(&dp->dp_config_rwlock, RW_READER); 2735 2736 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2737 &pa.shared_snaps); 2738 if (err != 0) 2739 goto out; 2740 2741 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2742 if (err != 0) 2743 goto out; 2744 2745 snap = list_head(&pa.shared_snaps); 2746 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2747 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2748 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2749 if (err != 0) 2750 goto out; 2751 2752 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2753 err = dsl_dataset_own_obj(dp, 2754 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2755 0, FTAG, &pa.origin_origin); 2756 if (err != 0) 2757 goto out; 2758 } 2759 2760 out: 2761 rw_exit(&dp->dp_config_rwlock); 2762 2763 /* 2764 * Add in 128x the snapnames zapobj size, since we will be moving 2765 * a bunch of snapnames to the promoted ds, and dirtying their 2766 * bonus buffers. 2767 */ 2768 if (err == 0) { 2769 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2770 dsl_dataset_promote_sync, ds, &pa, 2771 2 + 2 * doi.doi_physical_blks); 2772 } 2773 2774 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2775 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2776 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2777 if (pa.origin_origin) 2778 dsl_dataset_disown(pa.origin_origin, FTAG); 2779 dsl_dataset_rele(ds, FTAG); 2780 return (err); 2781 } 2782 2783 struct cloneswaparg { 2784 dsl_dataset_t *cds; /* clone dataset */ 2785 dsl_dataset_t *ohds; /* origin's head dataset */ 2786 boolean_t force; 2787 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2788 }; 2789 2790 /* ARGSUSED */ 2791 static int 2792 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2793 { 2794 struct cloneswaparg *csa = arg1; 2795 2796 /* they should both be heads */ 2797 if (dsl_dataset_is_snapshot(csa->cds) || 2798 dsl_dataset_is_snapshot(csa->ohds)) 2799 return (EINVAL); 2800 2801 /* the branch point should be just before them */ 2802 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2803 return (EINVAL); 2804 2805 /* cds should be the clone (unless they are unrelated) */ 2806 if (csa->cds->ds_prev != NULL && 2807 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 2808 csa->ohds->ds_object != 2809 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 2810 return (EINVAL); 2811 2812 /* the clone should be a child of the origin */ 2813 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2814 return (EINVAL); 2815 2816 /* ohds shouldn't be modified unless 'force' */ 2817 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2818 return (ETXTBSY); 2819 2820 /* adjust amount of any unconsumed refreservation */ 2821 csa->unused_refres_delta = 2822 (int64_t)MIN(csa->ohds->ds_reserved, 2823 csa->ohds->ds_phys->ds_unique_bytes) - 2824 (int64_t)MIN(csa->ohds->ds_reserved, 2825 csa->cds->ds_phys->ds_unique_bytes); 2826 2827 if (csa->unused_refres_delta > 0 && 2828 csa->unused_refres_delta > 2829 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2830 return (ENOSPC); 2831 2832 return (0); 2833 } 2834 2835 /* ARGSUSED */ 2836 static void 2837 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2838 { 2839 struct cloneswaparg *csa = arg1; 2840 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2841 2842 ASSERT(csa->cds->ds_reserved == 0); 2843 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2844 2845 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2846 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2847 2848 if (csa->cds->ds_objset != NULL) { 2849 dmu_objset_evict(csa->cds->ds_objset); 2850 csa->cds->ds_objset = NULL; 2851 } 2852 2853 if (csa->ohds->ds_objset != NULL) { 2854 dmu_objset_evict(csa->ohds->ds_objset); 2855 csa->ohds->ds_objset = NULL; 2856 } 2857 2858 /* 2859 * Reset origin's unique bytes, if it exists. 2860 */ 2861 if (csa->cds->ds_prev) { 2862 dsl_dataset_t *origin = csa->cds->ds_prev; 2863 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2864 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2865 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2866 &origin->ds_phys->ds_unique_bytes)); 2867 } 2868 2869 /* swap blkptrs */ 2870 { 2871 blkptr_t tmp; 2872 tmp = csa->ohds->ds_phys->ds_bp; 2873 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2874 csa->cds->ds_phys->ds_bp = tmp; 2875 } 2876 2877 /* set dd_*_bytes */ 2878 { 2879 int64_t dused, dcomp, duncomp; 2880 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2881 uint64_t odl_used, odl_comp, odl_uncomp; 2882 2883 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2884 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2885 2886 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2887 &cdl_comp, &cdl_uncomp)); 2888 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2889 &odl_comp, &odl_uncomp)); 2890 2891 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2892 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2893 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2894 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2895 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2896 cdl_uncomp - 2897 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2898 2899 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2900 dused, dcomp, duncomp, tx); 2901 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2902 -dused, -dcomp, -duncomp, tx); 2903 2904 /* 2905 * The difference in the space used by snapshots is the 2906 * difference in snapshot space due to the head's 2907 * deadlist (since that's the only thing that's 2908 * changing that affects the snapused). 2909 */ 2910 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2911 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2912 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2913 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2914 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2915 DD_USED_HEAD, DD_USED_SNAP, tx); 2916 } 2917 2918 #define SWITCH64(x, y) \ 2919 { \ 2920 uint64_t __tmp = (x); \ 2921 (x) = (y); \ 2922 (y) = __tmp; \ 2923 } 2924 2925 /* swap ds_*_bytes */ 2926 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2927 csa->cds->ds_phys->ds_used_bytes); 2928 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2929 csa->cds->ds_phys->ds_compressed_bytes); 2930 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2931 csa->cds->ds_phys->ds_uncompressed_bytes); 2932 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2933 csa->cds->ds_phys->ds_unique_bytes); 2934 2935 /* apply any parent delta for change in unconsumed refreservation */ 2936 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2937 csa->unused_refres_delta, 0, 0, tx); 2938 2939 /* swap deadlists */ 2940 bplist_close(&csa->cds->ds_deadlist); 2941 bplist_close(&csa->ohds->ds_deadlist); 2942 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2943 csa->cds->ds_phys->ds_deadlist_obj); 2944 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2945 csa->cds->ds_phys->ds_deadlist_obj)); 2946 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2947 csa->ohds->ds_phys->ds_deadlist_obj)); 2948 2949 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2950 } 2951 2952 /* 2953 * Swap 'clone' with its origin head datasets. Used at the end of "zfs 2954 * recv" into an existing fs to swizzle the file system to the new 2955 * version, and by "zfs rollback". Can also be used to swap two 2956 * independent head datasets if neither has any snapshots. 2957 */ 2958 int 2959 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2960 boolean_t force) 2961 { 2962 struct cloneswaparg csa; 2963 int error; 2964 2965 ASSERT(clone->ds_owner); 2966 ASSERT(origin_head->ds_owner); 2967 retry: 2968 /* Need exclusive access for the swap */ 2969 rw_enter(&clone->ds_rwlock, RW_WRITER); 2970 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2971 rw_exit(&clone->ds_rwlock); 2972 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2973 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2974 rw_exit(&origin_head->ds_rwlock); 2975 goto retry; 2976 } 2977 } 2978 csa.cds = clone; 2979 csa.ohds = origin_head; 2980 csa.force = force; 2981 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2982 dsl_dataset_clone_swap_check, 2983 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2984 return (error); 2985 } 2986 2987 /* 2988 * Given a pool name and a dataset object number in that pool, 2989 * return the name of that dataset. 2990 */ 2991 int 2992 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2993 { 2994 spa_t *spa; 2995 dsl_pool_t *dp; 2996 dsl_dataset_t *ds; 2997 int error; 2998 2999 if ((error = spa_open(pname, &spa, FTAG)) != 0) 3000 return (error); 3001 dp = spa_get_dsl(spa); 3002 rw_enter(&dp->dp_config_rwlock, RW_READER); 3003 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 3004 dsl_dataset_name(ds, buf); 3005 dsl_dataset_rele(ds, FTAG); 3006 } 3007 rw_exit(&dp->dp_config_rwlock); 3008 spa_close(spa, FTAG); 3009 3010 return (error); 3011 } 3012 3013 int 3014 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3015 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3016 { 3017 int error = 0; 3018 3019 ASSERT3S(asize, >, 0); 3020 3021 /* 3022 * *ref_rsrv is the portion of asize that will come from any 3023 * unconsumed refreservation space. 3024 */ 3025 *ref_rsrv = 0; 3026 3027 mutex_enter(&ds->ds_lock); 3028 /* 3029 * Make a space adjustment for reserved bytes. 3030 */ 3031 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 3032 ASSERT3U(*used, >=, 3033 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3034 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3035 *ref_rsrv = 3036 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3037 } 3038 3039 if (!check_quota || ds->ds_quota == 0) { 3040 mutex_exit(&ds->ds_lock); 3041 return (0); 3042 } 3043 /* 3044 * If they are requesting more space, and our current estimate 3045 * is over quota, they get to try again unless the actual 3046 * on-disk is over quota and there are no pending changes (which 3047 * may free up space for us). 3048 */ 3049 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 3050 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 3051 error = ERESTART; 3052 else 3053 error = EDQUOT; 3054 } 3055 mutex_exit(&ds->ds_lock); 3056 3057 return (error); 3058 } 3059 3060 /* ARGSUSED */ 3061 static int 3062 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 3063 { 3064 dsl_dataset_t *ds = arg1; 3065 uint64_t *quotap = arg2; 3066 uint64_t new_quota = *quotap; 3067 3068 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3069 return (ENOTSUP); 3070 3071 if (new_quota == 0) 3072 return (0); 3073 3074 if (new_quota < ds->ds_phys->ds_used_bytes || 3075 new_quota < ds->ds_reserved) 3076 return (ENOSPC); 3077 3078 return (0); 3079 } 3080 3081 /* ARGSUSED */ 3082 void 3083 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3084 { 3085 dsl_dataset_t *ds = arg1; 3086 uint64_t *quotap = arg2; 3087 uint64_t new_quota = *quotap; 3088 3089 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3090 3091 ds->ds_quota = new_quota; 3092 3093 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3094 3095 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3096 tx, cr, "%lld dataset = %llu ", 3097 (longlong_t)new_quota, ds->ds_object); 3098 } 3099 3100 int 3101 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3102 { 3103 dsl_dataset_t *ds; 3104 int err; 3105 3106 err = dsl_dataset_hold(dsname, FTAG, &ds); 3107 if (err) 3108 return (err); 3109 3110 if (quota != ds->ds_quota) { 3111 /* 3112 * If someone removes a file, then tries to set the quota, we 3113 * want to make sure the file freeing takes effect. 3114 */ 3115 txg_wait_open(ds->ds_dir->dd_pool, 0); 3116 3117 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3118 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3119 ds, "a, 0); 3120 } 3121 dsl_dataset_rele(ds, FTAG); 3122 return (err); 3123 } 3124 3125 static int 3126 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3127 { 3128 dsl_dataset_t *ds = arg1; 3129 uint64_t *reservationp = arg2; 3130 uint64_t new_reservation = *reservationp; 3131 uint64_t unique; 3132 3133 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3134 SPA_VERSION_REFRESERVATION) 3135 return (ENOTSUP); 3136 3137 if (dsl_dataset_is_snapshot(ds)) 3138 return (EINVAL); 3139 3140 /* 3141 * If we are doing the preliminary check in open context, the 3142 * space estimates may be inaccurate. 3143 */ 3144 if (!dmu_tx_is_syncing(tx)) 3145 return (0); 3146 3147 mutex_enter(&ds->ds_lock); 3148 unique = dsl_dataset_unique(ds); 3149 mutex_exit(&ds->ds_lock); 3150 3151 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3152 uint64_t delta = MAX(unique, new_reservation) - 3153 MAX(unique, ds->ds_reserved); 3154 3155 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3156 return (ENOSPC); 3157 if (ds->ds_quota > 0 && 3158 new_reservation > ds->ds_quota) 3159 return (ENOSPC); 3160 } 3161 3162 return (0); 3163 } 3164 3165 /* ARGSUSED */ 3166 static void 3167 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3168 dmu_tx_t *tx) 3169 { 3170 dsl_dataset_t *ds = arg1; 3171 uint64_t *reservationp = arg2; 3172 uint64_t new_reservation = *reservationp; 3173 uint64_t unique; 3174 int64_t delta; 3175 3176 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3177 3178 mutex_enter(&ds->ds_dir->dd_lock); 3179 mutex_enter(&ds->ds_lock); 3180 unique = dsl_dataset_unique(ds); 3181 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3182 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3183 ds->ds_reserved = new_reservation; 3184 mutex_exit(&ds->ds_lock); 3185 3186 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3187 mutex_exit(&ds->ds_dir->dd_lock); 3188 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3189 new_reservation, cr, tx); 3190 3191 spa_history_internal_log(LOG_DS_REFRESERV, 3192 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3193 (longlong_t)new_reservation, ds->ds_object); 3194 } 3195 3196 int 3197 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3198 { 3199 dsl_dataset_t *ds; 3200 int err; 3201 3202 err = dsl_dataset_hold(dsname, FTAG, &ds); 3203 if (err) 3204 return (err); 3205 3206 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3207 dsl_dataset_set_reservation_check, 3208 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3209 dsl_dataset_rele(ds, FTAG); 3210 return (err); 3211 } 3212 3213 struct dsl_ds_holdarg { 3214 dsl_sync_task_group_t *dstg; 3215 char *htag; 3216 char *snapname; 3217 boolean_t recursive; 3218 boolean_t gotone; 3219 boolean_t temphold; 3220 char failed[MAXPATHLEN]; 3221 }; 3222 3223 static int 3224 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 3225 { 3226 dsl_dataset_t *ds = arg1; 3227 struct dsl_ds_holdarg *ha = arg2; 3228 char *htag = ha->htag; 3229 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3230 int error = 0; 3231 3232 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3233 return (ENOTSUP); 3234 3235 if (!dsl_dataset_is_snapshot(ds)) 3236 return (EINVAL); 3237 3238 /* tags must be unique */ 3239 mutex_enter(&ds->ds_lock); 3240 if (ds->ds_phys->ds_userrefs_obj) { 3241 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 3242 8, 1, tx); 3243 if (error == 0) 3244 error = EEXIST; 3245 else if (error == ENOENT) 3246 error = 0; 3247 } 3248 mutex_exit(&ds->ds_lock); 3249 3250 return (error); 3251 } 3252 3253 static void 3254 dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3255 { 3256 dsl_dataset_t *ds = arg1; 3257 struct dsl_ds_holdarg *ha = arg2; 3258 char *htag = ha->htag; 3259 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3260 objset_t *mos = dp->dp_meta_objset; 3261 time_t now = gethrestime_sec(); 3262 uint64_t zapobj; 3263 3264 mutex_enter(&ds->ds_lock); 3265 if (ds->ds_phys->ds_userrefs_obj == 0) { 3266 /* 3267 * This is the first user hold for this dataset. Create 3268 * the userrefs zap object. 3269 */ 3270 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3271 zapobj = ds->ds_phys->ds_userrefs_obj = 3272 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 3273 } else { 3274 zapobj = ds->ds_phys->ds_userrefs_obj; 3275 } 3276 ds->ds_userrefs++; 3277 mutex_exit(&ds->ds_lock); 3278 3279 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 3280 3281 if (ha->temphold) { 3282 VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, 3283 htag, &now, tx)); 3284 } 3285 3286 spa_history_internal_log(LOG_DS_USER_HOLD, 3287 dp->dp_spa, tx, cr, "<%s> temp = %d dataset = %llu", htag, 3288 (int)ha->temphold, ds->ds_object); 3289 } 3290 3291 static int 3292 dsl_dataset_user_hold_one(char *dsname, void *arg) 3293 { 3294 struct dsl_ds_holdarg *ha = arg; 3295 dsl_dataset_t *ds; 3296 int error; 3297 char *name; 3298 3299 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 3300 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3301 error = dsl_dataset_hold(name, ha->dstg, &ds); 3302 strfree(name); 3303 if (error == 0) { 3304 ha->gotone = B_TRUE; 3305 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 3306 dsl_dataset_user_hold_sync, ds, ha, 0); 3307 } else if (error == ENOENT && ha->recursive) { 3308 error = 0; 3309 } else { 3310 (void) strcpy(ha->failed, dsname); 3311 } 3312 return (error); 3313 } 3314 3315 int 3316 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 3317 boolean_t recursive, boolean_t temphold) 3318 { 3319 struct dsl_ds_holdarg *ha; 3320 dsl_sync_task_t *dst; 3321 spa_t *spa; 3322 int error; 3323 3324 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3325 3326 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3327 3328 error = spa_open(dsname, &spa, FTAG); 3329 if (error) { 3330 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3331 return (error); 3332 } 3333 3334 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3335 ha->htag = htag; 3336 ha->snapname = snapname; 3337 ha->recursive = recursive; 3338 ha->temphold = temphold; 3339 if (recursive) { 3340 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 3341 ha, DS_FIND_CHILDREN); 3342 } else { 3343 error = dsl_dataset_user_hold_one(dsname, ha); 3344 } 3345 if (error == 0) 3346 error = dsl_sync_task_group_wait(ha->dstg); 3347 3348 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3349 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3350 dsl_dataset_t *ds = dst->dst_arg1; 3351 3352 if (dst->dst_err) { 3353 dsl_dataset_name(ds, ha->failed); 3354 *strchr(ha->failed, '@') = '\0'; 3355 } 3356 dsl_dataset_rele(ds, ha->dstg); 3357 } 3358 3359 if (error == 0 && recursive && !ha->gotone) 3360 error = ENOENT; 3361 3362 if (error) 3363 (void) strcpy(dsname, ha->failed); 3364 3365 dsl_sync_task_group_destroy(ha->dstg); 3366 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3367 spa_close(spa, FTAG); 3368 return (error); 3369 } 3370 3371 struct dsl_ds_releasearg { 3372 dsl_dataset_t *ds; 3373 const char *htag; 3374 boolean_t own; /* do we own or just hold ds? */ 3375 }; 3376 3377 static int 3378 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 3379 boolean_t *might_destroy) 3380 { 3381 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3382 uint64_t zapobj; 3383 uint64_t tmp; 3384 int error; 3385 3386 *might_destroy = B_FALSE; 3387 3388 mutex_enter(&ds->ds_lock); 3389 zapobj = ds->ds_phys->ds_userrefs_obj; 3390 if (zapobj == 0) { 3391 /* The tag can't possibly exist */ 3392 mutex_exit(&ds->ds_lock); 3393 return (ESRCH); 3394 } 3395 3396 /* Make sure the tag exists */ 3397 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 3398 if (error) { 3399 mutex_exit(&ds->ds_lock); 3400 if (error == ENOENT) 3401 error = ESRCH; 3402 return (error); 3403 } 3404 3405 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 3406 DS_IS_DEFER_DESTROY(ds)) 3407 *might_destroy = B_TRUE; 3408 3409 mutex_exit(&ds->ds_lock); 3410 return (0); 3411 } 3412 3413 static int 3414 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 3415 { 3416 struct dsl_ds_releasearg *ra = arg1; 3417 dsl_dataset_t *ds = ra->ds; 3418 boolean_t might_destroy; 3419 int error; 3420 3421 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3422 return (ENOTSUP); 3423 3424 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 3425 if (error) 3426 return (error); 3427 3428 if (might_destroy) { 3429 struct dsl_ds_destroyarg dsda = {0}; 3430 3431 if (dmu_tx_is_syncing(tx)) { 3432 /* 3433 * If we're not prepared to remove the snapshot, 3434 * we can't allow the release to happen right now. 3435 */ 3436 if (!ra->own) 3437 return (EBUSY); 3438 if (ds->ds_objset) { 3439 dmu_objset_evict(ds->ds_objset); 3440 ds->ds_objset = NULL; 3441 } 3442 } 3443 dsda.ds = ds; 3444 dsda.releasing = B_TRUE; 3445 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 3446 } 3447 3448 return (0); 3449 } 3450 3451 static void 3452 dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 3453 { 3454 struct dsl_ds_releasearg *ra = arg1; 3455 dsl_dataset_t *ds = ra->ds; 3456 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3457 objset_t *mos = dp->dp_meta_objset; 3458 uint64_t zapobj; 3459 uint64_t dsobj = ds->ds_object; 3460 uint64_t refs; 3461 int error; 3462 3463 mutex_enter(&ds->ds_lock); 3464 ds->ds_userrefs--; 3465 refs = ds->ds_userrefs; 3466 mutex_exit(&ds->ds_lock); 3467 error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); 3468 VERIFY(error == 0 || error == ENOENT); 3469 zapobj = ds->ds_phys->ds_userrefs_obj; 3470 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 3471 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 3472 DS_IS_DEFER_DESTROY(ds)) { 3473 struct dsl_ds_destroyarg dsda = {0}; 3474 3475 ASSERT(ra->own); 3476 dsda.ds = ds; 3477 dsda.releasing = B_TRUE; 3478 /* We already did the destroy_check */ 3479 dsl_dataset_destroy_sync(&dsda, tag, cr, tx); 3480 } 3481 3482 spa_history_internal_log(LOG_DS_USER_RELEASE, 3483 dp->dp_spa, tx, cr, "<%s> %lld dataset = %llu", 3484 ra->htag, (longlong_t)refs, dsobj); 3485 } 3486 3487 static int 3488 dsl_dataset_user_release_one(char *dsname, void *arg) 3489 { 3490 struct dsl_ds_holdarg *ha = arg; 3491 struct dsl_ds_releasearg *ra; 3492 dsl_dataset_t *ds; 3493 int error; 3494 void *dtag = ha->dstg; 3495 char *name; 3496 boolean_t own = B_FALSE; 3497 boolean_t might_destroy; 3498 3499 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 3500 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3501 error = dsl_dataset_hold(name, dtag, &ds); 3502 strfree(name); 3503 if (error == ENOENT && ha->recursive) 3504 return (0); 3505 (void) strcpy(ha->failed, dsname); 3506 if (error) 3507 return (error); 3508 3509 ha->gotone = B_TRUE; 3510 3511 ASSERT(dsl_dataset_is_snapshot(ds)); 3512 3513 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 3514 if (error) { 3515 dsl_dataset_rele(ds, dtag); 3516 return (error); 3517 } 3518 3519 if (might_destroy) { 3520 #ifdef _KERNEL 3521 error = zfs_unmount_snap(name, NULL); 3522 if (error) { 3523 dsl_dataset_rele(ds, dtag); 3524 return (error); 3525 } 3526 error = dsl_dataset_zvol_cleanup(ds, name); 3527 if (error) { 3528 dsl_dataset_rele(ds, dtag); 3529 return (error); 3530 } 3531 #endif 3532 if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { 3533 dsl_dataset_rele(ds, dtag); 3534 return (EBUSY); 3535 } else { 3536 own = B_TRUE; 3537 dsl_dataset_make_exclusive(ds, dtag); 3538 } 3539 } 3540 3541 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 3542 ra->ds = ds; 3543 ra->htag = ha->htag; 3544 ra->own = own; 3545 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 3546 dsl_dataset_user_release_sync, ra, dtag, 0); 3547 3548 return (0); 3549 } 3550 3551 int 3552 dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 3553 boolean_t recursive) 3554 { 3555 struct dsl_ds_holdarg *ha; 3556 dsl_sync_task_t *dst; 3557 spa_t *spa; 3558 int error; 3559 3560 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3561 3562 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3563 3564 error = spa_open(dsname, &spa, FTAG); 3565 if (error) { 3566 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3567 return (error); 3568 } 3569 3570 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3571 ha->htag = htag; 3572 ha->snapname = snapname; 3573 ha->recursive = recursive; 3574 if (recursive) { 3575 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 3576 ha, DS_FIND_CHILDREN); 3577 } else { 3578 error = dsl_dataset_user_release_one(dsname, ha); 3579 } 3580 if (error == 0) 3581 error = dsl_sync_task_group_wait(ha->dstg); 3582 3583 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3584 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3585 struct dsl_ds_releasearg *ra = dst->dst_arg1; 3586 dsl_dataset_t *ds = ra->ds; 3587 3588 if (dst->dst_err) 3589 dsl_dataset_name(ds, ha->failed); 3590 3591 if (ra->own) 3592 dsl_dataset_disown(ds, ha->dstg); 3593 else 3594 dsl_dataset_rele(ds, ha->dstg); 3595 3596 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 3597 } 3598 3599 if (error == 0 && recursive && !ha->gotone) 3600 error = ENOENT; 3601 3602 if (error) 3603 (void) strcpy(dsname, ha->failed); 3604 3605 dsl_sync_task_group_destroy(ha->dstg); 3606 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3607 spa_close(spa, FTAG); 3608 return (error); 3609 } 3610 3611 /* 3612 * Called at spa_load time to release a stale temporary user hold. 3613 */ 3614 int 3615 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag) 3616 { 3617 dsl_dataset_t *ds; 3618 char *snap; 3619 char *name; 3620 int namelen; 3621 int error; 3622 3623 rw_enter(&dp->dp_config_rwlock, RW_READER); 3624 error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 3625 rw_exit(&dp->dp_config_rwlock); 3626 if (error) 3627 return (error); 3628 namelen = dsl_dataset_namelen(ds)+1; 3629 name = kmem_alloc(namelen, KM_SLEEP); 3630 dsl_dataset_name(ds, name); 3631 dsl_dataset_rele(ds, FTAG); 3632 3633 snap = strchr(name, '@'); 3634 *snap = '\0'; 3635 ++snap; 3636 return (dsl_dataset_user_release(name, snap, htag, B_FALSE)); 3637 } 3638 3639 int 3640 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 3641 { 3642 dsl_dataset_t *ds; 3643 int err; 3644 3645 err = dsl_dataset_hold(dsname, FTAG, &ds); 3646 if (err) 3647 return (err); 3648 3649 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 3650 if (ds->ds_phys->ds_userrefs_obj != 0) { 3651 zap_attribute_t *za; 3652 zap_cursor_t zc; 3653 3654 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 3655 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 3656 ds->ds_phys->ds_userrefs_obj); 3657 zap_cursor_retrieve(&zc, za) == 0; 3658 zap_cursor_advance(&zc)) { 3659 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 3660 za->za_first_integer)); 3661 } 3662 zap_cursor_fini(&zc); 3663 kmem_free(za, sizeof (zap_attribute_t)); 3664 } 3665 dsl_dataset_rele(ds, FTAG); 3666 return (0); 3667 } 3668 3669 /* 3670 * Note, this fuction is used as the callback for dmu_objset_find(). We 3671 * always return 0 so that we will continue to find and process 3672 * inconsistent datasets, even if we encounter an error trying to 3673 * process one of them. 3674 */ 3675 /* ARGSUSED */ 3676 int 3677 dsl_destroy_inconsistent(char *dsname, void *arg) 3678 { 3679 dsl_dataset_t *ds; 3680 3681 if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { 3682 if (DS_IS_INCONSISTENT(ds)) 3683 (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); 3684 else 3685 dsl_dataset_disown(ds, FTAG); 3686 } 3687 return (0); 3688 } 3689