1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 #include <sys/zvol.h> 43 44 static char *dsl_reaper = "the grim reaper"; 45 46 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 47 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 55 56 57 /* 58 * Figure out how much of this delta should be propogated to the dsl_dir 59 * layer. If there's a refreservation, that space has already been 60 * partially accounted for in our ancestors. 61 */ 62 static int64_t 63 parent_delta(dsl_dataset_t *ds, int64_t delta) 64 { 65 uint64_t old_bytes, new_bytes; 66 67 if (ds->ds_reserved == 0) 68 return (delta); 69 70 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 71 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 72 73 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 74 return (new_bytes - old_bytes); 75 } 76 77 void 78 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 79 { 80 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 81 int compressed = BP_GET_PSIZE(bp); 82 int uncompressed = BP_GET_UCSIZE(bp); 83 int64_t delta; 84 85 dprintf_bp(bp, "born, ds=%p\n", ds); 86 87 ASSERT(dmu_tx_is_syncing(tx)); 88 /* It could have been compressed away to nothing */ 89 if (BP_IS_HOLE(bp)) 90 return; 91 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 92 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 93 if (ds == NULL) { 94 /* 95 * Account for the meta-objset space in its placeholder 96 * dsl_dir. 97 */ 98 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 99 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 100 used, compressed, uncompressed, tx); 101 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 102 return; 103 } 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 mutex_enter(&ds->ds_dir->dd_lock); 106 mutex_enter(&ds->ds_lock); 107 delta = parent_delta(ds, used); 108 ds->ds_phys->ds_used_bytes += used; 109 ds->ds_phys->ds_compressed_bytes += compressed; 110 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 111 ds->ds_phys->ds_unique_bytes += used; 112 mutex_exit(&ds->ds_lock); 113 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 114 compressed, uncompressed, tx); 115 dsl_dir_transfer_space(ds->ds_dir, used - delta, 116 DD_USED_REFRSRV, DD_USED_HEAD, tx); 117 mutex_exit(&ds->ds_dir->dd_lock); 118 } 119 120 int 121 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 122 dmu_tx_t *tx) 123 { 124 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 125 int compressed = BP_GET_PSIZE(bp); 126 int uncompressed = BP_GET_UCSIZE(bp); 127 128 ASSERT(pio != NULL); 129 ASSERT(dmu_tx_is_syncing(tx)); 130 /* No block pointer => nothing to free */ 131 if (BP_IS_HOLE(bp)) 132 return (0); 133 134 ASSERT(used > 0); 135 if (ds == NULL) { 136 int err; 137 /* 138 * Account for the meta-objset space in its placeholder 139 * dataset. 140 */ 141 err = dsl_free(pio, tx->tx_pool, 142 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 143 ASSERT(err == 0); 144 145 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 146 -used, -compressed, -uncompressed, tx); 147 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 148 return (used); 149 } 150 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 151 152 ASSERT(!dsl_dataset_is_snapshot(ds)); 153 dmu_buf_will_dirty(ds->ds_dbuf, tx); 154 155 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 156 int err; 157 int64_t delta; 158 159 dprintf_bp(bp, "freeing: %s", ""); 160 err = dsl_free(pio, tx->tx_pool, 161 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 162 ASSERT(err == 0); 163 164 mutex_enter(&ds->ds_dir->dd_lock); 165 mutex_enter(&ds->ds_lock); 166 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 167 !DS_UNIQUE_IS_ACCURATE(ds)); 168 delta = parent_delta(ds, -used); 169 ds->ds_phys->ds_unique_bytes -= used; 170 mutex_exit(&ds->ds_lock); 171 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 172 delta, -compressed, -uncompressed, tx); 173 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 174 DD_USED_REFRSRV, DD_USED_HEAD, tx); 175 mutex_exit(&ds->ds_dir->dd_lock); 176 } else { 177 dprintf_bp(bp, "putting on dead list: %s", ""); 178 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 179 ASSERT3U(ds->ds_prev->ds_object, ==, 180 ds->ds_phys->ds_prev_snap_obj); 181 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 184 ds->ds_object && bp->blk_birth > 185 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 186 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 187 mutex_enter(&ds->ds_prev->ds_lock); 188 ds->ds_prev->ds_phys->ds_unique_bytes += used; 189 mutex_exit(&ds->ds_prev->ds_lock); 190 } 191 if (bp->blk_birth > ds->ds_origin_txg) { 192 dsl_dir_transfer_space(ds->ds_dir, used, 193 DD_USED_HEAD, DD_USED_SNAP, tx); 194 } 195 } 196 mutex_enter(&ds->ds_lock); 197 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 198 ds->ds_phys->ds_used_bytes -= used; 199 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 200 ds->ds_phys->ds_compressed_bytes -= compressed; 201 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 202 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 203 mutex_exit(&ds->ds_lock); 204 205 return (used); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 boolean_t 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 244 245 unique_remove(ds->ds_fsid_guid); 246 247 if (ds->ds_user_ptr != NULL) 248 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 249 250 if (ds->ds_prev) { 251 dsl_dataset_drop_ref(ds->ds_prev, ds); 252 ds->ds_prev = NULL; 253 } 254 255 bplist_close(&ds->ds_deadlist); 256 if (ds->ds_dir) 257 dsl_dir_close(ds->ds_dir, ds); 258 259 ASSERT(!list_link_active(&ds->ds_synced_link)); 260 261 mutex_destroy(&ds->ds_lock); 262 mutex_destroy(&ds->ds_recvlock); 263 mutex_destroy(&ds->ds_opening_lock); 264 mutex_destroy(&ds->ds_deadlist.bpl_lock); 265 rw_destroy(&ds->ds_rwlock); 266 cv_destroy(&ds->ds_exclusive_cv); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 static int 297 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 298 { 299 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 300 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 301 matchtype_t mt; 302 int err; 303 304 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 305 mt = MT_FIRST; 306 else 307 mt = MT_EXACT; 308 309 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 310 value, mt, NULL, 0, NULL); 311 if (err == ENOTSUP && mt == MT_FIRST) 312 err = zap_lookup(mos, snapobj, name, 8, 1, value); 313 return (err); 314 } 315 316 static int 317 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 318 { 319 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 320 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 321 matchtype_t mt; 322 int err; 323 324 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 325 mt = MT_FIRST; 326 else 327 mt = MT_EXACT; 328 329 err = zap_remove_norm(mos, snapobj, name, mt, tx); 330 if (err == ENOTSUP && mt == MT_FIRST) 331 err = zap_remove(mos, snapobj, name, tx); 332 return (err); 333 } 334 335 static int 336 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 337 dsl_dataset_t **dsp) 338 { 339 objset_t *mos = dp->dp_meta_objset; 340 dmu_buf_t *dbuf; 341 dsl_dataset_t *ds; 342 int err; 343 344 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 345 dsl_pool_sync_context(dp)); 346 347 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 348 if (err) 349 return (err); 350 ds = dmu_buf_get_user(dbuf); 351 if (ds == NULL) { 352 dsl_dataset_t *winner; 353 354 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 355 ds->ds_dbuf = dbuf; 356 ds->ds_object = dsobj; 357 ds->ds_phys = dbuf->db_data; 358 359 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 360 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 361 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 362 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 363 NULL); 364 rw_init(&ds->ds_rwlock, 0, 0, 0); 365 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 366 367 err = bplist_open(&ds->ds_deadlist, 368 mos, ds->ds_phys->ds_deadlist_obj); 369 if (err == 0) { 370 err = dsl_dir_open_obj(dp, 371 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 372 } 373 if (err) { 374 /* 375 * we don't really need to close the blist if we 376 * just opened it. 377 */ 378 mutex_destroy(&ds->ds_lock); 379 mutex_destroy(&ds->ds_recvlock); 380 mutex_destroy(&ds->ds_opening_lock); 381 mutex_destroy(&ds->ds_deadlist.bpl_lock); 382 rw_destroy(&ds->ds_rwlock); 383 cv_destroy(&ds->ds_exclusive_cv); 384 kmem_free(ds, sizeof (dsl_dataset_t)); 385 dmu_buf_rele(dbuf, tag); 386 return (err); 387 } 388 389 if (!dsl_dataset_is_snapshot(ds)) { 390 ds->ds_snapname[0] = '\0'; 391 if (ds->ds_phys->ds_prev_snap_obj) { 392 err = dsl_dataset_get_ref(dp, 393 ds->ds_phys->ds_prev_snap_obj, 394 ds, &ds->ds_prev); 395 } 396 397 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 398 dsl_dataset_t *origin; 399 400 err = dsl_dataset_hold_obj(dp, 401 ds->ds_dir->dd_phys->dd_origin_obj, 402 FTAG, &origin); 403 if (err == 0) { 404 ds->ds_origin_txg = 405 origin->ds_phys->ds_creation_txg; 406 dsl_dataset_rele(origin, FTAG); 407 } 408 } 409 } else { 410 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 411 err = dsl_dataset_get_snapname(ds); 412 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 413 err = zap_count( 414 ds->ds_dir->dd_pool->dp_meta_objset, 415 ds->ds_phys->ds_userrefs_obj, 416 &ds->ds_userrefs); 417 } 418 } 419 420 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 421 /* 422 * In sync context, we're called with either no lock 423 * or with the write lock. If we're not syncing, 424 * we're always called with the read lock held. 425 */ 426 boolean_t need_lock = 427 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 428 dsl_pool_sync_context(dp); 429 430 if (need_lock) 431 rw_enter(&dp->dp_config_rwlock, RW_READER); 432 433 err = dsl_prop_get_ds(ds, 434 "refreservation", sizeof (uint64_t), 1, 435 &ds->ds_reserved, NULL); 436 if (err == 0) { 437 err = dsl_prop_get_ds(ds, 438 "refquota", sizeof (uint64_t), 1, 439 &ds->ds_quota, NULL); 440 } 441 442 if (need_lock) 443 rw_exit(&dp->dp_config_rwlock); 444 } else { 445 ds->ds_reserved = ds->ds_quota = 0; 446 } 447 448 if (err == 0) { 449 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 450 dsl_dataset_evict); 451 } 452 if (err || winner) { 453 bplist_close(&ds->ds_deadlist); 454 if (ds->ds_prev) 455 dsl_dataset_drop_ref(ds->ds_prev, ds); 456 dsl_dir_close(ds->ds_dir, ds); 457 mutex_destroy(&ds->ds_lock); 458 mutex_destroy(&ds->ds_recvlock); 459 mutex_destroy(&ds->ds_opening_lock); 460 mutex_destroy(&ds->ds_deadlist.bpl_lock); 461 rw_destroy(&ds->ds_rwlock); 462 cv_destroy(&ds->ds_exclusive_cv); 463 kmem_free(ds, sizeof (dsl_dataset_t)); 464 if (err) { 465 dmu_buf_rele(dbuf, tag); 466 return (err); 467 } 468 ds = winner; 469 } else { 470 ds->ds_fsid_guid = 471 unique_insert(ds->ds_phys->ds_fsid_guid); 472 } 473 } 474 ASSERT3P(ds->ds_dbuf, ==, dbuf); 475 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 476 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 477 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 478 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 479 mutex_enter(&ds->ds_lock); 480 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 481 mutex_exit(&ds->ds_lock); 482 dmu_buf_rele(ds->ds_dbuf, tag); 483 return (ENOENT); 484 } 485 mutex_exit(&ds->ds_lock); 486 *dsp = ds; 487 return (0); 488 } 489 490 static int 491 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 492 { 493 dsl_pool_t *dp = ds->ds_dir->dd_pool; 494 495 /* 496 * In syncing context we don't want the rwlock lock: there 497 * may be an existing writer waiting for sync phase to 498 * finish. We don't need to worry about such writers, since 499 * sync phase is single-threaded, so the writer can't be 500 * doing anything while we are active. 501 */ 502 if (dsl_pool_sync_context(dp)) { 503 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 504 return (0); 505 } 506 507 /* 508 * Normal users will hold the ds_rwlock as a READER until they 509 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 510 * drop their READER lock after they set the ds_owner field. 511 * 512 * If the dataset is being destroyed, the destroy thread will 513 * obtain a WRITER lock for exclusive access after it's done its 514 * open-context work and then change the ds_owner to 515 * dsl_reaper once destruction is assured. So threads 516 * may block here temporarily, until the "destructability" of 517 * the dataset is determined. 518 */ 519 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 520 mutex_enter(&ds->ds_lock); 521 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 522 rw_exit(&dp->dp_config_rwlock); 523 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 524 if (DSL_DATASET_IS_DESTROYED(ds)) { 525 mutex_exit(&ds->ds_lock); 526 dsl_dataset_drop_ref(ds, tag); 527 rw_enter(&dp->dp_config_rwlock, RW_READER); 528 return (ENOENT); 529 } 530 rw_enter(&dp->dp_config_rwlock, RW_READER); 531 } 532 mutex_exit(&ds->ds_lock); 533 return (0); 534 } 535 536 int 537 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 538 dsl_dataset_t **dsp) 539 { 540 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 541 542 if (err) 543 return (err); 544 return (dsl_dataset_hold_ref(*dsp, tag)); 545 } 546 547 int 548 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, 549 dsl_dataset_t **dsp) 550 { 551 int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); 552 553 ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); 554 555 if (err) 556 return (err); 557 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 558 dsl_dataset_rele(*dsp, owner); 559 *dsp = NULL; 560 return (EBUSY); 561 } 562 return (0); 563 } 564 565 int 566 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 567 { 568 dsl_dir_t *dd; 569 dsl_pool_t *dp; 570 const char *snapname; 571 uint64_t obj; 572 int err = 0; 573 574 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 575 if (err) 576 return (err); 577 578 dp = dd->dd_pool; 579 obj = dd->dd_phys->dd_head_dataset_obj; 580 rw_enter(&dp->dp_config_rwlock, RW_READER); 581 if (obj) 582 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 583 else 584 err = ENOENT; 585 if (err) 586 goto out; 587 588 err = dsl_dataset_hold_ref(*dsp, tag); 589 590 /* we may be looking for a snapshot */ 591 if (err == 0 && snapname != NULL) { 592 dsl_dataset_t *ds = NULL; 593 594 if (*snapname++ != '@') { 595 dsl_dataset_rele(*dsp, tag); 596 err = ENOENT; 597 goto out; 598 } 599 600 dprintf("looking for snapshot '%s'\n", snapname); 601 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 602 if (err == 0) 603 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 604 dsl_dataset_rele(*dsp, tag); 605 606 ASSERT3U((err == 0), ==, (ds != NULL)); 607 608 if (ds) { 609 mutex_enter(&ds->ds_lock); 610 if (ds->ds_snapname[0] == 0) 611 (void) strlcpy(ds->ds_snapname, snapname, 612 sizeof (ds->ds_snapname)); 613 mutex_exit(&ds->ds_lock); 614 err = dsl_dataset_hold_ref(ds, tag); 615 *dsp = err ? NULL : ds; 616 } 617 } 618 out: 619 rw_exit(&dp->dp_config_rwlock); 620 dsl_dir_close(dd, FTAG); 621 return (err); 622 } 623 624 int 625 dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) 626 { 627 int err = dsl_dataset_hold(name, owner, dsp); 628 if (err) 629 return (err); 630 if ((*dsp)->ds_phys->ds_num_children > 0 && 631 !DS_MODE_IS_READONLY(flags)) { 632 dsl_dataset_rele(*dsp, owner); 633 return (EROFS); 634 } 635 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 636 dsl_dataset_rele(*dsp, owner); 637 return (EBUSY); 638 } 639 return (0); 640 } 641 642 void 643 dsl_dataset_name(dsl_dataset_t *ds, char *name) 644 { 645 if (ds == NULL) { 646 (void) strcpy(name, "mos"); 647 } else { 648 dsl_dir_name(ds->ds_dir, name); 649 VERIFY(0 == dsl_dataset_get_snapname(ds)); 650 if (ds->ds_snapname[0]) { 651 (void) strcat(name, "@"); 652 /* 653 * We use a "recursive" mutex so that we 654 * can call dprintf_ds() with ds_lock held. 655 */ 656 if (!MUTEX_HELD(&ds->ds_lock)) { 657 mutex_enter(&ds->ds_lock); 658 (void) strcat(name, ds->ds_snapname); 659 mutex_exit(&ds->ds_lock); 660 } else { 661 (void) strcat(name, ds->ds_snapname); 662 } 663 } 664 } 665 } 666 667 static int 668 dsl_dataset_namelen(dsl_dataset_t *ds) 669 { 670 int result; 671 672 if (ds == NULL) { 673 result = 3; /* "mos" */ 674 } else { 675 result = dsl_dir_namelen(ds->ds_dir); 676 VERIFY(0 == dsl_dataset_get_snapname(ds)); 677 if (ds->ds_snapname[0]) { 678 ++result; /* adding one for the @-sign */ 679 if (!MUTEX_HELD(&ds->ds_lock)) { 680 mutex_enter(&ds->ds_lock); 681 result += strlen(ds->ds_snapname); 682 mutex_exit(&ds->ds_lock); 683 } else { 684 result += strlen(ds->ds_snapname); 685 } 686 } 687 } 688 689 return (result); 690 } 691 692 void 693 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 694 { 695 dmu_buf_rele(ds->ds_dbuf, tag); 696 } 697 698 void 699 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 700 { 701 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 702 rw_exit(&ds->ds_rwlock); 703 } 704 dsl_dataset_drop_ref(ds, tag); 705 } 706 707 void 708 dsl_dataset_disown(dsl_dataset_t *ds, void *owner) 709 { 710 ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || 711 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 712 713 mutex_enter(&ds->ds_lock); 714 ds->ds_owner = NULL; 715 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 716 rw_exit(&ds->ds_rwlock); 717 cv_broadcast(&ds->ds_exclusive_cv); 718 } 719 mutex_exit(&ds->ds_lock); 720 if (ds->ds_dbuf) 721 dsl_dataset_drop_ref(ds, owner); 722 else 723 dsl_dataset_evict(ds->ds_dbuf, ds); 724 } 725 726 boolean_t 727 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) 728 { 729 boolean_t gotit = FALSE; 730 731 mutex_enter(&ds->ds_lock); 732 if (ds->ds_owner == NULL && 733 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 734 ds->ds_owner = owner; 735 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 736 rw_exit(&ds->ds_rwlock); 737 gotit = TRUE; 738 } 739 mutex_exit(&ds->ds_lock); 740 return (gotit); 741 } 742 743 void 744 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 745 { 746 ASSERT3P(owner, ==, ds->ds_owner); 747 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 748 rw_enter(&ds->ds_rwlock, RW_WRITER); 749 } 750 751 uint64_t 752 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 753 uint64_t flags, dmu_tx_t *tx) 754 { 755 dsl_pool_t *dp = dd->dd_pool; 756 dmu_buf_t *dbuf; 757 dsl_dataset_phys_t *dsphys; 758 uint64_t dsobj; 759 objset_t *mos = dp->dp_meta_objset; 760 761 if (origin == NULL) 762 origin = dp->dp_origin_snap; 763 764 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 765 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 766 ASSERT(dmu_tx_is_syncing(tx)); 767 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 768 769 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 770 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 771 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 772 dmu_buf_will_dirty(dbuf, tx); 773 dsphys = dbuf->db_data; 774 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 775 dsphys->ds_dir_obj = dd->dd_object; 776 dsphys->ds_flags = flags; 777 dsphys->ds_fsid_guid = unique_create(); 778 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 779 sizeof (dsphys->ds_guid)); 780 dsphys->ds_snapnames_zapobj = 781 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 782 DMU_OT_NONE, 0, tx); 783 dsphys->ds_creation_time = gethrestime_sec(); 784 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 785 dsphys->ds_deadlist_obj = 786 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 787 788 if (origin) { 789 dsphys->ds_prev_snap_obj = origin->ds_object; 790 dsphys->ds_prev_snap_txg = 791 origin->ds_phys->ds_creation_txg; 792 dsphys->ds_used_bytes = 793 origin->ds_phys->ds_used_bytes; 794 dsphys->ds_compressed_bytes = 795 origin->ds_phys->ds_compressed_bytes; 796 dsphys->ds_uncompressed_bytes = 797 origin->ds_phys->ds_uncompressed_bytes; 798 dsphys->ds_bp = origin->ds_phys->ds_bp; 799 dsphys->ds_flags |= origin->ds_phys->ds_flags; 800 801 dmu_buf_will_dirty(origin->ds_dbuf, tx); 802 origin->ds_phys->ds_num_children++; 803 804 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 805 if (origin->ds_phys->ds_next_clones_obj == 0) { 806 origin->ds_phys->ds_next_clones_obj = 807 zap_create(mos, 808 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 809 } 810 VERIFY(0 == zap_add_int(mos, 811 origin->ds_phys->ds_next_clones_obj, 812 dsobj, tx)); 813 } 814 815 dmu_buf_will_dirty(dd->dd_dbuf, tx); 816 dd->dd_phys->dd_origin_obj = origin->ds_object; 817 } 818 819 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 820 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 821 822 dmu_buf_rele(dbuf, FTAG); 823 824 dmu_buf_will_dirty(dd->dd_dbuf, tx); 825 dd->dd_phys->dd_head_dataset_obj = dsobj; 826 827 return (dsobj); 828 } 829 830 uint64_t 831 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 832 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 833 { 834 dsl_pool_t *dp = pdd->dd_pool; 835 uint64_t dsobj, ddobj; 836 dsl_dir_t *dd; 837 838 ASSERT(lastname[0] != '@'); 839 840 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 841 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 842 843 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 844 845 dsl_deleg_set_create_perms(dd, tx, cr); 846 847 dsl_dir_close(dd, FTAG); 848 849 return (dsobj); 850 } 851 852 struct destroyarg { 853 dsl_sync_task_group_t *dstg; 854 char *snapname; 855 char *failed; 856 boolean_t defer; 857 }; 858 859 static int 860 dsl_snapshot_destroy_one(char *name, void *arg) 861 { 862 struct destroyarg *da = arg; 863 dsl_dataset_t *ds; 864 int err; 865 char *dsname; 866 867 dsname = kmem_asprintf("%s@%s", name, da->snapname); 868 err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT, 869 da->dstg, &ds); 870 strfree(dsname); 871 if (err == 0) { 872 struct dsl_ds_destroyarg *dsda; 873 874 dsl_dataset_make_exclusive(ds, da->dstg); 875 if (ds->ds_user_ptr) { 876 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 877 ds->ds_user_ptr = NULL; 878 } 879 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); 880 dsda->ds = ds; 881 dsda->defer = da->defer; 882 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 883 dsl_dataset_destroy_sync, dsda, da->dstg, 0); 884 } else if (err == ENOENT) { 885 err = 0; 886 } else { 887 (void) strcpy(da->failed, name); 888 } 889 return (err); 890 } 891 892 /* 893 * Destroy 'snapname' in all descendants of 'fsname'. 894 */ 895 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 896 int 897 dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) 898 { 899 int err; 900 struct destroyarg da; 901 dsl_sync_task_t *dst; 902 spa_t *spa; 903 904 err = spa_open(fsname, &spa, FTAG); 905 if (err) 906 return (err); 907 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 908 da.snapname = snapname; 909 da.failed = fsname; 910 da.defer = defer; 911 912 err = dmu_objset_find(fsname, 913 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 914 915 if (err == 0) 916 err = dsl_sync_task_group_wait(da.dstg); 917 918 for (dst = list_head(&da.dstg->dstg_tasks); dst; 919 dst = list_next(&da.dstg->dstg_tasks, dst)) { 920 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 921 dsl_dataset_t *ds = dsda->ds; 922 923 /* 924 * Return the file system name that triggered the error 925 */ 926 if (dst->dst_err) { 927 dsl_dataset_name(ds, fsname); 928 *strchr(fsname, '@') = '\0'; 929 } 930 ASSERT3P(dsda->rm_origin, ==, NULL); 931 dsl_dataset_disown(ds, da.dstg); 932 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 933 } 934 935 dsl_sync_task_group_destroy(da.dstg); 936 spa_close(spa, FTAG); 937 return (err); 938 } 939 940 static boolean_t 941 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 942 { 943 boolean_t might_destroy = B_FALSE; 944 945 mutex_enter(&ds->ds_lock); 946 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 947 DS_IS_DEFER_DESTROY(ds)) 948 might_destroy = B_TRUE; 949 mutex_exit(&ds->ds_lock); 950 951 return (might_destroy); 952 } 953 954 #ifdef _KERNEL 955 static int 956 dsl_dataset_zvol_cleanup(dsl_dataset_t *ds, const char *name) 957 { 958 int error; 959 objset_t *os; 960 961 error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 962 if (error) 963 return (error); 964 965 if (dmu_objset_type(os) == DMU_OST_ZVOL) 966 error = zvol_remove_minor(name); 967 dmu_objset_close(os); 968 969 return (error); 970 } 971 #endif 972 973 /* 974 * If we're removing a clone, and these three conditions are true: 975 * 1) the clone's origin has no other children 976 * 2) the clone's origin has no user references 977 * 3) the clone's origin has been marked for deferred destruction 978 * Then, prepare to remove the origin as part of this sync task group. 979 */ 980 static int 981 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 982 { 983 dsl_dataset_t *ds = dsda->ds; 984 dsl_dataset_t *origin = ds->ds_prev; 985 986 if (dsl_dataset_might_destroy_origin(origin)) { 987 char *name; 988 int namelen; 989 int error; 990 991 namelen = dsl_dataset_namelen(origin) + 1; 992 name = kmem_alloc(namelen, KM_SLEEP); 993 dsl_dataset_name(origin, name); 994 #ifdef _KERNEL 995 error = zfs_unmount_snap(name, NULL); 996 if (error) { 997 kmem_free(name, namelen); 998 return (error); 999 } 1000 error = dsl_dataset_zvol_cleanup(origin, name); 1001 if (error) { 1002 kmem_free(name, namelen); 1003 return (error); 1004 } 1005 #endif 1006 error = dsl_dataset_own(name, 1007 DS_MODE_READONLY | DS_MODE_INCONSISTENT, 1008 tag, &origin); 1009 kmem_free(name, namelen); 1010 if (error) 1011 return (error); 1012 dsda->rm_origin = origin; 1013 dsl_dataset_make_exclusive(origin, tag); 1014 } 1015 1016 return (0); 1017 } 1018 1019 /* 1020 * ds must be opened as OWNER. On return (whether successful or not), 1021 * ds will be closed and caller can no longer dereference it. 1022 */ 1023 int 1024 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1025 { 1026 int err; 1027 dsl_sync_task_group_t *dstg; 1028 objset_t *os; 1029 dsl_dir_t *dd; 1030 uint64_t obj; 1031 struct dsl_ds_destroyarg dsda = {0}; 1032 1033 dsda.ds = ds; 1034 1035 if (dsl_dataset_is_snapshot(ds)) { 1036 /* Destroying a snapshot is simpler */ 1037 dsl_dataset_make_exclusive(ds, tag); 1038 1039 if (ds->ds_user_ptr) { 1040 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1041 ds->ds_user_ptr = NULL; 1042 } 1043 /* NOTE: defer is always B_FALSE for non-snapshots */ 1044 dsda.defer = defer; 1045 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1046 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1047 &dsda, tag, 0); 1048 ASSERT3P(dsda.rm_origin, ==, NULL); 1049 goto out; 1050 } 1051 1052 dd = ds->ds_dir; 1053 1054 /* 1055 * Check for errors and mark this ds as inconsistent, in 1056 * case we crash while freeing the objects. 1057 */ 1058 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1059 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1060 if (err) 1061 goto out; 1062 1063 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 1064 if (err) 1065 goto out; 1066 1067 /* 1068 * remove the objects in open context, so that we won't 1069 * have too much to do in syncing context. 1070 */ 1071 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1072 ds->ds_phys->ds_prev_snap_txg)) { 1073 /* 1074 * Ignore errors, if there is not enough disk space 1075 * we will deal with it in dsl_dataset_destroy_sync(). 1076 */ 1077 (void) dmu_free_object(os, obj); 1078 } 1079 1080 /* 1081 * We need to sync out all in-flight IO before we try to evict 1082 * (the dataset evict func is trying to clear the cached entries 1083 * for this dataset in the ARC). 1084 */ 1085 txg_wait_synced(dd->dd_pool, 0); 1086 1087 /* 1088 * If we managed to free all the objects in open 1089 * context, the user space accounting should be zero. 1090 */ 1091 if (ds->ds_phys->ds_bp.blk_fill == 0 && 1092 dmu_objset_userused_enabled(os->os)) { 1093 uint64_t count; 1094 1095 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1096 count == 0); 1097 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1098 count == 0); 1099 } 1100 1101 dmu_objset_close(os); 1102 if (err != ESRCH) 1103 goto out; 1104 1105 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1106 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1107 rw_exit(&dd->dd_pool->dp_config_rwlock); 1108 1109 if (err) 1110 goto out; 1111 1112 if (ds->ds_user_ptr) { 1113 /* 1114 * We need to sync out all in-flight IO before we try 1115 * to evict (the dataset evict func is trying to clear 1116 * the cached entries for this dataset in the ARC). 1117 */ 1118 txg_wait_synced(dd->dd_pool, 0); 1119 } 1120 1121 /* 1122 * Blow away the dsl_dir + head dataset. 1123 */ 1124 dsl_dataset_make_exclusive(ds, tag); 1125 if (ds->ds_user_ptr) { 1126 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1127 ds->ds_user_ptr = NULL; 1128 } 1129 1130 /* 1131 * If we're removing a clone, we might also need to remove its 1132 * origin. 1133 */ 1134 do { 1135 dsda.need_prep = B_FALSE; 1136 if (dsl_dir_is_clone(dd)) { 1137 err = dsl_dataset_origin_rm_prep(&dsda, tag); 1138 if (err) { 1139 dsl_dir_close(dd, FTAG); 1140 goto out; 1141 } 1142 } 1143 1144 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1145 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1146 dsl_dataset_destroy_sync, &dsda, tag, 0); 1147 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1148 dsl_dir_destroy_sync, dd, FTAG, 0); 1149 err = dsl_sync_task_group_wait(dstg); 1150 dsl_sync_task_group_destroy(dstg); 1151 1152 /* 1153 * We could be racing against 'zfs release' or 'zfs destroy -d' 1154 * on the origin snap, in which case we can get EBUSY if we 1155 * needed to destroy the origin snap but were not ready to 1156 * do so. 1157 */ 1158 if (dsda.need_prep) { 1159 ASSERT(err == EBUSY); 1160 ASSERT(dsl_dir_is_clone(dd)); 1161 ASSERT(dsda.rm_origin == NULL); 1162 } 1163 } while (dsda.need_prep); 1164 1165 if (dsda.rm_origin != NULL) 1166 dsl_dataset_disown(dsda.rm_origin, tag); 1167 1168 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1169 if (err) 1170 dsl_dir_close(dd, FTAG); 1171 out: 1172 dsl_dataset_disown(ds, tag); 1173 return (err); 1174 } 1175 1176 void * 1177 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 1178 void *p, dsl_dataset_evict_func_t func) 1179 { 1180 void *old; 1181 1182 mutex_enter(&ds->ds_lock); 1183 old = ds->ds_user_ptr; 1184 if (old == NULL) { 1185 ds->ds_user_ptr = p; 1186 ds->ds_user_evict_func = func; 1187 } 1188 mutex_exit(&ds->ds_lock); 1189 return (old); 1190 } 1191 1192 void * 1193 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 1194 { 1195 return (ds->ds_user_ptr); 1196 } 1197 1198 blkptr_t * 1199 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1200 { 1201 return (&ds->ds_phys->ds_bp); 1202 } 1203 1204 void 1205 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1206 { 1207 ASSERT(dmu_tx_is_syncing(tx)); 1208 /* If it's the meta-objset, set dp_meta_rootbp */ 1209 if (ds == NULL) { 1210 tx->tx_pool->dp_meta_rootbp = *bp; 1211 } else { 1212 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1213 ds->ds_phys->ds_bp = *bp; 1214 } 1215 } 1216 1217 spa_t * 1218 dsl_dataset_get_spa(dsl_dataset_t *ds) 1219 { 1220 return (ds->ds_dir->dd_pool->dp_spa); 1221 } 1222 1223 void 1224 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1225 { 1226 dsl_pool_t *dp; 1227 1228 if (ds == NULL) /* this is the meta-objset */ 1229 return; 1230 1231 ASSERT(ds->ds_user_ptr != NULL); 1232 1233 if (ds->ds_phys->ds_next_snap_obj != 0) 1234 panic("dirtying snapshot!"); 1235 1236 dp = ds->ds_dir->dd_pool; 1237 1238 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1239 /* up the hold count until we can be written out */ 1240 dmu_buf_add_ref(ds->ds_dbuf, ds); 1241 } 1242 } 1243 1244 /* 1245 * The unique space in the head dataset can be calculated by subtracting 1246 * the space used in the most recent snapshot, that is still being used 1247 * in this file system, from the space currently in use. To figure out 1248 * the space in the most recent snapshot still in use, we need to take 1249 * the total space used in the snapshot and subtract out the space that 1250 * has been freed up since the snapshot was taken. 1251 */ 1252 static void 1253 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1254 { 1255 uint64_t mrs_used; 1256 uint64_t dlused, dlcomp, dluncomp; 1257 1258 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1259 1260 if (ds->ds_phys->ds_prev_snap_obj != 0) 1261 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1262 else 1263 mrs_used = 0; 1264 1265 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1266 &dluncomp)); 1267 1268 ASSERT3U(dlused, <=, mrs_used); 1269 ds->ds_phys->ds_unique_bytes = 1270 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1271 1272 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1273 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1274 SPA_VERSION_UNIQUE_ACCURATE) 1275 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1276 } 1277 1278 static uint64_t 1279 dsl_dataset_unique(dsl_dataset_t *ds) 1280 { 1281 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1282 dsl_dataset_recalc_head_uniq(ds); 1283 1284 return (ds->ds_phys->ds_unique_bytes); 1285 } 1286 1287 struct killarg { 1288 dsl_dataset_t *ds; 1289 zio_t *zio; 1290 dmu_tx_t *tx; 1291 }; 1292 1293 /* ARGSUSED */ 1294 static int 1295 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1296 const dnode_phys_t *dnp, void *arg) 1297 { 1298 struct killarg *ka = arg; 1299 1300 if (bp == NULL) 1301 return (0); 1302 1303 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1304 (zb->zb_object != 0 && dnp == NULL)) { 1305 /* 1306 * It's a block in the intent log. It has no 1307 * accounting, so just free it. 1308 */ 1309 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1310 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1311 } else { 1312 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1313 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1314 } 1315 1316 return (0); 1317 } 1318 1319 /* ARGSUSED */ 1320 static int 1321 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1322 { 1323 dsl_dataset_t *ds = arg1; 1324 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1325 uint64_t count; 1326 int err; 1327 1328 /* 1329 * Can't delete a head dataset if there are snapshots of it. 1330 * (Except if the only snapshots are from the branch we cloned 1331 * from.) 1332 */ 1333 if (ds->ds_prev != NULL && 1334 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1335 return (EINVAL); 1336 1337 /* 1338 * This is really a dsl_dir thing, but check it here so that 1339 * we'll be less likely to leave this dataset inconsistent & 1340 * nearly destroyed. 1341 */ 1342 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1343 if (err) 1344 return (err); 1345 if (count != 0) 1346 return (EEXIST); 1347 1348 return (0); 1349 } 1350 1351 /* ARGSUSED */ 1352 static void 1353 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1354 { 1355 dsl_dataset_t *ds = arg1; 1356 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1357 1358 /* Mark it as inconsistent on-disk, in case we crash */ 1359 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1360 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1361 1362 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1363 cr, "dataset = %llu", ds->ds_object); 1364 } 1365 1366 static int 1367 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1368 dmu_tx_t *tx) 1369 { 1370 dsl_dataset_t *ds = dsda->ds; 1371 dsl_dataset_t *ds_prev = ds->ds_prev; 1372 1373 if (dsl_dataset_might_destroy_origin(ds_prev)) { 1374 struct dsl_ds_destroyarg ndsda = {0}; 1375 1376 /* 1377 * If we're not prepared to remove the origin, don't remove 1378 * the clone either. 1379 */ 1380 if (dsda->rm_origin == NULL) { 1381 dsda->need_prep = B_TRUE; 1382 return (EBUSY); 1383 } 1384 1385 ndsda.ds = ds_prev; 1386 ndsda.is_origin_rm = B_TRUE; 1387 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1388 } 1389 1390 /* 1391 * If we're not going to remove the origin after all, 1392 * undo the open context setup. 1393 */ 1394 if (dsda->rm_origin != NULL) { 1395 dsl_dataset_disown(dsda->rm_origin, tag); 1396 dsda->rm_origin = NULL; 1397 } 1398 1399 return (0); 1400 } 1401 1402 /* ARGSUSED */ 1403 int 1404 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1405 { 1406 struct dsl_ds_destroyarg *dsda = arg1; 1407 dsl_dataset_t *ds = dsda->ds; 1408 1409 /* we have an owner hold, so noone else can destroy us */ 1410 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1411 1412 /* 1413 * Only allow deferred destroy on pools that support it. 1414 * NOTE: deferred destroy is only supported on snapshots. 1415 */ 1416 if (dsda->defer) { 1417 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1418 SPA_VERSION_USERREFS) 1419 return (ENOTSUP); 1420 ASSERT(dsl_dataset_is_snapshot(ds)); 1421 return (0); 1422 } 1423 1424 /* 1425 * Can't delete a head dataset if there are snapshots of it. 1426 * (Except if the only snapshots are from the branch we cloned 1427 * from.) 1428 */ 1429 if (ds->ds_prev != NULL && 1430 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1431 return (EINVAL); 1432 1433 /* 1434 * If we made changes this txg, traverse_dsl_dataset won't find 1435 * them. Try again. 1436 */ 1437 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1438 return (EAGAIN); 1439 1440 if (dsl_dataset_is_snapshot(ds)) { 1441 /* 1442 * If this snapshot has an elevated user reference count, 1443 * we can't destroy it yet. 1444 */ 1445 if (ds->ds_userrefs > 0 && !dsda->releasing) 1446 return (EBUSY); 1447 1448 mutex_enter(&ds->ds_lock); 1449 /* 1450 * Can't delete a branch point. However, if we're destroying 1451 * a clone and removing its origin due to it having a user 1452 * hold count of 0 and having been marked for deferred destroy, 1453 * it's OK for the origin to have a single clone. 1454 */ 1455 if (ds->ds_phys->ds_num_children > 1456 (dsda->is_origin_rm ? 2 : 1)) { 1457 mutex_exit(&ds->ds_lock); 1458 return (EEXIST); 1459 } 1460 mutex_exit(&ds->ds_lock); 1461 } else if (dsl_dir_is_clone(ds->ds_dir)) { 1462 return (dsl_dataset_origin_check(dsda, arg2, tx)); 1463 } 1464 1465 /* XXX we should do some i/o error checking... */ 1466 return (0); 1467 } 1468 1469 struct refsarg { 1470 kmutex_t lock; 1471 boolean_t gone; 1472 kcondvar_t cv; 1473 }; 1474 1475 /* ARGSUSED */ 1476 static void 1477 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1478 { 1479 struct refsarg *arg = argv; 1480 1481 mutex_enter(&arg->lock); 1482 arg->gone = TRUE; 1483 cv_signal(&arg->cv); 1484 mutex_exit(&arg->lock); 1485 } 1486 1487 static void 1488 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1489 { 1490 struct refsarg arg; 1491 1492 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1493 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1494 arg.gone = FALSE; 1495 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1496 dsl_dataset_refs_gone); 1497 dmu_buf_rele(ds->ds_dbuf, tag); 1498 mutex_enter(&arg.lock); 1499 while (!arg.gone) 1500 cv_wait(&arg.cv, &arg.lock); 1501 ASSERT(arg.gone); 1502 mutex_exit(&arg.lock); 1503 ds->ds_dbuf = NULL; 1504 ds->ds_phys = NULL; 1505 mutex_destroy(&arg.lock); 1506 cv_destroy(&arg.cv); 1507 } 1508 1509 void 1510 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1511 { 1512 struct dsl_ds_destroyarg *dsda = arg1; 1513 dsl_dataset_t *ds = dsda->ds; 1514 zio_t *zio; 1515 int err; 1516 int after_branch_point = FALSE; 1517 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1518 objset_t *mos = dp->dp_meta_objset; 1519 dsl_dataset_t *ds_prev = NULL; 1520 uint64_t obj; 1521 1522 ASSERT(ds->ds_owner); 1523 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1524 ASSERT(ds->ds_prev == NULL || 1525 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1526 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1527 1528 if (dsda->defer) { 1529 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1530 if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) { 1531 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1532 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1533 return; 1534 } 1535 } 1536 1537 /* signal any waiters that this dataset is going away */ 1538 mutex_enter(&ds->ds_lock); 1539 ds->ds_owner = dsl_reaper; 1540 cv_broadcast(&ds->ds_exclusive_cv); 1541 mutex_exit(&ds->ds_lock); 1542 1543 /* Remove our reservation */ 1544 if (ds->ds_reserved != 0) { 1545 uint64_t val = 0; 1546 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1547 ASSERT3U(ds->ds_reserved, ==, 0); 1548 } 1549 1550 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1551 1552 dsl_pool_ds_destroyed(ds, tx); 1553 1554 obj = ds->ds_object; 1555 1556 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1557 if (ds->ds_prev) { 1558 ds_prev = ds->ds_prev; 1559 } else { 1560 VERIFY(0 == dsl_dataset_hold_obj(dp, 1561 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1562 } 1563 after_branch_point = 1564 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1565 1566 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1567 if (after_branch_point && 1568 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1569 VERIFY3U(0, ==, zap_remove_int(mos, 1570 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1571 if (ds->ds_phys->ds_next_snap_obj != 0) { 1572 VERIFY(0 == zap_add_int(mos, 1573 ds_prev->ds_phys->ds_next_clones_obj, 1574 ds->ds_phys->ds_next_snap_obj, tx)); 1575 } 1576 } 1577 if (after_branch_point && 1578 ds->ds_phys->ds_next_snap_obj == 0) { 1579 /* This clone is toast. */ 1580 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1581 ds_prev->ds_phys->ds_num_children--; 1582 1583 /* 1584 * If the clone's origin has no other clones, no 1585 * user holds, and has been marked for deferred 1586 * deletion, then we should have done the necessary 1587 * destroy setup for it. 1588 */ 1589 if (ds_prev->ds_phys->ds_num_children == 1 && 1590 ds_prev->ds_userrefs == 0 && 1591 DS_IS_DEFER_DESTROY(ds_prev)) { 1592 ASSERT3P(dsda->rm_origin, !=, NULL); 1593 } else { 1594 ASSERT3P(dsda->rm_origin, ==, NULL); 1595 } 1596 } else if (!after_branch_point) { 1597 ds_prev->ds_phys->ds_next_snap_obj = 1598 ds->ds_phys->ds_next_snap_obj; 1599 } 1600 } 1601 1602 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1603 1604 if (ds->ds_phys->ds_next_snap_obj != 0) { 1605 blkptr_t bp; 1606 dsl_dataset_t *ds_next; 1607 uint64_t itor = 0; 1608 uint64_t old_unique; 1609 int64_t used = 0, compressed = 0, uncompressed = 0; 1610 1611 VERIFY(0 == dsl_dataset_hold_obj(dp, 1612 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1613 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1614 1615 old_unique = dsl_dataset_unique(ds_next); 1616 1617 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1618 ds_next->ds_phys->ds_prev_snap_obj = 1619 ds->ds_phys->ds_prev_snap_obj; 1620 ds_next->ds_phys->ds_prev_snap_txg = 1621 ds->ds_phys->ds_prev_snap_txg; 1622 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1623 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1624 1625 /* 1626 * Transfer to our deadlist (which will become next's 1627 * new deadlist) any entries from next's current 1628 * deadlist which were born before prev, and free the 1629 * other entries. 1630 * 1631 * XXX we're doing this long task with the config lock held 1632 */ 1633 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1634 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1635 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1636 &bp, tx)); 1637 if (ds_prev && !after_branch_point && 1638 bp.blk_birth > 1639 ds_prev->ds_phys->ds_prev_snap_txg) { 1640 ds_prev->ds_phys->ds_unique_bytes += 1641 bp_get_dasize(dp->dp_spa, &bp); 1642 } 1643 } else { 1644 used += bp_get_dasize(dp->dp_spa, &bp); 1645 compressed += BP_GET_PSIZE(&bp); 1646 uncompressed += BP_GET_UCSIZE(&bp); 1647 /* XXX check return value? */ 1648 (void) dsl_free(zio, dp, tx->tx_txg, 1649 &bp, NULL, NULL, ARC_NOWAIT); 1650 } 1651 } 1652 1653 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1654 1655 /* change snapused */ 1656 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1657 -used, -compressed, -uncompressed, tx); 1658 1659 /* free next's deadlist */ 1660 bplist_close(&ds_next->ds_deadlist); 1661 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1662 1663 /* set next's deadlist to our deadlist */ 1664 bplist_close(&ds->ds_deadlist); 1665 ds_next->ds_phys->ds_deadlist_obj = 1666 ds->ds_phys->ds_deadlist_obj; 1667 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1668 ds_next->ds_phys->ds_deadlist_obj)); 1669 ds->ds_phys->ds_deadlist_obj = 0; 1670 1671 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1672 /* 1673 * Update next's unique to include blocks which 1674 * were previously shared by only this snapshot 1675 * and it. Those blocks will be born after the 1676 * prev snap and before this snap, and will have 1677 * died after the next snap and before the one 1678 * after that (ie. be on the snap after next's 1679 * deadlist). 1680 * 1681 * XXX we're doing this long task with the 1682 * config lock held 1683 */ 1684 dsl_dataset_t *ds_after_next; 1685 uint64_t space; 1686 1687 VERIFY(0 == dsl_dataset_hold_obj(dp, 1688 ds_next->ds_phys->ds_next_snap_obj, 1689 FTAG, &ds_after_next)); 1690 1691 VERIFY(0 == 1692 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1693 ds->ds_phys->ds_prev_snap_txg, 1694 ds->ds_phys->ds_creation_txg, &space)); 1695 ds_next->ds_phys->ds_unique_bytes += space; 1696 1697 dsl_dataset_rele(ds_after_next, FTAG); 1698 ASSERT3P(ds_next->ds_prev, ==, NULL); 1699 } else { 1700 ASSERT3P(ds_next->ds_prev, ==, ds); 1701 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1702 ds_next->ds_prev = NULL; 1703 if (ds_prev) { 1704 VERIFY(0 == dsl_dataset_get_ref(dp, 1705 ds->ds_phys->ds_prev_snap_obj, 1706 ds_next, &ds_next->ds_prev)); 1707 } 1708 1709 dsl_dataset_recalc_head_uniq(ds_next); 1710 1711 /* 1712 * Reduce the amount of our unconsmed refreservation 1713 * being charged to our parent by the amount of 1714 * new unique data we have gained. 1715 */ 1716 if (old_unique < ds_next->ds_reserved) { 1717 int64_t mrsdelta; 1718 uint64_t new_unique = 1719 ds_next->ds_phys->ds_unique_bytes; 1720 1721 ASSERT(old_unique <= new_unique); 1722 mrsdelta = MIN(new_unique - old_unique, 1723 ds_next->ds_reserved - old_unique); 1724 dsl_dir_diduse_space(ds->ds_dir, 1725 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1726 } 1727 } 1728 dsl_dataset_rele(ds_next, FTAG); 1729 } else { 1730 /* 1731 * There's no next snapshot, so this is a head dataset. 1732 * Destroy the deadlist. Unless it's a clone, the 1733 * deadlist should be empty. (If it's a clone, it's 1734 * safe to ignore the deadlist contents.) 1735 */ 1736 struct killarg ka; 1737 1738 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1739 bplist_close(&ds->ds_deadlist); 1740 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1741 ds->ds_phys->ds_deadlist_obj = 0; 1742 1743 /* 1744 * Free everything that we point to (that's born after 1745 * the previous snapshot, if we are a clone) 1746 * 1747 * NB: this should be very quick, because we already 1748 * freed all the objects in open context. 1749 */ 1750 ka.ds = ds; 1751 ka.zio = zio; 1752 ka.tx = tx; 1753 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1754 TRAVERSE_POST, kill_blkptr, &ka); 1755 ASSERT3U(err, ==, 0); 1756 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1757 ds->ds_phys->ds_unique_bytes == 0); 1758 } 1759 1760 err = zio_wait(zio); 1761 ASSERT3U(err, ==, 0); 1762 1763 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1764 /* Erase the link in the dir */ 1765 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1766 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1767 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1768 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1769 ASSERT(err == 0); 1770 } else { 1771 /* remove from snapshot namespace */ 1772 dsl_dataset_t *ds_head; 1773 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1774 VERIFY(0 == dsl_dataset_hold_obj(dp, 1775 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1776 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1777 #ifdef ZFS_DEBUG 1778 { 1779 uint64_t val; 1780 1781 err = dsl_dataset_snap_lookup(ds_head, 1782 ds->ds_snapname, &val); 1783 ASSERT3U(err, ==, 0); 1784 ASSERT3U(val, ==, obj); 1785 } 1786 #endif 1787 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1788 ASSERT(err == 0); 1789 dsl_dataset_rele(ds_head, FTAG); 1790 } 1791 1792 if (ds_prev && ds->ds_prev != ds_prev) 1793 dsl_dataset_rele(ds_prev, FTAG); 1794 1795 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1796 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1797 cr, "dataset = %llu", ds->ds_object); 1798 1799 if (ds->ds_phys->ds_next_clones_obj != 0) { 1800 uint64_t count; 1801 ASSERT(0 == zap_count(mos, 1802 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1803 VERIFY(0 == dmu_object_free(mos, 1804 ds->ds_phys->ds_next_clones_obj, tx)); 1805 } 1806 if (ds->ds_phys->ds_props_obj != 0) 1807 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1808 if (ds->ds_phys->ds_userrefs_obj != 0) 1809 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1810 dsl_dir_close(ds->ds_dir, ds); 1811 ds->ds_dir = NULL; 1812 dsl_dataset_drain_refs(ds, tag); 1813 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1814 1815 if (dsda->rm_origin) { 1816 /* 1817 * Remove the origin of the clone we just destroyed. 1818 */ 1819 dsl_dataset_t *origin = ds->ds_prev; 1820 struct dsl_ds_destroyarg ndsda = {0}; 1821 1822 ASSERT3P(origin, ==, dsda->rm_origin); 1823 if (origin->ds_user_ptr) { 1824 origin->ds_user_evict_func(origin, origin->ds_user_ptr); 1825 origin->ds_user_ptr = NULL; 1826 } 1827 1828 dsl_dataset_rele(origin, ds); 1829 ds->ds_prev = NULL; 1830 1831 ndsda.ds = origin; 1832 dsl_dataset_destroy_sync(&ndsda, tag, cr, tx); 1833 } 1834 } 1835 1836 static int 1837 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1838 { 1839 uint64_t asize; 1840 1841 if (!dmu_tx_is_syncing(tx)) 1842 return (0); 1843 1844 /* 1845 * If there's an fs-only reservation, any blocks that might become 1846 * owned by the snapshot dataset must be accommodated by space 1847 * outside of the reservation. 1848 */ 1849 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1850 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1851 return (ENOSPC); 1852 1853 /* 1854 * Propogate any reserved space for this snapshot to other 1855 * snapshot checks in this sync group. 1856 */ 1857 if (asize > 0) 1858 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1859 1860 return (0); 1861 } 1862 1863 /* ARGSUSED */ 1864 int 1865 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1866 { 1867 dsl_dataset_t *ds = arg1; 1868 const char *snapname = arg2; 1869 int err; 1870 uint64_t value; 1871 1872 /* 1873 * We don't allow multiple snapshots of the same txg. If there 1874 * is already one, try again. 1875 */ 1876 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1877 return (EAGAIN); 1878 1879 /* 1880 * Check for conflicting name snapshot name. 1881 */ 1882 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1883 if (err == 0) 1884 return (EEXIST); 1885 if (err != ENOENT) 1886 return (err); 1887 1888 /* 1889 * Check that the dataset's name is not too long. Name consists 1890 * of the dataset's length + 1 for the @-sign + snapshot name's length 1891 */ 1892 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1893 return (ENAMETOOLONG); 1894 1895 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1896 if (err) 1897 return (err); 1898 1899 ds->ds_trysnap_txg = tx->tx_txg; 1900 return (0); 1901 } 1902 1903 void 1904 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1905 { 1906 dsl_dataset_t *ds = arg1; 1907 const char *snapname = arg2; 1908 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1909 dmu_buf_t *dbuf; 1910 dsl_dataset_phys_t *dsphys; 1911 uint64_t dsobj, crtxg; 1912 objset_t *mos = dp->dp_meta_objset; 1913 int err; 1914 1915 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1916 1917 /* 1918 * The origin's ds_creation_txg has to be < TXG_INITIAL 1919 */ 1920 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1921 crtxg = 1; 1922 else 1923 crtxg = tx->tx_txg; 1924 1925 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1926 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1927 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1928 dmu_buf_will_dirty(dbuf, tx); 1929 dsphys = dbuf->db_data; 1930 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1931 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1932 dsphys->ds_fsid_guid = unique_create(); 1933 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1934 sizeof (dsphys->ds_guid)); 1935 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1936 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1937 dsphys->ds_next_snap_obj = ds->ds_object; 1938 dsphys->ds_num_children = 1; 1939 dsphys->ds_creation_time = gethrestime_sec(); 1940 dsphys->ds_creation_txg = crtxg; 1941 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1942 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1943 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1944 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1945 dsphys->ds_flags = ds->ds_phys->ds_flags; 1946 dsphys->ds_bp = ds->ds_phys->ds_bp; 1947 dmu_buf_rele(dbuf, FTAG); 1948 1949 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1950 if (ds->ds_prev) { 1951 uint64_t next_clones_obj = 1952 ds->ds_prev->ds_phys->ds_next_clones_obj; 1953 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1954 ds->ds_object || 1955 ds->ds_prev->ds_phys->ds_num_children > 1); 1956 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1957 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1958 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1959 ds->ds_prev->ds_phys->ds_creation_txg); 1960 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1961 } else if (next_clones_obj != 0) { 1962 VERIFY3U(0, ==, zap_remove_int(mos, 1963 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1964 VERIFY3U(0, ==, zap_add_int(mos, 1965 next_clones_obj, dsobj, tx)); 1966 } 1967 } 1968 1969 /* 1970 * If we have a reference-reservation on this dataset, we will 1971 * need to increase the amount of refreservation being charged 1972 * since our unique space is going to zero. 1973 */ 1974 if (ds->ds_reserved) { 1975 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1976 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1977 add, 0, 0, tx); 1978 } 1979 1980 bplist_close(&ds->ds_deadlist); 1981 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1982 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1983 ds->ds_phys->ds_prev_snap_obj = dsobj; 1984 ds->ds_phys->ds_prev_snap_txg = crtxg; 1985 ds->ds_phys->ds_unique_bytes = 0; 1986 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1987 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1988 ds->ds_phys->ds_deadlist_obj = 1989 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1990 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1991 ds->ds_phys->ds_deadlist_obj)); 1992 1993 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1994 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1995 snapname, 8, 1, &dsobj, tx); 1996 ASSERT(err == 0); 1997 1998 if (ds->ds_prev) 1999 dsl_dataset_drop_ref(ds->ds_prev, ds); 2000 VERIFY(0 == dsl_dataset_get_ref(dp, 2001 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 2002 2003 dsl_pool_ds_snapshotted(ds, tx); 2004 2005 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 2006 "dataset = %llu", dsobj); 2007 } 2008 2009 void 2010 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 2011 { 2012 ASSERT(dmu_tx_is_syncing(tx)); 2013 ASSERT(ds->ds_user_ptr != NULL); 2014 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 2015 2016 /* 2017 * in case we had to change ds_fsid_guid when we opened it, 2018 * sync it out now. 2019 */ 2020 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2021 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 2022 2023 dsl_dir_dirty(ds->ds_dir, tx); 2024 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 2025 } 2026 2027 void 2028 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 2029 { 2030 uint64_t refd, avail, uobjs, aobjs; 2031 2032 dsl_dir_stats(ds->ds_dir, nv); 2033 2034 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2035 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2036 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2037 2038 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2039 ds->ds_phys->ds_creation_time); 2040 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2041 ds->ds_phys->ds_creation_txg); 2042 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2043 ds->ds_quota); 2044 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2045 ds->ds_reserved); 2046 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2047 ds->ds_phys->ds_guid); 2048 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs); 2049 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2050 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2051 2052 if (ds->ds_phys->ds_next_snap_obj) { 2053 /* 2054 * This is a snapshot; override the dd's space used with 2055 * our unique space and compression ratio. 2056 */ 2057 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2058 ds->ds_phys->ds_unique_bytes); 2059 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 2060 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2061 (ds->ds_phys->ds_uncompressed_bytes * 100 / 2062 ds->ds_phys->ds_compressed_bytes)); 2063 } 2064 } 2065 2066 void 2067 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2068 { 2069 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2070 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2071 stat->dds_guid = ds->ds_phys->ds_guid; 2072 if (ds->ds_phys->ds_next_snap_obj) { 2073 stat->dds_is_snapshot = B_TRUE; 2074 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2075 } else { 2076 stat->dds_is_snapshot = B_FALSE; 2077 stat->dds_num_clones = 0; 2078 } 2079 2080 /* clone origin is really a dsl_dir thing... */ 2081 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2082 if (dsl_dir_is_clone(ds->ds_dir)) { 2083 dsl_dataset_t *ods; 2084 2085 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2086 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2087 dsl_dataset_name(ods, stat->dds_origin); 2088 dsl_dataset_drop_ref(ods, FTAG); 2089 } else { 2090 stat->dds_origin[0] = '\0'; 2091 } 2092 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2093 } 2094 2095 uint64_t 2096 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2097 { 2098 return (ds->ds_fsid_guid); 2099 } 2100 2101 void 2102 dsl_dataset_space(dsl_dataset_t *ds, 2103 uint64_t *refdbytesp, uint64_t *availbytesp, 2104 uint64_t *usedobjsp, uint64_t *availobjsp) 2105 { 2106 *refdbytesp = ds->ds_phys->ds_used_bytes; 2107 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2108 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2109 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2110 if (ds->ds_quota != 0) { 2111 /* 2112 * Adjust available bytes according to refquota 2113 */ 2114 if (*refdbytesp < ds->ds_quota) 2115 *availbytesp = MIN(*availbytesp, 2116 ds->ds_quota - *refdbytesp); 2117 else 2118 *availbytesp = 0; 2119 } 2120 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2121 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2122 } 2123 2124 boolean_t 2125 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2126 { 2127 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2128 2129 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2130 dsl_pool_sync_context(dp)); 2131 if (ds->ds_prev == NULL) 2132 return (B_FALSE); 2133 if (ds->ds_phys->ds_bp.blk_birth > 2134 ds->ds_prev->ds_phys->ds_creation_txg) 2135 return (B_TRUE); 2136 return (B_FALSE); 2137 } 2138 2139 /* ARGSUSED */ 2140 static int 2141 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2142 { 2143 dsl_dataset_t *ds = arg1; 2144 char *newsnapname = arg2; 2145 dsl_dir_t *dd = ds->ds_dir; 2146 dsl_dataset_t *hds; 2147 uint64_t val; 2148 int err; 2149 2150 err = dsl_dataset_hold_obj(dd->dd_pool, 2151 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2152 if (err) 2153 return (err); 2154 2155 /* new name better not be in use */ 2156 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2157 dsl_dataset_rele(hds, FTAG); 2158 2159 if (err == 0) 2160 err = EEXIST; 2161 else if (err == ENOENT) 2162 err = 0; 2163 2164 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2165 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2166 err = ENAMETOOLONG; 2167 2168 return (err); 2169 } 2170 2171 static void 2172 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2173 cred_t *cr, dmu_tx_t *tx) 2174 { 2175 dsl_dataset_t *ds = arg1; 2176 const char *newsnapname = arg2; 2177 dsl_dir_t *dd = ds->ds_dir; 2178 objset_t *mos = dd->dd_pool->dp_meta_objset; 2179 dsl_dataset_t *hds; 2180 int err; 2181 2182 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2183 2184 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2185 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2186 2187 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2188 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2189 ASSERT3U(err, ==, 0); 2190 mutex_enter(&ds->ds_lock); 2191 (void) strcpy(ds->ds_snapname, newsnapname); 2192 mutex_exit(&ds->ds_lock); 2193 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2194 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2195 ASSERT3U(err, ==, 0); 2196 2197 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2198 cr, "dataset = %llu", ds->ds_object); 2199 dsl_dataset_rele(hds, FTAG); 2200 } 2201 2202 struct renamesnaparg { 2203 dsl_sync_task_group_t *dstg; 2204 char failed[MAXPATHLEN]; 2205 char *oldsnap; 2206 char *newsnap; 2207 }; 2208 2209 static int 2210 dsl_snapshot_rename_one(char *name, void *arg) 2211 { 2212 struct renamesnaparg *ra = arg; 2213 dsl_dataset_t *ds = NULL; 2214 char *cp; 2215 int err; 2216 2217 cp = name + strlen(name); 2218 *cp = '@'; 2219 (void) strcpy(cp + 1, ra->oldsnap); 2220 2221 /* 2222 * For recursive snapshot renames the parent won't be changing 2223 * so we just pass name for both the to/from argument. 2224 */ 2225 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2226 if (err == ENOENT) { 2227 return (0); 2228 } else if (err) { 2229 (void) strcpy(ra->failed, name); 2230 return (err); 2231 } 2232 2233 #ifdef _KERNEL 2234 /* 2235 * For all filesystems undergoing rename, we'll need to unmount it. 2236 */ 2237 (void) zfs_unmount_snap(name, NULL); 2238 #endif 2239 err = dsl_dataset_hold(name, ra->dstg, &ds); 2240 *cp = '\0'; 2241 if (err == ENOENT) { 2242 return (0); 2243 } else if (err) { 2244 (void) strcpy(ra->failed, name); 2245 return (err); 2246 } 2247 2248 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2249 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2250 2251 return (0); 2252 } 2253 2254 static int 2255 dsl_recursive_rename(char *oldname, const char *newname) 2256 { 2257 int err; 2258 struct renamesnaparg *ra; 2259 dsl_sync_task_t *dst; 2260 spa_t *spa; 2261 char *cp, *fsname = spa_strdup(oldname); 2262 int len = strlen(oldname); 2263 2264 /* truncate the snapshot name to get the fsname */ 2265 cp = strchr(fsname, '@'); 2266 *cp = '\0'; 2267 2268 err = spa_open(fsname, &spa, FTAG); 2269 if (err) { 2270 kmem_free(fsname, len + 1); 2271 return (err); 2272 } 2273 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2274 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2275 2276 ra->oldsnap = strchr(oldname, '@') + 1; 2277 ra->newsnap = strchr(newname, '@') + 1; 2278 *ra->failed = '\0'; 2279 2280 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2281 DS_FIND_CHILDREN); 2282 kmem_free(fsname, len + 1); 2283 2284 if (err == 0) { 2285 err = dsl_sync_task_group_wait(ra->dstg); 2286 } 2287 2288 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2289 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2290 dsl_dataset_t *ds = dst->dst_arg1; 2291 if (dst->dst_err) { 2292 dsl_dir_name(ds->ds_dir, ra->failed); 2293 (void) strcat(ra->failed, "@"); 2294 (void) strcat(ra->failed, ra->newsnap); 2295 } 2296 dsl_dataset_rele(ds, ra->dstg); 2297 } 2298 2299 if (err) 2300 (void) strcpy(oldname, ra->failed); 2301 2302 dsl_sync_task_group_destroy(ra->dstg); 2303 kmem_free(ra, sizeof (struct renamesnaparg)); 2304 spa_close(spa, FTAG); 2305 return (err); 2306 } 2307 2308 static int 2309 dsl_valid_rename(char *oldname, void *arg) 2310 { 2311 int delta = *(int *)arg; 2312 2313 if (strlen(oldname) + delta >= MAXNAMELEN) 2314 return (ENAMETOOLONG); 2315 2316 return (0); 2317 } 2318 2319 #pragma weak dmu_objset_rename = dsl_dataset_rename 2320 int 2321 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2322 { 2323 dsl_dir_t *dd; 2324 dsl_dataset_t *ds; 2325 const char *tail; 2326 int err; 2327 2328 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2329 if (err) 2330 return (err); 2331 /* 2332 * If there are more than 2 references there may be holds 2333 * hanging around that haven't been cleared out yet. 2334 */ 2335 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2336 txg_wait_synced(dd->dd_pool, 0); 2337 if (tail == NULL) { 2338 int delta = strlen(newname) - strlen(oldname); 2339 2340 /* if we're growing, validate child name lengths */ 2341 if (delta > 0) 2342 err = dmu_objset_find(oldname, dsl_valid_rename, 2343 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2344 2345 if (!err) 2346 err = dsl_dir_rename(dd, newname); 2347 dsl_dir_close(dd, FTAG); 2348 return (err); 2349 } 2350 if (tail[0] != '@') { 2351 /* the name ended in a nonexistant component */ 2352 dsl_dir_close(dd, FTAG); 2353 return (ENOENT); 2354 } 2355 2356 dsl_dir_close(dd, FTAG); 2357 2358 /* new name must be snapshot in same filesystem */ 2359 tail = strchr(newname, '@'); 2360 if (tail == NULL) 2361 return (EINVAL); 2362 tail++; 2363 if (strncmp(oldname, newname, tail - newname) != 0) 2364 return (EXDEV); 2365 2366 if (recursive) { 2367 err = dsl_recursive_rename(oldname, newname); 2368 } else { 2369 err = dsl_dataset_hold(oldname, FTAG, &ds); 2370 if (err) 2371 return (err); 2372 2373 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2374 dsl_dataset_snapshot_rename_check, 2375 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2376 2377 dsl_dataset_rele(ds, FTAG); 2378 } 2379 2380 return (err); 2381 } 2382 2383 struct promotenode { 2384 list_node_t link; 2385 dsl_dataset_t *ds; 2386 }; 2387 2388 struct promotearg { 2389 list_t shared_snaps, origin_snaps, clone_snaps; 2390 dsl_dataset_t *origin_origin, *origin_head; 2391 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2392 }; 2393 2394 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2395 2396 /* ARGSUSED */ 2397 static int 2398 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2399 { 2400 dsl_dataset_t *hds = arg1; 2401 struct promotearg *pa = arg2; 2402 struct promotenode *snap = list_head(&pa->shared_snaps); 2403 dsl_dataset_t *origin_ds = snap->ds; 2404 int err; 2405 2406 /* Check that it is a real clone */ 2407 if (!dsl_dir_is_clone(hds->ds_dir)) 2408 return (EINVAL); 2409 2410 /* Since this is so expensive, don't do the preliminary check */ 2411 if (!dmu_tx_is_syncing(tx)) 2412 return (0); 2413 2414 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2415 return (EXDEV); 2416 2417 /* compute origin's new unique space */ 2418 snap = list_tail(&pa->clone_snaps); 2419 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2420 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2421 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2422 if (err) 2423 return (err); 2424 2425 /* 2426 * Walk the snapshots that we are moving 2427 * 2428 * Compute space to transfer. Consider the incremental changes 2429 * to used for each snapshot: 2430 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2431 * So each snapshot gave birth to: 2432 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2433 * So a sequence would look like: 2434 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2435 * Which simplifies to: 2436 * uN + kN + kN-1 + ... + k1 + k0 2437 * Note however, if we stop before we reach the ORIGIN we get: 2438 * uN + kN + kN-1 + ... + kM - uM-1 2439 */ 2440 pa->used = origin_ds->ds_phys->ds_used_bytes; 2441 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2442 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2443 for (snap = list_head(&pa->shared_snaps); snap; 2444 snap = list_next(&pa->shared_snaps, snap)) { 2445 uint64_t val, dlused, dlcomp, dluncomp; 2446 dsl_dataset_t *ds = snap->ds; 2447 2448 /* Check that the snapshot name does not conflict */ 2449 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2450 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2451 if (err == 0) 2452 return (EEXIST); 2453 if (err != ENOENT) 2454 return (err); 2455 2456 /* The very first snapshot does not have a deadlist */ 2457 if (ds->ds_phys->ds_prev_snap_obj == 0) 2458 continue; 2459 2460 if (err = bplist_space(&ds->ds_deadlist, 2461 &dlused, &dlcomp, &dluncomp)) 2462 return (err); 2463 pa->used += dlused; 2464 pa->comp += dlcomp; 2465 pa->uncomp += dluncomp; 2466 } 2467 2468 /* 2469 * If we are a clone of a clone then we never reached ORIGIN, 2470 * so we need to subtract out the clone origin's used space. 2471 */ 2472 if (pa->origin_origin) { 2473 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2474 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2475 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2476 } 2477 2478 /* Check that there is enough space here */ 2479 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2480 pa->used); 2481 if (err) 2482 return (err); 2483 2484 /* 2485 * Compute the amounts of space that will be used by snapshots 2486 * after the promotion (for both origin and clone). For each, 2487 * it is the amount of space that will be on all of their 2488 * deadlists (that was not born before their new origin). 2489 */ 2490 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2491 uint64_t space; 2492 2493 /* 2494 * Note, typically this will not be a clone of a clone, 2495 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2496 * these snaplist_space() -> bplist_space_birthrange() 2497 * calls will be fast because they do not have to 2498 * iterate over all bps. 2499 */ 2500 snap = list_head(&pa->origin_snaps); 2501 err = snaplist_space(&pa->shared_snaps, 2502 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2503 if (err) 2504 return (err); 2505 2506 err = snaplist_space(&pa->clone_snaps, 2507 snap->ds->ds_origin_txg, &space); 2508 if (err) 2509 return (err); 2510 pa->cloneusedsnap += space; 2511 } 2512 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2513 err = snaplist_space(&pa->origin_snaps, 2514 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2515 if (err) 2516 return (err); 2517 } 2518 2519 return (0); 2520 } 2521 2522 static void 2523 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2524 { 2525 dsl_dataset_t *hds = arg1; 2526 struct promotearg *pa = arg2; 2527 struct promotenode *snap = list_head(&pa->shared_snaps); 2528 dsl_dataset_t *origin_ds = snap->ds; 2529 dsl_dataset_t *origin_head; 2530 dsl_dir_t *dd = hds->ds_dir; 2531 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2532 dsl_dir_t *odd = NULL; 2533 uint64_t oldnext_obj; 2534 int64_t delta; 2535 2536 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2537 2538 snap = list_head(&pa->origin_snaps); 2539 origin_head = snap->ds; 2540 2541 /* 2542 * We need to explicitly open odd, since origin_ds's dd will be 2543 * changing. 2544 */ 2545 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2546 NULL, FTAG, &odd)); 2547 2548 /* change origin's next snap */ 2549 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2550 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2551 snap = list_tail(&pa->clone_snaps); 2552 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2553 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2554 2555 /* change the origin's next clone */ 2556 if (origin_ds->ds_phys->ds_next_clones_obj) { 2557 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2558 origin_ds->ds_phys->ds_next_clones_obj, 2559 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2560 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2561 origin_ds->ds_phys->ds_next_clones_obj, 2562 oldnext_obj, tx)); 2563 } 2564 2565 /* change origin */ 2566 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2567 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2568 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2569 hds->ds_origin_txg = origin_head->ds_origin_txg; 2570 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2571 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2572 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2573 2574 /* move snapshots to this dir */ 2575 for (snap = list_head(&pa->shared_snaps); snap; 2576 snap = list_next(&pa->shared_snaps, snap)) { 2577 dsl_dataset_t *ds = snap->ds; 2578 2579 /* unregister props as dsl_dir is changing */ 2580 if (ds->ds_user_ptr) { 2581 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 2582 ds->ds_user_ptr = NULL; 2583 } 2584 /* move snap name entry */ 2585 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2586 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2587 ds->ds_snapname, tx)); 2588 VERIFY(0 == zap_add(dp->dp_meta_objset, 2589 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2590 8, 1, &ds->ds_object, tx)); 2591 /* change containing dsl_dir */ 2592 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2593 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2594 ds->ds_phys->ds_dir_obj = dd->dd_object; 2595 ASSERT3P(ds->ds_dir, ==, odd); 2596 dsl_dir_close(ds->ds_dir, ds); 2597 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2598 NULL, ds, &ds->ds_dir)); 2599 2600 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2601 } 2602 2603 /* 2604 * Change space accounting. 2605 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2606 * both be valid, or both be 0 (resulting in delta == 0). This 2607 * is true for each of {clone,origin} independently. 2608 */ 2609 2610 delta = pa->cloneusedsnap - 2611 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2612 ASSERT3S(delta, >=, 0); 2613 ASSERT3U(pa->used, >=, delta); 2614 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2615 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2616 pa->used - delta, pa->comp, pa->uncomp, tx); 2617 2618 delta = pa->originusedsnap - 2619 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2620 ASSERT3S(delta, <=, 0); 2621 ASSERT3U(pa->used, >=, -delta); 2622 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2623 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2624 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2625 2626 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2627 2628 /* log history record */ 2629 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2630 cr, "dataset = %llu", hds->ds_object); 2631 2632 dsl_dir_close(odd, FTAG); 2633 } 2634 2635 static char *snaplist_tag = "snaplist"; 2636 /* 2637 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2638 * (exclusive) and last_obj (inclusive). The list will be in reverse 2639 * order (last_obj will be the list_head()). If first_obj == 0, do all 2640 * snapshots back to this dataset's origin. 2641 */ 2642 static int 2643 snaplist_make(dsl_pool_t *dp, boolean_t own, 2644 uint64_t first_obj, uint64_t last_obj, list_t *l) 2645 { 2646 uint64_t obj = last_obj; 2647 2648 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2649 2650 list_create(l, sizeof (struct promotenode), 2651 offsetof(struct promotenode, link)); 2652 2653 while (obj != first_obj) { 2654 dsl_dataset_t *ds; 2655 struct promotenode *snap; 2656 int err; 2657 2658 if (own) { 2659 err = dsl_dataset_own_obj(dp, obj, 2660 0, snaplist_tag, &ds); 2661 if (err == 0) 2662 dsl_dataset_make_exclusive(ds, snaplist_tag); 2663 } else { 2664 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2665 } 2666 if (err == ENOENT) { 2667 /* lost race with snapshot destroy */ 2668 struct promotenode *last = list_tail(l); 2669 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2670 obj = last->ds->ds_phys->ds_prev_snap_obj; 2671 continue; 2672 } else if (err) { 2673 return (err); 2674 } 2675 2676 if (first_obj == 0) 2677 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2678 2679 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2680 snap->ds = ds; 2681 list_insert_tail(l, snap); 2682 obj = ds->ds_phys->ds_prev_snap_obj; 2683 } 2684 2685 return (0); 2686 } 2687 2688 static int 2689 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2690 { 2691 struct promotenode *snap; 2692 2693 *spacep = 0; 2694 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2695 uint64_t used; 2696 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2697 mintxg, UINT64_MAX, &used); 2698 if (err) 2699 return (err); 2700 *spacep += used; 2701 } 2702 return (0); 2703 } 2704 2705 static void 2706 snaplist_destroy(list_t *l, boolean_t own) 2707 { 2708 struct promotenode *snap; 2709 2710 if (!l || !list_link_active(&l->list_head)) 2711 return; 2712 2713 while ((snap = list_tail(l)) != NULL) { 2714 list_remove(l, snap); 2715 if (own) 2716 dsl_dataset_disown(snap->ds, snaplist_tag); 2717 else 2718 dsl_dataset_rele(snap->ds, snaplist_tag); 2719 kmem_free(snap, sizeof (struct promotenode)); 2720 } 2721 list_destroy(l); 2722 } 2723 2724 /* 2725 * Promote a clone. Nomenclature note: 2726 * "clone" or "cds": the original clone which is being promoted 2727 * "origin" or "ods": the snapshot which is originally clone's origin 2728 * "origin head" or "ohds": the dataset which is the head 2729 * (filesystem/volume) for the origin 2730 * "origin origin": the origin of the origin's filesystem (typically 2731 * NULL, indicating that the clone is not a clone of a clone). 2732 */ 2733 int 2734 dsl_dataset_promote(const char *name) 2735 { 2736 dsl_dataset_t *ds; 2737 dsl_dir_t *dd; 2738 dsl_pool_t *dp; 2739 dmu_object_info_t doi; 2740 struct promotearg pa = { 0 }; 2741 struct promotenode *snap; 2742 int err; 2743 2744 err = dsl_dataset_hold(name, FTAG, &ds); 2745 if (err) 2746 return (err); 2747 dd = ds->ds_dir; 2748 dp = dd->dd_pool; 2749 2750 err = dmu_object_info(dp->dp_meta_objset, 2751 ds->ds_phys->ds_snapnames_zapobj, &doi); 2752 if (err) { 2753 dsl_dataset_rele(ds, FTAG); 2754 return (err); 2755 } 2756 2757 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2758 dsl_dataset_rele(ds, FTAG); 2759 return (EINVAL); 2760 } 2761 2762 /* 2763 * We are going to inherit all the snapshots taken before our 2764 * origin (i.e., our new origin will be our parent's origin). 2765 * Take ownership of them so that we can rename them into our 2766 * namespace. 2767 */ 2768 rw_enter(&dp->dp_config_rwlock, RW_READER); 2769 2770 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2771 &pa.shared_snaps); 2772 if (err != 0) 2773 goto out; 2774 2775 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2776 if (err != 0) 2777 goto out; 2778 2779 snap = list_head(&pa.shared_snaps); 2780 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2781 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2782 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2783 if (err != 0) 2784 goto out; 2785 2786 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2787 err = dsl_dataset_own_obj(dp, 2788 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2789 0, FTAG, &pa.origin_origin); 2790 if (err != 0) 2791 goto out; 2792 } 2793 2794 out: 2795 rw_exit(&dp->dp_config_rwlock); 2796 2797 /* 2798 * Add in 128x the snapnames zapobj size, since we will be moving 2799 * a bunch of snapnames to the promoted ds, and dirtying their 2800 * bonus buffers. 2801 */ 2802 if (err == 0) { 2803 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2804 dsl_dataset_promote_sync, ds, &pa, 2805 2 + 2 * doi.doi_physical_blks); 2806 } 2807 2808 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2809 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2810 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2811 if (pa.origin_origin) 2812 dsl_dataset_disown(pa.origin_origin, FTAG); 2813 dsl_dataset_rele(ds, FTAG); 2814 return (err); 2815 } 2816 2817 struct cloneswaparg { 2818 dsl_dataset_t *cds; /* clone dataset */ 2819 dsl_dataset_t *ohds; /* origin's head dataset */ 2820 boolean_t force; 2821 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2822 }; 2823 2824 /* ARGSUSED */ 2825 static int 2826 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2827 { 2828 struct cloneswaparg *csa = arg1; 2829 2830 /* they should both be heads */ 2831 if (dsl_dataset_is_snapshot(csa->cds) || 2832 dsl_dataset_is_snapshot(csa->ohds)) 2833 return (EINVAL); 2834 2835 /* the branch point should be just before them */ 2836 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2837 return (EINVAL); 2838 2839 /* cds should be the clone (unless they are unrelated) */ 2840 if (csa->cds->ds_prev != NULL && 2841 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 2842 csa->ohds->ds_object != 2843 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 2844 return (EINVAL); 2845 2846 /* the clone should be a child of the origin */ 2847 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2848 return (EINVAL); 2849 2850 /* ohds shouldn't be modified unless 'force' */ 2851 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2852 return (ETXTBSY); 2853 2854 /* adjust amount of any unconsumed refreservation */ 2855 csa->unused_refres_delta = 2856 (int64_t)MIN(csa->ohds->ds_reserved, 2857 csa->ohds->ds_phys->ds_unique_bytes) - 2858 (int64_t)MIN(csa->ohds->ds_reserved, 2859 csa->cds->ds_phys->ds_unique_bytes); 2860 2861 if (csa->unused_refres_delta > 0 && 2862 csa->unused_refres_delta > 2863 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2864 return (ENOSPC); 2865 2866 return (0); 2867 } 2868 2869 /* ARGSUSED */ 2870 static void 2871 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2872 { 2873 struct cloneswaparg *csa = arg1; 2874 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2875 2876 ASSERT(csa->cds->ds_reserved == 0); 2877 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2878 2879 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2880 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2881 2882 if (csa->cds->ds_user_ptr != NULL) { 2883 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2884 csa->cds->ds_user_ptr = NULL; 2885 } 2886 2887 if (csa->ohds->ds_user_ptr != NULL) { 2888 csa->ohds->ds_user_evict_func(csa->ohds, 2889 csa->ohds->ds_user_ptr); 2890 csa->ohds->ds_user_ptr = NULL; 2891 } 2892 2893 /* 2894 * Reset origin's unique bytes, if it exists. 2895 */ 2896 if (csa->cds->ds_prev) { 2897 dsl_dataset_t *origin = csa->cds->ds_prev; 2898 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2899 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2900 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2901 &origin->ds_phys->ds_unique_bytes)); 2902 } 2903 2904 /* swap blkptrs */ 2905 { 2906 blkptr_t tmp; 2907 tmp = csa->ohds->ds_phys->ds_bp; 2908 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2909 csa->cds->ds_phys->ds_bp = tmp; 2910 } 2911 2912 /* set dd_*_bytes */ 2913 { 2914 int64_t dused, dcomp, duncomp; 2915 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2916 uint64_t odl_used, odl_comp, odl_uncomp; 2917 2918 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2919 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2920 2921 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2922 &cdl_comp, &cdl_uncomp)); 2923 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2924 &odl_comp, &odl_uncomp)); 2925 2926 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2927 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2928 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2929 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2930 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2931 cdl_uncomp - 2932 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2933 2934 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2935 dused, dcomp, duncomp, tx); 2936 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2937 -dused, -dcomp, -duncomp, tx); 2938 2939 /* 2940 * The difference in the space used by snapshots is the 2941 * difference in snapshot space due to the head's 2942 * deadlist (since that's the only thing that's 2943 * changing that affects the snapused). 2944 */ 2945 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2946 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2947 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2948 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2949 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2950 DD_USED_HEAD, DD_USED_SNAP, tx); 2951 } 2952 2953 #define SWITCH64(x, y) \ 2954 { \ 2955 uint64_t __tmp = (x); \ 2956 (x) = (y); \ 2957 (y) = __tmp; \ 2958 } 2959 2960 /* swap ds_*_bytes */ 2961 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2962 csa->cds->ds_phys->ds_used_bytes); 2963 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2964 csa->cds->ds_phys->ds_compressed_bytes); 2965 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2966 csa->cds->ds_phys->ds_uncompressed_bytes); 2967 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2968 csa->cds->ds_phys->ds_unique_bytes); 2969 2970 /* apply any parent delta for change in unconsumed refreservation */ 2971 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2972 csa->unused_refres_delta, 0, 0, tx); 2973 2974 /* swap deadlists */ 2975 bplist_close(&csa->cds->ds_deadlist); 2976 bplist_close(&csa->ohds->ds_deadlist); 2977 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2978 csa->cds->ds_phys->ds_deadlist_obj); 2979 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2980 csa->cds->ds_phys->ds_deadlist_obj)); 2981 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2982 csa->ohds->ds_phys->ds_deadlist_obj)); 2983 2984 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2985 } 2986 2987 /* 2988 * Swap 'clone' with its origin head datasets. Used at the end of "zfs 2989 * recv" into an existing fs to swizzle the file system to the new 2990 * version, and by "zfs rollback". Can also be used to swap two 2991 * independent head datasets if neither has any snapshots. 2992 */ 2993 int 2994 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2995 boolean_t force) 2996 { 2997 struct cloneswaparg csa; 2998 int error; 2999 3000 ASSERT(clone->ds_owner); 3001 ASSERT(origin_head->ds_owner); 3002 retry: 3003 /* Need exclusive access for the swap */ 3004 rw_enter(&clone->ds_rwlock, RW_WRITER); 3005 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 3006 rw_exit(&clone->ds_rwlock); 3007 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 3008 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 3009 rw_exit(&origin_head->ds_rwlock); 3010 goto retry; 3011 } 3012 } 3013 csa.cds = clone; 3014 csa.ohds = origin_head; 3015 csa.force = force; 3016 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 3017 dsl_dataset_clone_swap_check, 3018 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 3019 return (error); 3020 } 3021 3022 /* 3023 * Given a pool name and a dataset object number in that pool, 3024 * return the name of that dataset. 3025 */ 3026 int 3027 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 3028 { 3029 spa_t *spa; 3030 dsl_pool_t *dp; 3031 dsl_dataset_t *ds; 3032 int error; 3033 3034 if ((error = spa_open(pname, &spa, FTAG)) != 0) 3035 return (error); 3036 dp = spa_get_dsl(spa); 3037 rw_enter(&dp->dp_config_rwlock, RW_READER); 3038 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 3039 dsl_dataset_name(ds, buf); 3040 dsl_dataset_rele(ds, FTAG); 3041 } 3042 rw_exit(&dp->dp_config_rwlock); 3043 spa_close(spa, FTAG); 3044 3045 return (error); 3046 } 3047 3048 int 3049 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3050 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3051 { 3052 int error = 0; 3053 3054 ASSERT3S(asize, >, 0); 3055 3056 /* 3057 * *ref_rsrv is the portion of asize that will come from any 3058 * unconsumed refreservation space. 3059 */ 3060 *ref_rsrv = 0; 3061 3062 mutex_enter(&ds->ds_lock); 3063 /* 3064 * Make a space adjustment for reserved bytes. 3065 */ 3066 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 3067 ASSERT3U(*used, >=, 3068 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3069 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3070 *ref_rsrv = 3071 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3072 } 3073 3074 if (!check_quota || ds->ds_quota == 0) { 3075 mutex_exit(&ds->ds_lock); 3076 return (0); 3077 } 3078 /* 3079 * If they are requesting more space, and our current estimate 3080 * is over quota, they get to try again unless the actual 3081 * on-disk is over quota and there are no pending changes (which 3082 * may free up space for us). 3083 */ 3084 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 3085 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 3086 error = ERESTART; 3087 else 3088 error = EDQUOT; 3089 } 3090 mutex_exit(&ds->ds_lock); 3091 3092 return (error); 3093 } 3094 3095 /* ARGSUSED */ 3096 static int 3097 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 3098 { 3099 dsl_dataset_t *ds = arg1; 3100 uint64_t *quotap = arg2; 3101 uint64_t new_quota = *quotap; 3102 3103 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3104 return (ENOTSUP); 3105 3106 if (new_quota == 0) 3107 return (0); 3108 3109 if (new_quota < ds->ds_phys->ds_used_bytes || 3110 new_quota < ds->ds_reserved) 3111 return (ENOSPC); 3112 3113 return (0); 3114 } 3115 3116 /* ARGSUSED */ 3117 void 3118 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3119 { 3120 dsl_dataset_t *ds = arg1; 3121 uint64_t *quotap = arg2; 3122 uint64_t new_quota = *quotap; 3123 3124 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3125 3126 ds->ds_quota = new_quota; 3127 3128 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3129 3130 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3131 tx, cr, "%lld dataset = %llu ", 3132 (longlong_t)new_quota, ds->ds_object); 3133 } 3134 3135 int 3136 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3137 { 3138 dsl_dataset_t *ds; 3139 int err; 3140 3141 err = dsl_dataset_hold(dsname, FTAG, &ds); 3142 if (err) 3143 return (err); 3144 3145 if (quota != ds->ds_quota) { 3146 /* 3147 * If someone removes a file, then tries to set the quota, we 3148 * want to make sure the file freeing takes effect. 3149 */ 3150 txg_wait_open(ds->ds_dir->dd_pool, 0); 3151 3152 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3153 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3154 ds, "a, 0); 3155 } 3156 dsl_dataset_rele(ds, FTAG); 3157 return (err); 3158 } 3159 3160 static int 3161 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3162 { 3163 dsl_dataset_t *ds = arg1; 3164 uint64_t *reservationp = arg2; 3165 uint64_t new_reservation = *reservationp; 3166 uint64_t unique; 3167 3168 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3169 SPA_VERSION_REFRESERVATION) 3170 return (ENOTSUP); 3171 3172 if (dsl_dataset_is_snapshot(ds)) 3173 return (EINVAL); 3174 3175 /* 3176 * If we are doing the preliminary check in open context, the 3177 * space estimates may be inaccurate. 3178 */ 3179 if (!dmu_tx_is_syncing(tx)) 3180 return (0); 3181 3182 mutex_enter(&ds->ds_lock); 3183 unique = dsl_dataset_unique(ds); 3184 mutex_exit(&ds->ds_lock); 3185 3186 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3187 uint64_t delta = MAX(unique, new_reservation) - 3188 MAX(unique, ds->ds_reserved); 3189 3190 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3191 return (ENOSPC); 3192 if (ds->ds_quota > 0 && 3193 new_reservation > ds->ds_quota) 3194 return (ENOSPC); 3195 } 3196 3197 return (0); 3198 } 3199 3200 /* ARGSUSED */ 3201 static void 3202 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3203 dmu_tx_t *tx) 3204 { 3205 dsl_dataset_t *ds = arg1; 3206 uint64_t *reservationp = arg2; 3207 uint64_t new_reservation = *reservationp; 3208 uint64_t unique; 3209 int64_t delta; 3210 3211 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3212 3213 mutex_enter(&ds->ds_dir->dd_lock); 3214 mutex_enter(&ds->ds_lock); 3215 unique = dsl_dataset_unique(ds); 3216 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3217 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3218 ds->ds_reserved = new_reservation; 3219 mutex_exit(&ds->ds_lock); 3220 3221 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3222 mutex_exit(&ds->ds_dir->dd_lock); 3223 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3224 new_reservation, cr, tx); 3225 3226 spa_history_internal_log(LOG_DS_REFRESERV, 3227 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3228 (longlong_t)new_reservation, ds->ds_object); 3229 } 3230 3231 int 3232 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3233 { 3234 dsl_dataset_t *ds; 3235 int err; 3236 3237 err = dsl_dataset_hold(dsname, FTAG, &ds); 3238 if (err) 3239 return (err); 3240 3241 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3242 dsl_dataset_set_reservation_check, 3243 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3244 dsl_dataset_rele(ds, FTAG); 3245 return (err); 3246 } 3247 3248 static int 3249 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 3250 { 3251 dsl_dataset_t *ds = arg1; 3252 char *htag = arg2; 3253 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3254 int error = 0; 3255 3256 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3257 return (ENOTSUP); 3258 3259 if (!dsl_dataset_is_snapshot(ds)) 3260 return (EINVAL); 3261 3262 if (strlen(htag) >= ZAP_MAXNAMELEN) 3263 return (ENAMETOOLONG); 3264 3265 /* tags must be unique */ 3266 mutex_enter(&ds->ds_lock); 3267 if (ds->ds_phys->ds_userrefs_obj) { 3268 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 3269 8, 1, tx); 3270 if (error == 0) 3271 error = EEXIST; 3272 else if (error == ENOENT) 3273 error = 0; 3274 } 3275 mutex_exit(&ds->ds_lock); 3276 3277 return (error); 3278 } 3279 3280 static void 3281 dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3282 { 3283 dsl_dataset_t *ds = arg1; 3284 char *htag = arg2; 3285 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3286 time_t now = gethrestime_sec(); 3287 uint64_t zapobj; 3288 3289 mutex_enter(&ds->ds_lock); 3290 if (ds->ds_phys->ds_userrefs_obj == 0) { 3291 /* 3292 * This is the first user hold for this dataset. Create 3293 * the userrefs zap object. 3294 */ 3295 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3296 zapobj = ds->ds_phys->ds_userrefs_obj = 3297 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 3298 } else { 3299 zapobj = ds->ds_phys->ds_userrefs_obj; 3300 } 3301 ds->ds_userrefs++; 3302 mutex_exit(&ds->ds_lock); 3303 3304 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 3305 3306 spa_history_internal_log(LOG_DS_USER_HOLD, 3307 ds->ds_dir->dd_pool->dp_spa, tx, cr, "<%s> dataset = %llu", 3308 htag, ds->ds_object); 3309 } 3310 3311 struct dsl_ds_holdarg { 3312 dsl_sync_task_group_t *dstg; 3313 char *htag; 3314 char *snapname; 3315 boolean_t recursive; 3316 boolean_t gotone; 3317 char failed[MAXPATHLEN]; 3318 }; 3319 3320 static int 3321 dsl_dataset_user_hold_one(char *dsname, void *arg) 3322 { 3323 struct dsl_ds_holdarg *ha = arg; 3324 dsl_dataset_t *ds; 3325 int error; 3326 char *name; 3327 3328 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 3329 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3330 error = dsl_dataset_hold(name, ha->dstg, &ds); 3331 strfree(name); 3332 if (error == 0) { 3333 ha->gotone = B_TRUE; 3334 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 3335 dsl_dataset_user_hold_sync, ds, ha->htag, 0); 3336 } else if (error == ENOENT && ha->recursive) { 3337 error = 0; 3338 } else { 3339 (void) strcpy(ha->failed, dsname); 3340 } 3341 return (error); 3342 } 3343 3344 int 3345 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 3346 boolean_t recursive) 3347 { 3348 struct dsl_ds_holdarg *ha; 3349 dsl_sync_task_t *dst; 3350 spa_t *spa; 3351 int error; 3352 3353 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3354 3355 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3356 3357 error = spa_open(dsname, &spa, FTAG); 3358 if (error) { 3359 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3360 return (error); 3361 } 3362 3363 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3364 ha->htag = htag; 3365 ha->snapname = snapname; 3366 ha->recursive = recursive; 3367 if (recursive) { 3368 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 3369 ha, DS_FIND_CHILDREN); 3370 } else { 3371 error = dsl_dataset_user_hold_one(dsname, ha); 3372 } 3373 if (error == 0) 3374 error = dsl_sync_task_group_wait(ha->dstg); 3375 3376 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3377 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3378 dsl_dataset_t *ds = dst->dst_arg1; 3379 3380 if (dst->dst_err) { 3381 dsl_dataset_name(ds, ha->failed); 3382 *strchr(ha->failed, '@') = '\0'; 3383 } 3384 dsl_dataset_rele(ds, ha->dstg); 3385 } 3386 3387 if (error == 0 && recursive && !ha->gotone) 3388 error = ENOENT; 3389 3390 if (error) 3391 (void) strcpy(dsname, ha->failed); 3392 3393 dsl_sync_task_group_destroy(ha->dstg); 3394 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3395 spa_close(spa, FTAG); 3396 return (error); 3397 } 3398 3399 struct dsl_ds_releasearg { 3400 dsl_dataset_t *ds; 3401 const char *htag; 3402 boolean_t own; /* do we own or just hold ds? */ 3403 }; 3404 3405 static int 3406 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 3407 boolean_t *might_destroy) 3408 { 3409 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3410 uint64_t zapobj; 3411 uint64_t tmp; 3412 int error; 3413 3414 *might_destroy = B_FALSE; 3415 3416 mutex_enter(&ds->ds_lock); 3417 zapobj = ds->ds_phys->ds_userrefs_obj; 3418 if (zapobj == 0) { 3419 /* The tag can't possibly exist */ 3420 mutex_exit(&ds->ds_lock); 3421 return (ESRCH); 3422 } 3423 3424 /* Make sure the tag exists */ 3425 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 3426 if (error) { 3427 mutex_exit(&ds->ds_lock); 3428 if (error == ENOENT) 3429 error = ESRCH; 3430 return (error); 3431 } 3432 3433 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 3434 DS_IS_DEFER_DESTROY(ds)) 3435 *might_destroy = B_TRUE; 3436 3437 mutex_exit(&ds->ds_lock); 3438 return (0); 3439 } 3440 3441 static int 3442 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 3443 { 3444 struct dsl_ds_releasearg *ra = arg1; 3445 dsl_dataset_t *ds = ra->ds; 3446 boolean_t might_destroy; 3447 int error; 3448 3449 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3450 return (ENOTSUP); 3451 3452 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 3453 if (error) 3454 return (error); 3455 3456 if (might_destroy) { 3457 struct dsl_ds_destroyarg dsda = {0}; 3458 3459 if (dmu_tx_is_syncing(tx)) { 3460 /* 3461 * If we're not prepared to remove the snapshot, 3462 * we can't allow the release to happen right now. 3463 */ 3464 if (!ra->own) 3465 return (EBUSY); 3466 if (ds->ds_user_ptr) { 3467 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 3468 ds->ds_user_ptr = NULL; 3469 } 3470 } 3471 dsda.ds = ds; 3472 dsda.releasing = B_TRUE; 3473 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 3474 } 3475 3476 return (0); 3477 } 3478 3479 static void 3480 dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 3481 { 3482 struct dsl_ds_releasearg *ra = arg1; 3483 dsl_dataset_t *ds = ra->ds; 3484 spa_t *spa = ds->ds_dir->dd_pool->dp_spa; 3485 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3486 uint64_t zapobj; 3487 uint64_t dsobj = ds->ds_object; 3488 uint64_t refs; 3489 3490 mutex_enter(&ds->ds_lock); 3491 ds->ds_userrefs--; 3492 refs = ds->ds_userrefs; 3493 mutex_exit(&ds->ds_lock); 3494 zapobj = ds->ds_phys->ds_userrefs_obj; 3495 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 3496 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 3497 DS_IS_DEFER_DESTROY(ds)) { 3498 struct dsl_ds_destroyarg dsda = {0}; 3499 3500 ASSERT(ra->own); 3501 dsda.ds = ds; 3502 dsda.releasing = B_TRUE; 3503 /* We already did the destroy_check */ 3504 dsl_dataset_destroy_sync(&dsda, tag, cr, tx); 3505 } 3506 3507 spa_history_internal_log(LOG_DS_USER_RELEASE, 3508 spa, tx, cr, "<%s> %lld dataset = %llu", 3509 ra->htag, (longlong_t)refs, dsobj); 3510 } 3511 3512 static int 3513 dsl_dataset_user_release_one(char *dsname, void *arg) 3514 { 3515 struct dsl_ds_holdarg *ha = arg; 3516 struct dsl_ds_releasearg *ra; 3517 dsl_dataset_t *ds; 3518 int error; 3519 void *dtag = ha->dstg; 3520 char *name; 3521 boolean_t own = B_FALSE; 3522 boolean_t might_destroy; 3523 3524 if (strlen(ha->htag) >= ZAP_MAXNAMELEN) 3525 return (ENAMETOOLONG); 3526 3527 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 3528 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3529 error = dsl_dataset_hold(name, dtag, &ds); 3530 strfree(name); 3531 if (error == ENOENT && ha->recursive) 3532 return (0); 3533 (void) strcpy(ha->failed, dsname); 3534 if (error) 3535 return (error); 3536 3537 ha->gotone = B_TRUE; 3538 3539 ASSERT(dsl_dataset_is_snapshot(ds)); 3540 3541 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 3542 if (error) { 3543 dsl_dataset_rele(ds, dtag); 3544 return (error); 3545 } 3546 3547 if (might_destroy) { 3548 #ifdef _KERNEL 3549 error = zfs_unmount_snap(name, NULL); 3550 if (error) { 3551 dsl_dataset_rele(ds, dtag); 3552 return (error); 3553 } 3554 error = dsl_dataset_zvol_cleanup(ds, name); 3555 if (error) { 3556 dsl_dataset_rele(ds, dtag); 3557 return (error); 3558 } 3559 #endif 3560 if (!dsl_dataset_tryown(ds, 3561 DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) { 3562 dsl_dataset_rele(ds, dtag); 3563 return (EBUSY); 3564 } else { 3565 own = B_TRUE; 3566 dsl_dataset_make_exclusive(ds, dtag); 3567 } 3568 } 3569 3570 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 3571 ra->ds = ds; 3572 ra->htag = ha->htag; 3573 ra->own = own; 3574 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 3575 dsl_dataset_user_release_sync, ra, dtag, 0); 3576 3577 return (0); 3578 } 3579 3580 int 3581 dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 3582 boolean_t recursive) 3583 { 3584 struct dsl_ds_holdarg *ha; 3585 dsl_sync_task_t *dst; 3586 spa_t *spa; 3587 int error; 3588 3589 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3590 3591 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3592 3593 error = spa_open(dsname, &spa, FTAG); 3594 if (error) { 3595 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3596 return (error); 3597 } 3598 3599 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3600 ha->htag = htag; 3601 ha->snapname = snapname; 3602 ha->recursive = recursive; 3603 if (recursive) { 3604 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 3605 ha, DS_FIND_CHILDREN); 3606 } else { 3607 error = dsl_dataset_user_release_one(dsname, ha); 3608 } 3609 if (error == 0) 3610 error = dsl_sync_task_group_wait(ha->dstg); 3611 3612 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3613 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3614 struct dsl_ds_releasearg *ra = dst->dst_arg1; 3615 dsl_dataset_t *ds = ra->ds; 3616 3617 if (dst->dst_err) 3618 dsl_dataset_name(ds, ha->failed); 3619 3620 if (ra->own) 3621 dsl_dataset_disown(ds, ha->dstg); 3622 else 3623 dsl_dataset_rele(ds, ha->dstg); 3624 3625 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 3626 } 3627 3628 if (error == 0 && recursive && !ha->gotone) 3629 error = ENOENT; 3630 3631 if (error) 3632 (void) strcpy(dsname, ha->failed); 3633 3634 dsl_sync_task_group_destroy(ha->dstg); 3635 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3636 spa_close(spa, FTAG); 3637 return (error); 3638 } 3639 3640 int 3641 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 3642 { 3643 dsl_dataset_t *ds; 3644 int err; 3645 3646 err = dsl_dataset_hold(dsname, FTAG, &ds); 3647 if (err) 3648 return (err); 3649 3650 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 3651 if (ds->ds_phys->ds_userrefs_obj != 0) { 3652 zap_attribute_t *za; 3653 zap_cursor_t zc; 3654 3655 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 3656 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 3657 ds->ds_phys->ds_userrefs_obj); 3658 zap_cursor_retrieve(&zc, za) == 0; 3659 zap_cursor_advance(&zc)) { 3660 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 3661 za->za_first_integer)); 3662 } 3663 zap_cursor_fini(&zc); 3664 kmem_free(za, sizeof (zap_attribute_t)); 3665 } 3666 dsl_dataset_rele(ds, FTAG); 3667 return (0); 3668 } 3669