1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 43 static dsl_checkfunc_t dsl_dataset_rollback_check; 44 static dsl_syncfunc_t dsl_dataset_rollback_sync; 45 static dsl_checkfunc_t dsl_dataset_destroy_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_sync; 47 48 #define DS_REF_MAX (1ULL << 62) 49 50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 51 52 /* 53 * We use weighted reference counts to express the various forms of exclusion 54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 55 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 56 * This makes the exclusion logic simple: the total refcnt for all opens cannot 57 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 58 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 59 * just over half of the refcnt space, so there can't be more than one, but it 60 * can peacefully coexist with any number of STANDARD opens. 61 */ 62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 63 0, /* DS_MODE_NONE - invalid */ 64 1, /* DS_MODE_STANDARD - unlimited number */ 65 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 66 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 67 }; 68 69 70 void 71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 72 { 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 74 int compressed = BP_GET_PSIZE(bp); 75 int uncompressed = BP_GET_UCSIZE(bp); 76 77 dprintf_bp(bp, "born, ds=%p\n", ds); 78 79 ASSERT(dmu_tx_is_syncing(tx)); 80 /* It could have been compressed away to nothing */ 81 if (BP_IS_HOLE(bp)) 82 return; 83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 85 if (ds == NULL) { 86 /* 87 * Account for the meta-objset space in its placeholder 88 * dsl_dir. 89 */ 90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 92 used, compressed, uncompressed, tx); 93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 94 return; 95 } 96 dmu_buf_will_dirty(ds->ds_dbuf, tx); 97 mutex_enter(&ds->ds_lock); 98 ds->ds_phys->ds_used_bytes += used; 99 ds->ds_phys->ds_compressed_bytes += compressed; 100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 101 ds->ds_phys->ds_unique_bytes += used; 102 mutex_exit(&ds->ds_lock); 103 dsl_dir_diduse_space(ds->ds_dir, 104 used, compressed, uncompressed, tx); 105 } 106 107 void 108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 109 dmu_tx_t *tx) 110 { 111 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 112 int compressed = BP_GET_PSIZE(bp); 113 int uncompressed = BP_GET_UCSIZE(bp); 114 115 ASSERT(dmu_tx_is_syncing(tx)); 116 /* No block pointer => nothing to free */ 117 if (BP_IS_HOLE(bp)) 118 return; 119 120 ASSERT(used > 0); 121 if (ds == NULL) { 122 int err; 123 /* 124 * Account for the meta-objset space in its placeholder 125 * dataset. 126 */ 127 err = arc_free(pio, tx->tx_pool->dp_spa, 128 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 129 ASSERT(err == 0); 130 131 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 132 -used, -compressed, -uncompressed, tx); 133 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 134 return; 135 } 136 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 137 138 dmu_buf_will_dirty(ds->ds_dbuf, tx); 139 140 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 141 int err; 142 143 dprintf_bp(bp, "freeing: %s", ""); 144 err = arc_free(pio, tx->tx_pool->dp_spa, 145 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 146 ASSERT(err == 0); 147 148 mutex_enter(&ds->ds_lock); 149 /* XXX unique_bytes is not accurate for head datasets */ 150 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 151 ds->ds_phys->ds_unique_bytes -= used; 152 mutex_exit(&ds->ds_lock); 153 dsl_dir_diduse_space(ds->ds_dir, 154 -used, -compressed, -uncompressed, tx); 155 } else { 156 dprintf_bp(bp, "putting on dead list: %s", ""); 157 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 158 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 159 if (ds->ds_phys->ds_prev_snap_obj != 0) { 160 ASSERT3U(ds->ds_prev->ds_object, ==, 161 ds->ds_phys->ds_prev_snap_obj); 162 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 163 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 164 ds->ds_object && bp->blk_birth > 165 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 166 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 167 mutex_enter(&ds->ds_prev->ds_lock); 168 ds->ds_prev->ds_phys->ds_unique_bytes += 169 used; 170 mutex_exit(&ds->ds_prev->ds_lock); 171 } 172 } 173 } 174 mutex_enter(&ds->ds_lock); 175 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 176 ds->ds_phys->ds_used_bytes -= used; 177 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 178 ds->ds_phys->ds_compressed_bytes -= compressed; 179 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 180 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 181 mutex_exit(&ds->ds_lock); 182 } 183 184 uint64_t 185 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 186 { 187 uint64_t trysnap = 0; 188 189 if (ds == NULL) 190 return (0); 191 /* 192 * The snapshot creation could fail, but that would cause an 193 * incorrect FALSE return, which would only result in an 194 * overestimation of the amount of space that an operation would 195 * consume, which is OK. 196 * 197 * There's also a small window where we could miss a pending 198 * snapshot, because we could set the sync task in the quiescing 199 * phase. So this should only be used as a guess. 200 */ 201 if (ds->ds_trysnap_txg > 202 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 203 trysnap = ds->ds_trysnap_txg; 204 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 205 } 206 207 int 208 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 209 { 210 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 211 } 212 213 /* ARGSUSED */ 214 static void 215 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 216 { 217 dsl_dataset_t *ds = dsv; 218 dsl_pool_t *dp = ds->ds_dir->dd_pool; 219 220 /* open_refcount == DS_REF_MAX when deleting */ 221 ASSERT(ds->ds_open_refcount == 0 || 222 ds->ds_open_refcount == DS_REF_MAX); 223 224 dprintf_ds(ds, "evicting %s\n", ""); 225 226 unique_remove(ds->ds_phys->ds_fsid_guid); 227 228 if (ds->ds_user_ptr != NULL) 229 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 230 231 if (ds->ds_prev) { 232 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 233 ds->ds_prev = NULL; 234 } 235 236 bplist_close(&ds->ds_deadlist); 237 dsl_dir_close(ds->ds_dir, ds); 238 239 if (list_link_active(&ds->ds_synced_link)) 240 list_remove(&dp->dp_synced_objsets, ds); 241 242 mutex_destroy(&ds->ds_lock); 243 mutex_destroy(&ds->ds_deadlist.bpl_lock); 244 245 kmem_free(ds, sizeof (dsl_dataset_t)); 246 } 247 248 static int 249 dsl_dataset_get_snapname(dsl_dataset_t *ds) 250 { 251 dsl_dataset_phys_t *headphys; 252 int err; 253 dmu_buf_t *headdbuf; 254 dsl_pool_t *dp = ds->ds_dir->dd_pool; 255 objset_t *mos = dp->dp_meta_objset; 256 257 if (ds->ds_snapname[0]) 258 return (0); 259 if (ds->ds_phys->ds_next_snap_obj == 0) 260 return (0); 261 262 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 263 FTAG, &headdbuf); 264 if (err) 265 return (err); 266 headphys = headdbuf->db_data; 267 err = zap_value_search(dp->dp_meta_objset, 268 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 269 dmu_buf_rele(headdbuf, FTAG); 270 return (err); 271 } 272 273 int 274 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 275 int mode, void *tag, dsl_dataset_t **dsp) 276 { 277 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 278 objset_t *mos = dp->dp_meta_objset; 279 dmu_buf_t *dbuf; 280 dsl_dataset_t *ds; 281 int err; 282 283 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 284 dsl_pool_sync_context(dp)); 285 286 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 287 if (err) 288 return (err); 289 ds = dmu_buf_get_user(dbuf); 290 if (ds == NULL) { 291 dsl_dataset_t *winner; 292 293 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 294 ds->ds_dbuf = dbuf; 295 ds->ds_object = dsobj; 296 ds->ds_phys = dbuf->db_data; 297 298 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 299 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 300 NULL); 301 302 err = bplist_open(&ds->ds_deadlist, 303 mos, ds->ds_phys->ds_deadlist_obj); 304 if (err == 0) { 305 err = dsl_dir_open_obj(dp, 306 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 307 } 308 if (err) { 309 /* 310 * we don't really need to close the blist if we 311 * just opened it. 312 */ 313 mutex_destroy(&ds->ds_lock); 314 mutex_destroy(&ds->ds_deadlist.bpl_lock); 315 kmem_free(ds, sizeof (dsl_dataset_t)); 316 dmu_buf_rele(dbuf, tag); 317 return (err); 318 } 319 320 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 321 ds->ds_snapname[0] = '\0'; 322 if (ds->ds_phys->ds_prev_snap_obj) { 323 err = dsl_dataset_open_obj(dp, 324 ds->ds_phys->ds_prev_snap_obj, NULL, 325 DS_MODE_NONE, ds, &ds->ds_prev); 326 } 327 } else { 328 if (snapname) { 329 #ifdef ZFS_DEBUG 330 dsl_dataset_phys_t *headphys; 331 dmu_buf_t *headdbuf; 332 err = dmu_bonus_hold(mos, 333 ds->ds_dir->dd_phys->dd_head_dataset_obj, 334 FTAG, &headdbuf); 335 if (err == 0) { 336 headphys = headdbuf->db_data; 337 uint64_t foundobj; 338 err = zap_lookup(dp->dp_meta_objset, 339 headphys->ds_snapnames_zapobj, 340 snapname, sizeof (foundobj), 1, 341 &foundobj); 342 ASSERT3U(foundobj, ==, dsobj); 343 dmu_buf_rele(headdbuf, FTAG); 344 } 345 #endif 346 (void) strcat(ds->ds_snapname, snapname); 347 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 348 err = dsl_dataset_get_snapname(ds); 349 } 350 } 351 352 if (err == 0) { 353 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 354 dsl_dataset_evict); 355 } 356 if (err || winner) { 357 bplist_close(&ds->ds_deadlist); 358 if (ds->ds_prev) { 359 dsl_dataset_close(ds->ds_prev, 360 DS_MODE_NONE, ds); 361 } 362 dsl_dir_close(ds->ds_dir, ds); 363 mutex_destroy(&ds->ds_lock); 364 mutex_destroy(&ds->ds_deadlist.bpl_lock); 365 kmem_free(ds, sizeof (dsl_dataset_t)); 366 if (err) { 367 dmu_buf_rele(dbuf, tag); 368 return (err); 369 } 370 ds = winner; 371 } else { 372 uint64_t new = 373 unique_insert(ds->ds_phys->ds_fsid_guid); 374 if (new != ds->ds_phys->ds_fsid_guid) { 375 /* XXX it won't necessarily be synced... */ 376 ds->ds_phys->ds_fsid_guid = new; 377 } 378 } 379 } 380 ASSERT3P(ds->ds_dbuf, ==, dbuf); 381 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 382 383 mutex_enter(&ds->ds_lock); 384 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 385 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 386 !DS_MODE_IS_INCONSISTENT(mode)) || 387 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 388 mutex_exit(&ds->ds_lock); 389 dsl_dataset_close(ds, DS_MODE_NONE, tag); 390 return (EBUSY); 391 } 392 ds->ds_open_refcount += weight; 393 mutex_exit(&ds->ds_lock); 394 395 *dsp = ds; 396 return (0); 397 } 398 399 int 400 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 401 void *tag, dsl_dataset_t **dsp) 402 { 403 dsl_dir_t *dd; 404 dsl_pool_t *dp; 405 const char *tail; 406 uint64_t obj; 407 dsl_dataset_t *ds = NULL; 408 int err = 0; 409 410 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 411 if (err) 412 return (err); 413 414 dp = dd->dd_pool; 415 obj = dd->dd_phys->dd_head_dataset_obj; 416 rw_enter(&dp->dp_config_rwlock, RW_READER); 417 if (obj == 0) { 418 /* A dataset with no associated objset */ 419 err = ENOENT; 420 goto out; 421 } 422 423 if (tail != NULL) { 424 objset_t *mos = dp->dp_meta_objset; 425 426 err = dsl_dataset_open_obj(dp, obj, NULL, 427 DS_MODE_NONE, tag, &ds); 428 if (err) 429 goto out; 430 obj = ds->ds_phys->ds_snapnames_zapobj; 431 dsl_dataset_close(ds, DS_MODE_NONE, tag); 432 ds = NULL; 433 434 if (tail[0] != '@') { 435 err = ENOENT; 436 goto out; 437 } 438 tail++; 439 440 /* Look for a snapshot */ 441 if (!DS_MODE_IS_READONLY(mode)) { 442 err = EROFS; 443 goto out; 444 } 445 dprintf("looking for snapshot '%s'\n", tail); 446 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 447 if (err) 448 goto out; 449 } 450 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 451 452 out: 453 rw_exit(&dp->dp_config_rwlock); 454 dsl_dir_close(dd, FTAG); 455 456 ASSERT3U((err == 0), ==, (ds != NULL)); 457 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 458 459 *dsp = ds; 460 return (err); 461 } 462 463 int 464 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 465 { 466 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 467 } 468 469 void 470 dsl_dataset_name(dsl_dataset_t *ds, char *name) 471 { 472 if (ds == NULL) { 473 (void) strcpy(name, "mos"); 474 } else { 475 dsl_dir_name(ds->ds_dir, name); 476 VERIFY(0 == dsl_dataset_get_snapname(ds)); 477 if (ds->ds_snapname[0]) { 478 (void) strcat(name, "@"); 479 if (!MUTEX_HELD(&ds->ds_lock)) { 480 /* 481 * We use a "recursive" mutex so that we 482 * can call dprintf_ds() with ds_lock held. 483 */ 484 mutex_enter(&ds->ds_lock); 485 (void) strcat(name, ds->ds_snapname); 486 mutex_exit(&ds->ds_lock); 487 } else { 488 (void) strcat(name, ds->ds_snapname); 489 } 490 } 491 } 492 } 493 494 static int 495 dsl_dataset_namelen(dsl_dataset_t *ds) 496 { 497 int result; 498 499 if (ds == NULL) { 500 result = 3; /* "mos" */ 501 } else { 502 result = dsl_dir_namelen(ds->ds_dir); 503 VERIFY(0 == dsl_dataset_get_snapname(ds)); 504 if (ds->ds_snapname[0]) { 505 ++result; /* adding one for the @-sign */ 506 if (!MUTEX_HELD(&ds->ds_lock)) { 507 /* see dsl_datset_name */ 508 mutex_enter(&ds->ds_lock); 509 result += strlen(ds->ds_snapname); 510 mutex_exit(&ds->ds_lock); 511 } else { 512 result += strlen(ds->ds_snapname); 513 } 514 } 515 } 516 517 return (result); 518 } 519 520 void 521 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 522 { 523 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 524 mutex_enter(&ds->ds_lock); 525 ASSERT3U(ds->ds_open_refcount, >=, weight); 526 ds->ds_open_refcount -= weight; 527 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 528 mode, ds->ds_open_refcount); 529 mutex_exit(&ds->ds_lock); 530 531 dmu_buf_rele(ds->ds_dbuf, tag); 532 } 533 534 void 535 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 536 { 537 objset_t *mos = dp->dp_meta_objset; 538 dmu_buf_t *dbuf; 539 dsl_dataset_phys_t *dsphys; 540 dsl_dataset_t *ds; 541 uint64_t dsobj; 542 dsl_dir_t *dd; 543 544 dsl_dir_create_root(mos, ddobjp, tx); 545 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 546 547 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 548 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 549 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 550 dmu_buf_will_dirty(dbuf, tx); 551 dsphys = dbuf->db_data; 552 dsphys->ds_dir_obj = dd->dd_object; 553 dsphys->ds_fsid_guid = unique_create(); 554 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 555 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 556 sizeof (dsphys->ds_guid)); 557 dsphys->ds_snapnames_zapobj = 558 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 559 dsphys->ds_creation_time = gethrestime_sec(); 560 dsphys->ds_creation_txg = tx->tx_txg; 561 dsphys->ds_deadlist_obj = 562 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 563 dmu_buf_rele(dbuf, FTAG); 564 565 dmu_buf_will_dirty(dd->dd_dbuf, tx); 566 dd->dd_phys->dd_head_dataset_obj = dsobj; 567 dsl_dir_close(dd, FTAG); 568 569 VERIFY(0 == 570 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 571 (void) dmu_objset_create_impl(dp->dp_spa, ds, 572 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 573 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 574 } 575 576 uint64_t 577 dsl_dataset_create_sync(dsl_dir_t *pdd, 578 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 579 { 580 dsl_pool_t *dp = pdd->dd_pool; 581 dmu_buf_t *dbuf; 582 dsl_dataset_phys_t *dsphys; 583 uint64_t dsobj, ddobj; 584 objset_t *mos = dp->dp_meta_objset; 585 dsl_dir_t *dd; 586 587 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 588 ASSERT(clone_parent == NULL || 589 clone_parent->ds_phys->ds_num_children > 0); 590 ASSERT(lastname[0] != '@'); 591 ASSERT(dmu_tx_is_syncing(tx)); 592 593 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 594 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 595 596 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 597 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 598 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 599 dmu_buf_will_dirty(dbuf, tx); 600 dsphys = dbuf->db_data; 601 dsphys->ds_dir_obj = dd->dd_object; 602 dsphys->ds_fsid_guid = unique_create(); 603 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 604 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 605 sizeof (dsphys->ds_guid)); 606 dsphys->ds_snapnames_zapobj = 607 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 608 dsphys->ds_creation_time = gethrestime_sec(); 609 dsphys->ds_creation_txg = tx->tx_txg; 610 dsphys->ds_deadlist_obj = 611 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 612 if (clone_parent) { 613 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 614 dsphys->ds_prev_snap_txg = 615 clone_parent->ds_phys->ds_creation_txg; 616 dsphys->ds_used_bytes = 617 clone_parent->ds_phys->ds_used_bytes; 618 dsphys->ds_compressed_bytes = 619 clone_parent->ds_phys->ds_compressed_bytes; 620 dsphys->ds_uncompressed_bytes = 621 clone_parent->ds_phys->ds_uncompressed_bytes; 622 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 623 624 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 625 clone_parent->ds_phys->ds_num_children++; 626 627 dmu_buf_will_dirty(dd->dd_dbuf, tx); 628 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 629 } 630 dmu_buf_rele(dbuf, FTAG); 631 632 dmu_buf_will_dirty(dd->dd_dbuf, tx); 633 dd->dd_phys->dd_head_dataset_obj = dsobj; 634 dsl_dir_close(dd, FTAG); 635 636 return (dsobj); 637 } 638 639 struct destroyarg { 640 dsl_sync_task_group_t *dstg; 641 char *snapname; 642 void *tag; 643 char *failed; 644 }; 645 646 static int 647 dsl_snapshot_destroy_one(char *name, void *arg) 648 { 649 struct destroyarg *da = arg; 650 dsl_dataset_t *ds; 651 char *cp; 652 int err; 653 654 (void) strcat(name, "@"); 655 (void) strcat(name, da->snapname); 656 err = dsl_dataset_open(name, 657 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 658 da->tag, &ds); 659 cp = strchr(name, '@'); 660 *cp = '\0'; 661 if (err == ENOENT) 662 return (0); 663 if (err) { 664 (void) strcpy(da->failed, name); 665 return (err); 666 } 667 668 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 669 dsl_dataset_destroy_sync, ds, da->tag, 0); 670 return (0); 671 } 672 673 /* 674 * Destroy 'snapname' in all descendants of 'fsname'. 675 */ 676 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 677 int 678 dsl_snapshots_destroy(char *fsname, char *snapname) 679 { 680 int err; 681 struct destroyarg da; 682 dsl_sync_task_t *dst; 683 spa_t *spa; 684 char *cp; 685 686 cp = strchr(fsname, '/'); 687 if (cp) { 688 *cp = '\0'; 689 err = spa_open(fsname, &spa, FTAG); 690 *cp = '/'; 691 } else { 692 err = spa_open(fsname, &spa, FTAG); 693 } 694 if (err) 695 return (err); 696 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 697 da.snapname = snapname; 698 da.tag = FTAG; 699 da.failed = fsname; 700 701 err = dmu_objset_find(fsname, 702 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 703 704 if (err == 0) 705 err = dsl_sync_task_group_wait(da.dstg); 706 707 for (dst = list_head(&da.dstg->dstg_tasks); dst; 708 dst = list_next(&da.dstg->dstg_tasks, dst)) { 709 dsl_dataset_t *ds = dst->dst_arg1; 710 if (dst->dst_err) { 711 dsl_dataset_name(ds, fsname); 712 cp = strchr(fsname, '@'); 713 *cp = '\0'; 714 } 715 /* 716 * If it was successful, destroy_sync would have 717 * closed the ds 718 */ 719 if (err) 720 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 721 } 722 723 dsl_sync_task_group_destroy(da.dstg); 724 spa_close(spa, FTAG); 725 return (err); 726 } 727 728 int 729 dsl_dataset_destroy(const char *name) 730 { 731 int err; 732 dsl_sync_task_group_t *dstg; 733 objset_t *os; 734 dsl_dataset_t *ds; 735 dsl_dir_t *dd; 736 uint64_t obj; 737 738 if (strchr(name, '@')) { 739 /* Destroying a snapshot is simpler */ 740 err = dsl_dataset_open(name, 741 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 742 FTAG, &ds); 743 if (err) 744 return (err); 745 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 746 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 747 ds, FTAG, 0); 748 if (err) 749 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 750 return (err); 751 } 752 753 err = dmu_objset_open(name, DMU_OST_ANY, 754 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 755 if (err) 756 return (err); 757 ds = os->os->os_dsl_dataset; 758 dd = ds->ds_dir; 759 760 /* 761 * Check for errors and mark this ds as inconsistent, in 762 * case we crash while freeing the objects. 763 */ 764 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 765 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 766 if (err) { 767 dmu_objset_close(os); 768 return (err); 769 } 770 771 /* 772 * remove the objects in open context, so that we won't 773 * have too much to do in syncing context. 774 */ 775 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 776 ds->ds_phys->ds_prev_snap_txg)) { 777 dmu_tx_t *tx = dmu_tx_create(os); 778 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 779 dmu_tx_hold_bonus(tx, obj); 780 err = dmu_tx_assign(tx, TXG_WAIT); 781 if (err) { 782 /* 783 * Perhaps there is not enough disk 784 * space. Just deal with it from 785 * dsl_dataset_destroy_sync(). 786 */ 787 dmu_tx_abort(tx); 788 continue; 789 } 790 VERIFY(0 == dmu_object_free(os, obj, tx)); 791 dmu_tx_commit(tx); 792 } 793 /* Make sure it's not dirty before we finish destroying it. */ 794 txg_wait_synced(dd->dd_pool, 0); 795 796 dmu_objset_close(os); 797 if (err != ESRCH) 798 return (err); 799 800 err = dsl_dataset_open(name, 801 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 802 FTAG, &ds); 803 if (err) 804 return (err); 805 806 err = dsl_dir_open(name, FTAG, &dd, NULL); 807 if (err) { 808 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 809 return (err); 810 } 811 812 /* 813 * Blow away the dsl_dir + head dataset. 814 */ 815 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 816 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 817 dsl_dataset_destroy_sync, ds, FTAG, 0); 818 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 819 dsl_dir_destroy_sync, dd, FTAG, 0); 820 err = dsl_sync_task_group_wait(dstg); 821 dsl_sync_task_group_destroy(dstg); 822 /* if it is successful, *destroy_sync will close the ds+dd */ 823 if (err) { 824 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 825 dsl_dir_close(dd, FTAG); 826 } 827 return (err); 828 } 829 830 int 831 dsl_dataset_rollback(dsl_dataset_t *ds) 832 { 833 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 834 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 835 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 836 ds, NULL, 0)); 837 } 838 839 void * 840 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 841 void *p, dsl_dataset_evict_func_t func) 842 { 843 void *old; 844 845 mutex_enter(&ds->ds_lock); 846 old = ds->ds_user_ptr; 847 if (old == NULL) { 848 ds->ds_user_ptr = p; 849 ds->ds_user_evict_func = func; 850 } 851 mutex_exit(&ds->ds_lock); 852 return (old); 853 } 854 855 void * 856 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 857 { 858 return (ds->ds_user_ptr); 859 } 860 861 862 blkptr_t * 863 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 864 { 865 return (&ds->ds_phys->ds_bp); 866 } 867 868 void 869 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 870 { 871 ASSERT(dmu_tx_is_syncing(tx)); 872 /* If it's the meta-objset, set dp_meta_rootbp */ 873 if (ds == NULL) { 874 tx->tx_pool->dp_meta_rootbp = *bp; 875 } else { 876 dmu_buf_will_dirty(ds->ds_dbuf, tx); 877 ds->ds_phys->ds_bp = *bp; 878 } 879 } 880 881 spa_t * 882 dsl_dataset_get_spa(dsl_dataset_t *ds) 883 { 884 return (ds->ds_dir->dd_pool->dp_spa); 885 } 886 887 void 888 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 889 { 890 dsl_pool_t *dp; 891 892 if (ds == NULL) /* this is the meta-objset */ 893 return; 894 895 ASSERT(ds->ds_user_ptr != NULL); 896 897 if (ds->ds_phys->ds_next_snap_obj != 0) 898 panic("dirtying snapshot!"); 899 900 dp = ds->ds_dir->dd_pool; 901 902 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 903 /* up the hold count until we can be written out */ 904 dmu_buf_add_ref(ds->ds_dbuf, ds); 905 } 906 } 907 908 struct killarg { 909 uint64_t *usedp; 910 uint64_t *compressedp; 911 uint64_t *uncompressedp; 912 zio_t *zio; 913 dmu_tx_t *tx; 914 }; 915 916 static int 917 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 918 { 919 struct killarg *ka = arg; 920 blkptr_t *bp = &bc->bc_blkptr; 921 922 ASSERT3U(bc->bc_errno, ==, 0); 923 924 /* 925 * Since this callback is not called concurrently, no lock is 926 * needed on the accounting values. 927 */ 928 *ka->usedp += bp_get_dasize(spa, bp); 929 *ka->compressedp += BP_GET_PSIZE(bp); 930 *ka->uncompressedp += BP_GET_UCSIZE(bp); 931 /* XXX check for EIO? */ 932 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 933 ARC_NOWAIT); 934 return (0); 935 } 936 937 /* ARGSUSED */ 938 static int 939 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 940 { 941 dsl_dataset_t *ds = arg1; 942 943 /* 944 * There must be a previous snapshot. I suppose we could roll 945 * it back to being empty (and re-initialize the upper (ZPL) 946 * layer). But for now there's no way to do this via the user 947 * interface. 948 */ 949 if (ds->ds_phys->ds_prev_snap_txg == 0) 950 return (EINVAL); 951 952 /* 953 * This must not be a snapshot. 954 */ 955 if (ds->ds_phys->ds_next_snap_obj != 0) 956 return (EINVAL); 957 958 /* 959 * If we made changes this txg, traverse_dsl_dataset won't find 960 * them. Try again. 961 */ 962 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 963 return (EAGAIN); 964 965 return (0); 966 } 967 968 /* ARGSUSED */ 969 static void 970 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 971 { 972 dsl_dataset_t *ds = arg1; 973 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 974 975 dmu_buf_will_dirty(ds->ds_dbuf, tx); 976 977 /* Zero out the deadlist. */ 978 bplist_close(&ds->ds_deadlist); 979 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 980 ds->ds_phys->ds_deadlist_obj = 981 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 982 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 983 ds->ds_phys->ds_deadlist_obj)); 984 985 { 986 /* Free blkptrs that we gave birth to */ 987 zio_t *zio; 988 uint64_t used = 0, compressed = 0, uncompressed = 0; 989 struct killarg ka; 990 991 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 992 ZIO_FLAG_MUSTSUCCEED); 993 ka.usedp = &used; 994 ka.compressedp = &compressed; 995 ka.uncompressedp = &uncompressed; 996 ka.zio = zio; 997 ka.tx = tx; 998 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 999 ADVANCE_POST, kill_blkptr, &ka); 1000 (void) zio_wait(zio); 1001 1002 dsl_dir_diduse_space(ds->ds_dir, 1003 -used, -compressed, -uncompressed, tx); 1004 } 1005 1006 /* Change our contents to that of the prev snapshot */ 1007 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 1008 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1009 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 1010 ds->ds_phys->ds_compressed_bytes = 1011 ds->ds_prev->ds_phys->ds_compressed_bytes; 1012 ds->ds_phys->ds_uncompressed_bytes = 1013 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1014 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1015 ds->ds_phys->ds_unique_bytes = 0; 1016 1017 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1018 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1019 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1020 } 1021 } 1022 1023 /* ARGSUSED */ 1024 static int 1025 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1026 { 1027 dsl_dataset_t *ds = arg1; 1028 1029 /* 1030 * Can't delete a head dataset if there are snapshots of it. 1031 * (Except if the only snapshots are from the branch we cloned 1032 * from.) 1033 */ 1034 if (ds->ds_prev != NULL && 1035 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1036 return (EINVAL); 1037 1038 return (0); 1039 } 1040 1041 /* ARGSUSED */ 1042 static void 1043 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1044 { 1045 dsl_dataset_t *ds = arg1; 1046 1047 /* Mark it as inconsistent on-disk, in case we crash */ 1048 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1049 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1050 } 1051 1052 /* ARGSUSED */ 1053 static int 1054 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1055 { 1056 dsl_dataset_t *ds = arg1; 1057 1058 /* Can't delete a branch point. */ 1059 if (ds->ds_phys->ds_num_children > 1) 1060 return (EEXIST); 1061 1062 /* 1063 * Can't delete a head dataset if there are snapshots of it. 1064 * (Except if the only snapshots are from the branch we cloned 1065 * from.) 1066 */ 1067 if (ds->ds_prev != NULL && 1068 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1069 return (EINVAL); 1070 1071 /* 1072 * If we made changes this txg, traverse_dsl_dataset won't find 1073 * them. Try again. 1074 */ 1075 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1076 return (EAGAIN); 1077 1078 /* XXX we should do some i/o error checking... */ 1079 return (0); 1080 } 1081 1082 static void 1083 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1084 { 1085 dsl_dataset_t *ds = arg1; 1086 uint64_t used = 0, compressed = 0, uncompressed = 0; 1087 zio_t *zio; 1088 int err; 1089 int after_branch_point = FALSE; 1090 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1091 objset_t *mos = dp->dp_meta_objset; 1092 dsl_dataset_t *ds_prev = NULL; 1093 uint64_t obj; 1094 1095 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1096 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1097 ASSERT(ds->ds_prev == NULL || 1098 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1099 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1100 1101 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1102 1103 obj = ds->ds_object; 1104 1105 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1106 if (ds->ds_prev) { 1107 ds_prev = ds->ds_prev; 1108 } else { 1109 VERIFY(0 == dsl_dataset_open_obj(dp, 1110 ds->ds_phys->ds_prev_snap_obj, NULL, 1111 DS_MODE_NONE, FTAG, &ds_prev)); 1112 } 1113 after_branch_point = 1114 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1115 1116 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1117 if (after_branch_point && 1118 ds->ds_phys->ds_next_snap_obj == 0) { 1119 /* This clone is toast. */ 1120 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1121 ds_prev->ds_phys->ds_num_children--; 1122 } else if (!after_branch_point) { 1123 ds_prev->ds_phys->ds_next_snap_obj = 1124 ds->ds_phys->ds_next_snap_obj; 1125 } 1126 } 1127 1128 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1129 1130 if (ds->ds_phys->ds_next_snap_obj != 0) { 1131 blkptr_t bp; 1132 dsl_dataset_t *ds_next; 1133 uint64_t itor = 0; 1134 1135 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1136 1137 VERIFY(0 == dsl_dataset_open_obj(dp, 1138 ds->ds_phys->ds_next_snap_obj, NULL, 1139 DS_MODE_NONE, FTAG, &ds_next)); 1140 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1141 1142 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1143 ds_next->ds_phys->ds_prev_snap_obj = 1144 ds->ds_phys->ds_prev_snap_obj; 1145 ds_next->ds_phys->ds_prev_snap_txg = 1146 ds->ds_phys->ds_prev_snap_txg; 1147 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1148 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1149 1150 /* 1151 * Transfer to our deadlist (which will become next's 1152 * new deadlist) any entries from next's current 1153 * deadlist which were born before prev, and free the 1154 * other entries. 1155 * 1156 * XXX we're doing this long task with the config lock held 1157 */ 1158 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1159 &bp) == 0) { 1160 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1161 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1162 &bp, tx)); 1163 if (ds_prev && !after_branch_point && 1164 bp.blk_birth > 1165 ds_prev->ds_phys->ds_prev_snap_txg) { 1166 ds_prev->ds_phys->ds_unique_bytes += 1167 bp_get_dasize(dp->dp_spa, &bp); 1168 } 1169 } else { 1170 used += bp_get_dasize(dp->dp_spa, &bp); 1171 compressed += BP_GET_PSIZE(&bp); 1172 uncompressed += BP_GET_UCSIZE(&bp); 1173 /* XXX check return value? */ 1174 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1175 &bp, NULL, NULL, ARC_NOWAIT); 1176 } 1177 } 1178 1179 /* free next's deadlist */ 1180 bplist_close(&ds_next->ds_deadlist); 1181 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1182 1183 /* set next's deadlist to our deadlist */ 1184 ds_next->ds_phys->ds_deadlist_obj = 1185 ds->ds_phys->ds_deadlist_obj; 1186 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1187 ds_next->ds_phys->ds_deadlist_obj)); 1188 ds->ds_phys->ds_deadlist_obj = 0; 1189 1190 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1191 /* 1192 * Update next's unique to include blocks which 1193 * were previously shared by only this snapshot 1194 * and it. Those blocks will be born after the 1195 * prev snap and before this snap, and will have 1196 * died after the next snap and before the one 1197 * after that (ie. be on the snap after next's 1198 * deadlist). 1199 * 1200 * XXX we're doing this long task with the 1201 * config lock held 1202 */ 1203 dsl_dataset_t *ds_after_next; 1204 1205 VERIFY(0 == dsl_dataset_open_obj(dp, 1206 ds_next->ds_phys->ds_next_snap_obj, NULL, 1207 DS_MODE_NONE, FTAG, &ds_after_next)); 1208 itor = 0; 1209 while (bplist_iterate(&ds_after_next->ds_deadlist, 1210 &itor, &bp) == 0) { 1211 if (bp.blk_birth > 1212 ds->ds_phys->ds_prev_snap_txg && 1213 bp.blk_birth <= 1214 ds->ds_phys->ds_creation_txg) { 1215 ds_next->ds_phys->ds_unique_bytes += 1216 bp_get_dasize(dp->dp_spa, &bp); 1217 } 1218 } 1219 1220 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1221 ASSERT3P(ds_next->ds_prev, ==, NULL); 1222 } else { 1223 /* 1224 * It would be nice to update the head dataset's 1225 * unique. To do so we would have to traverse 1226 * it for blocks born after ds_prev, which is 1227 * pretty expensive just to maintain something 1228 * for debugging purposes. 1229 */ 1230 ASSERT3P(ds_next->ds_prev, ==, ds); 1231 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1232 ds_next); 1233 if (ds_prev) { 1234 VERIFY(0 == dsl_dataset_open_obj(dp, 1235 ds->ds_phys->ds_prev_snap_obj, NULL, 1236 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1237 } else { 1238 ds_next->ds_prev = NULL; 1239 } 1240 } 1241 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1242 1243 /* 1244 * NB: unique_bytes is not accurate for head objsets 1245 * because we don't update it when we delete the most 1246 * recent snapshot -- see above comment. 1247 */ 1248 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1249 } else { 1250 /* 1251 * There's no next snapshot, so this is a head dataset. 1252 * Destroy the deadlist. Unless it's a clone, the 1253 * deadlist should be empty. (If it's a clone, it's 1254 * safe to ignore the deadlist contents.) 1255 */ 1256 struct killarg ka; 1257 1258 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1259 bplist_close(&ds->ds_deadlist); 1260 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1261 ds->ds_phys->ds_deadlist_obj = 0; 1262 1263 /* 1264 * Free everything that we point to (that's born after 1265 * the previous snapshot, if we are a clone) 1266 * 1267 * XXX we're doing this long task with the config lock held 1268 */ 1269 ka.usedp = &used; 1270 ka.compressedp = &compressed; 1271 ka.uncompressedp = &uncompressed; 1272 ka.zio = zio; 1273 ka.tx = tx; 1274 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1275 ADVANCE_POST, kill_blkptr, &ka); 1276 ASSERT3U(err, ==, 0); 1277 } 1278 1279 err = zio_wait(zio); 1280 ASSERT3U(err, ==, 0); 1281 1282 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1283 1284 if (ds->ds_phys->ds_snapnames_zapobj) { 1285 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1286 ASSERT(err == 0); 1287 } 1288 1289 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1290 /* Erase the link in the dataset */ 1291 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1292 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1293 /* 1294 * dsl_dir_sync_destroy() called us, they'll destroy 1295 * the dataset. 1296 */ 1297 } else { 1298 /* remove from snapshot namespace */ 1299 dsl_dataset_t *ds_head; 1300 VERIFY(0 == dsl_dataset_open_obj(dp, 1301 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1302 DS_MODE_NONE, FTAG, &ds_head)); 1303 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1304 #ifdef ZFS_DEBUG 1305 { 1306 uint64_t val; 1307 err = zap_lookup(mos, 1308 ds_head->ds_phys->ds_snapnames_zapobj, 1309 ds->ds_snapname, 8, 1, &val); 1310 ASSERT3U(err, ==, 0); 1311 ASSERT3U(val, ==, obj); 1312 } 1313 #endif 1314 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1315 ds->ds_snapname, tx); 1316 ASSERT(err == 0); 1317 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1318 } 1319 1320 if (ds_prev && ds->ds_prev != ds_prev) 1321 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1322 1323 spa_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1324 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1325 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1326 1327 } 1328 1329 /* ARGSUSED */ 1330 int 1331 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1332 { 1333 objset_t *os = arg1; 1334 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1335 const char *snapname = arg2; 1336 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1337 int err; 1338 uint64_t value; 1339 1340 /* 1341 * We don't allow multiple snapshots of the same txg. If there 1342 * is already one, try again. 1343 */ 1344 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1345 return (EAGAIN); 1346 1347 /* 1348 * Check for conflicting name snapshot name. 1349 */ 1350 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1351 snapname, 8, 1, &value); 1352 if (err == 0) 1353 return (EEXIST); 1354 if (err != ENOENT) 1355 return (err); 1356 1357 /* 1358 * Check that the dataset's name is not too long. Name consists 1359 * of the dataset's length + 1 for the @-sign + snapshot name's length 1360 */ 1361 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1362 return (ENAMETOOLONG); 1363 1364 ds->ds_trysnap_txg = tx->tx_txg; 1365 return (0); 1366 } 1367 1368 void 1369 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1370 { 1371 objset_t *os = arg1; 1372 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1373 const char *snapname = arg2; 1374 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1375 dmu_buf_t *dbuf; 1376 dsl_dataset_phys_t *dsphys; 1377 uint64_t dsobj; 1378 objset_t *mos = dp->dp_meta_objset; 1379 int err; 1380 1381 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1382 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1383 1384 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1385 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1386 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1387 dmu_buf_will_dirty(dbuf, tx); 1388 dsphys = dbuf->db_data; 1389 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1390 dsphys->ds_fsid_guid = unique_create(); 1391 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1392 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1393 sizeof (dsphys->ds_guid)); 1394 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1395 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1396 dsphys->ds_next_snap_obj = ds->ds_object; 1397 dsphys->ds_num_children = 1; 1398 dsphys->ds_creation_time = gethrestime_sec(); 1399 dsphys->ds_creation_txg = tx->tx_txg; 1400 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1401 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1402 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1403 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1404 dsphys->ds_flags = ds->ds_phys->ds_flags; 1405 dsphys->ds_bp = ds->ds_phys->ds_bp; 1406 dmu_buf_rele(dbuf, FTAG); 1407 1408 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1409 if (ds->ds_prev) { 1410 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1411 ds->ds_object || 1412 ds->ds_prev->ds_phys->ds_num_children > 1); 1413 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1414 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1415 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1416 ds->ds_prev->ds_phys->ds_creation_txg); 1417 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1418 } 1419 } 1420 1421 bplist_close(&ds->ds_deadlist); 1422 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1423 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1424 ds->ds_phys->ds_prev_snap_obj = dsobj; 1425 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1426 ds->ds_phys->ds_unique_bytes = 0; 1427 ds->ds_phys->ds_deadlist_obj = 1428 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1429 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1430 ds->ds_phys->ds_deadlist_obj)); 1431 1432 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1433 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1434 snapname, 8, 1, &dsobj, tx); 1435 ASSERT(err == 0); 1436 1437 if (ds->ds_prev) 1438 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1439 VERIFY(0 == dsl_dataset_open_obj(dp, 1440 ds->ds_phys->ds_prev_snap_obj, snapname, 1441 DS_MODE_NONE, ds, &ds->ds_prev)); 1442 } 1443 1444 void 1445 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1446 { 1447 ASSERT(dmu_tx_is_syncing(tx)); 1448 ASSERT(ds->ds_user_ptr != NULL); 1449 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1450 1451 dsl_dir_dirty(ds->ds_dir, tx); 1452 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1453 /* Unneeded? bplist_close(&ds->ds_deadlist); */ 1454 } 1455 1456 void 1457 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1458 { 1459 dsl_dir_stats(ds->ds_dir, nv); 1460 1461 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1462 ds->ds_phys->ds_creation_time); 1463 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1464 ds->ds_phys->ds_creation_txg); 1465 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1466 ds->ds_phys->ds_used_bytes); 1467 1468 if (ds->ds_phys->ds_next_snap_obj) { 1469 /* 1470 * This is a snapshot; override the dd's space used with 1471 * our unique space and compression ratio. 1472 */ 1473 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1474 ds->ds_phys->ds_unique_bytes); 1475 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1476 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1477 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1478 ds->ds_phys->ds_compressed_bytes)); 1479 } 1480 } 1481 1482 void 1483 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1484 { 1485 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1486 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1487 if (ds->ds_phys->ds_next_snap_obj) { 1488 stat->dds_is_snapshot = B_TRUE; 1489 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1490 } 1491 1492 /* clone origin is really a dsl_dir thing... */ 1493 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1494 dsl_dataset_t *ods; 1495 1496 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1497 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1498 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1499 NULL, DS_MODE_NONE, FTAG, &ods)); 1500 dsl_dataset_name(ods, stat->dds_clone_of); 1501 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1502 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1503 } 1504 } 1505 1506 uint64_t 1507 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1508 { 1509 return (ds->ds_phys->ds_fsid_guid); 1510 } 1511 1512 void 1513 dsl_dataset_space(dsl_dataset_t *ds, 1514 uint64_t *refdbytesp, uint64_t *availbytesp, 1515 uint64_t *usedobjsp, uint64_t *availobjsp) 1516 { 1517 *refdbytesp = ds->ds_phys->ds_used_bytes; 1518 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1519 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1520 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1521 } 1522 1523 /* ARGSUSED */ 1524 static int 1525 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1526 { 1527 dsl_dataset_t *ds = arg1; 1528 char *newsnapname = arg2; 1529 dsl_dir_t *dd = ds->ds_dir; 1530 objset_t *mos = dd->dd_pool->dp_meta_objset; 1531 dsl_dataset_t *hds; 1532 uint64_t val; 1533 int err; 1534 1535 err = dsl_dataset_open_obj(dd->dd_pool, 1536 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1537 if (err) 1538 return (err); 1539 1540 /* new name better not be in use */ 1541 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1542 newsnapname, 8, 1, &val); 1543 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1544 1545 if (err == 0) 1546 err = EEXIST; 1547 else if (err == ENOENT) 1548 err = 0; 1549 return (err); 1550 } 1551 1552 static void 1553 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1554 { 1555 dsl_dataset_t *ds = arg1; 1556 char *newsnapname = arg2; 1557 dsl_dir_t *dd = ds->ds_dir; 1558 objset_t *mos = dd->dd_pool->dp_meta_objset; 1559 dsl_dataset_t *hds; 1560 int err; 1561 1562 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1563 1564 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1565 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1566 1567 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1568 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1569 ds->ds_snapname, tx); 1570 ASSERT3U(err, ==, 0); 1571 mutex_enter(&ds->ds_lock); 1572 (void) strcpy(ds->ds_snapname, newsnapname); 1573 mutex_exit(&ds->ds_lock); 1574 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1575 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1576 ASSERT3U(err, ==, 0); 1577 1578 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1579 } 1580 1581 #pragma weak dmu_objset_rename = dsl_dataset_rename 1582 int 1583 dsl_dataset_rename(const char *oldname, const char *newname) 1584 { 1585 dsl_dir_t *dd; 1586 dsl_dataset_t *ds; 1587 const char *tail; 1588 int err; 1589 1590 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1591 if (err) 1592 return (err); 1593 if (tail == NULL) { 1594 err = dsl_dir_rename(dd, newname); 1595 dsl_dir_close(dd, FTAG); 1596 return (err); 1597 } 1598 if (tail[0] != '@') { 1599 /* the name ended in a nonexistant component */ 1600 dsl_dir_close(dd, FTAG); 1601 return (ENOENT); 1602 } 1603 1604 dsl_dir_close(dd, FTAG); 1605 1606 /* new name must be snapshot in same filesystem */ 1607 tail = strchr(newname, '@'); 1608 if (tail == NULL) 1609 return (EINVAL); 1610 tail++; 1611 if (strncmp(oldname, newname, tail - newname) != 0) 1612 return (EXDEV); 1613 1614 err = dsl_dataset_open(oldname, 1615 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1616 if (err) 1617 return (err); 1618 1619 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1620 dsl_dataset_snapshot_rename_check, 1621 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1622 1623 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1624 1625 return (err); 1626 } 1627 1628 struct promotearg { 1629 uint64_t used, comp, uncomp, unique; 1630 uint64_t newnext_obj, snapnames_obj; 1631 }; 1632 1633 static int 1634 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1635 { 1636 dsl_dataset_t *hds = arg1; 1637 struct promotearg *pa = arg2; 1638 dsl_dir_t *dd = hds->ds_dir; 1639 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1640 dsl_dir_t *pdd = NULL; 1641 dsl_dataset_t *ds = NULL; 1642 dsl_dataset_t *pivot_ds = NULL; 1643 dsl_dataset_t *newnext_ds = NULL; 1644 int err; 1645 char *name = NULL; 1646 uint64_t itor = 0; 1647 blkptr_t bp; 1648 1649 bzero(pa, sizeof (*pa)); 1650 1651 /* Check that it is a clone */ 1652 if (dd->dd_phys->dd_clone_parent_obj == 0) 1653 return (EINVAL); 1654 1655 /* Since this is so expensive, don't do the preliminary check */ 1656 if (!dmu_tx_is_syncing(tx)) 1657 return (0); 1658 1659 if (err = dsl_dataset_open_obj(dp, 1660 dd->dd_phys->dd_clone_parent_obj, 1661 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1662 goto out; 1663 pdd = pivot_ds->ds_dir; 1664 1665 { 1666 dsl_dataset_t *phds; 1667 if (err = dsl_dataset_open_obj(dd->dd_pool, 1668 pdd->dd_phys->dd_head_dataset_obj, 1669 NULL, DS_MODE_NONE, FTAG, &phds)) 1670 goto out; 1671 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1672 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1673 } 1674 1675 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1676 err = EXDEV; 1677 goto out; 1678 } 1679 1680 /* find pivot point's new next ds */ 1681 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1682 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1683 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1684 dsl_dataset_t *prev; 1685 1686 if (err = dsl_dataset_open_obj(dd->dd_pool, 1687 newnext_ds->ds_phys->ds_prev_snap_obj, 1688 NULL, DS_MODE_NONE, FTAG, &prev)) 1689 goto out; 1690 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1691 newnext_ds = prev; 1692 } 1693 pa->newnext_obj = newnext_ds->ds_object; 1694 1695 /* compute pivot point's new unique space */ 1696 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1697 &itor, &bp)) == 0) { 1698 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1699 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1700 } 1701 if (err != ENOENT) 1702 goto out; 1703 1704 /* Walk the snapshots that we are moving */ 1705 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1706 ds = pivot_ds; 1707 /* CONSTCOND */ 1708 while (TRUE) { 1709 uint64_t val, dlused, dlcomp, dluncomp; 1710 dsl_dataset_t *prev; 1711 1712 /* Check that the snapshot name does not conflict */ 1713 dsl_dataset_name(ds, name); 1714 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1715 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1716 8, 1, &val); 1717 if (err != ENOENT) { 1718 if (err == 0) 1719 err = EEXIST; 1720 goto out; 1721 } 1722 1723 /* 1724 * compute space to transfer. Each snapshot gave birth to: 1725 * (my used) - (prev's used) + (deadlist's used) 1726 */ 1727 pa->used += ds->ds_phys->ds_used_bytes; 1728 pa->comp += ds->ds_phys->ds_compressed_bytes; 1729 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1730 1731 /* If we reach the first snapshot, we're done. */ 1732 if (ds->ds_phys->ds_prev_snap_obj == 0) 1733 break; 1734 1735 if (err = bplist_space(&ds->ds_deadlist, 1736 &dlused, &dlcomp, &dluncomp)) 1737 goto out; 1738 if (err = dsl_dataset_open_obj(dd->dd_pool, 1739 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1740 FTAG, &prev)) 1741 goto out; 1742 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1743 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1744 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1745 1746 /* 1747 * We could be a clone of a clone. If we reach our 1748 * parent's branch point, we're done. 1749 */ 1750 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1751 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1752 break; 1753 } 1754 if (ds != pivot_ds) 1755 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1756 ds = prev; 1757 } 1758 1759 /* Check that there is enough space here */ 1760 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1761 1762 out: 1763 if (ds && ds != pivot_ds) 1764 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1765 if (pivot_ds) 1766 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1767 if (newnext_ds) 1768 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1769 if (name) 1770 kmem_free(name, MAXPATHLEN); 1771 return (err); 1772 } 1773 1774 static void 1775 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1776 { 1777 dsl_dataset_t *hds = arg1; 1778 struct promotearg *pa = arg2; 1779 dsl_dir_t *dd = hds->ds_dir; 1780 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1781 dsl_dir_t *pdd = NULL; 1782 dsl_dataset_t *ds, *pivot_ds; 1783 char *name; 1784 1785 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1786 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1787 1788 VERIFY(0 == dsl_dataset_open_obj(dp, 1789 dd->dd_phys->dd_clone_parent_obj, 1790 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1791 /* 1792 * We need to explicitly open pdd, since pivot_ds's pdd will be 1793 * changing. 1794 */ 1795 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1796 NULL, FTAG, &pdd)); 1797 1798 /* move snapshots to this dir */ 1799 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1800 ds = pivot_ds; 1801 /* CONSTCOND */ 1802 while (TRUE) { 1803 dsl_dataset_t *prev; 1804 1805 /* move snap name entry */ 1806 dsl_dataset_name(ds, name); 1807 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1808 pa->snapnames_obj, ds->ds_snapname, tx)); 1809 VERIFY(0 == zap_add(dp->dp_meta_objset, 1810 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1811 8, 1, &ds->ds_object, tx)); 1812 1813 /* change containing dsl_dir */ 1814 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1815 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1816 ds->ds_phys->ds_dir_obj = dd->dd_object; 1817 ASSERT3P(ds->ds_dir, ==, pdd); 1818 dsl_dir_close(ds->ds_dir, ds); 1819 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1820 NULL, ds, &ds->ds_dir)); 1821 1822 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1823 1824 if (ds->ds_phys->ds_prev_snap_obj == 0) 1825 break; 1826 1827 VERIFY(0 == dsl_dataset_open_obj(dp, 1828 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1829 FTAG, &prev)); 1830 1831 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1832 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1833 break; 1834 } 1835 if (ds != pivot_ds) 1836 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1837 ds = prev; 1838 } 1839 if (ds != pivot_ds) 1840 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1841 1842 /* change pivot point's next snap */ 1843 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1844 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1845 1846 /* change clone_parent-age */ 1847 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1848 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1849 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1850 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1851 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1852 1853 /* change space accounting */ 1854 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1855 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1856 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1857 1858 dsl_dir_close(pdd, FTAG); 1859 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1860 kmem_free(name, MAXPATHLEN); 1861 } 1862 1863 int 1864 dsl_dataset_promote(const char *name) 1865 { 1866 dsl_dataset_t *ds; 1867 int err; 1868 dmu_object_info_t doi; 1869 struct promotearg pa; 1870 1871 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1872 if (err) 1873 return (err); 1874 1875 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1876 ds->ds_phys->ds_snapnames_zapobj, &doi); 1877 if (err) { 1878 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1879 return (err); 1880 } 1881 1882 /* 1883 * Add in 128x the snapnames zapobj size, since we will be moving 1884 * a bunch of snapnames to the promoted ds, and dirtying their 1885 * bonus buffers. 1886 */ 1887 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1888 dsl_dataset_promote_check, 1889 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 1890 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1891 return (err); 1892 } 1893 1894 /* 1895 * Given a pool name and a dataset object number in that pool, 1896 * return the name of that dataset. 1897 */ 1898 int 1899 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 1900 { 1901 spa_t *spa; 1902 dsl_pool_t *dp; 1903 dsl_dataset_t *ds = NULL; 1904 int error; 1905 1906 if ((error = spa_open(pname, &spa, FTAG)) != 0) 1907 return (error); 1908 dp = spa_get_dsl(spa); 1909 rw_enter(&dp->dp_config_rwlock, RW_READER); 1910 if ((error = dsl_dataset_open_obj(dp, obj, 1911 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 1912 rw_exit(&dp->dp_config_rwlock); 1913 spa_close(spa, FTAG); 1914 return (error); 1915 } 1916 dsl_dataset_name(ds, buf); 1917 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1918 rw_exit(&dp->dp_config_rwlock); 1919 spa_close(spa, FTAG); 1920 1921 return (0); 1922 } 1923