1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 42 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 43 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 44 static dsl_checkfunc_t dsl_dataset_rollback_check; 45 static dsl_syncfunc_t dsl_dataset_rollback_sync; 46 static dsl_checkfunc_t dsl_dataset_destroy_check; 47 static dsl_syncfunc_t dsl_dataset_destroy_sync; 48 49 #define DS_REF_MAX (1ULL << 62) 50 51 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 52 53 /* 54 * We use weighted reference counts to express the various forms of exclusion 55 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 56 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 57 * This makes the exclusion logic simple: the total refcnt for all opens cannot 58 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 59 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 60 * just over half of the refcnt space, so there can't be more than one, but it 61 * can peacefully coexist with any number of STANDARD opens. 62 */ 63 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 64 0, /* DS_MODE_NONE - invalid */ 65 1, /* DS_MODE_STANDARD - unlimited number */ 66 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 67 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 68 }; 69 70 71 void 72 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 73 { 74 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 75 int compressed = BP_GET_PSIZE(bp); 76 int uncompressed = BP_GET_UCSIZE(bp); 77 78 dprintf_bp(bp, "born, ds=%p\n", ds); 79 80 ASSERT(dmu_tx_is_syncing(tx)); 81 /* It could have been compressed away to nothing */ 82 if (BP_IS_HOLE(bp)) 83 return; 84 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 85 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 86 if (ds == NULL) { 87 /* 88 * Account for the meta-objset space in its placeholder 89 * dsl_dir. 90 */ 91 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 92 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 93 used, compressed, uncompressed, tx); 94 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 95 return; 96 } 97 dmu_buf_will_dirty(ds->ds_dbuf, tx); 98 mutex_enter(&ds->ds_lock); 99 ds->ds_phys->ds_used_bytes += used; 100 ds->ds_phys->ds_compressed_bytes += compressed; 101 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 102 ds->ds_phys->ds_unique_bytes += used; 103 mutex_exit(&ds->ds_lock); 104 dsl_dir_diduse_space(ds->ds_dir, 105 used, compressed, uncompressed, tx); 106 } 107 108 void 109 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 110 dmu_tx_t *tx) 111 { 112 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 113 int compressed = BP_GET_PSIZE(bp); 114 int uncompressed = BP_GET_UCSIZE(bp); 115 116 ASSERT(dmu_tx_is_syncing(tx)); 117 /* No block pointer => nothing to free */ 118 if (BP_IS_HOLE(bp)) 119 return; 120 121 ASSERT(used > 0); 122 if (ds == NULL) { 123 int err; 124 /* 125 * Account for the meta-objset space in its placeholder 126 * dataset. 127 */ 128 err = arc_free(pio, tx->tx_pool->dp_spa, 129 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 130 ASSERT(err == 0); 131 132 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 133 -used, -compressed, -uncompressed, tx); 134 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 135 return; 136 } 137 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 138 139 dmu_buf_will_dirty(ds->ds_dbuf, tx); 140 141 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 142 int err; 143 144 dprintf_bp(bp, "freeing: %s", ""); 145 err = arc_free(pio, tx->tx_pool->dp_spa, 146 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 147 ASSERT(err == 0); 148 149 mutex_enter(&ds->ds_lock); 150 /* XXX unique_bytes is not accurate for head datasets */ 151 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 152 ds->ds_phys->ds_unique_bytes -= used; 153 mutex_exit(&ds->ds_lock); 154 dsl_dir_diduse_space(ds->ds_dir, 155 -used, -compressed, -uncompressed, tx); 156 } else { 157 dprintf_bp(bp, "putting on dead list: %s", ""); 158 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 159 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 160 if (ds->ds_phys->ds_prev_snap_obj != 0) { 161 ASSERT3U(ds->ds_prev->ds_object, ==, 162 ds->ds_phys->ds_prev_snap_obj); 163 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 164 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 165 ds->ds_object && bp->blk_birth > 166 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 167 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 168 mutex_enter(&ds->ds_prev->ds_lock); 169 ds->ds_prev->ds_phys->ds_unique_bytes += 170 used; 171 mutex_exit(&ds->ds_prev->ds_lock); 172 } 173 } 174 } 175 mutex_enter(&ds->ds_lock); 176 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 177 ds->ds_phys->ds_used_bytes -= used; 178 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 179 ds->ds_phys->ds_compressed_bytes -= compressed; 180 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 181 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 182 mutex_exit(&ds->ds_lock); 183 } 184 185 uint64_t 186 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 187 { 188 uint64_t trysnap = 0; 189 190 if (ds == NULL) 191 return (0); 192 /* 193 * The snapshot creation could fail, but that would cause an 194 * incorrect FALSE return, which would only result in an 195 * overestimation of the amount of space that an operation would 196 * consume, which is OK. 197 * 198 * There's also a small window where we could miss a pending 199 * snapshot, because we could set the sync task in the quiescing 200 * phase. So this should only be used as a guess. 201 */ 202 if (ds->ds_trysnap_txg > 203 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 204 trysnap = ds->ds_trysnap_txg; 205 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 206 } 207 208 int 209 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 210 { 211 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 212 } 213 214 /* ARGSUSED */ 215 static void 216 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 217 { 218 dsl_dataset_t *ds = dsv; 219 dsl_pool_t *dp = ds->ds_dir->dd_pool; 220 221 /* open_refcount == DS_REF_MAX when deleting */ 222 ASSERT(ds->ds_open_refcount == 0 || 223 ds->ds_open_refcount == DS_REF_MAX); 224 225 dprintf_ds(ds, "evicting %s\n", ""); 226 227 unique_remove(ds->ds_phys->ds_fsid_guid); 228 229 if (ds->ds_user_ptr != NULL) 230 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 231 232 if (ds->ds_prev) { 233 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 234 ds->ds_prev = NULL; 235 } 236 237 bplist_close(&ds->ds_deadlist); 238 dsl_dir_close(ds->ds_dir, ds); 239 240 if (list_link_active(&ds->ds_synced_link)) 241 list_remove(&dp->dp_synced_objsets, ds); 242 243 mutex_destroy(&ds->ds_lock); 244 mutex_destroy(&ds->ds_deadlist.bpl_lock); 245 246 kmem_free(ds, sizeof (dsl_dataset_t)); 247 } 248 249 static int 250 dsl_dataset_get_snapname(dsl_dataset_t *ds) 251 { 252 dsl_dataset_phys_t *headphys; 253 int err; 254 dmu_buf_t *headdbuf; 255 dsl_pool_t *dp = ds->ds_dir->dd_pool; 256 objset_t *mos = dp->dp_meta_objset; 257 258 if (ds->ds_snapname[0]) 259 return (0); 260 if (ds->ds_phys->ds_next_snap_obj == 0) 261 return (0); 262 263 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 264 FTAG, &headdbuf); 265 if (err) 266 return (err); 267 headphys = headdbuf->db_data; 268 err = zap_value_search(dp->dp_meta_objset, 269 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 270 dmu_buf_rele(headdbuf, FTAG); 271 return (err); 272 } 273 274 int 275 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 276 int mode, void *tag, dsl_dataset_t **dsp) 277 { 278 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 279 objset_t *mos = dp->dp_meta_objset; 280 dmu_buf_t *dbuf; 281 dsl_dataset_t *ds; 282 int err; 283 284 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 285 dsl_pool_sync_context(dp)); 286 287 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 288 if (err) 289 return (err); 290 ds = dmu_buf_get_user(dbuf); 291 if (ds == NULL) { 292 dsl_dataset_t *winner; 293 294 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 295 ds->ds_dbuf = dbuf; 296 ds->ds_object = dsobj; 297 ds->ds_phys = dbuf->db_data; 298 299 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 300 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 301 NULL); 302 303 err = bplist_open(&ds->ds_deadlist, 304 mos, ds->ds_phys->ds_deadlist_obj); 305 if (err == 0) { 306 err = dsl_dir_open_obj(dp, 307 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 308 } 309 if (err) { 310 /* 311 * we don't really need to close the blist if we 312 * just opened it. 313 */ 314 mutex_destroy(&ds->ds_lock); 315 mutex_destroy(&ds->ds_deadlist.bpl_lock); 316 kmem_free(ds, sizeof (dsl_dataset_t)); 317 dmu_buf_rele(dbuf, tag); 318 return (err); 319 } 320 321 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 322 ds->ds_snapname[0] = '\0'; 323 if (ds->ds_phys->ds_prev_snap_obj) { 324 err = dsl_dataset_open_obj(dp, 325 ds->ds_phys->ds_prev_snap_obj, NULL, 326 DS_MODE_NONE, ds, &ds->ds_prev); 327 } 328 } else { 329 if (snapname) { 330 #ifdef ZFS_DEBUG 331 dsl_dataset_phys_t *headphys; 332 dmu_buf_t *headdbuf; 333 err = dmu_bonus_hold(mos, 334 ds->ds_dir->dd_phys->dd_head_dataset_obj, 335 FTAG, &headdbuf); 336 if (err == 0) { 337 headphys = headdbuf->db_data; 338 uint64_t foundobj; 339 err = zap_lookup(dp->dp_meta_objset, 340 headphys->ds_snapnames_zapobj, 341 snapname, sizeof (foundobj), 1, 342 &foundobj); 343 ASSERT3U(foundobj, ==, dsobj); 344 dmu_buf_rele(headdbuf, FTAG); 345 } 346 #endif 347 (void) strcat(ds->ds_snapname, snapname); 348 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 349 err = dsl_dataset_get_snapname(ds); 350 } 351 } 352 353 if (err == 0) { 354 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 355 dsl_dataset_evict); 356 } 357 if (err || winner) { 358 bplist_close(&ds->ds_deadlist); 359 if (ds->ds_prev) { 360 dsl_dataset_close(ds->ds_prev, 361 DS_MODE_NONE, ds); 362 } 363 dsl_dir_close(ds->ds_dir, ds); 364 mutex_destroy(&ds->ds_lock); 365 mutex_destroy(&ds->ds_deadlist.bpl_lock); 366 kmem_free(ds, sizeof (dsl_dataset_t)); 367 if (err) { 368 dmu_buf_rele(dbuf, tag); 369 return (err); 370 } 371 ds = winner; 372 } else { 373 uint64_t new = 374 unique_insert(ds->ds_phys->ds_fsid_guid); 375 if (new != ds->ds_phys->ds_fsid_guid) { 376 /* XXX it won't necessarily be synced... */ 377 ds->ds_phys->ds_fsid_guid = new; 378 } 379 } 380 } 381 ASSERT3P(ds->ds_dbuf, ==, dbuf); 382 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 383 384 mutex_enter(&ds->ds_lock); 385 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 386 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 387 !DS_MODE_IS_INCONSISTENT(mode)) || 388 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 389 mutex_exit(&ds->ds_lock); 390 dsl_dataset_close(ds, DS_MODE_NONE, tag); 391 return (EBUSY); 392 } 393 ds->ds_open_refcount += weight; 394 mutex_exit(&ds->ds_lock); 395 396 *dsp = ds; 397 return (0); 398 } 399 400 int 401 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 402 void *tag, dsl_dataset_t **dsp) 403 { 404 dsl_dir_t *dd; 405 dsl_pool_t *dp; 406 const char *tail; 407 uint64_t obj; 408 dsl_dataset_t *ds = NULL; 409 int err = 0; 410 411 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 412 if (err) 413 return (err); 414 415 dp = dd->dd_pool; 416 obj = dd->dd_phys->dd_head_dataset_obj; 417 rw_enter(&dp->dp_config_rwlock, RW_READER); 418 if (obj == 0) { 419 /* A dataset with no associated objset */ 420 err = ENOENT; 421 goto out; 422 } 423 424 if (tail != NULL) { 425 objset_t *mos = dp->dp_meta_objset; 426 427 err = dsl_dataset_open_obj(dp, obj, NULL, 428 DS_MODE_NONE, tag, &ds); 429 if (err) 430 goto out; 431 obj = ds->ds_phys->ds_snapnames_zapobj; 432 dsl_dataset_close(ds, DS_MODE_NONE, tag); 433 ds = NULL; 434 435 if (tail[0] != '@') { 436 err = ENOENT; 437 goto out; 438 } 439 tail++; 440 441 /* Look for a snapshot */ 442 if (!DS_MODE_IS_READONLY(mode)) { 443 err = EROFS; 444 goto out; 445 } 446 dprintf("looking for snapshot '%s'\n", tail); 447 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 448 if (err) 449 goto out; 450 } 451 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 452 453 out: 454 rw_exit(&dp->dp_config_rwlock); 455 dsl_dir_close(dd, FTAG); 456 457 ASSERT3U((err == 0), ==, (ds != NULL)); 458 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 459 460 *dsp = ds; 461 return (err); 462 } 463 464 int 465 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 466 { 467 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 468 } 469 470 void 471 dsl_dataset_name(dsl_dataset_t *ds, char *name) 472 { 473 if (ds == NULL) { 474 (void) strcpy(name, "mos"); 475 } else { 476 dsl_dir_name(ds->ds_dir, name); 477 VERIFY(0 == dsl_dataset_get_snapname(ds)); 478 if (ds->ds_snapname[0]) { 479 (void) strcat(name, "@"); 480 if (!MUTEX_HELD(&ds->ds_lock)) { 481 /* 482 * We use a "recursive" mutex so that we 483 * can call dprintf_ds() with ds_lock held. 484 */ 485 mutex_enter(&ds->ds_lock); 486 (void) strcat(name, ds->ds_snapname); 487 mutex_exit(&ds->ds_lock); 488 } else { 489 (void) strcat(name, ds->ds_snapname); 490 } 491 } 492 } 493 } 494 495 static int 496 dsl_dataset_namelen(dsl_dataset_t *ds) 497 { 498 int result; 499 500 if (ds == NULL) { 501 result = 3; /* "mos" */ 502 } else { 503 result = dsl_dir_namelen(ds->ds_dir); 504 VERIFY(0 == dsl_dataset_get_snapname(ds)); 505 if (ds->ds_snapname[0]) { 506 ++result; /* adding one for the @-sign */ 507 if (!MUTEX_HELD(&ds->ds_lock)) { 508 /* see dsl_datset_name */ 509 mutex_enter(&ds->ds_lock); 510 result += strlen(ds->ds_snapname); 511 mutex_exit(&ds->ds_lock); 512 } else { 513 result += strlen(ds->ds_snapname); 514 } 515 } 516 } 517 518 return (result); 519 } 520 521 void 522 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 523 { 524 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 525 mutex_enter(&ds->ds_lock); 526 ASSERT3U(ds->ds_open_refcount, >=, weight); 527 ds->ds_open_refcount -= weight; 528 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 529 mode, ds->ds_open_refcount); 530 mutex_exit(&ds->ds_lock); 531 532 dmu_buf_rele(ds->ds_dbuf, tag); 533 } 534 535 void 536 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 537 { 538 objset_t *mos = dp->dp_meta_objset; 539 dmu_buf_t *dbuf; 540 dsl_dataset_phys_t *dsphys; 541 dsl_dataset_t *ds; 542 uint64_t dsobj; 543 dsl_dir_t *dd; 544 545 dsl_dir_create_root(mos, ddobjp, tx); 546 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 547 548 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 549 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 550 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 551 dmu_buf_will_dirty(dbuf, tx); 552 dsphys = dbuf->db_data; 553 dsphys->ds_dir_obj = dd->dd_object; 554 dsphys->ds_fsid_guid = unique_create(); 555 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 556 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 557 sizeof (dsphys->ds_guid)); 558 dsphys->ds_snapnames_zapobj = 559 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 560 dsphys->ds_creation_time = gethrestime_sec(); 561 dsphys->ds_creation_txg = tx->tx_txg; 562 dsphys->ds_deadlist_obj = 563 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 564 dmu_buf_rele(dbuf, FTAG); 565 566 dmu_buf_will_dirty(dd->dd_dbuf, tx); 567 dd->dd_phys->dd_head_dataset_obj = dsobj; 568 dsl_dir_close(dd, FTAG); 569 570 VERIFY(0 == 571 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 572 (void) dmu_objset_create_impl(dp->dp_spa, ds, 573 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 574 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 575 } 576 577 uint64_t 578 dsl_dataset_create_sync(dsl_dir_t *pdd, 579 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 580 { 581 dsl_pool_t *dp = pdd->dd_pool; 582 dmu_buf_t *dbuf; 583 dsl_dataset_phys_t *dsphys; 584 uint64_t dsobj, ddobj; 585 objset_t *mos = dp->dp_meta_objset; 586 dsl_dir_t *dd; 587 588 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 589 ASSERT(clone_parent == NULL || 590 clone_parent->ds_phys->ds_num_children > 0); 591 ASSERT(lastname[0] != '@'); 592 ASSERT(dmu_tx_is_syncing(tx)); 593 594 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 595 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 596 597 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 598 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 599 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 600 dmu_buf_will_dirty(dbuf, tx); 601 dsphys = dbuf->db_data; 602 dsphys->ds_dir_obj = dd->dd_object; 603 dsphys->ds_fsid_guid = unique_create(); 604 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 605 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 606 sizeof (dsphys->ds_guid)); 607 dsphys->ds_snapnames_zapobj = 608 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 609 dsphys->ds_creation_time = gethrestime_sec(); 610 dsphys->ds_creation_txg = tx->tx_txg; 611 dsphys->ds_deadlist_obj = 612 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 613 if (clone_parent) { 614 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 615 dsphys->ds_prev_snap_txg = 616 clone_parent->ds_phys->ds_creation_txg; 617 dsphys->ds_used_bytes = 618 clone_parent->ds_phys->ds_used_bytes; 619 dsphys->ds_compressed_bytes = 620 clone_parent->ds_phys->ds_compressed_bytes; 621 dsphys->ds_uncompressed_bytes = 622 clone_parent->ds_phys->ds_uncompressed_bytes; 623 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 624 625 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 626 clone_parent->ds_phys->ds_num_children++; 627 628 dmu_buf_will_dirty(dd->dd_dbuf, tx); 629 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 630 } 631 dmu_buf_rele(dbuf, FTAG); 632 633 dmu_buf_will_dirty(dd->dd_dbuf, tx); 634 dd->dd_phys->dd_head_dataset_obj = dsobj; 635 dsl_dir_close(dd, FTAG); 636 637 return (dsobj); 638 } 639 640 struct destroyarg { 641 dsl_sync_task_group_t *dstg; 642 char *snapname; 643 char *failed; 644 }; 645 646 static int 647 dsl_snapshot_destroy_one(char *name, void *arg) 648 { 649 struct destroyarg *da = arg; 650 dsl_dataset_t *ds; 651 char *cp; 652 int err; 653 654 (void) strcat(name, "@"); 655 (void) strcat(name, da->snapname); 656 err = dsl_dataset_open(name, 657 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 658 da->dstg, &ds); 659 cp = strchr(name, '@'); 660 *cp = '\0'; 661 if (err == ENOENT) 662 return (0); 663 if (err) { 664 (void) strcpy(da->failed, name); 665 return (err); 666 } 667 668 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 669 dsl_dataset_destroy_sync, ds, da->dstg, 0); 670 return (0); 671 } 672 673 /* 674 * Destroy 'snapname' in all descendants of 'fsname'. 675 */ 676 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 677 int 678 dsl_snapshots_destroy(char *fsname, char *snapname) 679 { 680 int err; 681 struct destroyarg da; 682 dsl_sync_task_t *dst; 683 spa_t *spa; 684 char *cp; 685 686 cp = strchr(fsname, '/'); 687 if (cp) { 688 *cp = '\0'; 689 err = spa_open(fsname, &spa, FTAG); 690 *cp = '/'; 691 } else { 692 err = spa_open(fsname, &spa, FTAG); 693 } 694 if (err) 695 return (err); 696 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 697 da.snapname = snapname; 698 da.failed = fsname; 699 700 err = dmu_objset_find(fsname, 701 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 702 703 if (err == 0) 704 err = dsl_sync_task_group_wait(da.dstg); 705 706 for (dst = list_head(&da.dstg->dstg_tasks); dst; 707 dst = list_next(&da.dstg->dstg_tasks, dst)) { 708 dsl_dataset_t *ds = dst->dst_arg1; 709 if (dst->dst_err) { 710 dsl_dataset_name(ds, fsname); 711 cp = strchr(fsname, '@'); 712 *cp = '\0'; 713 } 714 /* 715 * If it was successful, destroy_sync would have 716 * closed the ds 717 */ 718 if (err) 719 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 720 } 721 722 dsl_sync_task_group_destroy(da.dstg); 723 spa_close(spa, FTAG); 724 return (err); 725 } 726 727 int 728 dsl_dataset_destroy(const char *name) 729 { 730 int err; 731 dsl_sync_task_group_t *dstg; 732 objset_t *os; 733 dsl_dataset_t *ds; 734 dsl_dir_t *dd; 735 uint64_t obj; 736 737 if (strchr(name, '@')) { 738 /* Destroying a snapshot is simpler */ 739 err = dsl_dataset_open(name, 740 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 741 FTAG, &ds); 742 if (err) 743 return (err); 744 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 745 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 746 ds, FTAG, 0); 747 if (err) 748 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 749 return (err); 750 } 751 752 err = dmu_objset_open(name, DMU_OST_ANY, 753 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 754 if (err) 755 return (err); 756 ds = os->os->os_dsl_dataset; 757 dd = ds->ds_dir; 758 759 /* 760 * Check for errors and mark this ds as inconsistent, in 761 * case we crash while freeing the objects. 762 */ 763 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 764 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 765 if (err) { 766 dmu_objset_close(os); 767 return (err); 768 } 769 770 /* 771 * remove the objects in open context, so that we won't 772 * have too much to do in syncing context. 773 */ 774 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 775 ds->ds_phys->ds_prev_snap_txg)) { 776 dmu_tx_t *tx = dmu_tx_create(os); 777 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 778 dmu_tx_hold_bonus(tx, obj); 779 err = dmu_tx_assign(tx, TXG_WAIT); 780 if (err) { 781 /* 782 * Perhaps there is not enough disk 783 * space. Just deal with it from 784 * dsl_dataset_destroy_sync(). 785 */ 786 dmu_tx_abort(tx); 787 continue; 788 } 789 VERIFY(0 == dmu_object_free(os, obj, tx)); 790 dmu_tx_commit(tx); 791 } 792 /* Make sure it's not dirty before we finish destroying it. */ 793 txg_wait_synced(dd->dd_pool, 0); 794 795 dmu_objset_close(os); 796 if (err != ESRCH) 797 return (err); 798 799 err = dsl_dataset_open(name, 800 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 801 FTAG, &ds); 802 if (err) 803 return (err); 804 805 err = dsl_dir_open(name, FTAG, &dd, NULL); 806 if (err) { 807 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 808 return (err); 809 } 810 811 /* 812 * Blow away the dsl_dir + head dataset. 813 */ 814 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 815 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 816 dsl_dataset_destroy_sync, ds, FTAG, 0); 817 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 818 dsl_dir_destroy_sync, dd, FTAG, 0); 819 err = dsl_sync_task_group_wait(dstg); 820 dsl_sync_task_group_destroy(dstg); 821 /* if it is successful, *destroy_sync will close the ds+dd */ 822 if (err) { 823 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 824 dsl_dir_close(dd, FTAG); 825 } 826 return (err); 827 } 828 829 int 830 dsl_dataset_rollback(dsl_dataset_t *ds) 831 { 832 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 833 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 834 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 835 ds, NULL, 0)); 836 } 837 838 void * 839 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 840 void *p, dsl_dataset_evict_func_t func) 841 { 842 void *old; 843 844 mutex_enter(&ds->ds_lock); 845 old = ds->ds_user_ptr; 846 if (old == NULL) { 847 ds->ds_user_ptr = p; 848 ds->ds_user_evict_func = func; 849 } 850 mutex_exit(&ds->ds_lock); 851 return (old); 852 } 853 854 void * 855 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 856 { 857 return (ds->ds_user_ptr); 858 } 859 860 861 blkptr_t * 862 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 863 { 864 return (&ds->ds_phys->ds_bp); 865 } 866 867 void 868 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 869 { 870 ASSERT(dmu_tx_is_syncing(tx)); 871 /* If it's the meta-objset, set dp_meta_rootbp */ 872 if (ds == NULL) { 873 tx->tx_pool->dp_meta_rootbp = *bp; 874 } else { 875 dmu_buf_will_dirty(ds->ds_dbuf, tx); 876 ds->ds_phys->ds_bp = *bp; 877 } 878 } 879 880 spa_t * 881 dsl_dataset_get_spa(dsl_dataset_t *ds) 882 { 883 return (ds->ds_dir->dd_pool->dp_spa); 884 } 885 886 void 887 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 888 { 889 dsl_pool_t *dp; 890 891 if (ds == NULL) /* this is the meta-objset */ 892 return; 893 894 ASSERT(ds->ds_user_ptr != NULL); 895 896 if (ds->ds_phys->ds_next_snap_obj != 0) 897 panic("dirtying snapshot!"); 898 899 dp = ds->ds_dir->dd_pool; 900 901 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 902 /* up the hold count until we can be written out */ 903 dmu_buf_add_ref(ds->ds_dbuf, ds); 904 } 905 } 906 907 struct killarg { 908 uint64_t *usedp; 909 uint64_t *compressedp; 910 uint64_t *uncompressedp; 911 zio_t *zio; 912 dmu_tx_t *tx; 913 }; 914 915 static int 916 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 917 { 918 struct killarg *ka = arg; 919 blkptr_t *bp = &bc->bc_blkptr; 920 921 ASSERT3U(bc->bc_errno, ==, 0); 922 923 /* 924 * Since this callback is not called concurrently, no lock is 925 * needed on the accounting values. 926 */ 927 *ka->usedp += bp_get_dasize(spa, bp); 928 *ka->compressedp += BP_GET_PSIZE(bp); 929 *ka->uncompressedp += BP_GET_UCSIZE(bp); 930 /* XXX check for EIO? */ 931 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 932 ARC_NOWAIT); 933 return (0); 934 } 935 936 /* ARGSUSED */ 937 static int 938 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 939 { 940 dsl_dataset_t *ds = arg1; 941 942 /* 943 * There must be a previous snapshot. I suppose we could roll 944 * it back to being empty (and re-initialize the upper (ZPL) 945 * layer). But for now there's no way to do this via the user 946 * interface. 947 */ 948 if (ds->ds_phys->ds_prev_snap_txg == 0) 949 return (EINVAL); 950 951 /* 952 * This must not be a snapshot. 953 */ 954 if (ds->ds_phys->ds_next_snap_obj != 0) 955 return (EINVAL); 956 957 /* 958 * If we made changes this txg, traverse_dsl_dataset won't find 959 * them. Try again. 960 */ 961 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 962 return (EAGAIN); 963 964 return (0); 965 } 966 967 /* ARGSUSED */ 968 static void 969 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 970 { 971 dsl_dataset_t *ds = arg1; 972 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 973 974 dmu_buf_will_dirty(ds->ds_dbuf, tx); 975 976 /* Zero out the deadlist. */ 977 bplist_close(&ds->ds_deadlist); 978 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 979 ds->ds_phys->ds_deadlist_obj = 980 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 981 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 982 ds->ds_phys->ds_deadlist_obj)); 983 984 { 985 /* Free blkptrs that we gave birth to */ 986 zio_t *zio; 987 uint64_t used = 0, compressed = 0, uncompressed = 0; 988 struct killarg ka; 989 990 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 991 ZIO_FLAG_MUSTSUCCEED); 992 ka.usedp = &used; 993 ka.compressedp = &compressed; 994 ka.uncompressedp = &uncompressed; 995 ka.zio = zio; 996 ka.tx = tx; 997 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 998 ADVANCE_POST, kill_blkptr, &ka); 999 (void) zio_wait(zio); 1000 1001 dsl_dir_diduse_space(ds->ds_dir, 1002 -used, -compressed, -uncompressed, tx); 1003 } 1004 1005 /* Change our contents to that of the prev snapshot */ 1006 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 1007 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1008 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 1009 ds->ds_phys->ds_compressed_bytes = 1010 ds->ds_prev->ds_phys->ds_compressed_bytes; 1011 ds->ds_phys->ds_uncompressed_bytes = 1012 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1013 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1014 ds->ds_phys->ds_unique_bytes = 0; 1015 1016 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1017 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1018 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1019 } 1020 } 1021 1022 /* ARGSUSED */ 1023 static int 1024 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1025 { 1026 dsl_dataset_t *ds = arg1; 1027 1028 /* 1029 * Can't delete a head dataset if there are snapshots of it. 1030 * (Except if the only snapshots are from the branch we cloned 1031 * from.) 1032 */ 1033 if (ds->ds_prev != NULL && 1034 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1035 return (EINVAL); 1036 1037 return (0); 1038 } 1039 1040 /* ARGSUSED */ 1041 static void 1042 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1043 { 1044 dsl_dataset_t *ds = arg1; 1045 1046 /* Mark it as inconsistent on-disk, in case we crash */ 1047 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1048 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1049 } 1050 1051 /* ARGSUSED */ 1052 static int 1053 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1054 { 1055 dsl_dataset_t *ds = arg1; 1056 1057 /* Can't delete a branch point. */ 1058 if (ds->ds_phys->ds_num_children > 1) 1059 return (EEXIST); 1060 1061 /* 1062 * Can't delete a head dataset if there are snapshots of it. 1063 * (Except if the only snapshots are from the branch we cloned 1064 * from.) 1065 */ 1066 if (ds->ds_prev != NULL && 1067 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1068 return (EINVAL); 1069 1070 /* 1071 * If we made changes this txg, traverse_dsl_dataset won't find 1072 * them. Try again. 1073 */ 1074 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1075 return (EAGAIN); 1076 1077 /* XXX we should do some i/o error checking... */ 1078 return (0); 1079 } 1080 1081 static void 1082 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1083 { 1084 dsl_dataset_t *ds = arg1; 1085 uint64_t used = 0, compressed = 0, uncompressed = 0; 1086 zio_t *zio; 1087 int err; 1088 int after_branch_point = FALSE; 1089 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1090 objset_t *mos = dp->dp_meta_objset; 1091 dsl_dataset_t *ds_prev = NULL; 1092 uint64_t obj; 1093 1094 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1095 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1096 ASSERT(ds->ds_prev == NULL || 1097 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1098 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1099 1100 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1101 1102 obj = ds->ds_object; 1103 1104 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1105 if (ds->ds_prev) { 1106 ds_prev = ds->ds_prev; 1107 } else { 1108 VERIFY(0 == dsl_dataset_open_obj(dp, 1109 ds->ds_phys->ds_prev_snap_obj, NULL, 1110 DS_MODE_NONE, FTAG, &ds_prev)); 1111 } 1112 after_branch_point = 1113 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1114 1115 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1116 if (after_branch_point && 1117 ds->ds_phys->ds_next_snap_obj == 0) { 1118 /* This clone is toast. */ 1119 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1120 ds_prev->ds_phys->ds_num_children--; 1121 } else if (!after_branch_point) { 1122 ds_prev->ds_phys->ds_next_snap_obj = 1123 ds->ds_phys->ds_next_snap_obj; 1124 } 1125 } 1126 1127 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1128 1129 if (ds->ds_phys->ds_next_snap_obj != 0) { 1130 blkptr_t bp; 1131 dsl_dataset_t *ds_next; 1132 uint64_t itor = 0; 1133 1134 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1135 1136 VERIFY(0 == dsl_dataset_open_obj(dp, 1137 ds->ds_phys->ds_next_snap_obj, NULL, 1138 DS_MODE_NONE, FTAG, &ds_next)); 1139 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1140 1141 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1142 ds_next->ds_phys->ds_prev_snap_obj = 1143 ds->ds_phys->ds_prev_snap_obj; 1144 ds_next->ds_phys->ds_prev_snap_txg = 1145 ds->ds_phys->ds_prev_snap_txg; 1146 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1147 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1148 1149 /* 1150 * Transfer to our deadlist (which will become next's 1151 * new deadlist) any entries from next's current 1152 * deadlist which were born before prev, and free the 1153 * other entries. 1154 * 1155 * XXX we're doing this long task with the config lock held 1156 */ 1157 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1158 &bp) == 0) { 1159 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1160 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1161 &bp, tx)); 1162 if (ds_prev && !after_branch_point && 1163 bp.blk_birth > 1164 ds_prev->ds_phys->ds_prev_snap_txg) { 1165 ds_prev->ds_phys->ds_unique_bytes += 1166 bp_get_dasize(dp->dp_spa, &bp); 1167 } 1168 } else { 1169 used += bp_get_dasize(dp->dp_spa, &bp); 1170 compressed += BP_GET_PSIZE(&bp); 1171 uncompressed += BP_GET_UCSIZE(&bp); 1172 /* XXX check return value? */ 1173 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1174 &bp, NULL, NULL, ARC_NOWAIT); 1175 } 1176 } 1177 1178 /* free next's deadlist */ 1179 bplist_close(&ds_next->ds_deadlist); 1180 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1181 1182 /* set next's deadlist to our deadlist */ 1183 ds_next->ds_phys->ds_deadlist_obj = 1184 ds->ds_phys->ds_deadlist_obj; 1185 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1186 ds_next->ds_phys->ds_deadlist_obj)); 1187 ds->ds_phys->ds_deadlist_obj = 0; 1188 1189 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1190 /* 1191 * Update next's unique to include blocks which 1192 * were previously shared by only this snapshot 1193 * and it. Those blocks will be born after the 1194 * prev snap and before this snap, and will have 1195 * died after the next snap and before the one 1196 * after that (ie. be on the snap after next's 1197 * deadlist). 1198 * 1199 * XXX we're doing this long task with the 1200 * config lock held 1201 */ 1202 dsl_dataset_t *ds_after_next; 1203 1204 VERIFY(0 == dsl_dataset_open_obj(dp, 1205 ds_next->ds_phys->ds_next_snap_obj, NULL, 1206 DS_MODE_NONE, FTAG, &ds_after_next)); 1207 itor = 0; 1208 while (bplist_iterate(&ds_after_next->ds_deadlist, 1209 &itor, &bp) == 0) { 1210 if (bp.blk_birth > 1211 ds->ds_phys->ds_prev_snap_txg && 1212 bp.blk_birth <= 1213 ds->ds_phys->ds_creation_txg) { 1214 ds_next->ds_phys->ds_unique_bytes += 1215 bp_get_dasize(dp->dp_spa, &bp); 1216 } 1217 } 1218 1219 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1220 ASSERT3P(ds_next->ds_prev, ==, NULL); 1221 } else { 1222 /* 1223 * It would be nice to update the head dataset's 1224 * unique. To do so we would have to traverse 1225 * it for blocks born after ds_prev, which is 1226 * pretty expensive just to maintain something 1227 * for debugging purposes. 1228 */ 1229 ASSERT3P(ds_next->ds_prev, ==, ds); 1230 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1231 ds_next); 1232 if (ds_prev) { 1233 VERIFY(0 == dsl_dataset_open_obj(dp, 1234 ds->ds_phys->ds_prev_snap_obj, NULL, 1235 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1236 } else { 1237 ds_next->ds_prev = NULL; 1238 } 1239 } 1240 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1241 1242 /* 1243 * NB: unique_bytes is not accurate for head objsets 1244 * because we don't update it when we delete the most 1245 * recent snapshot -- see above comment. 1246 */ 1247 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1248 } else { 1249 /* 1250 * There's no next snapshot, so this is a head dataset. 1251 * Destroy the deadlist. Unless it's a clone, the 1252 * deadlist should be empty. (If it's a clone, it's 1253 * safe to ignore the deadlist contents.) 1254 */ 1255 struct killarg ka; 1256 1257 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1258 bplist_close(&ds->ds_deadlist); 1259 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1260 ds->ds_phys->ds_deadlist_obj = 0; 1261 1262 /* 1263 * Free everything that we point to (that's born after 1264 * the previous snapshot, if we are a clone) 1265 * 1266 * XXX we're doing this long task with the config lock held 1267 */ 1268 ka.usedp = &used; 1269 ka.compressedp = &compressed; 1270 ka.uncompressedp = &uncompressed; 1271 ka.zio = zio; 1272 ka.tx = tx; 1273 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1274 ADVANCE_POST, kill_blkptr, &ka); 1275 ASSERT3U(err, ==, 0); 1276 } 1277 1278 err = zio_wait(zio); 1279 ASSERT3U(err, ==, 0); 1280 1281 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1282 1283 if (ds->ds_phys->ds_snapnames_zapobj) { 1284 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1285 ASSERT(err == 0); 1286 } 1287 1288 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1289 /* Erase the link in the dataset */ 1290 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1291 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1292 /* 1293 * dsl_dir_sync_destroy() called us, they'll destroy 1294 * the dataset. 1295 */ 1296 } else { 1297 /* remove from snapshot namespace */ 1298 dsl_dataset_t *ds_head; 1299 VERIFY(0 == dsl_dataset_open_obj(dp, 1300 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1301 DS_MODE_NONE, FTAG, &ds_head)); 1302 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1303 #ifdef ZFS_DEBUG 1304 { 1305 uint64_t val; 1306 err = zap_lookup(mos, 1307 ds_head->ds_phys->ds_snapnames_zapobj, 1308 ds->ds_snapname, 8, 1, &val); 1309 ASSERT3U(err, ==, 0); 1310 ASSERT3U(val, ==, obj); 1311 } 1312 #endif 1313 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1314 ds->ds_snapname, tx); 1315 ASSERT(err == 0); 1316 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1317 } 1318 1319 if (ds_prev && ds->ds_prev != ds_prev) 1320 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1321 1322 spa_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1323 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1324 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1325 1326 } 1327 1328 /* ARGSUSED */ 1329 int 1330 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1331 { 1332 objset_t *os = arg1; 1333 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1334 const char *snapname = arg2; 1335 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1336 int err; 1337 uint64_t value; 1338 1339 /* 1340 * We don't allow multiple snapshots of the same txg. If there 1341 * is already one, try again. 1342 */ 1343 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1344 return (EAGAIN); 1345 1346 /* 1347 * Check for conflicting name snapshot name. 1348 */ 1349 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1350 snapname, 8, 1, &value); 1351 if (err == 0) 1352 return (EEXIST); 1353 if (err != ENOENT) 1354 return (err); 1355 1356 /* 1357 * Check that the dataset's name is not too long. Name consists 1358 * of the dataset's length + 1 for the @-sign + snapshot name's length 1359 */ 1360 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1361 return (ENAMETOOLONG); 1362 1363 ds->ds_trysnap_txg = tx->tx_txg; 1364 return (0); 1365 } 1366 1367 void 1368 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1369 { 1370 objset_t *os = arg1; 1371 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1372 const char *snapname = arg2; 1373 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1374 dmu_buf_t *dbuf; 1375 dsl_dataset_phys_t *dsphys; 1376 uint64_t dsobj; 1377 objset_t *mos = dp->dp_meta_objset; 1378 int err; 1379 1380 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1381 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1382 1383 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1384 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1385 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1386 dmu_buf_will_dirty(dbuf, tx); 1387 dsphys = dbuf->db_data; 1388 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1389 dsphys->ds_fsid_guid = unique_create(); 1390 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1391 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1392 sizeof (dsphys->ds_guid)); 1393 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1394 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1395 dsphys->ds_next_snap_obj = ds->ds_object; 1396 dsphys->ds_num_children = 1; 1397 dsphys->ds_creation_time = gethrestime_sec(); 1398 dsphys->ds_creation_txg = tx->tx_txg; 1399 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1400 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1401 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1402 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1403 dsphys->ds_flags = ds->ds_phys->ds_flags; 1404 dsphys->ds_bp = ds->ds_phys->ds_bp; 1405 dmu_buf_rele(dbuf, FTAG); 1406 1407 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1408 if (ds->ds_prev) { 1409 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1410 ds->ds_object || 1411 ds->ds_prev->ds_phys->ds_num_children > 1); 1412 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1413 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1414 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1415 ds->ds_prev->ds_phys->ds_creation_txg); 1416 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1417 } 1418 } 1419 1420 bplist_close(&ds->ds_deadlist); 1421 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1422 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1423 ds->ds_phys->ds_prev_snap_obj = dsobj; 1424 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1425 ds->ds_phys->ds_unique_bytes = 0; 1426 ds->ds_phys->ds_deadlist_obj = 1427 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1428 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1429 ds->ds_phys->ds_deadlist_obj)); 1430 1431 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1432 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1433 snapname, 8, 1, &dsobj, tx); 1434 ASSERT(err == 0); 1435 1436 if (ds->ds_prev) 1437 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1438 VERIFY(0 == dsl_dataset_open_obj(dp, 1439 ds->ds_phys->ds_prev_snap_obj, snapname, 1440 DS_MODE_NONE, ds, &ds->ds_prev)); 1441 } 1442 1443 void 1444 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1445 { 1446 ASSERT(dmu_tx_is_syncing(tx)); 1447 ASSERT(ds->ds_user_ptr != NULL); 1448 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1449 1450 dsl_dir_dirty(ds->ds_dir, tx); 1451 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1452 /* Unneeded? bplist_close(&ds->ds_deadlist); */ 1453 } 1454 1455 void 1456 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1457 { 1458 dsl_dir_stats(ds->ds_dir, nv); 1459 1460 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1461 ds->ds_phys->ds_creation_time); 1462 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1463 ds->ds_phys->ds_creation_txg); 1464 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1465 ds->ds_phys->ds_used_bytes); 1466 1467 if (ds->ds_phys->ds_next_snap_obj) { 1468 /* 1469 * This is a snapshot; override the dd's space used with 1470 * our unique space and compression ratio. 1471 */ 1472 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1473 ds->ds_phys->ds_unique_bytes); 1474 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1475 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1476 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1477 ds->ds_phys->ds_compressed_bytes)); 1478 } 1479 } 1480 1481 void 1482 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1483 { 1484 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1485 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1486 if (ds->ds_phys->ds_next_snap_obj) { 1487 stat->dds_is_snapshot = B_TRUE; 1488 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1489 } 1490 1491 /* clone origin is really a dsl_dir thing... */ 1492 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1493 dsl_dataset_t *ods; 1494 1495 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1496 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1497 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1498 NULL, DS_MODE_NONE, FTAG, &ods)); 1499 dsl_dataset_name(ods, stat->dds_clone_of); 1500 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1501 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1502 } 1503 } 1504 1505 uint64_t 1506 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1507 { 1508 return (ds->ds_phys->ds_fsid_guid); 1509 } 1510 1511 void 1512 dsl_dataset_space(dsl_dataset_t *ds, 1513 uint64_t *refdbytesp, uint64_t *availbytesp, 1514 uint64_t *usedobjsp, uint64_t *availobjsp) 1515 { 1516 *refdbytesp = ds->ds_phys->ds_used_bytes; 1517 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1518 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1519 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1520 } 1521 1522 /* ARGSUSED */ 1523 static int 1524 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1525 { 1526 dsl_dataset_t *ds = arg1; 1527 char *newsnapname = arg2; 1528 dsl_dir_t *dd = ds->ds_dir; 1529 objset_t *mos = dd->dd_pool->dp_meta_objset; 1530 dsl_dataset_t *hds; 1531 uint64_t val; 1532 int err; 1533 1534 err = dsl_dataset_open_obj(dd->dd_pool, 1535 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1536 if (err) 1537 return (err); 1538 1539 /* new name better not be in use */ 1540 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1541 newsnapname, 8, 1, &val); 1542 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1543 1544 if (err == 0) 1545 err = EEXIST; 1546 else if (err == ENOENT) 1547 err = 0; 1548 1549 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1550 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1551 err = ENAMETOOLONG; 1552 1553 return (err); 1554 } 1555 1556 static void 1557 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1558 { 1559 dsl_dataset_t *ds = arg1; 1560 char *newsnapname = arg2; 1561 dsl_dir_t *dd = ds->ds_dir; 1562 objset_t *mos = dd->dd_pool->dp_meta_objset; 1563 dsl_dataset_t *hds; 1564 int err; 1565 1566 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1567 1568 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1569 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1570 1571 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1572 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1573 ds->ds_snapname, tx); 1574 ASSERT3U(err, ==, 0); 1575 mutex_enter(&ds->ds_lock); 1576 (void) strcpy(ds->ds_snapname, newsnapname); 1577 mutex_exit(&ds->ds_lock); 1578 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1579 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1580 ASSERT3U(err, ==, 0); 1581 1582 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1583 } 1584 1585 struct renamearg { 1586 dsl_sync_task_group_t *dstg; 1587 char failed[MAXPATHLEN]; 1588 char *oldsnap; 1589 char *newsnap; 1590 }; 1591 1592 static int 1593 dsl_snapshot_rename_one(char *name, void *arg) 1594 { 1595 struct renamearg *ra = arg; 1596 dsl_dataset_t *ds = NULL; 1597 char *cp; 1598 int err; 1599 1600 cp = name + strlen(name); 1601 *cp = '@'; 1602 (void) strcpy(cp + 1, ra->oldsnap); 1603 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1604 ra->dstg, &ds); 1605 if (err == ENOENT) { 1606 *cp = '\0'; 1607 return (0); 1608 } 1609 if (err) { 1610 (void) strcpy(ra->failed, name); 1611 *cp = '\0'; 1612 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1613 return (err); 1614 } 1615 1616 #ifdef _KERNEL 1617 /* for all filesystems undergoing rename, we'll need to unmount it */ 1618 (void) zfs_unmount_snap(name, NULL); 1619 #endif 1620 1621 *cp = '\0'; 1622 1623 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1624 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1625 1626 return (0); 1627 } 1628 1629 static int 1630 dsl_recursive_rename(char *oldname, const char *newname) 1631 { 1632 int err; 1633 struct renamearg *ra; 1634 dsl_sync_task_t *dst; 1635 spa_t *spa; 1636 char *cp, *fsname = spa_strdup(oldname); 1637 int len = strlen(oldname); 1638 1639 /* truncate the snapshot name to get the fsname */ 1640 cp = strchr(fsname, '@'); 1641 *cp = '\0'; 1642 1643 cp = strchr(fsname, '/'); 1644 if (cp) { 1645 *cp = '\0'; 1646 err = spa_open(fsname, &spa, FTAG); 1647 *cp = '/'; 1648 } else { 1649 err = spa_open(fsname, &spa, FTAG); 1650 } 1651 if (err) { 1652 kmem_free(fsname, len + 1); 1653 return (err); 1654 } 1655 ra = kmem_alloc(sizeof (struct renamearg), KM_SLEEP); 1656 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1657 1658 ra->oldsnap = strchr(oldname, '@') + 1; 1659 ra->newsnap = strchr(newname, '@') + 1; 1660 *ra->failed = '\0'; 1661 1662 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1663 DS_FIND_CHILDREN); 1664 kmem_free(fsname, len + 1); 1665 1666 if (err == 0) { 1667 err = dsl_sync_task_group_wait(ra->dstg); 1668 } 1669 1670 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1671 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1672 dsl_dataset_t *ds = dst->dst_arg1; 1673 if (dst->dst_err) { 1674 dsl_dir_name(ds->ds_dir, ra->failed); 1675 (void) strcat(ra->failed, "@"); 1676 (void) strcat(ra->failed, ra->newsnap); 1677 } 1678 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1679 } 1680 1681 (void) strcpy(oldname, ra->failed); 1682 1683 dsl_sync_task_group_destroy(ra->dstg); 1684 kmem_free(ra, sizeof (struct renamearg)); 1685 spa_close(spa, FTAG); 1686 return (err); 1687 } 1688 1689 #pragma weak dmu_objset_rename = dsl_dataset_rename 1690 int 1691 dsl_dataset_rename(char *oldname, const char *newname, 1692 boolean_t recursive) 1693 { 1694 dsl_dir_t *dd; 1695 dsl_dataset_t *ds; 1696 const char *tail; 1697 int err; 1698 1699 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1700 if (err) 1701 return (err); 1702 if (tail == NULL) { 1703 err = dsl_dir_rename(dd, newname); 1704 dsl_dir_close(dd, FTAG); 1705 return (err); 1706 } 1707 if (tail[0] != '@') { 1708 /* the name ended in a nonexistant component */ 1709 dsl_dir_close(dd, FTAG); 1710 return (ENOENT); 1711 } 1712 1713 dsl_dir_close(dd, FTAG); 1714 1715 /* new name must be snapshot in same filesystem */ 1716 tail = strchr(newname, '@'); 1717 if (tail == NULL) 1718 return (EINVAL); 1719 tail++; 1720 if (strncmp(oldname, newname, tail - newname) != 0) 1721 return (EXDEV); 1722 1723 if (recursive) { 1724 err = dsl_recursive_rename(oldname, newname); 1725 } else { 1726 err = dsl_dataset_open(oldname, 1727 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1728 if (err) 1729 return (err); 1730 1731 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1732 dsl_dataset_snapshot_rename_check, 1733 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1734 1735 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1736 } 1737 1738 return (err); 1739 } 1740 1741 struct promotearg { 1742 uint64_t used, comp, uncomp, unique; 1743 uint64_t newnext_obj, snapnames_obj; 1744 }; 1745 1746 static int 1747 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1748 { 1749 dsl_dataset_t *hds = arg1; 1750 struct promotearg *pa = arg2; 1751 dsl_dir_t *dd = hds->ds_dir; 1752 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1753 dsl_dir_t *pdd = NULL; 1754 dsl_dataset_t *ds = NULL; 1755 dsl_dataset_t *pivot_ds = NULL; 1756 dsl_dataset_t *newnext_ds = NULL; 1757 int err; 1758 char *name = NULL; 1759 uint64_t itor = 0; 1760 blkptr_t bp; 1761 1762 bzero(pa, sizeof (*pa)); 1763 1764 /* Check that it is a clone */ 1765 if (dd->dd_phys->dd_clone_parent_obj == 0) 1766 return (EINVAL); 1767 1768 /* Since this is so expensive, don't do the preliminary check */ 1769 if (!dmu_tx_is_syncing(tx)) 1770 return (0); 1771 1772 if (err = dsl_dataset_open_obj(dp, 1773 dd->dd_phys->dd_clone_parent_obj, 1774 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1775 goto out; 1776 pdd = pivot_ds->ds_dir; 1777 1778 { 1779 dsl_dataset_t *phds; 1780 if (err = dsl_dataset_open_obj(dd->dd_pool, 1781 pdd->dd_phys->dd_head_dataset_obj, 1782 NULL, DS_MODE_NONE, FTAG, &phds)) 1783 goto out; 1784 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1785 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1786 } 1787 1788 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1789 err = EXDEV; 1790 goto out; 1791 } 1792 1793 /* find pivot point's new next ds */ 1794 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1795 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1796 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1797 dsl_dataset_t *prev; 1798 1799 if (err = dsl_dataset_open_obj(dd->dd_pool, 1800 newnext_ds->ds_phys->ds_prev_snap_obj, 1801 NULL, DS_MODE_NONE, FTAG, &prev)) 1802 goto out; 1803 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1804 newnext_ds = prev; 1805 } 1806 pa->newnext_obj = newnext_ds->ds_object; 1807 1808 /* compute pivot point's new unique space */ 1809 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1810 &itor, &bp)) == 0) { 1811 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1812 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1813 } 1814 if (err != ENOENT) 1815 goto out; 1816 1817 /* Walk the snapshots that we are moving */ 1818 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1819 ds = pivot_ds; 1820 /* CONSTCOND */ 1821 while (TRUE) { 1822 uint64_t val, dlused, dlcomp, dluncomp; 1823 dsl_dataset_t *prev; 1824 1825 /* Check that the snapshot name does not conflict */ 1826 dsl_dataset_name(ds, name); 1827 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1828 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1829 8, 1, &val); 1830 if (err != ENOENT) { 1831 if (err == 0) 1832 err = EEXIST; 1833 goto out; 1834 } 1835 1836 /* 1837 * compute space to transfer. Each snapshot gave birth to: 1838 * (my used) - (prev's used) + (deadlist's used) 1839 */ 1840 pa->used += ds->ds_phys->ds_used_bytes; 1841 pa->comp += ds->ds_phys->ds_compressed_bytes; 1842 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1843 1844 /* If we reach the first snapshot, we're done. */ 1845 if (ds->ds_phys->ds_prev_snap_obj == 0) 1846 break; 1847 1848 if (err = bplist_space(&ds->ds_deadlist, 1849 &dlused, &dlcomp, &dluncomp)) 1850 goto out; 1851 if (err = dsl_dataset_open_obj(dd->dd_pool, 1852 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1853 FTAG, &prev)) 1854 goto out; 1855 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1856 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1857 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1858 1859 /* 1860 * We could be a clone of a clone. If we reach our 1861 * parent's branch point, we're done. 1862 */ 1863 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1864 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1865 break; 1866 } 1867 if (ds != pivot_ds) 1868 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1869 ds = prev; 1870 } 1871 1872 /* Check that there is enough space here */ 1873 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1874 1875 out: 1876 if (ds && ds != pivot_ds) 1877 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1878 if (pivot_ds) 1879 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1880 if (newnext_ds) 1881 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1882 if (name) 1883 kmem_free(name, MAXPATHLEN); 1884 return (err); 1885 } 1886 1887 static void 1888 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1889 { 1890 dsl_dataset_t *hds = arg1; 1891 struct promotearg *pa = arg2; 1892 dsl_dir_t *dd = hds->ds_dir; 1893 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1894 dsl_dir_t *pdd = NULL; 1895 dsl_dataset_t *ds, *pivot_ds; 1896 char *name; 1897 1898 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1899 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1900 1901 VERIFY(0 == dsl_dataset_open_obj(dp, 1902 dd->dd_phys->dd_clone_parent_obj, 1903 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1904 /* 1905 * We need to explicitly open pdd, since pivot_ds's pdd will be 1906 * changing. 1907 */ 1908 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1909 NULL, FTAG, &pdd)); 1910 1911 /* move snapshots to this dir */ 1912 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1913 ds = pivot_ds; 1914 /* CONSTCOND */ 1915 while (TRUE) { 1916 dsl_dataset_t *prev; 1917 1918 /* move snap name entry */ 1919 dsl_dataset_name(ds, name); 1920 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1921 pa->snapnames_obj, ds->ds_snapname, tx)); 1922 VERIFY(0 == zap_add(dp->dp_meta_objset, 1923 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1924 8, 1, &ds->ds_object, tx)); 1925 1926 /* change containing dsl_dir */ 1927 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1928 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1929 ds->ds_phys->ds_dir_obj = dd->dd_object; 1930 ASSERT3P(ds->ds_dir, ==, pdd); 1931 dsl_dir_close(ds->ds_dir, ds); 1932 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1933 NULL, ds, &ds->ds_dir)); 1934 1935 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1936 1937 if (ds->ds_phys->ds_prev_snap_obj == 0) 1938 break; 1939 1940 VERIFY(0 == dsl_dataset_open_obj(dp, 1941 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1942 FTAG, &prev)); 1943 1944 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1945 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1946 break; 1947 } 1948 if (ds != pivot_ds) 1949 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1950 ds = prev; 1951 } 1952 if (ds != pivot_ds) 1953 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1954 1955 /* change pivot point's next snap */ 1956 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1957 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1958 1959 /* change clone_parent-age */ 1960 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1961 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1962 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1963 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1964 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1965 1966 /* change space accounting */ 1967 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1968 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1969 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1970 1971 dsl_dir_close(pdd, FTAG); 1972 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1973 kmem_free(name, MAXPATHLEN); 1974 } 1975 1976 int 1977 dsl_dataset_promote(const char *name) 1978 { 1979 dsl_dataset_t *ds; 1980 int err; 1981 dmu_object_info_t doi; 1982 struct promotearg pa; 1983 1984 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1985 if (err) 1986 return (err); 1987 1988 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1989 ds->ds_phys->ds_snapnames_zapobj, &doi); 1990 if (err) { 1991 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1992 return (err); 1993 } 1994 1995 /* 1996 * Add in 128x the snapnames zapobj size, since we will be moving 1997 * a bunch of snapnames to the promoted ds, and dirtying their 1998 * bonus buffers. 1999 */ 2000 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2001 dsl_dataset_promote_check, 2002 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2003 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2004 return (err); 2005 } 2006 2007 /* 2008 * Given a pool name and a dataset object number in that pool, 2009 * return the name of that dataset. 2010 */ 2011 int 2012 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2013 { 2014 spa_t *spa; 2015 dsl_pool_t *dp; 2016 dsl_dataset_t *ds = NULL; 2017 int error; 2018 2019 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2020 return (error); 2021 dp = spa_get_dsl(spa); 2022 rw_enter(&dp->dp_config_rwlock, RW_READER); 2023 if ((error = dsl_dataset_open_obj(dp, obj, 2024 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2025 rw_exit(&dp->dp_config_rwlock); 2026 spa_close(spa, FTAG); 2027 return (error); 2028 } 2029 dsl_dataset_name(ds, buf); 2030 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2031 rw_exit(&dp->dp_config_rwlock); 2032 spa_close(spa, FTAG); 2033 2034 return (0); 2035 } 2036