1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_checkfunc_t dsl_dataset_destroy_check; 49 static dsl_syncfunc_t dsl_dataset_destroy_sync; 50 51 #define DS_REF_MAX (1ULL << 62) 52 53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54 55 /* 56 * We use weighted reference counts to express the various forms of exclusion 57 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 58 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 59 * This makes the exclusion logic simple: the total refcnt for all opens cannot 60 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 61 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 62 * just over half of the refcnt space, so there can't be more than one, but it 63 * can peacefully coexist with any number of STANDARD opens. 64 */ 65 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 66 0, /* DS_MODE_NONE - invalid */ 67 1, /* DS_MODE_STANDARD - unlimited number */ 68 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 69 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 70 }; 71 72 73 void 74 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 75 { 76 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 77 int compressed = BP_GET_PSIZE(bp); 78 int uncompressed = BP_GET_UCSIZE(bp); 79 80 dprintf_bp(bp, "born, ds=%p\n", ds); 81 82 ASSERT(dmu_tx_is_syncing(tx)); 83 /* It could have been compressed away to nothing */ 84 if (BP_IS_HOLE(bp)) 85 return; 86 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 87 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 88 if (ds == NULL) { 89 /* 90 * Account for the meta-objset space in its placeholder 91 * dsl_dir. 92 */ 93 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 94 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 95 used, compressed, uncompressed, tx); 96 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 97 return; 98 } 99 dmu_buf_will_dirty(ds->ds_dbuf, tx); 100 mutex_enter(&ds->ds_lock); 101 ds->ds_phys->ds_used_bytes += used; 102 ds->ds_phys->ds_compressed_bytes += compressed; 103 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 104 ds->ds_phys->ds_unique_bytes += used; 105 mutex_exit(&ds->ds_lock); 106 dsl_dir_diduse_space(ds->ds_dir, 107 used, compressed, uncompressed, tx); 108 } 109 110 void 111 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 112 dmu_tx_t *tx) 113 { 114 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 115 int compressed = BP_GET_PSIZE(bp); 116 int uncompressed = BP_GET_UCSIZE(bp); 117 118 ASSERT(dmu_tx_is_syncing(tx)); 119 /* No block pointer => nothing to free */ 120 if (BP_IS_HOLE(bp)) 121 return; 122 123 ASSERT(used > 0); 124 if (ds == NULL) { 125 int err; 126 /* 127 * Account for the meta-objset space in its placeholder 128 * dataset. 129 */ 130 err = arc_free(pio, tx->tx_pool->dp_spa, 131 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 132 ASSERT(err == 0); 133 134 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 135 -used, -compressed, -uncompressed, tx); 136 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 137 return; 138 } 139 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 140 141 dmu_buf_will_dirty(ds->ds_dbuf, tx); 142 143 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 144 int err; 145 146 dprintf_bp(bp, "freeing: %s", ""); 147 err = arc_free(pio, tx->tx_pool->dp_spa, 148 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 149 ASSERT(err == 0); 150 151 mutex_enter(&ds->ds_lock); 152 /* XXX unique_bytes is not accurate for head datasets */ 153 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 154 ds->ds_phys->ds_unique_bytes -= used; 155 mutex_exit(&ds->ds_lock); 156 dsl_dir_diduse_space(ds->ds_dir, 157 -used, -compressed, -uncompressed, tx); 158 } else { 159 dprintf_bp(bp, "putting on dead list: %s", ""); 160 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 161 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 162 if (ds->ds_phys->ds_prev_snap_obj != 0) { 163 ASSERT3U(ds->ds_prev->ds_object, ==, 164 ds->ds_phys->ds_prev_snap_obj); 165 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 166 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 167 ds->ds_object && bp->blk_birth > 168 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 169 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 170 mutex_enter(&ds->ds_prev->ds_lock); 171 ds->ds_prev->ds_phys->ds_unique_bytes += 172 used; 173 mutex_exit(&ds->ds_prev->ds_lock); 174 } 175 } 176 } 177 mutex_enter(&ds->ds_lock); 178 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 179 ds->ds_phys->ds_used_bytes -= used; 180 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 181 ds->ds_phys->ds_compressed_bytes -= compressed; 182 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 183 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 184 mutex_exit(&ds->ds_lock); 185 } 186 187 uint64_t 188 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 189 { 190 uint64_t trysnap = 0; 191 192 if (ds == NULL) 193 return (0); 194 /* 195 * The snapshot creation could fail, but that would cause an 196 * incorrect FALSE return, which would only result in an 197 * overestimation of the amount of space that an operation would 198 * consume, which is OK. 199 * 200 * There's also a small window where we could miss a pending 201 * snapshot, because we could set the sync task in the quiescing 202 * phase. So this should only be used as a guess. 203 */ 204 if (ds->ds_trysnap_txg > 205 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 206 trysnap = ds->ds_trysnap_txg; 207 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 208 } 209 210 int 211 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 212 { 213 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 214 } 215 216 /* ARGSUSED */ 217 static void 218 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 219 { 220 dsl_dataset_t *ds = dsv; 221 222 /* open_refcount == DS_REF_MAX when deleting */ 223 ASSERT(ds->ds_open_refcount == 0 || 224 ds->ds_open_refcount == DS_REF_MAX); 225 226 dprintf_ds(ds, "evicting %s\n", ""); 227 228 unique_remove(ds->ds_fsid_guid); 229 230 if (ds->ds_user_ptr != NULL) 231 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 232 233 if (ds->ds_prev) { 234 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 235 ds->ds_prev = NULL; 236 } 237 238 bplist_close(&ds->ds_deadlist); 239 dsl_dir_close(ds->ds_dir, ds); 240 241 ASSERT(!list_link_active(&ds->ds_synced_link)); 242 243 mutex_destroy(&ds->ds_lock); 244 mutex_destroy(&ds->ds_opening_lock); 245 mutex_destroy(&ds->ds_deadlist.bpl_lock); 246 247 kmem_free(ds, sizeof (dsl_dataset_t)); 248 } 249 250 static int 251 dsl_dataset_get_snapname(dsl_dataset_t *ds) 252 { 253 dsl_dataset_phys_t *headphys; 254 int err; 255 dmu_buf_t *headdbuf; 256 dsl_pool_t *dp = ds->ds_dir->dd_pool; 257 objset_t *mos = dp->dp_meta_objset; 258 259 if (ds->ds_snapname[0]) 260 return (0); 261 if (ds->ds_phys->ds_next_snap_obj == 0) 262 return (0); 263 264 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 265 FTAG, &headdbuf); 266 if (err) 267 return (err); 268 headphys = headdbuf->db_data; 269 err = zap_value_search(dp->dp_meta_objset, 270 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 271 dmu_buf_rele(headdbuf, FTAG); 272 return (err); 273 } 274 275 int 276 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 277 int mode, void *tag, dsl_dataset_t **dsp) 278 { 279 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 280 objset_t *mos = dp->dp_meta_objset; 281 dmu_buf_t *dbuf; 282 dsl_dataset_t *ds; 283 int err; 284 285 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 286 dsl_pool_sync_context(dp)); 287 288 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 289 if (err) 290 return (err); 291 ds = dmu_buf_get_user(dbuf); 292 if (ds == NULL) { 293 dsl_dataset_t *winner; 294 295 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 296 ds->ds_dbuf = dbuf; 297 ds->ds_object = dsobj; 298 ds->ds_phys = dbuf->db_data; 299 300 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 301 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 302 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 303 NULL); 304 305 err = bplist_open(&ds->ds_deadlist, 306 mos, ds->ds_phys->ds_deadlist_obj); 307 if (err == 0) { 308 err = dsl_dir_open_obj(dp, 309 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 310 } 311 if (err) { 312 /* 313 * we don't really need to close the blist if we 314 * just opened it. 315 */ 316 mutex_destroy(&ds->ds_lock); 317 mutex_destroy(&ds->ds_opening_lock); 318 mutex_destroy(&ds->ds_deadlist.bpl_lock); 319 kmem_free(ds, sizeof (dsl_dataset_t)); 320 dmu_buf_rele(dbuf, tag); 321 return (err); 322 } 323 324 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 325 ds->ds_snapname[0] = '\0'; 326 if (ds->ds_phys->ds_prev_snap_obj) { 327 err = dsl_dataset_open_obj(dp, 328 ds->ds_phys->ds_prev_snap_obj, NULL, 329 DS_MODE_NONE, ds, &ds->ds_prev); 330 } 331 } else { 332 if (snapname) { 333 #ifdef ZFS_DEBUG 334 dsl_dataset_phys_t *headphys; 335 dmu_buf_t *headdbuf; 336 err = dmu_bonus_hold(mos, 337 ds->ds_dir->dd_phys->dd_head_dataset_obj, 338 FTAG, &headdbuf); 339 if (err == 0) { 340 headphys = headdbuf->db_data; 341 uint64_t foundobj; 342 err = zap_lookup(dp->dp_meta_objset, 343 headphys->ds_snapnames_zapobj, 344 snapname, sizeof (foundobj), 1, 345 &foundobj); 346 ASSERT3U(foundobj, ==, dsobj); 347 dmu_buf_rele(headdbuf, FTAG); 348 } 349 #endif 350 (void) strcat(ds->ds_snapname, snapname); 351 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 352 err = dsl_dataset_get_snapname(ds); 353 } 354 } 355 356 if (err == 0) { 357 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 358 dsl_dataset_evict); 359 } 360 if (err || winner) { 361 bplist_close(&ds->ds_deadlist); 362 if (ds->ds_prev) { 363 dsl_dataset_close(ds->ds_prev, 364 DS_MODE_NONE, ds); 365 } 366 dsl_dir_close(ds->ds_dir, ds); 367 mutex_destroy(&ds->ds_lock); 368 mutex_destroy(&ds->ds_opening_lock); 369 mutex_destroy(&ds->ds_deadlist.bpl_lock); 370 kmem_free(ds, sizeof (dsl_dataset_t)); 371 if (err) { 372 dmu_buf_rele(dbuf, tag); 373 return (err); 374 } 375 ds = winner; 376 } else { 377 ds->ds_fsid_guid = 378 unique_insert(ds->ds_phys->ds_fsid_guid); 379 } 380 } 381 ASSERT3P(ds->ds_dbuf, ==, dbuf); 382 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 383 384 mutex_enter(&ds->ds_lock); 385 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 386 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 387 !DS_MODE_IS_INCONSISTENT(mode)) || 388 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 389 mutex_exit(&ds->ds_lock); 390 dsl_dataset_close(ds, DS_MODE_NONE, tag); 391 return (EBUSY); 392 } 393 ds->ds_open_refcount += weight; 394 mutex_exit(&ds->ds_lock); 395 396 *dsp = ds; 397 return (0); 398 } 399 400 int 401 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 402 void *tag, dsl_dataset_t **dsp) 403 { 404 dsl_dir_t *dd; 405 dsl_pool_t *dp; 406 const char *tail; 407 uint64_t obj; 408 dsl_dataset_t *ds = NULL; 409 int err = 0; 410 411 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 412 if (err) 413 return (err); 414 415 dp = dd->dd_pool; 416 obj = dd->dd_phys->dd_head_dataset_obj; 417 rw_enter(&dp->dp_config_rwlock, RW_READER); 418 if (obj == 0) { 419 /* A dataset with no associated objset */ 420 err = ENOENT; 421 goto out; 422 } 423 424 if (tail != NULL) { 425 objset_t *mos = dp->dp_meta_objset; 426 427 err = dsl_dataset_open_obj(dp, obj, NULL, 428 DS_MODE_NONE, tag, &ds); 429 if (err) 430 goto out; 431 obj = ds->ds_phys->ds_snapnames_zapobj; 432 dsl_dataset_close(ds, DS_MODE_NONE, tag); 433 ds = NULL; 434 435 if (tail[0] != '@') { 436 err = ENOENT; 437 goto out; 438 } 439 tail++; 440 441 /* Look for a snapshot */ 442 if (!DS_MODE_IS_READONLY(mode)) { 443 err = EROFS; 444 goto out; 445 } 446 dprintf("looking for snapshot '%s'\n", tail); 447 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 448 if (err) 449 goto out; 450 } 451 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 452 453 out: 454 rw_exit(&dp->dp_config_rwlock); 455 dsl_dir_close(dd, FTAG); 456 457 ASSERT3U((err == 0), ==, (ds != NULL)); 458 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 459 460 *dsp = ds; 461 return (err); 462 } 463 464 int 465 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 466 { 467 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 468 } 469 470 void 471 dsl_dataset_name(dsl_dataset_t *ds, char *name) 472 { 473 if (ds == NULL) { 474 (void) strcpy(name, "mos"); 475 } else { 476 dsl_dir_name(ds->ds_dir, name); 477 VERIFY(0 == dsl_dataset_get_snapname(ds)); 478 if (ds->ds_snapname[0]) { 479 (void) strcat(name, "@"); 480 if (!MUTEX_HELD(&ds->ds_lock)) { 481 /* 482 * We use a "recursive" mutex so that we 483 * can call dprintf_ds() with ds_lock held. 484 */ 485 mutex_enter(&ds->ds_lock); 486 (void) strcat(name, ds->ds_snapname); 487 mutex_exit(&ds->ds_lock); 488 } else { 489 (void) strcat(name, ds->ds_snapname); 490 } 491 } 492 } 493 } 494 495 static int 496 dsl_dataset_namelen(dsl_dataset_t *ds) 497 { 498 int result; 499 500 if (ds == NULL) { 501 result = 3; /* "mos" */ 502 } else { 503 result = dsl_dir_namelen(ds->ds_dir); 504 VERIFY(0 == dsl_dataset_get_snapname(ds)); 505 if (ds->ds_snapname[0]) { 506 ++result; /* adding one for the @-sign */ 507 if (!MUTEX_HELD(&ds->ds_lock)) { 508 /* see dsl_datset_name */ 509 mutex_enter(&ds->ds_lock); 510 result += strlen(ds->ds_snapname); 511 mutex_exit(&ds->ds_lock); 512 } else { 513 result += strlen(ds->ds_snapname); 514 } 515 } 516 } 517 518 return (result); 519 } 520 521 void 522 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 523 { 524 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 525 mutex_enter(&ds->ds_lock); 526 ASSERT3U(ds->ds_open_refcount, >=, weight); 527 ds->ds_open_refcount -= weight; 528 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 529 mode, ds->ds_open_refcount); 530 mutex_exit(&ds->ds_lock); 531 532 dmu_buf_rele(ds->ds_dbuf, tag); 533 } 534 535 void 536 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 537 { 538 objset_t *mos = dp->dp_meta_objset; 539 dmu_buf_t *dbuf; 540 dsl_dataset_phys_t *dsphys; 541 dsl_dataset_t *ds; 542 uint64_t dsobj; 543 dsl_dir_t *dd; 544 545 dsl_dir_create_root(mos, ddobjp, tx); 546 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 547 548 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 549 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 550 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 551 dmu_buf_will_dirty(dbuf, tx); 552 dsphys = dbuf->db_data; 553 dsphys->ds_dir_obj = dd->dd_object; 554 dsphys->ds_fsid_guid = unique_create(); 555 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 556 sizeof (dsphys->ds_guid)); 557 dsphys->ds_snapnames_zapobj = 558 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 559 dsphys->ds_creation_time = gethrestime_sec(); 560 dsphys->ds_creation_txg = tx->tx_txg; 561 dsphys->ds_deadlist_obj = 562 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 563 dmu_buf_rele(dbuf, FTAG); 564 565 dmu_buf_will_dirty(dd->dd_dbuf, tx); 566 dd->dd_phys->dd_head_dataset_obj = dsobj; 567 dsl_dir_close(dd, FTAG); 568 569 VERIFY(0 == 570 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 571 (void) dmu_objset_create_impl(dp->dp_spa, ds, 572 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 573 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 574 } 575 576 uint64_t 577 dsl_dataset_create_sync(dsl_dir_t *pdd, 578 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 579 { 580 dsl_pool_t *dp = pdd->dd_pool; 581 dmu_buf_t *dbuf; 582 dsl_dataset_phys_t *dsphys; 583 uint64_t dsobj, ddobj; 584 objset_t *mos = dp->dp_meta_objset; 585 dsl_dir_t *dd; 586 587 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 588 ASSERT(clone_parent == NULL || 589 clone_parent->ds_phys->ds_num_children > 0); 590 ASSERT(lastname[0] != '@'); 591 ASSERT(dmu_tx_is_syncing(tx)); 592 593 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 594 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 595 596 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 597 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 598 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 599 dmu_buf_will_dirty(dbuf, tx); 600 dsphys = dbuf->db_data; 601 dsphys->ds_dir_obj = dd->dd_object; 602 dsphys->ds_fsid_guid = unique_create(); 603 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 604 sizeof (dsphys->ds_guid)); 605 dsphys->ds_snapnames_zapobj = 606 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 607 dsphys->ds_creation_time = gethrestime_sec(); 608 dsphys->ds_creation_txg = tx->tx_txg; 609 dsphys->ds_deadlist_obj = 610 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 611 if (clone_parent) { 612 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 613 dsphys->ds_prev_snap_txg = 614 clone_parent->ds_phys->ds_creation_txg; 615 dsphys->ds_used_bytes = 616 clone_parent->ds_phys->ds_used_bytes; 617 dsphys->ds_compressed_bytes = 618 clone_parent->ds_phys->ds_compressed_bytes; 619 dsphys->ds_uncompressed_bytes = 620 clone_parent->ds_phys->ds_uncompressed_bytes; 621 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 622 623 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 624 clone_parent->ds_phys->ds_num_children++; 625 626 dmu_buf_will_dirty(dd->dd_dbuf, tx); 627 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 628 } 629 dmu_buf_rele(dbuf, FTAG); 630 631 dmu_buf_will_dirty(dd->dd_dbuf, tx); 632 dd->dd_phys->dd_head_dataset_obj = dsobj; 633 dsl_dir_close(dd, FTAG); 634 635 return (dsobj); 636 } 637 638 struct destroyarg { 639 dsl_sync_task_group_t *dstg; 640 char *snapname; 641 char *failed; 642 }; 643 644 static int 645 dsl_snapshot_destroy_one(char *name, void *arg) 646 { 647 struct destroyarg *da = arg; 648 dsl_dataset_t *ds; 649 char *cp; 650 int err; 651 652 (void) strcat(name, "@"); 653 (void) strcat(name, da->snapname); 654 err = dsl_dataset_open(name, 655 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 656 da->dstg, &ds); 657 cp = strchr(name, '@'); 658 *cp = '\0'; 659 if (err == ENOENT) 660 return (0); 661 if (err) { 662 (void) strcpy(da->failed, name); 663 return (err); 664 } 665 666 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 667 dsl_dataset_destroy_sync, ds, da->dstg, 0); 668 return (0); 669 } 670 671 /* 672 * Destroy 'snapname' in all descendants of 'fsname'. 673 */ 674 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 675 int 676 dsl_snapshots_destroy(char *fsname, char *snapname) 677 { 678 int err; 679 struct destroyarg da; 680 dsl_sync_task_t *dst; 681 spa_t *spa; 682 683 err = spa_open(fsname, &spa, FTAG); 684 if (err) 685 return (err); 686 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 687 da.snapname = snapname; 688 da.failed = fsname; 689 690 err = dmu_objset_find(fsname, 691 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 692 693 if (err == 0) 694 err = dsl_sync_task_group_wait(da.dstg); 695 696 for (dst = list_head(&da.dstg->dstg_tasks); dst; 697 dst = list_next(&da.dstg->dstg_tasks, dst)) { 698 dsl_dataset_t *ds = dst->dst_arg1; 699 if (dst->dst_err) { 700 dsl_dataset_name(ds, fsname); 701 *strchr(fsname, '@') = '\0'; 702 } 703 /* 704 * If it was successful, destroy_sync would have 705 * closed the ds 706 */ 707 if (err) 708 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 709 } 710 711 dsl_sync_task_group_destroy(da.dstg); 712 spa_close(spa, FTAG); 713 return (err); 714 } 715 716 int 717 dsl_dataset_destroy(const char *name) 718 { 719 int err; 720 dsl_sync_task_group_t *dstg; 721 objset_t *os; 722 dsl_dataset_t *ds; 723 dsl_dir_t *dd; 724 uint64_t obj; 725 726 if (strchr(name, '@')) { 727 /* Destroying a snapshot is simpler */ 728 err = dsl_dataset_open(name, 729 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 730 FTAG, &ds); 731 if (err) 732 return (err); 733 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 734 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 735 ds, FTAG, 0); 736 if (err) 737 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 738 return (err); 739 } 740 741 err = dmu_objset_open(name, DMU_OST_ANY, 742 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 743 if (err) 744 return (err); 745 ds = os->os->os_dsl_dataset; 746 dd = ds->ds_dir; 747 748 /* 749 * Check for errors and mark this ds as inconsistent, in 750 * case we crash while freeing the objects. 751 */ 752 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 753 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 754 if (err) { 755 dmu_objset_close(os); 756 return (err); 757 } 758 759 /* 760 * remove the objects in open context, so that we won't 761 * have too much to do in syncing context. 762 */ 763 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 764 ds->ds_phys->ds_prev_snap_txg)) { 765 dmu_tx_t *tx = dmu_tx_create(os); 766 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 767 dmu_tx_hold_bonus(tx, obj); 768 err = dmu_tx_assign(tx, TXG_WAIT); 769 if (err) { 770 /* 771 * Perhaps there is not enough disk 772 * space. Just deal with it from 773 * dsl_dataset_destroy_sync(). 774 */ 775 dmu_tx_abort(tx); 776 continue; 777 } 778 VERIFY(0 == dmu_object_free(os, obj, tx)); 779 dmu_tx_commit(tx); 780 } 781 /* Make sure it's not dirty before we finish destroying it. */ 782 txg_wait_synced(dd->dd_pool, 0); 783 784 dmu_objset_close(os); 785 if (err != ESRCH) 786 return (err); 787 788 err = dsl_dataset_open(name, 789 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 790 FTAG, &ds); 791 if (err) 792 return (err); 793 794 err = dsl_dir_open(name, FTAG, &dd, NULL); 795 if (err) { 796 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 797 return (err); 798 } 799 800 /* 801 * Blow away the dsl_dir + head dataset. 802 */ 803 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 804 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 805 dsl_dataset_destroy_sync, ds, FTAG, 0); 806 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 807 dsl_dir_destroy_sync, dd, FTAG, 0); 808 err = dsl_sync_task_group_wait(dstg); 809 dsl_sync_task_group_destroy(dstg); 810 /* if it is successful, *destroy_sync will close the ds+dd */ 811 if (err) { 812 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 813 dsl_dir_close(dd, FTAG); 814 } 815 return (err); 816 } 817 818 int 819 dsl_dataset_rollback(dsl_dataset_t *ds) 820 { 821 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 822 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 823 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 824 ds, NULL, 0)); 825 } 826 827 void * 828 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 829 void *p, dsl_dataset_evict_func_t func) 830 { 831 void *old; 832 833 mutex_enter(&ds->ds_lock); 834 old = ds->ds_user_ptr; 835 if (old == NULL) { 836 ds->ds_user_ptr = p; 837 ds->ds_user_evict_func = func; 838 } 839 mutex_exit(&ds->ds_lock); 840 return (old); 841 } 842 843 void * 844 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 845 { 846 return (ds->ds_user_ptr); 847 } 848 849 850 blkptr_t * 851 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 852 { 853 return (&ds->ds_phys->ds_bp); 854 } 855 856 void 857 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 858 { 859 ASSERT(dmu_tx_is_syncing(tx)); 860 /* If it's the meta-objset, set dp_meta_rootbp */ 861 if (ds == NULL) { 862 tx->tx_pool->dp_meta_rootbp = *bp; 863 } else { 864 dmu_buf_will_dirty(ds->ds_dbuf, tx); 865 ds->ds_phys->ds_bp = *bp; 866 } 867 } 868 869 spa_t * 870 dsl_dataset_get_spa(dsl_dataset_t *ds) 871 { 872 return (ds->ds_dir->dd_pool->dp_spa); 873 } 874 875 void 876 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 877 { 878 dsl_pool_t *dp; 879 880 if (ds == NULL) /* this is the meta-objset */ 881 return; 882 883 ASSERT(ds->ds_user_ptr != NULL); 884 885 if (ds->ds_phys->ds_next_snap_obj != 0) 886 panic("dirtying snapshot!"); 887 888 dp = ds->ds_dir->dd_pool; 889 890 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 891 /* up the hold count until we can be written out */ 892 dmu_buf_add_ref(ds->ds_dbuf, ds); 893 } 894 } 895 896 struct killarg { 897 uint64_t *usedp; 898 uint64_t *compressedp; 899 uint64_t *uncompressedp; 900 zio_t *zio; 901 dmu_tx_t *tx; 902 }; 903 904 static int 905 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 906 { 907 struct killarg *ka = arg; 908 blkptr_t *bp = &bc->bc_blkptr; 909 910 ASSERT3U(bc->bc_errno, ==, 0); 911 912 /* 913 * Since this callback is not called concurrently, no lock is 914 * needed on the accounting values. 915 */ 916 *ka->usedp += bp_get_dasize(spa, bp); 917 *ka->compressedp += BP_GET_PSIZE(bp); 918 *ka->uncompressedp += BP_GET_UCSIZE(bp); 919 /* XXX check for EIO? */ 920 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 921 ARC_NOWAIT); 922 return (0); 923 } 924 925 /* ARGSUSED */ 926 static int 927 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 928 { 929 dsl_dataset_t *ds = arg1; 930 931 /* 932 * There must be a previous snapshot. I suppose we could roll 933 * it back to being empty (and re-initialize the upper (ZPL) 934 * layer). But for now there's no way to do this via the user 935 * interface. 936 */ 937 if (ds->ds_phys->ds_prev_snap_txg == 0) 938 return (EINVAL); 939 940 /* 941 * This must not be a snapshot. 942 */ 943 if (ds->ds_phys->ds_next_snap_obj != 0) 944 return (EINVAL); 945 946 /* 947 * If we made changes this txg, traverse_dsl_dataset won't find 948 * them. Try again. 949 */ 950 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 951 return (EAGAIN); 952 953 return (0); 954 } 955 956 /* ARGSUSED */ 957 static void 958 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 959 { 960 dsl_dataset_t *ds = arg1; 961 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 962 963 dmu_buf_will_dirty(ds->ds_dbuf, tx); 964 965 /* 966 * Before the roll back destroy the zil. 967 * Note, ds_user_ptr can be null if we are doing a "zfs receive -F" 968 */ 969 if (ds->ds_user_ptr != NULL) { 970 zil_rollback_destroy( 971 ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 972 } 973 974 /* Zero out the deadlist. */ 975 bplist_close(&ds->ds_deadlist); 976 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 977 ds->ds_phys->ds_deadlist_obj = 978 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 979 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 980 ds->ds_phys->ds_deadlist_obj)); 981 982 { 983 /* Free blkptrs that we gave birth to */ 984 zio_t *zio; 985 uint64_t used = 0, compressed = 0, uncompressed = 0; 986 struct killarg ka; 987 988 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 989 ZIO_FLAG_MUSTSUCCEED); 990 ka.usedp = &used; 991 ka.compressedp = &compressed; 992 ka.uncompressedp = &uncompressed; 993 ka.zio = zio; 994 ka.tx = tx; 995 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 996 ADVANCE_POST, kill_blkptr, &ka); 997 (void) zio_wait(zio); 998 999 dsl_dir_diduse_space(ds->ds_dir, 1000 -used, -compressed, -uncompressed, tx); 1001 } 1002 1003 /* Change our contents to that of the prev snapshot */ 1004 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 1005 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1006 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 1007 ds->ds_phys->ds_compressed_bytes = 1008 ds->ds_prev->ds_phys->ds_compressed_bytes; 1009 ds->ds_phys->ds_uncompressed_bytes = 1010 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1011 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1012 ds->ds_phys->ds_unique_bytes = 0; 1013 1014 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1015 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1016 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1017 } 1018 1019 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1020 tx, cr, "dataset = %llu", ds->ds_object); 1021 } 1022 1023 /* ARGSUSED */ 1024 static int 1025 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1026 { 1027 dsl_dataset_t *ds = arg1; 1028 1029 /* 1030 * Can't delete a head dataset if there are snapshots of it. 1031 * (Except if the only snapshots are from the branch we cloned 1032 * from.) 1033 */ 1034 if (ds->ds_prev != NULL && 1035 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1036 return (EINVAL); 1037 1038 return (0); 1039 } 1040 1041 /* ARGSUSED */ 1042 static void 1043 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1044 { 1045 dsl_dataset_t *ds = arg1; 1046 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1047 1048 /* Mark it as inconsistent on-disk, in case we crash */ 1049 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1050 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1051 1052 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1053 cr, "dataset = %llu", ds->ds_object); 1054 } 1055 1056 /* ARGSUSED */ 1057 static int 1058 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1059 { 1060 dsl_dataset_t *ds = arg1; 1061 1062 /* Can't delete a branch point. */ 1063 if (ds->ds_phys->ds_num_children > 1) 1064 return (EEXIST); 1065 1066 /* 1067 * Can't delete a head dataset if there are snapshots of it. 1068 * (Except if the only snapshots are from the branch we cloned 1069 * from.) 1070 */ 1071 if (ds->ds_prev != NULL && 1072 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1073 return (EINVAL); 1074 1075 /* 1076 * If we made changes this txg, traverse_dsl_dataset won't find 1077 * them. Try again. 1078 */ 1079 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1080 return (EAGAIN); 1081 1082 /* XXX we should do some i/o error checking... */ 1083 return (0); 1084 } 1085 1086 static void 1087 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1088 { 1089 dsl_dataset_t *ds = arg1; 1090 uint64_t used = 0, compressed = 0, uncompressed = 0; 1091 zio_t *zio; 1092 int err; 1093 int after_branch_point = FALSE; 1094 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1095 objset_t *mos = dp->dp_meta_objset; 1096 dsl_dataset_t *ds_prev = NULL; 1097 uint64_t obj; 1098 1099 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1100 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1101 ASSERT(ds->ds_prev == NULL || 1102 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1103 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1104 1105 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1106 1107 obj = ds->ds_object; 1108 1109 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1110 if (ds->ds_prev) { 1111 ds_prev = ds->ds_prev; 1112 } else { 1113 VERIFY(0 == dsl_dataset_open_obj(dp, 1114 ds->ds_phys->ds_prev_snap_obj, NULL, 1115 DS_MODE_NONE, FTAG, &ds_prev)); 1116 } 1117 after_branch_point = 1118 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1119 1120 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1121 if (after_branch_point && 1122 ds->ds_phys->ds_next_snap_obj == 0) { 1123 /* This clone is toast. */ 1124 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1125 ds_prev->ds_phys->ds_num_children--; 1126 } else if (!after_branch_point) { 1127 ds_prev->ds_phys->ds_next_snap_obj = 1128 ds->ds_phys->ds_next_snap_obj; 1129 } 1130 } 1131 1132 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1133 1134 if (ds->ds_phys->ds_next_snap_obj != 0) { 1135 blkptr_t bp; 1136 dsl_dataset_t *ds_next; 1137 uint64_t itor = 0; 1138 1139 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1140 1141 VERIFY(0 == dsl_dataset_open_obj(dp, 1142 ds->ds_phys->ds_next_snap_obj, NULL, 1143 DS_MODE_NONE, FTAG, &ds_next)); 1144 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1145 1146 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1147 ds_next->ds_phys->ds_prev_snap_obj = 1148 ds->ds_phys->ds_prev_snap_obj; 1149 ds_next->ds_phys->ds_prev_snap_txg = 1150 ds->ds_phys->ds_prev_snap_txg; 1151 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1152 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1153 1154 /* 1155 * Transfer to our deadlist (which will become next's 1156 * new deadlist) any entries from next's current 1157 * deadlist which were born before prev, and free the 1158 * other entries. 1159 * 1160 * XXX we're doing this long task with the config lock held 1161 */ 1162 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1163 &bp) == 0) { 1164 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1165 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1166 &bp, tx)); 1167 if (ds_prev && !after_branch_point && 1168 bp.blk_birth > 1169 ds_prev->ds_phys->ds_prev_snap_txg) { 1170 ds_prev->ds_phys->ds_unique_bytes += 1171 bp_get_dasize(dp->dp_spa, &bp); 1172 } 1173 } else { 1174 used += bp_get_dasize(dp->dp_spa, &bp); 1175 compressed += BP_GET_PSIZE(&bp); 1176 uncompressed += BP_GET_UCSIZE(&bp); 1177 /* XXX check return value? */ 1178 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1179 &bp, NULL, NULL, ARC_NOWAIT); 1180 } 1181 } 1182 1183 /* free next's deadlist */ 1184 bplist_close(&ds_next->ds_deadlist); 1185 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1186 1187 /* set next's deadlist to our deadlist */ 1188 ds_next->ds_phys->ds_deadlist_obj = 1189 ds->ds_phys->ds_deadlist_obj; 1190 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1191 ds_next->ds_phys->ds_deadlist_obj)); 1192 ds->ds_phys->ds_deadlist_obj = 0; 1193 1194 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1195 /* 1196 * Update next's unique to include blocks which 1197 * were previously shared by only this snapshot 1198 * and it. Those blocks will be born after the 1199 * prev snap and before this snap, and will have 1200 * died after the next snap and before the one 1201 * after that (ie. be on the snap after next's 1202 * deadlist). 1203 * 1204 * XXX we're doing this long task with the 1205 * config lock held 1206 */ 1207 dsl_dataset_t *ds_after_next; 1208 1209 VERIFY(0 == dsl_dataset_open_obj(dp, 1210 ds_next->ds_phys->ds_next_snap_obj, NULL, 1211 DS_MODE_NONE, FTAG, &ds_after_next)); 1212 itor = 0; 1213 while (bplist_iterate(&ds_after_next->ds_deadlist, 1214 &itor, &bp) == 0) { 1215 if (bp.blk_birth > 1216 ds->ds_phys->ds_prev_snap_txg && 1217 bp.blk_birth <= 1218 ds->ds_phys->ds_creation_txg) { 1219 ds_next->ds_phys->ds_unique_bytes += 1220 bp_get_dasize(dp->dp_spa, &bp); 1221 } 1222 } 1223 1224 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1225 ASSERT3P(ds_next->ds_prev, ==, NULL); 1226 } else { 1227 /* 1228 * It would be nice to update the head dataset's 1229 * unique. To do so we would have to traverse 1230 * it for blocks born after ds_prev, which is 1231 * pretty expensive just to maintain something 1232 * for debugging purposes. 1233 */ 1234 ASSERT3P(ds_next->ds_prev, ==, ds); 1235 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1236 ds_next); 1237 if (ds_prev) { 1238 VERIFY(0 == dsl_dataset_open_obj(dp, 1239 ds->ds_phys->ds_prev_snap_obj, NULL, 1240 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1241 } else { 1242 ds_next->ds_prev = NULL; 1243 } 1244 } 1245 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1246 1247 /* 1248 * NB: unique_bytes is not accurate for head objsets 1249 * because we don't update it when we delete the most 1250 * recent snapshot -- see above comment. 1251 */ 1252 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1253 } else { 1254 /* 1255 * There's no next snapshot, so this is a head dataset. 1256 * Destroy the deadlist. Unless it's a clone, the 1257 * deadlist should be empty. (If it's a clone, it's 1258 * safe to ignore the deadlist contents.) 1259 */ 1260 struct killarg ka; 1261 1262 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1263 bplist_close(&ds->ds_deadlist); 1264 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1265 ds->ds_phys->ds_deadlist_obj = 0; 1266 1267 /* 1268 * Free everything that we point to (that's born after 1269 * the previous snapshot, if we are a clone) 1270 * 1271 * XXX we're doing this long task with the config lock held 1272 */ 1273 ka.usedp = &used; 1274 ka.compressedp = &compressed; 1275 ka.uncompressedp = &uncompressed; 1276 ka.zio = zio; 1277 ka.tx = tx; 1278 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1279 ADVANCE_POST, kill_blkptr, &ka); 1280 ASSERT3U(err, ==, 0); 1281 } 1282 1283 err = zio_wait(zio); 1284 ASSERT3U(err, ==, 0); 1285 1286 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1287 1288 if (ds->ds_phys->ds_snapnames_zapobj) { 1289 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1290 ASSERT(err == 0); 1291 } 1292 1293 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1294 /* Erase the link in the dataset */ 1295 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1296 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1297 /* 1298 * dsl_dir_sync_destroy() called us, they'll destroy 1299 * the dataset. 1300 */ 1301 } else { 1302 /* remove from snapshot namespace */ 1303 dsl_dataset_t *ds_head; 1304 VERIFY(0 == dsl_dataset_open_obj(dp, 1305 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1306 DS_MODE_NONE, FTAG, &ds_head)); 1307 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1308 #ifdef ZFS_DEBUG 1309 { 1310 uint64_t val; 1311 err = zap_lookup(mos, 1312 ds_head->ds_phys->ds_snapnames_zapobj, 1313 ds->ds_snapname, 8, 1, &val); 1314 ASSERT3U(err, ==, 0); 1315 ASSERT3U(val, ==, obj); 1316 } 1317 #endif 1318 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1319 ds->ds_snapname, tx); 1320 ASSERT(err == 0); 1321 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1322 } 1323 1324 if (ds_prev && ds->ds_prev != ds_prev) 1325 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1326 1327 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1328 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1329 cr, "dataset = %llu", ds->ds_object); 1330 1331 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1332 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1333 1334 } 1335 1336 /* ARGSUSED */ 1337 int 1338 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1339 { 1340 objset_t *os = arg1; 1341 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1342 const char *snapname = arg2; 1343 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1344 int err; 1345 uint64_t value; 1346 1347 /* 1348 * We don't allow multiple snapshots of the same txg. If there 1349 * is already one, try again. 1350 */ 1351 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1352 return (EAGAIN); 1353 1354 /* 1355 * Check for conflicting name snapshot name. 1356 */ 1357 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1358 snapname, 8, 1, &value); 1359 if (err == 0) 1360 return (EEXIST); 1361 if (err != ENOENT) 1362 return (err); 1363 1364 /* 1365 * Check that the dataset's name is not too long. Name consists 1366 * of the dataset's length + 1 for the @-sign + snapshot name's length 1367 */ 1368 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1369 return (ENAMETOOLONG); 1370 1371 ds->ds_trysnap_txg = tx->tx_txg; 1372 return (0); 1373 } 1374 1375 void 1376 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1377 { 1378 objset_t *os = arg1; 1379 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1380 const char *snapname = arg2; 1381 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1382 dmu_buf_t *dbuf; 1383 dsl_dataset_phys_t *dsphys; 1384 uint64_t dsobj; 1385 objset_t *mos = dp->dp_meta_objset; 1386 int err; 1387 1388 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1389 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1390 1391 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1392 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1393 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1394 dmu_buf_will_dirty(dbuf, tx); 1395 dsphys = dbuf->db_data; 1396 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1397 dsphys->ds_fsid_guid = unique_create(); 1398 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1399 sizeof (dsphys->ds_guid)); 1400 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1401 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1402 dsphys->ds_next_snap_obj = ds->ds_object; 1403 dsphys->ds_num_children = 1; 1404 dsphys->ds_creation_time = gethrestime_sec(); 1405 dsphys->ds_creation_txg = tx->tx_txg; 1406 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1407 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1408 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1409 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1410 dsphys->ds_flags = ds->ds_phys->ds_flags; 1411 dsphys->ds_bp = ds->ds_phys->ds_bp; 1412 dmu_buf_rele(dbuf, FTAG); 1413 1414 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1415 if (ds->ds_prev) { 1416 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1417 ds->ds_object || 1418 ds->ds_prev->ds_phys->ds_num_children > 1); 1419 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1420 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1421 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1422 ds->ds_prev->ds_phys->ds_creation_txg); 1423 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1424 } 1425 } 1426 1427 bplist_close(&ds->ds_deadlist); 1428 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1429 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1430 ds->ds_phys->ds_prev_snap_obj = dsobj; 1431 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1432 ds->ds_phys->ds_unique_bytes = 0; 1433 ds->ds_phys->ds_deadlist_obj = 1434 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1435 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1436 ds->ds_phys->ds_deadlist_obj)); 1437 1438 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1439 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1440 snapname, 8, 1, &dsobj, tx); 1441 ASSERT(err == 0); 1442 1443 if (ds->ds_prev) 1444 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1445 VERIFY(0 == dsl_dataset_open_obj(dp, 1446 ds->ds_phys->ds_prev_snap_obj, snapname, 1447 DS_MODE_NONE, ds, &ds->ds_prev)); 1448 1449 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1450 "dataset = %llu", dsobj); 1451 } 1452 1453 void 1454 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1455 { 1456 ASSERT(dmu_tx_is_syncing(tx)); 1457 ASSERT(ds->ds_user_ptr != NULL); 1458 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1459 1460 /* 1461 * in case we had to change ds_fsid_guid when we opened it, 1462 * sync it out now. 1463 */ 1464 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1465 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1466 1467 dsl_dir_dirty(ds->ds_dir, tx); 1468 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1469 } 1470 1471 void 1472 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1473 { 1474 dsl_dir_stats(ds->ds_dir, nv); 1475 1476 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1477 ds->ds_phys->ds_creation_time); 1478 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1479 ds->ds_phys->ds_creation_txg); 1480 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1481 ds->ds_phys->ds_used_bytes); 1482 1483 if (ds->ds_phys->ds_next_snap_obj) { 1484 /* 1485 * This is a snapshot; override the dd's space used with 1486 * our unique space and compression ratio. 1487 */ 1488 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1489 ds->ds_phys->ds_unique_bytes); 1490 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1491 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1492 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1493 ds->ds_phys->ds_compressed_bytes)); 1494 } 1495 } 1496 1497 void 1498 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1499 { 1500 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1501 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1502 if (ds->ds_phys->ds_next_snap_obj) { 1503 stat->dds_is_snapshot = B_TRUE; 1504 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1505 } 1506 1507 /* clone origin is really a dsl_dir thing... */ 1508 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1509 dsl_dataset_t *ods; 1510 1511 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1512 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1513 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1514 NULL, DS_MODE_NONE, FTAG, &ods)); 1515 dsl_dataset_name(ods, stat->dds_clone_of); 1516 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1517 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1518 } 1519 } 1520 1521 uint64_t 1522 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1523 { 1524 return (ds->ds_fsid_guid); 1525 } 1526 1527 void 1528 dsl_dataset_space(dsl_dataset_t *ds, 1529 uint64_t *refdbytesp, uint64_t *availbytesp, 1530 uint64_t *usedobjsp, uint64_t *availobjsp) 1531 { 1532 *refdbytesp = ds->ds_phys->ds_used_bytes; 1533 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1534 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1535 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1536 } 1537 1538 boolean_t 1539 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1540 { 1541 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1542 1543 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1544 dsl_pool_sync_context(dp)); 1545 if (ds->ds_prev == NULL) 1546 return (B_FALSE); 1547 if (ds->ds_phys->ds_bp.blk_birth > 1548 ds->ds_prev->ds_phys->ds_creation_txg) 1549 return (B_TRUE); 1550 return (B_FALSE); 1551 } 1552 1553 /* ARGSUSED */ 1554 static int 1555 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1556 { 1557 dsl_dataset_t *ds = arg1; 1558 char *newsnapname = arg2; 1559 dsl_dir_t *dd = ds->ds_dir; 1560 objset_t *mos = dd->dd_pool->dp_meta_objset; 1561 dsl_dataset_t *hds; 1562 uint64_t val; 1563 int err; 1564 1565 err = dsl_dataset_open_obj(dd->dd_pool, 1566 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1567 if (err) 1568 return (err); 1569 1570 /* new name better not be in use */ 1571 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1572 newsnapname, 8, 1, &val); 1573 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1574 1575 if (err == 0) 1576 err = EEXIST; 1577 else if (err == ENOENT) 1578 err = 0; 1579 1580 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1581 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1582 err = ENAMETOOLONG; 1583 1584 return (err); 1585 } 1586 1587 static void 1588 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1589 cred_t *cr, dmu_tx_t *tx) 1590 { 1591 dsl_dataset_t *ds = arg1; 1592 const char *newsnapname = arg2; 1593 dsl_dir_t *dd = ds->ds_dir; 1594 objset_t *mos = dd->dd_pool->dp_meta_objset; 1595 dsl_dataset_t *hds; 1596 int err; 1597 1598 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1599 1600 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1601 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1602 1603 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1604 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1605 ds->ds_snapname, tx); 1606 ASSERT3U(err, ==, 0); 1607 mutex_enter(&ds->ds_lock); 1608 (void) strcpy(ds->ds_snapname, newsnapname); 1609 mutex_exit(&ds->ds_lock); 1610 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1611 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1612 ASSERT3U(err, ==, 0); 1613 1614 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1615 cr, "dataset = %llu", ds->ds_object); 1616 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1617 } 1618 1619 struct renamesnaparg { 1620 dsl_sync_task_group_t *dstg; 1621 char failed[MAXPATHLEN]; 1622 char *oldsnap; 1623 char *newsnap; 1624 }; 1625 1626 static int 1627 dsl_snapshot_rename_one(char *name, void *arg) 1628 { 1629 struct renamesnaparg *ra = arg; 1630 dsl_dataset_t *ds = NULL; 1631 char *cp; 1632 int err; 1633 1634 cp = name + strlen(name); 1635 *cp = '@'; 1636 (void) strcpy(cp + 1, ra->oldsnap); 1637 1638 /* 1639 * For recursive snapshot renames the parent won't be changing 1640 * so we just pass name for both the to/from argument. 1641 */ 1642 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1643 (void) strcpy(ra->failed, name); 1644 return (err); 1645 } 1646 1647 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1648 ra->dstg, &ds); 1649 if (err == ENOENT) { 1650 *cp = '\0'; 1651 return (0); 1652 } 1653 if (err) { 1654 (void) strcpy(ra->failed, name); 1655 *cp = '\0'; 1656 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1657 return (err); 1658 } 1659 1660 #ifdef _KERNEL 1661 /* for all filesystems undergoing rename, we'll need to unmount it */ 1662 (void) zfs_unmount_snap(name, NULL); 1663 #endif 1664 1665 *cp = '\0'; 1666 1667 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1668 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1669 1670 return (0); 1671 } 1672 1673 static int 1674 dsl_recursive_rename(char *oldname, const char *newname) 1675 { 1676 int err; 1677 struct renamesnaparg *ra; 1678 dsl_sync_task_t *dst; 1679 spa_t *spa; 1680 char *cp, *fsname = spa_strdup(oldname); 1681 int len = strlen(oldname); 1682 1683 /* truncate the snapshot name to get the fsname */ 1684 cp = strchr(fsname, '@'); 1685 *cp = '\0'; 1686 1687 err = spa_open(fsname, &spa, FTAG); 1688 if (err) { 1689 kmem_free(fsname, len + 1); 1690 return (err); 1691 } 1692 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 1693 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1694 1695 ra->oldsnap = strchr(oldname, '@') + 1; 1696 ra->newsnap = strchr(newname, '@') + 1; 1697 *ra->failed = '\0'; 1698 1699 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1700 DS_FIND_CHILDREN); 1701 kmem_free(fsname, len + 1); 1702 1703 if (err == 0) { 1704 err = dsl_sync_task_group_wait(ra->dstg); 1705 } 1706 1707 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1708 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1709 dsl_dataset_t *ds = dst->dst_arg1; 1710 if (dst->dst_err) { 1711 dsl_dir_name(ds->ds_dir, ra->failed); 1712 (void) strcat(ra->failed, "@"); 1713 (void) strcat(ra->failed, ra->newsnap); 1714 } 1715 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1716 } 1717 1718 if (err) 1719 (void) strcpy(oldname, ra->failed); 1720 1721 dsl_sync_task_group_destroy(ra->dstg); 1722 kmem_free(ra, sizeof (struct renamesnaparg)); 1723 spa_close(spa, FTAG); 1724 return (err); 1725 } 1726 1727 static int 1728 dsl_valid_rename(char *oldname, void *arg) 1729 { 1730 int delta = *(int *)arg; 1731 1732 if (strlen(oldname) + delta >= MAXNAMELEN) 1733 return (ENAMETOOLONG); 1734 1735 return (0); 1736 } 1737 1738 #pragma weak dmu_objset_rename = dsl_dataset_rename 1739 int 1740 dsl_dataset_rename(char *oldname, const char *newname, 1741 boolean_t recursive) 1742 { 1743 dsl_dir_t *dd; 1744 dsl_dataset_t *ds; 1745 const char *tail; 1746 int err; 1747 1748 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1749 if (err) 1750 return (err); 1751 if (tail == NULL) { 1752 int delta = strlen(newname) - strlen(oldname); 1753 1754 /* if we're growing, validate child size lengths */ 1755 if (delta > 0) 1756 err = dmu_objset_find(oldname, dsl_valid_rename, 1757 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 1758 1759 if (!err) 1760 err = dsl_dir_rename(dd, newname); 1761 dsl_dir_close(dd, FTAG); 1762 return (err); 1763 } 1764 if (tail[0] != '@') { 1765 /* the name ended in a nonexistant component */ 1766 dsl_dir_close(dd, FTAG); 1767 return (ENOENT); 1768 } 1769 1770 dsl_dir_close(dd, FTAG); 1771 1772 /* new name must be snapshot in same filesystem */ 1773 tail = strchr(newname, '@'); 1774 if (tail == NULL) 1775 return (EINVAL); 1776 tail++; 1777 if (strncmp(oldname, newname, tail - newname) != 0) 1778 return (EXDEV); 1779 1780 if (recursive) { 1781 err = dsl_recursive_rename(oldname, newname); 1782 } else { 1783 err = dsl_dataset_open(oldname, 1784 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1785 if (err) 1786 return (err); 1787 1788 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1789 dsl_dataset_snapshot_rename_check, 1790 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1791 1792 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1793 } 1794 1795 return (err); 1796 } 1797 1798 struct promotearg { 1799 uint64_t used, comp, uncomp, unique; 1800 uint64_t newnext_obj, snapnames_obj; 1801 }; 1802 1803 /* ARGSUSED */ 1804 static int 1805 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1806 { 1807 dsl_dataset_t *hds = arg1; 1808 struct promotearg *pa = arg2; 1809 dsl_dir_t *dd = hds->ds_dir; 1810 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1811 dsl_dir_t *pdd = NULL; 1812 dsl_dataset_t *ds = NULL; 1813 dsl_dataset_t *pivot_ds = NULL; 1814 dsl_dataset_t *newnext_ds = NULL; 1815 int err; 1816 char *name = NULL; 1817 uint64_t itor = 0; 1818 blkptr_t bp; 1819 1820 bzero(pa, sizeof (*pa)); 1821 1822 /* Check that it is a clone */ 1823 if (dd->dd_phys->dd_clone_parent_obj == 0) 1824 return (EINVAL); 1825 1826 /* Since this is so expensive, don't do the preliminary check */ 1827 if (!dmu_tx_is_syncing(tx)) 1828 return (0); 1829 1830 if (err = dsl_dataset_open_obj(dp, 1831 dd->dd_phys->dd_clone_parent_obj, 1832 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1833 goto out; 1834 pdd = pivot_ds->ds_dir; 1835 1836 { 1837 dsl_dataset_t *phds; 1838 if (err = dsl_dataset_open_obj(dd->dd_pool, 1839 pdd->dd_phys->dd_head_dataset_obj, 1840 NULL, DS_MODE_NONE, FTAG, &phds)) 1841 goto out; 1842 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1843 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1844 } 1845 1846 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1847 err = EXDEV; 1848 goto out; 1849 } 1850 1851 /* find pivot point's new next ds */ 1852 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1853 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1854 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1855 dsl_dataset_t *prev; 1856 1857 if (err = dsl_dataset_open_obj(dd->dd_pool, 1858 newnext_ds->ds_phys->ds_prev_snap_obj, 1859 NULL, DS_MODE_NONE, FTAG, &prev)) 1860 goto out; 1861 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1862 newnext_ds = prev; 1863 } 1864 pa->newnext_obj = newnext_ds->ds_object; 1865 1866 /* compute pivot point's new unique space */ 1867 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1868 &itor, &bp)) == 0) { 1869 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1870 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1871 } 1872 if (err != ENOENT) 1873 goto out; 1874 1875 /* Walk the snapshots that we are moving */ 1876 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1877 ds = pivot_ds; 1878 /* CONSTCOND */ 1879 while (TRUE) { 1880 uint64_t val, dlused, dlcomp, dluncomp; 1881 dsl_dataset_t *prev; 1882 1883 /* Check that the snapshot name does not conflict */ 1884 dsl_dataset_name(ds, name); 1885 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1886 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1887 8, 1, &val); 1888 if (err != ENOENT) { 1889 if (err == 0) 1890 err = EEXIST; 1891 goto out; 1892 } 1893 1894 /* 1895 * compute space to transfer. Each snapshot gave birth to: 1896 * (my used) - (prev's used) + (deadlist's used) 1897 */ 1898 pa->used += ds->ds_phys->ds_used_bytes; 1899 pa->comp += ds->ds_phys->ds_compressed_bytes; 1900 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1901 1902 /* If we reach the first snapshot, we're done. */ 1903 if (ds->ds_phys->ds_prev_snap_obj == 0) 1904 break; 1905 1906 if (err = bplist_space(&ds->ds_deadlist, 1907 &dlused, &dlcomp, &dluncomp)) 1908 goto out; 1909 if (err = dsl_dataset_open_obj(dd->dd_pool, 1910 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1911 FTAG, &prev)) 1912 goto out; 1913 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1914 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1915 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1916 1917 /* 1918 * We could be a clone of a clone. If we reach our 1919 * parent's branch point, we're done. 1920 */ 1921 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1922 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1923 break; 1924 } 1925 if (ds != pivot_ds) 1926 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1927 ds = prev; 1928 } 1929 1930 /* Check that there is enough space here */ 1931 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1932 1933 out: 1934 if (ds && ds != pivot_ds) 1935 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1936 if (pivot_ds) 1937 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1938 if (newnext_ds) 1939 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1940 if (name) 1941 kmem_free(name, MAXPATHLEN); 1942 return (err); 1943 } 1944 1945 static void 1946 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1947 { 1948 dsl_dataset_t *hds = arg1; 1949 struct promotearg *pa = arg2; 1950 dsl_dir_t *dd = hds->ds_dir; 1951 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1952 dsl_dir_t *pdd = NULL; 1953 dsl_dataset_t *ds, *pivot_ds; 1954 char *name; 1955 1956 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1957 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1958 1959 VERIFY(0 == dsl_dataset_open_obj(dp, 1960 dd->dd_phys->dd_clone_parent_obj, 1961 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1962 /* 1963 * We need to explicitly open pdd, since pivot_ds's pdd will be 1964 * changing. 1965 */ 1966 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1967 NULL, FTAG, &pdd)); 1968 1969 /* move snapshots to this dir */ 1970 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1971 ds = pivot_ds; 1972 /* CONSTCOND */ 1973 while (TRUE) { 1974 dsl_dataset_t *prev; 1975 1976 /* move snap name entry */ 1977 dsl_dataset_name(ds, name); 1978 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1979 pa->snapnames_obj, ds->ds_snapname, tx)); 1980 VERIFY(0 == zap_add(dp->dp_meta_objset, 1981 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1982 8, 1, &ds->ds_object, tx)); 1983 1984 /* change containing dsl_dir */ 1985 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1986 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1987 ds->ds_phys->ds_dir_obj = dd->dd_object; 1988 ASSERT3P(ds->ds_dir, ==, pdd); 1989 dsl_dir_close(ds->ds_dir, ds); 1990 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1991 NULL, ds, &ds->ds_dir)); 1992 1993 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1994 1995 if (ds->ds_phys->ds_prev_snap_obj == 0) 1996 break; 1997 1998 VERIFY(0 == dsl_dataset_open_obj(dp, 1999 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2000 FTAG, &prev)); 2001 2002 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2003 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2004 break; 2005 } 2006 if (ds != pivot_ds) 2007 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2008 ds = prev; 2009 } 2010 if (ds != pivot_ds) 2011 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2012 2013 /* change pivot point's next snap */ 2014 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 2015 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2016 2017 /* change clone_parent-age */ 2018 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2019 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 2020 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 2021 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 2022 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 2023 2024 /* change space accounting */ 2025 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 2026 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2027 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 2028 2029 /* log history record */ 2030 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2031 cr, "dataset = %llu", ds->ds_object); 2032 2033 dsl_dir_close(pdd, FTAG); 2034 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 2035 kmem_free(name, MAXPATHLEN); 2036 } 2037 2038 int 2039 dsl_dataset_promote(const char *name) 2040 { 2041 dsl_dataset_t *ds; 2042 int err; 2043 dmu_object_info_t doi; 2044 struct promotearg pa; 2045 2046 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2047 if (err) 2048 return (err); 2049 2050 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2051 ds->ds_phys->ds_snapnames_zapobj, &doi); 2052 if (err) { 2053 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2054 return (err); 2055 } 2056 2057 /* 2058 * Add in 128x the snapnames zapobj size, since we will be moving 2059 * a bunch of snapnames to the promoted ds, and dirtying their 2060 * bonus buffers. 2061 */ 2062 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2063 dsl_dataset_promote_check, 2064 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2065 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2066 return (err); 2067 } 2068 2069 #define SWITCH64(x, y) \ 2070 { \ 2071 uint64_t __tmp = (x); \ 2072 (x) = (y); \ 2073 (y) = __tmp; \ 2074 } 2075 2076 /* ARGSUSED */ 2077 static int 2078 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2079 { 2080 dsl_dataset_t *cds = arg1; /* clone to become new head */ 2081 boolean_t *forcep = arg2; 2082 dsl_dir_t *cdd = cds->ds_dir; 2083 dsl_pool_t *dp = cds->ds_dir->dd_pool; 2084 dsl_dataset_t *ods; /* the snapshot cds is cloned off of */ 2085 dsl_dataset_t *ohds = NULL; 2086 dsl_dir_t *odd; 2087 int err; 2088 2089 /* check that it is a clone */ 2090 if (cdd->dd_phys->dd_clone_parent_obj == 0) 2091 return (EINVAL); 2092 2093 /* check that cds is not a snapshot */ 2094 if (dsl_dataset_is_snapshot(cds)) 2095 return (EINVAL); 2096 2097 /* open the origin */ 2098 if (err = dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj, 2099 NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods)) 2100 return (err); 2101 odd = ods->ds_dir; 2102 2103 /* make sure the clone is descendant of origin */ 2104 if (cdd->dd_parent != odd) { 2105 err = EINVAL; 2106 goto out; 2107 } 2108 2109 /* check that there are no snapshots after the origin */ 2110 if (cds->ds_phys->ds_prev_snap_obj != ods->ds_object || 2111 ods->ds_phys->ds_next_snap_obj != 2112 odd->dd_phys->dd_head_dataset_obj) { 2113 err = EINVAL; 2114 goto out; 2115 } 2116 2117 /* 2118 * Verify origin head dataset hasn't been modified or 2119 * 'force' has been passed down. 2120 */ 2121 if (!(*forcep) && 2122 (err = dsl_dataset_open_obj(cdd->dd_pool, 2123 odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE, 2124 FTAG, &ohds)) == 0) { 2125 if (dsl_dataset_modified_since_lastsnap(ohds)) 2126 err = ETXTBSY; 2127 dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG); 2128 } 2129 out: 2130 dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); 2131 return (err); 2132 } 2133 2134 /* ARGSUSED */ 2135 static void 2136 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2137 { 2138 dsl_dataset_t *cds = arg1; /* clone to become new head */ 2139 dsl_dir_t *cdd = cds->ds_dir; 2140 dsl_pool_t *dp = cds->ds_dir->dd_pool; 2141 dsl_dataset_t *ods, *ohds; 2142 dsl_dir_t *odd; 2143 uint64_t itor = 0; 2144 blkptr_t bp; 2145 uint64_t unique = 0; 2146 int err; 2147 2148 ASSERT(cdd->dd_phys->dd_clone_parent_obj != 0); 2149 ASSERT(dsl_dataset_is_snapshot(cds) == 0); 2150 2151 /* open the origin */ 2152 VERIFY(0 == dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj, 2153 NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods)); 2154 odd = ods->ds_dir; 2155 ASSERT(cds->ds_phys->ds_prev_snap_obj == ods->ds_object); 2156 ASSERT(ods->ds_phys->ds_next_snap_obj == 2157 odd->dd_phys->dd_head_dataset_obj); 2158 2159 /* open the origin head */ 2160 VERIFY(0 == dsl_dataset_open_obj(cdd->dd_pool, 2161 odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE, 2162 FTAG, &ohds)); 2163 ASSERT(odd == ohds->ds_dir); 2164 2165 dmu_buf_will_dirty(cds->ds_dbuf, tx); 2166 dmu_buf_will_dirty(ohds->ds_dbuf, tx); 2167 dmu_buf_will_dirty(ods->ds_dbuf, tx); 2168 2169 /* compute unique space */ 2170 while ((err = bplist_iterate(&cds->ds_deadlist, &itor, &bp)) == 0) { 2171 if (bp.blk_birth > ods->ds_phys->ds_prev_snap_txg) 2172 unique += bp_get_dasize(cdd->dd_pool->dp_spa, &bp); 2173 } 2174 VERIFY(err == ENOENT); 2175 2176 /* reset origin's unique bytes */ 2177 ods->ds_phys->ds_unique_bytes = unique; 2178 2179 /* swap blkptrs */ 2180 { 2181 blkptr_t tmp; 2182 tmp = ohds->ds_phys->ds_bp; 2183 ohds->ds_phys->ds_bp = cds->ds_phys->ds_bp; 2184 cds->ds_phys->ds_bp = tmp; 2185 } 2186 2187 /* set dd_*_bytes */ 2188 { 2189 int64_t dused, dcomp, duncomp; 2190 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2191 uint64_t odl_used, odl_comp, odl_uncomp; 2192 2193 VERIFY(0 == bplist_space(&cds->ds_deadlist, &cdl_used, 2194 &cdl_comp, &cdl_uncomp)); 2195 VERIFY(0 == bplist_space(&ohds->ds_deadlist, &odl_used, 2196 &odl_comp, &odl_uncomp)); 2197 dused = cds->ds_phys->ds_used_bytes + cdl_used - 2198 (ohds->ds_phys->ds_used_bytes + odl_used); 2199 dcomp = cds->ds_phys->ds_compressed_bytes + cdl_comp - 2200 (ohds->ds_phys->ds_compressed_bytes + odl_comp); 2201 duncomp = cds->ds_phys->ds_uncompressed_bytes + cdl_uncomp - 2202 (ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2203 2204 dsl_dir_diduse_space(odd, dused, dcomp, duncomp, tx); 2205 dsl_dir_diduse_space(cdd, -dused, -dcomp, -duncomp, tx); 2206 } 2207 2208 /* swap ds_*_bytes */ 2209 SWITCH64(ohds->ds_phys->ds_used_bytes, cds->ds_phys->ds_used_bytes); 2210 SWITCH64(ohds->ds_phys->ds_compressed_bytes, 2211 cds->ds_phys->ds_compressed_bytes); 2212 SWITCH64(ohds->ds_phys->ds_uncompressed_bytes, 2213 cds->ds_phys->ds_uncompressed_bytes); 2214 2215 /* swap deadlists */ 2216 bplist_close(&cds->ds_deadlist); 2217 bplist_close(&ohds->ds_deadlist); 2218 SWITCH64(ohds->ds_phys->ds_deadlist_obj, cds->ds_phys->ds_deadlist_obj); 2219 VERIFY(0 == bplist_open(&cds->ds_deadlist, dp->dp_meta_objset, 2220 cds->ds_phys->ds_deadlist_obj)); 2221 VERIFY(0 == bplist_open(&ohds->ds_deadlist, dp->dp_meta_objset, 2222 ohds->ds_phys->ds_deadlist_obj)); 2223 2224 dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG); 2225 dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); 2226 } 2227 2228 /* 2229 * Swap the clone "cosname" with its origin head file system. 2230 */ 2231 int 2232 dsl_dataset_clone_swap(const char *cosname, boolean_t force) 2233 { 2234 dsl_dataset_t *ds; 2235 int err; 2236 2237 err = dsl_dataset_open(cosname, 2238 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, FTAG, &ds); 2239 if (err) 2240 return (err); 2241 2242 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2243 dsl_dataset_clone_swap_check, 2244 dsl_dataset_clone_swap_sync, ds, &force, 9); 2245 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2246 return (err); 2247 } 2248 2249 /* 2250 * Given a pool name and a dataset object number in that pool, 2251 * return the name of that dataset. 2252 */ 2253 int 2254 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2255 { 2256 spa_t *spa; 2257 dsl_pool_t *dp; 2258 dsl_dataset_t *ds = NULL; 2259 int error; 2260 2261 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2262 return (error); 2263 dp = spa_get_dsl(spa); 2264 rw_enter(&dp->dp_config_rwlock, RW_READER); 2265 if ((error = dsl_dataset_open_obj(dp, obj, 2266 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2267 rw_exit(&dp->dp_config_rwlock); 2268 spa_close(spa, FTAG); 2269 return (error); 2270 } 2271 dsl_dataset_name(ds, buf); 2272 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2273 rw_exit(&dp->dp_config_rwlock); 2274 spa_close(spa, FTAG); 2275 2276 return (0); 2277 } 2278