1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_checkfunc_t dsl_dataset_destroy_check; 49 static dsl_syncfunc_t dsl_dataset_destroy_sync; 50 51 #define DS_REF_MAX (1ULL << 62) 52 53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54 55 /* 56 * We use weighted reference counts to express the various forms of exclusion 57 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 58 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 59 * This makes the exclusion logic simple: the total refcnt for all opens cannot 60 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 61 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 62 * just over half of the refcnt space, so there can't be more than one, but it 63 * can peacefully coexist with any number of STANDARD opens. 64 */ 65 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 66 0, /* DS_MODE_NONE - invalid */ 67 1, /* DS_MODE_STANDARD - unlimited number */ 68 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 69 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 70 }; 71 72 73 void 74 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 75 { 76 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 77 int compressed = BP_GET_PSIZE(bp); 78 int uncompressed = BP_GET_UCSIZE(bp); 79 80 dprintf_bp(bp, "born, ds=%p\n", ds); 81 82 ASSERT(dmu_tx_is_syncing(tx)); 83 /* It could have been compressed away to nothing */ 84 if (BP_IS_HOLE(bp)) 85 return; 86 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 87 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 88 if (ds == NULL) { 89 /* 90 * Account for the meta-objset space in its placeholder 91 * dsl_dir. 92 */ 93 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 94 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 95 used, compressed, uncompressed, tx); 96 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 97 return; 98 } 99 dmu_buf_will_dirty(ds->ds_dbuf, tx); 100 mutex_enter(&ds->ds_lock); 101 ds->ds_phys->ds_used_bytes += used; 102 ds->ds_phys->ds_compressed_bytes += compressed; 103 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 104 ds->ds_phys->ds_unique_bytes += used; 105 mutex_exit(&ds->ds_lock); 106 dsl_dir_diduse_space(ds->ds_dir, 107 used, compressed, uncompressed, tx); 108 } 109 110 void 111 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 112 dmu_tx_t *tx) 113 { 114 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 115 int compressed = BP_GET_PSIZE(bp); 116 int uncompressed = BP_GET_UCSIZE(bp); 117 118 ASSERT(dmu_tx_is_syncing(tx)); 119 /* No block pointer => nothing to free */ 120 if (BP_IS_HOLE(bp)) 121 return; 122 123 ASSERT(used > 0); 124 if (ds == NULL) { 125 int err; 126 /* 127 * Account for the meta-objset space in its placeholder 128 * dataset. 129 */ 130 err = arc_free(pio, tx->tx_pool->dp_spa, 131 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 132 ASSERT(err == 0); 133 134 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 135 -used, -compressed, -uncompressed, tx); 136 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 137 return; 138 } 139 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 140 141 dmu_buf_will_dirty(ds->ds_dbuf, tx); 142 143 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 144 int err; 145 146 dprintf_bp(bp, "freeing: %s", ""); 147 err = arc_free(pio, tx->tx_pool->dp_spa, 148 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 149 ASSERT(err == 0); 150 151 mutex_enter(&ds->ds_lock); 152 /* XXX unique_bytes is not accurate for head datasets */ 153 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 154 ds->ds_phys->ds_unique_bytes -= used; 155 mutex_exit(&ds->ds_lock); 156 dsl_dir_diduse_space(ds->ds_dir, 157 -used, -compressed, -uncompressed, tx); 158 } else { 159 dprintf_bp(bp, "putting on dead list: %s", ""); 160 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 161 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 162 if (ds->ds_phys->ds_prev_snap_obj != 0) { 163 ASSERT3U(ds->ds_prev->ds_object, ==, 164 ds->ds_phys->ds_prev_snap_obj); 165 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 166 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 167 ds->ds_object && bp->blk_birth > 168 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 169 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 170 mutex_enter(&ds->ds_prev->ds_lock); 171 ds->ds_prev->ds_phys->ds_unique_bytes += 172 used; 173 mutex_exit(&ds->ds_prev->ds_lock); 174 } 175 } 176 } 177 mutex_enter(&ds->ds_lock); 178 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 179 ds->ds_phys->ds_used_bytes -= used; 180 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 181 ds->ds_phys->ds_compressed_bytes -= compressed; 182 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 183 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 184 mutex_exit(&ds->ds_lock); 185 } 186 187 uint64_t 188 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 189 { 190 uint64_t trysnap = 0; 191 192 if (ds == NULL) 193 return (0); 194 /* 195 * The snapshot creation could fail, but that would cause an 196 * incorrect FALSE return, which would only result in an 197 * overestimation of the amount of space that an operation would 198 * consume, which is OK. 199 * 200 * There's also a small window where we could miss a pending 201 * snapshot, because we could set the sync task in the quiescing 202 * phase. So this should only be used as a guess. 203 */ 204 if (ds->ds_trysnap_txg > 205 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 206 trysnap = ds->ds_trysnap_txg; 207 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 208 } 209 210 int 211 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 212 { 213 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 214 } 215 216 /* ARGSUSED */ 217 static void 218 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 219 { 220 dsl_dataset_t *ds = dsv; 221 dsl_pool_t *dp = ds->ds_dir->dd_pool; 222 223 /* open_refcount == DS_REF_MAX when deleting */ 224 ASSERT(ds->ds_open_refcount == 0 || 225 ds->ds_open_refcount == DS_REF_MAX); 226 227 dprintf_ds(ds, "evicting %s\n", ""); 228 229 unique_remove(ds->ds_phys->ds_fsid_guid); 230 231 if (ds->ds_user_ptr != NULL) 232 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 233 234 if (ds->ds_prev) { 235 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 236 ds->ds_prev = NULL; 237 } 238 239 bplist_close(&ds->ds_deadlist); 240 dsl_dir_close(ds->ds_dir, ds); 241 242 if (list_link_active(&ds->ds_synced_link)) 243 list_remove(&dp->dp_synced_objsets, ds); 244 245 mutex_destroy(&ds->ds_lock); 246 mutex_destroy(&ds->ds_deadlist.bpl_lock); 247 248 kmem_free(ds, sizeof (dsl_dataset_t)); 249 } 250 251 static int 252 dsl_dataset_get_snapname(dsl_dataset_t *ds) 253 { 254 dsl_dataset_phys_t *headphys; 255 int err; 256 dmu_buf_t *headdbuf; 257 dsl_pool_t *dp = ds->ds_dir->dd_pool; 258 objset_t *mos = dp->dp_meta_objset; 259 260 if (ds->ds_snapname[0]) 261 return (0); 262 if (ds->ds_phys->ds_next_snap_obj == 0) 263 return (0); 264 265 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 266 FTAG, &headdbuf); 267 if (err) 268 return (err); 269 headphys = headdbuf->db_data; 270 err = zap_value_search(dp->dp_meta_objset, 271 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 272 dmu_buf_rele(headdbuf, FTAG); 273 return (err); 274 } 275 276 int 277 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 278 int mode, void *tag, dsl_dataset_t **dsp) 279 { 280 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 281 objset_t *mos = dp->dp_meta_objset; 282 dmu_buf_t *dbuf; 283 dsl_dataset_t *ds; 284 int err; 285 286 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 287 dsl_pool_sync_context(dp)); 288 289 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 290 if (err) 291 return (err); 292 ds = dmu_buf_get_user(dbuf); 293 if (ds == NULL) { 294 dsl_dataset_t *winner; 295 296 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 297 ds->ds_dbuf = dbuf; 298 ds->ds_object = dsobj; 299 ds->ds_phys = dbuf->db_data; 300 301 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 302 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 303 NULL); 304 305 err = bplist_open(&ds->ds_deadlist, 306 mos, ds->ds_phys->ds_deadlist_obj); 307 if (err == 0) { 308 err = dsl_dir_open_obj(dp, 309 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 310 } 311 if (err) { 312 /* 313 * we don't really need to close the blist if we 314 * just opened it. 315 */ 316 mutex_destroy(&ds->ds_lock); 317 mutex_destroy(&ds->ds_deadlist.bpl_lock); 318 kmem_free(ds, sizeof (dsl_dataset_t)); 319 dmu_buf_rele(dbuf, tag); 320 return (err); 321 } 322 323 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 324 ds->ds_snapname[0] = '\0'; 325 if (ds->ds_phys->ds_prev_snap_obj) { 326 err = dsl_dataset_open_obj(dp, 327 ds->ds_phys->ds_prev_snap_obj, NULL, 328 DS_MODE_NONE, ds, &ds->ds_prev); 329 } 330 } else { 331 if (snapname) { 332 #ifdef ZFS_DEBUG 333 dsl_dataset_phys_t *headphys; 334 dmu_buf_t *headdbuf; 335 err = dmu_bonus_hold(mos, 336 ds->ds_dir->dd_phys->dd_head_dataset_obj, 337 FTAG, &headdbuf); 338 if (err == 0) { 339 headphys = headdbuf->db_data; 340 uint64_t foundobj; 341 err = zap_lookup(dp->dp_meta_objset, 342 headphys->ds_snapnames_zapobj, 343 snapname, sizeof (foundobj), 1, 344 &foundobj); 345 ASSERT3U(foundobj, ==, dsobj); 346 dmu_buf_rele(headdbuf, FTAG); 347 } 348 #endif 349 (void) strcat(ds->ds_snapname, snapname); 350 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 351 err = dsl_dataset_get_snapname(ds); 352 } 353 } 354 355 if (err == 0) { 356 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 357 dsl_dataset_evict); 358 } 359 if (err || winner) { 360 bplist_close(&ds->ds_deadlist); 361 if (ds->ds_prev) { 362 dsl_dataset_close(ds->ds_prev, 363 DS_MODE_NONE, ds); 364 } 365 dsl_dir_close(ds->ds_dir, ds); 366 mutex_destroy(&ds->ds_lock); 367 mutex_destroy(&ds->ds_deadlist.bpl_lock); 368 kmem_free(ds, sizeof (dsl_dataset_t)); 369 if (err) { 370 dmu_buf_rele(dbuf, tag); 371 return (err); 372 } 373 ds = winner; 374 } else { 375 uint64_t new = 376 unique_insert(ds->ds_phys->ds_fsid_guid); 377 if (new != ds->ds_phys->ds_fsid_guid) { 378 /* XXX it won't necessarily be synced... */ 379 ds->ds_phys->ds_fsid_guid = new; 380 } 381 } 382 } 383 ASSERT3P(ds->ds_dbuf, ==, dbuf); 384 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 385 386 mutex_enter(&ds->ds_lock); 387 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 388 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 389 !DS_MODE_IS_INCONSISTENT(mode)) || 390 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 391 mutex_exit(&ds->ds_lock); 392 dsl_dataset_close(ds, DS_MODE_NONE, tag); 393 return (EBUSY); 394 } 395 ds->ds_open_refcount += weight; 396 mutex_exit(&ds->ds_lock); 397 398 *dsp = ds; 399 return (0); 400 } 401 402 int 403 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 404 void *tag, dsl_dataset_t **dsp) 405 { 406 dsl_dir_t *dd; 407 dsl_pool_t *dp; 408 const char *tail; 409 uint64_t obj; 410 dsl_dataset_t *ds = NULL; 411 int err = 0; 412 413 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 414 if (err) 415 return (err); 416 417 dp = dd->dd_pool; 418 obj = dd->dd_phys->dd_head_dataset_obj; 419 rw_enter(&dp->dp_config_rwlock, RW_READER); 420 if (obj == 0) { 421 /* A dataset with no associated objset */ 422 err = ENOENT; 423 goto out; 424 } 425 426 if (tail != NULL) { 427 objset_t *mos = dp->dp_meta_objset; 428 429 err = dsl_dataset_open_obj(dp, obj, NULL, 430 DS_MODE_NONE, tag, &ds); 431 if (err) 432 goto out; 433 obj = ds->ds_phys->ds_snapnames_zapobj; 434 dsl_dataset_close(ds, DS_MODE_NONE, tag); 435 ds = NULL; 436 437 if (tail[0] != '@') { 438 err = ENOENT; 439 goto out; 440 } 441 tail++; 442 443 /* Look for a snapshot */ 444 if (!DS_MODE_IS_READONLY(mode)) { 445 err = EROFS; 446 goto out; 447 } 448 dprintf("looking for snapshot '%s'\n", tail); 449 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 450 if (err) 451 goto out; 452 } 453 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 454 455 out: 456 rw_exit(&dp->dp_config_rwlock); 457 dsl_dir_close(dd, FTAG); 458 459 ASSERT3U((err == 0), ==, (ds != NULL)); 460 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 461 462 *dsp = ds; 463 return (err); 464 } 465 466 int 467 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 468 { 469 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 470 } 471 472 void 473 dsl_dataset_name(dsl_dataset_t *ds, char *name) 474 { 475 if (ds == NULL) { 476 (void) strcpy(name, "mos"); 477 } else { 478 dsl_dir_name(ds->ds_dir, name); 479 VERIFY(0 == dsl_dataset_get_snapname(ds)); 480 if (ds->ds_snapname[0]) { 481 (void) strcat(name, "@"); 482 if (!MUTEX_HELD(&ds->ds_lock)) { 483 /* 484 * We use a "recursive" mutex so that we 485 * can call dprintf_ds() with ds_lock held. 486 */ 487 mutex_enter(&ds->ds_lock); 488 (void) strcat(name, ds->ds_snapname); 489 mutex_exit(&ds->ds_lock); 490 } else { 491 (void) strcat(name, ds->ds_snapname); 492 } 493 } 494 } 495 } 496 497 static int 498 dsl_dataset_namelen(dsl_dataset_t *ds) 499 { 500 int result; 501 502 if (ds == NULL) { 503 result = 3; /* "mos" */ 504 } else { 505 result = dsl_dir_namelen(ds->ds_dir); 506 VERIFY(0 == dsl_dataset_get_snapname(ds)); 507 if (ds->ds_snapname[0]) { 508 ++result; /* adding one for the @-sign */ 509 if (!MUTEX_HELD(&ds->ds_lock)) { 510 /* see dsl_datset_name */ 511 mutex_enter(&ds->ds_lock); 512 result += strlen(ds->ds_snapname); 513 mutex_exit(&ds->ds_lock); 514 } else { 515 result += strlen(ds->ds_snapname); 516 } 517 } 518 } 519 520 return (result); 521 } 522 523 void 524 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 525 { 526 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 527 mutex_enter(&ds->ds_lock); 528 ASSERT3U(ds->ds_open_refcount, >=, weight); 529 ds->ds_open_refcount -= weight; 530 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 531 mode, ds->ds_open_refcount); 532 mutex_exit(&ds->ds_lock); 533 534 dmu_buf_rele(ds->ds_dbuf, tag); 535 } 536 537 void 538 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 539 { 540 objset_t *mos = dp->dp_meta_objset; 541 dmu_buf_t *dbuf; 542 dsl_dataset_phys_t *dsphys; 543 dsl_dataset_t *ds; 544 uint64_t dsobj; 545 dsl_dir_t *dd; 546 547 dsl_dir_create_root(mos, ddobjp, tx); 548 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 549 550 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 551 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 552 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 553 dmu_buf_will_dirty(dbuf, tx); 554 dsphys = dbuf->db_data; 555 dsphys->ds_dir_obj = dd->dd_object; 556 dsphys->ds_fsid_guid = unique_create(); 557 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 558 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 559 sizeof (dsphys->ds_guid)); 560 dsphys->ds_snapnames_zapobj = 561 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 562 dsphys->ds_creation_time = gethrestime_sec(); 563 dsphys->ds_creation_txg = tx->tx_txg; 564 dsphys->ds_deadlist_obj = 565 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 566 dmu_buf_rele(dbuf, FTAG); 567 568 dmu_buf_will_dirty(dd->dd_dbuf, tx); 569 dd->dd_phys->dd_head_dataset_obj = dsobj; 570 dsl_dir_close(dd, FTAG); 571 572 VERIFY(0 == 573 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 574 (void) dmu_objset_create_impl(dp->dp_spa, ds, 575 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 576 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 577 } 578 579 uint64_t 580 dsl_dataset_create_sync(dsl_dir_t *pdd, 581 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 582 { 583 dsl_pool_t *dp = pdd->dd_pool; 584 dmu_buf_t *dbuf; 585 dsl_dataset_phys_t *dsphys; 586 uint64_t dsobj, ddobj; 587 objset_t *mos = dp->dp_meta_objset; 588 dsl_dir_t *dd; 589 590 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 591 ASSERT(clone_parent == NULL || 592 clone_parent->ds_phys->ds_num_children > 0); 593 ASSERT(lastname[0] != '@'); 594 ASSERT(dmu_tx_is_syncing(tx)); 595 596 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 597 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 598 599 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 600 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 601 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 602 dmu_buf_will_dirty(dbuf, tx); 603 dsphys = dbuf->db_data; 604 dsphys->ds_dir_obj = dd->dd_object; 605 dsphys->ds_fsid_guid = unique_create(); 606 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 607 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 608 sizeof (dsphys->ds_guid)); 609 dsphys->ds_snapnames_zapobj = 610 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 611 dsphys->ds_creation_time = gethrestime_sec(); 612 dsphys->ds_creation_txg = tx->tx_txg; 613 dsphys->ds_deadlist_obj = 614 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 615 if (clone_parent) { 616 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 617 dsphys->ds_prev_snap_txg = 618 clone_parent->ds_phys->ds_creation_txg; 619 dsphys->ds_used_bytes = 620 clone_parent->ds_phys->ds_used_bytes; 621 dsphys->ds_compressed_bytes = 622 clone_parent->ds_phys->ds_compressed_bytes; 623 dsphys->ds_uncompressed_bytes = 624 clone_parent->ds_phys->ds_uncompressed_bytes; 625 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 626 627 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 628 clone_parent->ds_phys->ds_num_children++; 629 630 dmu_buf_will_dirty(dd->dd_dbuf, tx); 631 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 632 } 633 dmu_buf_rele(dbuf, FTAG); 634 635 dmu_buf_will_dirty(dd->dd_dbuf, tx); 636 dd->dd_phys->dd_head_dataset_obj = dsobj; 637 dsl_dir_close(dd, FTAG); 638 639 return (dsobj); 640 } 641 642 struct destroyarg { 643 dsl_sync_task_group_t *dstg; 644 char *snapname; 645 char *failed; 646 }; 647 648 static int 649 dsl_snapshot_destroy_one(char *name, void *arg) 650 { 651 struct destroyarg *da = arg; 652 dsl_dataset_t *ds; 653 char *cp; 654 int err; 655 656 (void) strcat(name, "@"); 657 (void) strcat(name, da->snapname); 658 err = dsl_dataset_open(name, 659 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 660 da->dstg, &ds); 661 cp = strchr(name, '@'); 662 *cp = '\0'; 663 if (err == ENOENT) 664 return (0); 665 if (err) { 666 (void) strcpy(da->failed, name); 667 return (err); 668 } 669 670 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 671 dsl_dataset_destroy_sync, ds, da->dstg, 0); 672 return (0); 673 } 674 675 /* 676 * Destroy 'snapname' in all descendants of 'fsname'. 677 */ 678 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 679 int 680 dsl_snapshots_destroy(char *fsname, char *snapname) 681 { 682 int err; 683 struct destroyarg da; 684 dsl_sync_task_t *dst; 685 spa_t *spa; 686 char *cp; 687 688 cp = strchr(fsname, '/'); 689 if (cp) { 690 *cp = '\0'; 691 err = spa_open(fsname, &spa, FTAG); 692 *cp = '/'; 693 } else { 694 err = spa_open(fsname, &spa, FTAG); 695 } 696 if (err) 697 return (err); 698 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 699 da.snapname = snapname; 700 da.failed = fsname; 701 702 err = dmu_objset_find(fsname, 703 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 704 705 if (err == 0) 706 err = dsl_sync_task_group_wait(da.dstg); 707 708 for (dst = list_head(&da.dstg->dstg_tasks); dst; 709 dst = list_next(&da.dstg->dstg_tasks, dst)) { 710 dsl_dataset_t *ds = dst->dst_arg1; 711 if (dst->dst_err) { 712 dsl_dataset_name(ds, fsname); 713 cp = strchr(fsname, '@'); 714 *cp = '\0'; 715 } 716 /* 717 * If it was successful, destroy_sync would have 718 * closed the ds 719 */ 720 if (err) 721 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 722 } 723 724 dsl_sync_task_group_destroy(da.dstg); 725 spa_close(spa, FTAG); 726 return (err); 727 } 728 729 int 730 dsl_dataset_destroy(const char *name) 731 { 732 int err; 733 dsl_sync_task_group_t *dstg; 734 objset_t *os; 735 dsl_dataset_t *ds; 736 dsl_dir_t *dd; 737 uint64_t obj; 738 739 if (strchr(name, '@')) { 740 /* Destroying a snapshot is simpler */ 741 err = dsl_dataset_open(name, 742 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 743 FTAG, &ds); 744 if (err) 745 return (err); 746 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 747 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 748 ds, FTAG, 0); 749 if (err) 750 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 751 return (err); 752 } 753 754 err = dmu_objset_open(name, DMU_OST_ANY, 755 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 756 if (err) 757 return (err); 758 ds = os->os->os_dsl_dataset; 759 dd = ds->ds_dir; 760 761 /* 762 * Check for errors and mark this ds as inconsistent, in 763 * case we crash while freeing the objects. 764 */ 765 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 766 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 767 if (err) { 768 dmu_objset_close(os); 769 return (err); 770 } 771 772 /* 773 * remove the objects in open context, so that we won't 774 * have too much to do in syncing context. 775 */ 776 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 777 ds->ds_phys->ds_prev_snap_txg)) { 778 dmu_tx_t *tx = dmu_tx_create(os); 779 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 780 dmu_tx_hold_bonus(tx, obj); 781 err = dmu_tx_assign(tx, TXG_WAIT); 782 if (err) { 783 /* 784 * Perhaps there is not enough disk 785 * space. Just deal with it from 786 * dsl_dataset_destroy_sync(). 787 */ 788 dmu_tx_abort(tx); 789 continue; 790 } 791 VERIFY(0 == dmu_object_free(os, obj, tx)); 792 dmu_tx_commit(tx); 793 } 794 /* Make sure it's not dirty before we finish destroying it. */ 795 txg_wait_synced(dd->dd_pool, 0); 796 797 dmu_objset_close(os); 798 if (err != ESRCH) 799 return (err); 800 801 err = dsl_dataset_open(name, 802 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 803 FTAG, &ds); 804 if (err) 805 return (err); 806 807 err = dsl_dir_open(name, FTAG, &dd, NULL); 808 if (err) { 809 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 810 return (err); 811 } 812 813 /* 814 * Blow away the dsl_dir + head dataset. 815 */ 816 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 817 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 818 dsl_dataset_destroy_sync, ds, FTAG, 0); 819 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 820 dsl_dir_destroy_sync, dd, FTAG, 0); 821 err = dsl_sync_task_group_wait(dstg); 822 dsl_sync_task_group_destroy(dstg); 823 /* if it is successful, *destroy_sync will close the ds+dd */ 824 if (err) { 825 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 826 dsl_dir_close(dd, FTAG); 827 } 828 return (err); 829 } 830 831 int 832 dsl_dataset_rollback(dsl_dataset_t *ds) 833 { 834 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 835 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 836 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 837 ds, NULL, 0)); 838 } 839 840 void * 841 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 842 void *p, dsl_dataset_evict_func_t func) 843 { 844 void *old; 845 846 mutex_enter(&ds->ds_lock); 847 old = ds->ds_user_ptr; 848 if (old == NULL) { 849 ds->ds_user_ptr = p; 850 ds->ds_user_evict_func = func; 851 } 852 mutex_exit(&ds->ds_lock); 853 return (old); 854 } 855 856 void * 857 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 858 { 859 return (ds->ds_user_ptr); 860 } 861 862 863 blkptr_t * 864 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 865 { 866 return (&ds->ds_phys->ds_bp); 867 } 868 869 void 870 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 871 { 872 ASSERT(dmu_tx_is_syncing(tx)); 873 /* If it's the meta-objset, set dp_meta_rootbp */ 874 if (ds == NULL) { 875 tx->tx_pool->dp_meta_rootbp = *bp; 876 } else { 877 dmu_buf_will_dirty(ds->ds_dbuf, tx); 878 ds->ds_phys->ds_bp = *bp; 879 } 880 } 881 882 spa_t * 883 dsl_dataset_get_spa(dsl_dataset_t *ds) 884 { 885 return (ds->ds_dir->dd_pool->dp_spa); 886 } 887 888 void 889 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 890 { 891 dsl_pool_t *dp; 892 893 if (ds == NULL) /* this is the meta-objset */ 894 return; 895 896 ASSERT(ds->ds_user_ptr != NULL); 897 898 if (ds->ds_phys->ds_next_snap_obj != 0) 899 panic("dirtying snapshot!"); 900 901 dp = ds->ds_dir->dd_pool; 902 903 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 904 /* up the hold count until we can be written out */ 905 dmu_buf_add_ref(ds->ds_dbuf, ds); 906 } 907 } 908 909 struct killarg { 910 uint64_t *usedp; 911 uint64_t *compressedp; 912 uint64_t *uncompressedp; 913 zio_t *zio; 914 dmu_tx_t *tx; 915 }; 916 917 static int 918 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 919 { 920 struct killarg *ka = arg; 921 blkptr_t *bp = &bc->bc_blkptr; 922 923 ASSERT3U(bc->bc_errno, ==, 0); 924 925 /* 926 * Since this callback is not called concurrently, no lock is 927 * needed on the accounting values. 928 */ 929 *ka->usedp += bp_get_dasize(spa, bp); 930 *ka->compressedp += BP_GET_PSIZE(bp); 931 *ka->uncompressedp += BP_GET_UCSIZE(bp); 932 /* XXX check for EIO? */ 933 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 934 ARC_NOWAIT); 935 return (0); 936 } 937 938 /* ARGSUSED */ 939 static int 940 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 941 { 942 dsl_dataset_t *ds = arg1; 943 944 /* 945 * There must be a previous snapshot. I suppose we could roll 946 * it back to being empty (and re-initialize the upper (ZPL) 947 * layer). But for now there's no way to do this via the user 948 * interface. 949 */ 950 if (ds->ds_phys->ds_prev_snap_txg == 0) 951 return (EINVAL); 952 953 /* 954 * This must not be a snapshot. 955 */ 956 if (ds->ds_phys->ds_next_snap_obj != 0) 957 return (EINVAL); 958 959 /* 960 * If we made changes this txg, traverse_dsl_dataset won't find 961 * them. Try again. 962 */ 963 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 964 return (EAGAIN); 965 966 return (0); 967 } 968 969 /* ARGSUSED */ 970 static void 971 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 972 { 973 dsl_dataset_t *ds = arg1; 974 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 975 976 dmu_buf_will_dirty(ds->ds_dbuf, tx); 977 978 /* Zero out the deadlist. */ 979 bplist_close(&ds->ds_deadlist); 980 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 981 ds->ds_phys->ds_deadlist_obj = 982 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 983 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 984 ds->ds_phys->ds_deadlist_obj)); 985 986 { 987 /* Free blkptrs that we gave birth to */ 988 zio_t *zio; 989 uint64_t used = 0, compressed = 0, uncompressed = 0; 990 struct killarg ka; 991 992 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 993 ZIO_FLAG_MUSTSUCCEED); 994 ka.usedp = &used; 995 ka.compressedp = &compressed; 996 ka.uncompressedp = &uncompressed; 997 ka.zio = zio; 998 ka.tx = tx; 999 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1000 ADVANCE_POST, kill_blkptr, &ka); 1001 (void) zio_wait(zio); 1002 1003 dsl_dir_diduse_space(ds->ds_dir, 1004 -used, -compressed, -uncompressed, tx); 1005 } 1006 1007 /* Change our contents to that of the prev snapshot */ 1008 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 1009 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1010 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 1011 ds->ds_phys->ds_compressed_bytes = 1012 ds->ds_prev->ds_phys->ds_compressed_bytes; 1013 ds->ds_phys->ds_uncompressed_bytes = 1014 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1015 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1016 ds->ds_phys->ds_unique_bytes = 0; 1017 1018 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1019 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1020 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1021 } 1022 1023 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1024 tx, cr, "dataset = %llu", ds->ds_object); 1025 } 1026 1027 /* ARGSUSED */ 1028 static int 1029 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1030 { 1031 dsl_dataset_t *ds = arg1; 1032 1033 /* 1034 * Can't delete a head dataset if there are snapshots of it. 1035 * (Except if the only snapshots are from the branch we cloned 1036 * from.) 1037 */ 1038 if (ds->ds_prev != NULL && 1039 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1040 return (EINVAL); 1041 1042 return (0); 1043 } 1044 1045 /* ARGSUSED */ 1046 static void 1047 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1048 { 1049 dsl_dataset_t *ds = arg1; 1050 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1051 1052 /* Mark it as inconsistent on-disk, in case we crash */ 1053 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1054 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1055 1056 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1057 cr, "dataset = %llu", ds->ds_object); 1058 } 1059 1060 /* ARGSUSED */ 1061 static int 1062 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1063 { 1064 dsl_dataset_t *ds = arg1; 1065 1066 /* Can't delete a branch point. */ 1067 if (ds->ds_phys->ds_num_children > 1) 1068 return (EEXIST); 1069 1070 /* 1071 * Can't delete a head dataset if there are snapshots of it. 1072 * (Except if the only snapshots are from the branch we cloned 1073 * from.) 1074 */ 1075 if (ds->ds_prev != NULL && 1076 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1077 return (EINVAL); 1078 1079 /* 1080 * If we made changes this txg, traverse_dsl_dataset won't find 1081 * them. Try again. 1082 */ 1083 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1084 return (EAGAIN); 1085 1086 /* XXX we should do some i/o error checking... */ 1087 return (0); 1088 } 1089 1090 static void 1091 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1092 { 1093 dsl_dataset_t *ds = arg1; 1094 uint64_t used = 0, compressed = 0, uncompressed = 0; 1095 zio_t *zio; 1096 int err; 1097 int after_branch_point = FALSE; 1098 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1099 objset_t *mos = dp->dp_meta_objset; 1100 dsl_dataset_t *ds_prev = NULL; 1101 uint64_t obj; 1102 1103 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1104 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1105 ASSERT(ds->ds_prev == NULL || 1106 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1107 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1108 1109 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1110 1111 obj = ds->ds_object; 1112 1113 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1114 if (ds->ds_prev) { 1115 ds_prev = ds->ds_prev; 1116 } else { 1117 VERIFY(0 == dsl_dataset_open_obj(dp, 1118 ds->ds_phys->ds_prev_snap_obj, NULL, 1119 DS_MODE_NONE, FTAG, &ds_prev)); 1120 } 1121 after_branch_point = 1122 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1123 1124 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1125 if (after_branch_point && 1126 ds->ds_phys->ds_next_snap_obj == 0) { 1127 /* This clone is toast. */ 1128 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1129 ds_prev->ds_phys->ds_num_children--; 1130 } else if (!after_branch_point) { 1131 ds_prev->ds_phys->ds_next_snap_obj = 1132 ds->ds_phys->ds_next_snap_obj; 1133 } 1134 } 1135 1136 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1137 1138 if (ds->ds_phys->ds_next_snap_obj != 0) { 1139 blkptr_t bp; 1140 dsl_dataset_t *ds_next; 1141 uint64_t itor = 0; 1142 1143 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1144 1145 VERIFY(0 == dsl_dataset_open_obj(dp, 1146 ds->ds_phys->ds_next_snap_obj, NULL, 1147 DS_MODE_NONE, FTAG, &ds_next)); 1148 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1149 1150 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1151 ds_next->ds_phys->ds_prev_snap_obj = 1152 ds->ds_phys->ds_prev_snap_obj; 1153 ds_next->ds_phys->ds_prev_snap_txg = 1154 ds->ds_phys->ds_prev_snap_txg; 1155 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1156 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1157 1158 /* 1159 * Transfer to our deadlist (which will become next's 1160 * new deadlist) any entries from next's current 1161 * deadlist which were born before prev, and free the 1162 * other entries. 1163 * 1164 * XXX we're doing this long task with the config lock held 1165 */ 1166 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1167 &bp) == 0) { 1168 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1169 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1170 &bp, tx)); 1171 if (ds_prev && !after_branch_point && 1172 bp.blk_birth > 1173 ds_prev->ds_phys->ds_prev_snap_txg) { 1174 ds_prev->ds_phys->ds_unique_bytes += 1175 bp_get_dasize(dp->dp_spa, &bp); 1176 } 1177 } else { 1178 used += bp_get_dasize(dp->dp_spa, &bp); 1179 compressed += BP_GET_PSIZE(&bp); 1180 uncompressed += BP_GET_UCSIZE(&bp); 1181 /* XXX check return value? */ 1182 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1183 &bp, NULL, NULL, ARC_NOWAIT); 1184 } 1185 } 1186 1187 /* free next's deadlist */ 1188 bplist_close(&ds_next->ds_deadlist); 1189 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1190 1191 /* set next's deadlist to our deadlist */ 1192 ds_next->ds_phys->ds_deadlist_obj = 1193 ds->ds_phys->ds_deadlist_obj; 1194 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1195 ds_next->ds_phys->ds_deadlist_obj)); 1196 ds->ds_phys->ds_deadlist_obj = 0; 1197 1198 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1199 /* 1200 * Update next's unique to include blocks which 1201 * were previously shared by only this snapshot 1202 * and it. Those blocks will be born after the 1203 * prev snap and before this snap, and will have 1204 * died after the next snap and before the one 1205 * after that (ie. be on the snap after next's 1206 * deadlist). 1207 * 1208 * XXX we're doing this long task with the 1209 * config lock held 1210 */ 1211 dsl_dataset_t *ds_after_next; 1212 1213 VERIFY(0 == dsl_dataset_open_obj(dp, 1214 ds_next->ds_phys->ds_next_snap_obj, NULL, 1215 DS_MODE_NONE, FTAG, &ds_after_next)); 1216 itor = 0; 1217 while (bplist_iterate(&ds_after_next->ds_deadlist, 1218 &itor, &bp) == 0) { 1219 if (bp.blk_birth > 1220 ds->ds_phys->ds_prev_snap_txg && 1221 bp.blk_birth <= 1222 ds->ds_phys->ds_creation_txg) { 1223 ds_next->ds_phys->ds_unique_bytes += 1224 bp_get_dasize(dp->dp_spa, &bp); 1225 } 1226 } 1227 1228 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1229 ASSERT3P(ds_next->ds_prev, ==, NULL); 1230 } else { 1231 /* 1232 * It would be nice to update the head dataset's 1233 * unique. To do so we would have to traverse 1234 * it for blocks born after ds_prev, which is 1235 * pretty expensive just to maintain something 1236 * for debugging purposes. 1237 */ 1238 ASSERT3P(ds_next->ds_prev, ==, ds); 1239 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1240 ds_next); 1241 if (ds_prev) { 1242 VERIFY(0 == dsl_dataset_open_obj(dp, 1243 ds->ds_phys->ds_prev_snap_obj, NULL, 1244 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1245 } else { 1246 ds_next->ds_prev = NULL; 1247 } 1248 } 1249 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1250 1251 /* 1252 * NB: unique_bytes is not accurate for head objsets 1253 * because we don't update it when we delete the most 1254 * recent snapshot -- see above comment. 1255 */ 1256 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1257 } else { 1258 /* 1259 * There's no next snapshot, so this is a head dataset. 1260 * Destroy the deadlist. Unless it's a clone, the 1261 * deadlist should be empty. (If it's a clone, it's 1262 * safe to ignore the deadlist contents.) 1263 */ 1264 struct killarg ka; 1265 1266 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1267 bplist_close(&ds->ds_deadlist); 1268 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1269 ds->ds_phys->ds_deadlist_obj = 0; 1270 1271 /* 1272 * Free everything that we point to (that's born after 1273 * the previous snapshot, if we are a clone) 1274 * 1275 * XXX we're doing this long task with the config lock held 1276 */ 1277 ka.usedp = &used; 1278 ka.compressedp = &compressed; 1279 ka.uncompressedp = &uncompressed; 1280 ka.zio = zio; 1281 ka.tx = tx; 1282 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1283 ADVANCE_POST, kill_blkptr, &ka); 1284 ASSERT3U(err, ==, 0); 1285 } 1286 1287 err = zio_wait(zio); 1288 ASSERT3U(err, ==, 0); 1289 1290 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1291 1292 if (ds->ds_phys->ds_snapnames_zapobj) { 1293 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1294 ASSERT(err == 0); 1295 } 1296 1297 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1298 /* Erase the link in the dataset */ 1299 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1300 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1301 /* 1302 * dsl_dir_sync_destroy() called us, they'll destroy 1303 * the dataset. 1304 */ 1305 } else { 1306 /* remove from snapshot namespace */ 1307 dsl_dataset_t *ds_head; 1308 VERIFY(0 == dsl_dataset_open_obj(dp, 1309 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1310 DS_MODE_NONE, FTAG, &ds_head)); 1311 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1312 #ifdef ZFS_DEBUG 1313 { 1314 uint64_t val; 1315 err = zap_lookup(mos, 1316 ds_head->ds_phys->ds_snapnames_zapobj, 1317 ds->ds_snapname, 8, 1, &val); 1318 ASSERT3U(err, ==, 0); 1319 ASSERT3U(val, ==, obj); 1320 } 1321 #endif 1322 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1323 ds->ds_snapname, tx); 1324 ASSERT(err == 0); 1325 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1326 } 1327 1328 if (ds_prev && ds->ds_prev != ds_prev) 1329 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1330 1331 spa_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1332 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1333 cr, "dataset = %llu", ds->ds_object); 1334 1335 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1336 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1337 1338 } 1339 1340 /* ARGSUSED */ 1341 int 1342 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1343 { 1344 objset_t *os = arg1; 1345 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1346 const char *snapname = arg2; 1347 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1348 int err; 1349 uint64_t value; 1350 1351 /* 1352 * We don't allow multiple snapshots of the same txg. If there 1353 * is already one, try again. 1354 */ 1355 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1356 return (EAGAIN); 1357 1358 /* 1359 * Check for conflicting name snapshot name. 1360 */ 1361 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1362 snapname, 8, 1, &value); 1363 if (err == 0) 1364 return (EEXIST); 1365 if (err != ENOENT) 1366 return (err); 1367 1368 /* 1369 * Check that the dataset's name is not too long. Name consists 1370 * of the dataset's length + 1 for the @-sign + snapshot name's length 1371 */ 1372 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1373 return (ENAMETOOLONG); 1374 1375 ds->ds_trysnap_txg = tx->tx_txg; 1376 return (0); 1377 } 1378 1379 void 1380 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1381 { 1382 objset_t *os = arg1; 1383 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1384 const char *snapname = arg2; 1385 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1386 dmu_buf_t *dbuf; 1387 dsl_dataset_phys_t *dsphys; 1388 uint64_t dsobj; 1389 objset_t *mos = dp->dp_meta_objset; 1390 int err; 1391 1392 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1393 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1394 1395 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1396 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1397 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1398 dmu_buf_will_dirty(dbuf, tx); 1399 dsphys = dbuf->db_data; 1400 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1401 dsphys->ds_fsid_guid = unique_create(); 1402 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1403 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1404 sizeof (dsphys->ds_guid)); 1405 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1406 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1407 dsphys->ds_next_snap_obj = ds->ds_object; 1408 dsphys->ds_num_children = 1; 1409 dsphys->ds_creation_time = gethrestime_sec(); 1410 dsphys->ds_creation_txg = tx->tx_txg; 1411 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1412 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1413 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1414 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1415 dsphys->ds_flags = ds->ds_phys->ds_flags; 1416 dsphys->ds_bp = ds->ds_phys->ds_bp; 1417 dmu_buf_rele(dbuf, FTAG); 1418 1419 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1420 if (ds->ds_prev) { 1421 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1422 ds->ds_object || 1423 ds->ds_prev->ds_phys->ds_num_children > 1); 1424 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1425 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1426 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1427 ds->ds_prev->ds_phys->ds_creation_txg); 1428 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1429 } 1430 } 1431 1432 bplist_close(&ds->ds_deadlist); 1433 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1434 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1435 ds->ds_phys->ds_prev_snap_obj = dsobj; 1436 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1437 ds->ds_phys->ds_unique_bytes = 0; 1438 ds->ds_phys->ds_deadlist_obj = 1439 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1440 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1441 ds->ds_phys->ds_deadlist_obj)); 1442 1443 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1444 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1445 snapname, 8, 1, &dsobj, tx); 1446 ASSERT(err == 0); 1447 1448 if (ds->ds_prev) 1449 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1450 VERIFY(0 == dsl_dataset_open_obj(dp, 1451 ds->ds_phys->ds_prev_snap_obj, snapname, 1452 DS_MODE_NONE, ds, &ds->ds_prev)); 1453 1454 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1455 "dataset = %llu", ds->ds_object); 1456 } 1457 1458 void 1459 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1460 { 1461 ASSERT(dmu_tx_is_syncing(tx)); 1462 ASSERT(ds->ds_user_ptr != NULL); 1463 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1464 1465 dsl_dir_dirty(ds->ds_dir, tx); 1466 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1467 /* Unneeded? bplist_close(&ds->ds_deadlist); */ 1468 } 1469 1470 void 1471 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1472 { 1473 dsl_dir_stats(ds->ds_dir, nv); 1474 1475 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1476 ds->ds_phys->ds_creation_time); 1477 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1478 ds->ds_phys->ds_creation_txg); 1479 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1480 ds->ds_phys->ds_used_bytes); 1481 1482 if (ds->ds_phys->ds_next_snap_obj) { 1483 /* 1484 * This is a snapshot; override the dd's space used with 1485 * our unique space and compression ratio. 1486 */ 1487 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1488 ds->ds_phys->ds_unique_bytes); 1489 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1490 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1491 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1492 ds->ds_phys->ds_compressed_bytes)); 1493 } 1494 } 1495 1496 void 1497 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1498 { 1499 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1500 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1501 if (ds->ds_phys->ds_next_snap_obj) { 1502 stat->dds_is_snapshot = B_TRUE; 1503 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1504 } 1505 1506 /* clone origin is really a dsl_dir thing... */ 1507 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1508 dsl_dataset_t *ods; 1509 1510 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1511 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1512 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1513 NULL, DS_MODE_NONE, FTAG, &ods)); 1514 dsl_dataset_name(ods, stat->dds_clone_of); 1515 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1516 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1517 } 1518 } 1519 1520 uint64_t 1521 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1522 { 1523 return (ds->ds_phys->ds_fsid_guid); 1524 } 1525 1526 void 1527 dsl_dataset_space(dsl_dataset_t *ds, 1528 uint64_t *refdbytesp, uint64_t *availbytesp, 1529 uint64_t *usedobjsp, uint64_t *availobjsp) 1530 { 1531 *refdbytesp = ds->ds_phys->ds_used_bytes; 1532 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1533 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1534 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1535 } 1536 1537 /* ARGSUSED */ 1538 static int 1539 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1540 { 1541 dsl_dataset_t *ds = arg1; 1542 char *newsnapname = arg2; 1543 dsl_dir_t *dd = ds->ds_dir; 1544 objset_t *mos = dd->dd_pool->dp_meta_objset; 1545 dsl_dataset_t *hds; 1546 uint64_t val; 1547 int err; 1548 1549 err = dsl_dataset_open_obj(dd->dd_pool, 1550 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1551 if (err) 1552 return (err); 1553 1554 /* new name better not be in use */ 1555 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1556 newsnapname, 8, 1, &val); 1557 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1558 1559 if (err == 0) 1560 err = EEXIST; 1561 else if (err == ENOENT) 1562 err = 0; 1563 1564 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1565 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1566 err = ENAMETOOLONG; 1567 1568 return (err); 1569 } 1570 1571 static void 1572 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1573 cred_t *cr, dmu_tx_t *tx) 1574 { 1575 dsl_dataset_t *ds = arg1; 1576 const char *newsnapname = arg2; 1577 dsl_dir_t *dd = ds->ds_dir; 1578 objset_t *mos = dd->dd_pool->dp_meta_objset; 1579 dsl_dataset_t *hds; 1580 int err; 1581 1582 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1583 1584 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1585 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1586 1587 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1588 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1589 ds->ds_snapname, tx); 1590 ASSERT3U(err, ==, 0); 1591 mutex_enter(&ds->ds_lock); 1592 (void) strcpy(ds->ds_snapname, newsnapname); 1593 mutex_exit(&ds->ds_lock); 1594 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1595 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1596 ASSERT3U(err, ==, 0); 1597 1598 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1599 cr, "dataset = %llu", ds->ds_object); 1600 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1601 } 1602 1603 struct renamearg { 1604 dsl_sync_task_group_t *dstg; 1605 char failed[MAXPATHLEN]; 1606 char *oldsnap; 1607 char *newsnap; 1608 }; 1609 1610 static int 1611 dsl_snapshot_rename_one(char *name, void *arg) 1612 { 1613 struct renamearg *ra = arg; 1614 dsl_dataset_t *ds = NULL; 1615 char *cp; 1616 int err; 1617 1618 cp = name + strlen(name); 1619 *cp = '@'; 1620 (void) strcpy(cp + 1, ra->oldsnap); 1621 1622 /* 1623 * For recursive snapshot renames the parent won't be changing 1624 * so we just pass name for both the to/from argument. 1625 */ 1626 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1627 (void) strcpy(ra->failed, name); 1628 return (err); 1629 } 1630 1631 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1632 ra->dstg, &ds); 1633 if (err == ENOENT) { 1634 *cp = '\0'; 1635 return (0); 1636 } 1637 if (err) { 1638 (void) strcpy(ra->failed, name); 1639 *cp = '\0'; 1640 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1641 return (err); 1642 } 1643 1644 #ifdef _KERNEL 1645 /* for all filesystems undergoing rename, we'll need to unmount it */ 1646 (void) zfs_unmount_snap(name, NULL); 1647 #endif 1648 1649 *cp = '\0'; 1650 1651 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1652 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1653 1654 return (0); 1655 } 1656 1657 static int 1658 dsl_recursive_rename(char *oldname, const char *newname) 1659 { 1660 int err; 1661 struct renamearg *ra; 1662 dsl_sync_task_t *dst; 1663 spa_t *spa; 1664 char *cp, *fsname = spa_strdup(oldname); 1665 int len = strlen(oldname); 1666 1667 /* truncate the snapshot name to get the fsname */ 1668 cp = strchr(fsname, '@'); 1669 *cp = '\0'; 1670 1671 cp = strchr(fsname, '/'); 1672 if (cp) { 1673 *cp = '\0'; 1674 err = spa_open(fsname, &spa, FTAG); 1675 *cp = '/'; 1676 } else { 1677 err = spa_open(fsname, &spa, FTAG); 1678 } 1679 if (err) { 1680 kmem_free(fsname, len + 1); 1681 return (err); 1682 } 1683 ra = kmem_alloc(sizeof (struct renamearg), KM_SLEEP); 1684 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1685 1686 ra->oldsnap = strchr(oldname, '@') + 1; 1687 ra->newsnap = strchr(newname, '@') + 1; 1688 *ra->failed = '\0'; 1689 1690 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1691 DS_FIND_CHILDREN); 1692 kmem_free(fsname, len + 1); 1693 1694 if (err == 0) { 1695 err = dsl_sync_task_group_wait(ra->dstg); 1696 } 1697 1698 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1699 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1700 dsl_dataset_t *ds = dst->dst_arg1; 1701 if (dst->dst_err) { 1702 dsl_dir_name(ds->ds_dir, ra->failed); 1703 (void) strcat(ra->failed, "@"); 1704 (void) strcat(ra->failed, ra->newsnap); 1705 } 1706 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1707 } 1708 1709 if (err) 1710 (void) strcpy(oldname, ra->failed); 1711 1712 dsl_sync_task_group_destroy(ra->dstg); 1713 kmem_free(ra, sizeof (struct renamearg)); 1714 spa_close(spa, FTAG); 1715 return (err); 1716 } 1717 1718 static int 1719 dsl_valid_rename(char *oldname, void *arg) 1720 { 1721 int delta = *(int *)arg; 1722 1723 if (strlen(oldname) + delta >= MAXNAMELEN) 1724 return (ENAMETOOLONG); 1725 1726 return (0); 1727 } 1728 1729 #pragma weak dmu_objset_rename = dsl_dataset_rename 1730 int 1731 dsl_dataset_rename(char *oldname, const char *newname, 1732 boolean_t recursive) 1733 { 1734 dsl_dir_t *dd; 1735 dsl_dataset_t *ds; 1736 const char *tail; 1737 int err; 1738 1739 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1740 if (err) 1741 return (err); 1742 if (tail == NULL) { 1743 int delta = strlen(newname) - strlen(oldname); 1744 1745 /* if we're growing, validate child size lengths */ 1746 if (delta > 0) 1747 err = dmu_objset_find(oldname, dsl_valid_rename, 1748 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 1749 1750 if (!err) 1751 err = dsl_dir_rename(dd, newname); 1752 dsl_dir_close(dd, FTAG); 1753 return (err); 1754 } 1755 if (tail[0] != '@') { 1756 /* the name ended in a nonexistant component */ 1757 dsl_dir_close(dd, FTAG); 1758 return (ENOENT); 1759 } 1760 1761 dsl_dir_close(dd, FTAG); 1762 1763 /* new name must be snapshot in same filesystem */ 1764 tail = strchr(newname, '@'); 1765 if (tail == NULL) 1766 return (EINVAL); 1767 tail++; 1768 if (strncmp(oldname, newname, tail - newname) != 0) 1769 return (EXDEV); 1770 1771 if (recursive) { 1772 err = dsl_recursive_rename(oldname, newname); 1773 } else { 1774 err = dsl_dataset_open(oldname, 1775 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1776 if (err) 1777 return (err); 1778 1779 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1780 dsl_dataset_snapshot_rename_check, 1781 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1782 1783 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1784 } 1785 1786 return (err); 1787 } 1788 1789 struct promotearg { 1790 uint64_t used, comp, uncomp, unique; 1791 uint64_t newnext_obj, snapnames_obj; 1792 }; 1793 1794 /* ARGSUSED */ 1795 static int 1796 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1797 { 1798 dsl_dataset_t *hds = arg1; 1799 struct promotearg *pa = arg2; 1800 dsl_dir_t *dd = hds->ds_dir; 1801 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1802 dsl_dir_t *pdd = NULL; 1803 dsl_dataset_t *ds = NULL; 1804 dsl_dataset_t *pivot_ds = NULL; 1805 dsl_dataset_t *newnext_ds = NULL; 1806 int err; 1807 char *name = NULL; 1808 uint64_t itor = 0; 1809 blkptr_t bp; 1810 1811 bzero(pa, sizeof (*pa)); 1812 1813 /* Check that it is a clone */ 1814 if (dd->dd_phys->dd_clone_parent_obj == 0) 1815 return (EINVAL); 1816 1817 /* Since this is so expensive, don't do the preliminary check */ 1818 if (!dmu_tx_is_syncing(tx)) 1819 return (0); 1820 1821 if (err = dsl_dataset_open_obj(dp, 1822 dd->dd_phys->dd_clone_parent_obj, 1823 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1824 goto out; 1825 pdd = pivot_ds->ds_dir; 1826 1827 { 1828 dsl_dataset_t *phds; 1829 if (err = dsl_dataset_open_obj(dd->dd_pool, 1830 pdd->dd_phys->dd_head_dataset_obj, 1831 NULL, DS_MODE_NONE, FTAG, &phds)) 1832 goto out; 1833 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1834 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1835 } 1836 1837 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1838 err = EXDEV; 1839 goto out; 1840 } 1841 1842 /* find pivot point's new next ds */ 1843 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1844 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1845 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1846 dsl_dataset_t *prev; 1847 1848 if (err = dsl_dataset_open_obj(dd->dd_pool, 1849 newnext_ds->ds_phys->ds_prev_snap_obj, 1850 NULL, DS_MODE_NONE, FTAG, &prev)) 1851 goto out; 1852 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1853 newnext_ds = prev; 1854 } 1855 pa->newnext_obj = newnext_ds->ds_object; 1856 1857 /* compute pivot point's new unique space */ 1858 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1859 &itor, &bp)) == 0) { 1860 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1861 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1862 } 1863 if (err != ENOENT) 1864 goto out; 1865 1866 /* Walk the snapshots that we are moving */ 1867 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1868 ds = pivot_ds; 1869 /* CONSTCOND */ 1870 while (TRUE) { 1871 uint64_t val, dlused, dlcomp, dluncomp; 1872 dsl_dataset_t *prev; 1873 1874 /* Check that the snapshot name does not conflict */ 1875 dsl_dataset_name(ds, name); 1876 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1877 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1878 8, 1, &val); 1879 if (err != ENOENT) { 1880 if (err == 0) 1881 err = EEXIST; 1882 goto out; 1883 } 1884 1885 /* 1886 * compute space to transfer. Each snapshot gave birth to: 1887 * (my used) - (prev's used) + (deadlist's used) 1888 */ 1889 pa->used += ds->ds_phys->ds_used_bytes; 1890 pa->comp += ds->ds_phys->ds_compressed_bytes; 1891 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1892 1893 /* If we reach the first snapshot, we're done. */ 1894 if (ds->ds_phys->ds_prev_snap_obj == 0) 1895 break; 1896 1897 if (err = bplist_space(&ds->ds_deadlist, 1898 &dlused, &dlcomp, &dluncomp)) 1899 goto out; 1900 if (err = dsl_dataset_open_obj(dd->dd_pool, 1901 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1902 FTAG, &prev)) 1903 goto out; 1904 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1905 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1906 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1907 1908 /* 1909 * We could be a clone of a clone. If we reach our 1910 * parent's branch point, we're done. 1911 */ 1912 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1913 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1914 break; 1915 } 1916 if (ds != pivot_ds) 1917 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1918 ds = prev; 1919 } 1920 1921 /* Check that there is enough space here */ 1922 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1923 1924 out: 1925 if (ds && ds != pivot_ds) 1926 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1927 if (pivot_ds) 1928 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1929 if (newnext_ds) 1930 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1931 if (name) 1932 kmem_free(name, MAXPATHLEN); 1933 return (err); 1934 } 1935 1936 static void 1937 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1938 { 1939 dsl_dataset_t *hds = arg1; 1940 struct promotearg *pa = arg2; 1941 dsl_dir_t *dd = hds->ds_dir; 1942 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1943 dsl_dir_t *pdd = NULL; 1944 dsl_dataset_t *ds, *pivot_ds; 1945 char *name; 1946 1947 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1948 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1949 1950 VERIFY(0 == dsl_dataset_open_obj(dp, 1951 dd->dd_phys->dd_clone_parent_obj, 1952 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1953 /* 1954 * We need to explicitly open pdd, since pivot_ds's pdd will be 1955 * changing. 1956 */ 1957 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1958 NULL, FTAG, &pdd)); 1959 1960 /* move snapshots to this dir */ 1961 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1962 ds = pivot_ds; 1963 /* CONSTCOND */ 1964 while (TRUE) { 1965 dsl_dataset_t *prev; 1966 1967 /* move snap name entry */ 1968 dsl_dataset_name(ds, name); 1969 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1970 pa->snapnames_obj, ds->ds_snapname, tx)); 1971 VERIFY(0 == zap_add(dp->dp_meta_objset, 1972 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1973 8, 1, &ds->ds_object, tx)); 1974 1975 /* change containing dsl_dir */ 1976 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1977 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1978 ds->ds_phys->ds_dir_obj = dd->dd_object; 1979 ASSERT3P(ds->ds_dir, ==, pdd); 1980 dsl_dir_close(ds->ds_dir, ds); 1981 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1982 NULL, ds, &ds->ds_dir)); 1983 1984 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1985 1986 if (ds->ds_phys->ds_prev_snap_obj == 0) 1987 break; 1988 1989 VERIFY(0 == dsl_dataset_open_obj(dp, 1990 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1991 FTAG, &prev)); 1992 1993 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1994 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1995 break; 1996 } 1997 if (ds != pivot_ds) 1998 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1999 ds = prev; 2000 } 2001 if (ds != pivot_ds) 2002 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2003 2004 /* change pivot point's next snap */ 2005 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 2006 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2007 2008 /* change clone_parent-age */ 2009 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2010 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 2011 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 2012 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 2013 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 2014 2015 /* change space accounting */ 2016 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 2017 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2018 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 2019 2020 /* log history record */ 2021 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2022 cr, "dataset = %llu", ds->ds_object); 2023 2024 dsl_dir_close(pdd, FTAG); 2025 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 2026 kmem_free(name, MAXPATHLEN); 2027 } 2028 2029 int 2030 dsl_dataset_promote(const char *name) 2031 { 2032 dsl_dataset_t *ds; 2033 int err; 2034 dmu_object_info_t doi; 2035 struct promotearg pa; 2036 2037 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2038 if (err) 2039 return (err); 2040 2041 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2042 ds->ds_phys->ds_snapnames_zapobj, &doi); 2043 if (err) { 2044 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2045 return (err); 2046 } 2047 2048 /* 2049 * Add in 128x the snapnames zapobj size, since we will be moving 2050 * a bunch of snapnames to the promoted ds, and dirtying their 2051 * bonus buffers. 2052 */ 2053 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2054 dsl_dataset_promote_check, 2055 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2056 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2057 return (err); 2058 } 2059 2060 /* 2061 * Given a pool name and a dataset object number in that pool, 2062 * return the name of that dataset. 2063 */ 2064 int 2065 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2066 { 2067 spa_t *spa; 2068 dsl_pool_t *dp; 2069 dsl_dataset_t *ds = NULL; 2070 int error; 2071 2072 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2073 return (error); 2074 dp = spa_get_dsl(spa); 2075 rw_enter(&dp->dp_config_rwlock, RW_READER); 2076 if ((error = dsl_dataset_open_obj(dp, obj, 2077 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2078 rw_exit(&dp->dp_config_rwlock); 2079 spa_close(spa, FTAG); 2080 return (error); 2081 } 2082 dsl_dataset_name(ds, buf); 2083 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2084 rw_exit(&dp->dp_config_rwlock); 2085 spa_close(spa, FTAG); 2086 2087 return (0); 2088 } 2089