1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 39 static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, 40 void *arg, dmu_tx_t *tx); 41 42 #define DOS_REF_MAX (1ULL << 62) 43 44 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 45 46 #define BP_GET_UCSIZE(bp) \ 47 ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ 48 BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); 49 50 /* 51 * We use weighted reference counts to express the various forms of exclusion 52 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 53 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 54 * This makes the exclusion logic simple: the total refcnt for all opens cannot 55 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 56 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 57 * just over half of the refcnt space, so there can't be more than one, but it 58 * can peacefully coexist with any number of STANDARD opens. 59 */ 60 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 61 0, /* DOS_MODE_NONE - invalid */ 62 1, /* DOS_MODE_STANDARD - unlimited number */ 63 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 64 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 65 }; 66 67 68 void 69 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 70 { 71 int used = BP_GET_ASIZE(bp); 72 int compressed = BP_GET_PSIZE(bp); 73 int uncompressed = BP_GET_UCSIZE(bp); 74 75 dprintf_bp(bp, "born, ds=%p\n", ds); 76 77 ASSERT(dmu_tx_is_syncing(tx)); 78 /* It could have been compressed away to nothing */ 79 if (BP_IS_HOLE(bp)) 80 return; 81 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 82 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 83 if (ds == NULL) { 84 /* 85 * Account for the meta-objset space in its placeholder 86 * dsl_dir. 87 */ 88 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 89 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 90 used, compressed, uncompressed, tx); 91 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 92 return; 93 } 94 dmu_buf_will_dirty(ds->ds_dbuf, tx); 95 mutex_enter(&ds->ds_lock); 96 ds->ds_phys->ds_used_bytes += used; 97 ds->ds_phys->ds_compressed_bytes += compressed; 98 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 99 ds->ds_phys->ds_unique_bytes += used; 100 mutex_exit(&ds->ds_lock); 101 dsl_dir_diduse_space(ds->ds_dir, 102 used, compressed, uncompressed, tx); 103 } 104 105 void 106 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 107 { 108 int used = BP_GET_ASIZE(bp); 109 int compressed = BP_GET_PSIZE(bp); 110 int uncompressed = BP_GET_UCSIZE(bp); 111 112 ASSERT(dmu_tx_is_syncing(tx)); 113 if (BP_IS_HOLE(bp)) 114 return; 115 116 ASSERT(used > 0); 117 if (ds == NULL) { 118 /* 119 * Account for the meta-objset space in its placeholder 120 * dataset. 121 */ 122 /* XXX this can fail, what do we do when it does? */ 123 (void) arc_free(NULL, tx->tx_pool->dp_spa, 124 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 125 bzero(bp, sizeof (blkptr_t)); 126 127 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 128 -used, -compressed, -uncompressed, tx); 129 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 130 return; 131 } 132 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 133 134 dmu_buf_will_dirty(ds->ds_dbuf, tx); 135 136 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 137 dprintf_bp(bp, "freeing: %s", ""); 138 /* XXX check return code? */ 139 (void) arc_free(NULL, tx->tx_pool->dp_spa, 140 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 141 142 mutex_enter(&ds->ds_lock); 143 /* XXX unique_bytes is not accurate for head datasets */ 144 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 145 ds->ds_phys->ds_unique_bytes -= used; 146 mutex_exit(&ds->ds_lock); 147 dsl_dir_diduse_space(ds->ds_dir, 148 -used, -compressed, -uncompressed, tx); 149 } else { 150 dprintf_bp(bp, "putting on dead list: %s", ""); 151 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 152 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 153 if (ds->ds_phys->ds_prev_snap_obj != 0) { 154 ASSERT3U(ds->ds_prev->ds_object, ==, 155 ds->ds_phys->ds_prev_snap_obj); 156 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 157 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 158 ds->ds_object && 159 bp->blk_birth > 160 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 161 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 162 mutex_enter(&ds->ds_prev->ds_lock); 163 ds->ds_prev->ds_phys->ds_unique_bytes += 164 used; 165 mutex_exit(&ds->ds_prev->ds_lock); 166 } 167 } 168 } 169 bzero(bp, sizeof (blkptr_t)); 170 mutex_enter(&ds->ds_lock); 171 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 172 ds->ds_phys->ds_used_bytes -= used; 173 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 174 ds->ds_phys->ds_compressed_bytes -= compressed; 175 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 176 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 177 mutex_exit(&ds->ds_lock); 178 } 179 180 uint64_t 181 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 182 { 183 uint64_t txg; 184 dsl_dir_t *dd; 185 186 if (ds == NULL) 187 return (0); 188 /* 189 * The snapshot creation could fail, but that would cause an 190 * incorrect FALSE return, which would only result in an 191 * overestimation of the amount of space that an operation would 192 * consume, which is OK. 193 * 194 * There's also a small window where we could miss a pending 195 * snapshot, because we could set the sync task in the quiescing 196 * phase. So this should only be used as a guess. 197 */ 198 dd = ds->ds_dir; 199 mutex_enter(&dd->dd_lock); 200 if (dd->dd_sync_func == dsl_dataset_snapshot_sync) 201 txg = dd->dd_sync_txg; 202 else 203 txg = ds->ds_phys->ds_prev_snap_txg; 204 mutex_exit(&dd->dd_lock); 205 206 return (txg); 207 } 208 209 int 210 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 211 { 212 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 213 } 214 215 /* ARGSUSED */ 216 static void 217 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 218 { 219 dsl_dataset_t *ds = dsv; 220 dsl_pool_t *dp = ds->ds_dir->dd_pool; 221 222 /* open_refcount == DOS_REF_MAX when deleting */ 223 ASSERT(ds->ds_open_refcount == 0 || 224 ds->ds_open_refcount == DOS_REF_MAX); 225 226 dprintf_ds(ds, "evicting %s\n", ""); 227 228 unique_remove(ds->ds_phys->ds_fsid_guid); 229 230 if (ds->ds_user_ptr != NULL) 231 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 232 233 if (ds->ds_prev) { 234 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 235 ds->ds_prev = NULL; 236 } 237 238 bplist_close(&ds->ds_deadlist); 239 dsl_dir_close(ds->ds_dir, ds); 240 241 if (list_link_active(&ds->ds_synced_link)) 242 list_remove(&dp->dp_synced_objsets, ds); 243 244 kmem_free(ds, sizeof (dsl_dataset_t)); 245 } 246 247 static int 248 dsl_dataset_get_snapname(dsl_dataset_t *ds) 249 { 250 dsl_dataset_phys_t *headphys; 251 int err; 252 dmu_buf_t *headdbuf; 253 dsl_pool_t *dp = ds->ds_dir->dd_pool; 254 objset_t *mos = dp->dp_meta_objset; 255 256 if (ds->ds_snapname[0]) 257 return (0); 258 if (ds->ds_phys->ds_next_snap_obj == 0) 259 return (0); 260 261 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 262 FTAG, &headdbuf); 263 if (err) 264 return (err); 265 headphys = headdbuf->db_data; 266 err = zap_value_search(dp->dp_meta_objset, 267 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 268 dmu_buf_rele(headdbuf, FTAG); 269 return (err); 270 } 271 272 int 273 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 274 int mode, void *tag, dsl_dataset_t **dsp) 275 { 276 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 277 objset_t *mos = dp->dp_meta_objset; 278 dmu_buf_t *dbuf; 279 dsl_dataset_t *ds; 280 int err; 281 282 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 283 dsl_pool_sync_context(dp)); 284 285 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 286 if (err) 287 return (err); 288 ds = dmu_buf_get_user(dbuf); 289 if (ds == NULL) { 290 dsl_dataset_t *winner; 291 292 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 293 ds->ds_dbuf = dbuf; 294 ds->ds_object = dsobj; 295 ds->ds_phys = dbuf->db_data; 296 297 err = bplist_open(&ds->ds_deadlist, 298 mos, ds->ds_phys->ds_deadlist_obj); 299 if (err == 0) { 300 err = dsl_dir_open_obj(dp, 301 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 302 } 303 if (err) { 304 /* 305 * we don't really need to close the blist if we 306 * just opened it. 307 */ 308 kmem_free(ds, sizeof (dsl_dataset_t)); 309 dmu_buf_rele(dbuf, tag); 310 return (err); 311 } 312 313 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 314 ds->ds_snapname[0] = '\0'; 315 if (ds->ds_phys->ds_prev_snap_obj) { 316 err = dsl_dataset_open_obj(dp, 317 ds->ds_phys->ds_prev_snap_obj, NULL, 318 DS_MODE_NONE, ds, &ds->ds_prev); 319 } 320 } else { 321 if (snapname) { 322 #ifdef ZFS_DEBUG 323 dsl_dataset_phys_t *headphys; 324 dmu_buf_t *headdbuf; 325 err = dmu_bonus_hold(mos, 326 ds->ds_dir->dd_phys->dd_head_dataset_obj, 327 FTAG, &headdbuf); 328 if (err == 0) { 329 headphys = headdbuf->db_data; 330 uint64_t foundobj; 331 err = zap_lookup(dp->dp_meta_objset, 332 headphys->ds_snapnames_zapobj, 333 snapname, sizeof (foundobj), 1, 334 &foundobj); 335 ASSERT3U(foundobj, ==, dsobj); 336 dmu_buf_rele(headdbuf, FTAG); 337 } 338 #endif 339 (void) strcat(ds->ds_snapname, snapname); 340 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 341 err = dsl_dataset_get_snapname(ds); 342 } 343 } 344 345 if (err == 0) { 346 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 347 dsl_dataset_evict); 348 } 349 if (err || winner) { 350 bplist_close(&ds->ds_deadlist); 351 if (ds->ds_prev) { 352 dsl_dataset_close(ds->ds_prev, 353 DS_MODE_NONE, ds); 354 } 355 dsl_dir_close(ds->ds_dir, ds); 356 kmem_free(ds, sizeof (dsl_dataset_t)); 357 if (err) { 358 dmu_buf_rele(dbuf, tag); 359 return (err); 360 } 361 ds = winner; 362 } else { 363 uint64_t new = 364 unique_insert(ds->ds_phys->ds_fsid_guid); 365 if (new != ds->ds_phys->ds_fsid_guid) { 366 /* XXX it won't necessarily be synced... */ 367 ds->ds_phys->ds_fsid_guid = new; 368 } 369 } 370 } 371 ASSERT3P(ds->ds_dbuf, ==, dbuf); 372 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 373 374 mutex_enter(&ds->ds_lock); 375 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 376 ds->ds_phys->ds_inconsistent && !DS_MODE_IS_INCONSISTENT(mode)) || 377 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 378 mutex_exit(&ds->ds_lock); 379 dsl_dataset_close(ds, DS_MODE_NONE, tag); 380 return (EBUSY); 381 } 382 ds->ds_open_refcount += weight; 383 mutex_exit(&ds->ds_lock); 384 385 *dsp = ds; 386 return (0); 387 } 388 389 int 390 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 391 void *tag, dsl_dataset_t **dsp) 392 { 393 dsl_dir_t *dd; 394 dsl_pool_t *dp; 395 const char *tail; 396 uint64_t obj; 397 dsl_dataset_t *ds = NULL; 398 int err = 0; 399 400 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 401 if (err) 402 return (err); 403 404 dp = dd->dd_pool; 405 obj = dd->dd_phys->dd_head_dataset_obj; 406 rw_enter(&dp->dp_config_rwlock, RW_READER); 407 if (obj == 0) { 408 /* A dataset with no associated objset */ 409 err = ENOENT; 410 goto out; 411 } 412 413 if (tail != NULL) { 414 objset_t *mos = dp->dp_meta_objset; 415 416 err = dsl_dataset_open_obj(dp, obj, NULL, 417 DS_MODE_NONE, tag, &ds); 418 if (err) 419 goto out; 420 obj = ds->ds_phys->ds_snapnames_zapobj; 421 dsl_dataset_close(ds, DS_MODE_NONE, tag); 422 ds = NULL; 423 424 if (tail[0] != '@') { 425 err = ENOENT; 426 goto out; 427 } 428 tail++; 429 430 /* Look for a snapshot */ 431 if (!DS_MODE_IS_READONLY(mode)) { 432 err = EROFS; 433 goto out; 434 } 435 dprintf("looking for snapshot '%s'\n", tail); 436 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 437 if (err) 438 goto out; 439 } 440 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 441 442 out: 443 rw_exit(&dp->dp_config_rwlock); 444 dsl_dir_close(dd, FTAG); 445 446 ASSERT3U((err == 0), ==, (ds != NULL)); 447 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 448 449 *dsp = ds; 450 return (err); 451 } 452 453 int 454 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 455 { 456 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 457 } 458 459 void 460 dsl_dataset_name(dsl_dataset_t *ds, char *name) 461 { 462 if (ds == NULL) { 463 (void) strcpy(name, "mos"); 464 } else { 465 dsl_dir_name(ds->ds_dir, name); 466 VERIFY(0 == dsl_dataset_get_snapname(ds)); 467 if (ds->ds_snapname[0]) { 468 (void) strcat(name, "@"); 469 if (!MUTEX_HELD(&ds->ds_lock)) { 470 /* 471 * We use a "recursive" mutex so that we 472 * can call dprintf_ds() with ds_lock held. 473 */ 474 mutex_enter(&ds->ds_lock); 475 (void) strcat(name, ds->ds_snapname); 476 mutex_exit(&ds->ds_lock); 477 } else { 478 (void) strcat(name, ds->ds_snapname); 479 } 480 } 481 } 482 } 483 484 void 485 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 486 { 487 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 488 mutex_enter(&ds->ds_lock); 489 ASSERT3U(ds->ds_open_refcount, >=, weight); 490 ds->ds_open_refcount -= weight; 491 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 492 mode, ds->ds_open_refcount); 493 mutex_exit(&ds->ds_lock); 494 495 dmu_buf_rele(ds->ds_dbuf, tag); 496 } 497 498 void 499 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 500 { 501 objset_t *mos = dp->dp_meta_objset; 502 dmu_buf_t *dbuf; 503 dsl_dataset_phys_t *dsphys; 504 dsl_dataset_t *ds; 505 uint64_t dsobj; 506 dsl_dir_t *dd; 507 508 dsl_dir_create_root(mos, ddobjp, tx); 509 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 510 511 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 512 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 513 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 514 dmu_buf_will_dirty(dbuf, tx); 515 dsphys = dbuf->db_data; 516 dsphys->ds_dir_obj = dd->dd_object; 517 dsphys->ds_fsid_guid = unique_create(); 518 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 519 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 520 sizeof (dsphys->ds_guid)); 521 dsphys->ds_snapnames_zapobj = 522 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 523 dsphys->ds_creation_time = gethrestime_sec(); 524 dsphys->ds_creation_txg = tx->tx_txg; 525 dsphys->ds_deadlist_obj = 526 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 527 dmu_buf_rele(dbuf, FTAG); 528 529 dmu_buf_will_dirty(dd->dd_dbuf, tx); 530 dd->dd_phys->dd_head_dataset_obj = dsobj; 531 dsl_dir_close(dd, FTAG); 532 533 VERIFY(0 == 534 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 535 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 536 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 537 } 538 539 int 540 dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, 541 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 542 { 543 int err; 544 dsl_pool_t *dp = pds->dd_pool; 545 dmu_buf_t *dbuf; 546 dsl_dataset_phys_t *dsphys; 547 uint64_t dsobj; 548 objset_t *mos = dp->dp_meta_objset; 549 dsl_dir_t *dd; 550 551 if (clone_parent != NULL) { 552 /* 553 * You can't clone across pools. 554 */ 555 if (clone_parent->ds_dir->dd_pool != dp) 556 return (EXDEV); 557 558 /* 559 * You can only clone snapshots, not the head datasets. 560 */ 561 if (clone_parent->ds_phys->ds_num_children == 0) 562 return (EINVAL); 563 } 564 565 ASSERT(lastname[0] != '@'); 566 ASSERT(dmu_tx_is_syncing(tx)); 567 568 err = dsl_dir_create_sync(pds, lastname, tx); 569 if (err) 570 return (err); 571 VERIFY(0 == dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, &dd, NULL)); 572 573 /* This is the point of no (unsuccessful) return */ 574 575 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 576 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 577 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 578 dmu_buf_will_dirty(dbuf, tx); 579 dsphys = dbuf->db_data; 580 dsphys->ds_dir_obj = dd->dd_object; 581 dsphys->ds_fsid_guid = unique_create(); 582 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 583 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 584 sizeof (dsphys->ds_guid)); 585 dsphys->ds_snapnames_zapobj = 586 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 587 dsphys->ds_creation_time = gethrestime_sec(); 588 dsphys->ds_creation_txg = tx->tx_txg; 589 dsphys->ds_deadlist_obj = 590 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 591 if (clone_parent) { 592 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 593 dsphys->ds_prev_snap_txg = 594 clone_parent->ds_phys->ds_creation_txg; 595 dsphys->ds_used_bytes = 596 clone_parent->ds_phys->ds_used_bytes; 597 dsphys->ds_compressed_bytes = 598 clone_parent->ds_phys->ds_compressed_bytes; 599 dsphys->ds_uncompressed_bytes = 600 clone_parent->ds_phys->ds_uncompressed_bytes; 601 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 602 603 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 604 clone_parent->ds_phys->ds_num_children++; 605 606 dmu_buf_will_dirty(dd->dd_dbuf, tx); 607 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 608 } 609 dmu_buf_rele(dbuf, FTAG); 610 611 dmu_buf_will_dirty(dd->dd_dbuf, tx); 612 dd->dd_phys->dd_head_dataset_obj = dsobj; 613 dsl_dir_close(dd, FTAG); 614 615 return (0); 616 } 617 618 int 619 dsl_dataset_destroy(const char *name) 620 { 621 int err; 622 dsl_pool_t *dp; 623 dsl_dir_t *dd; 624 const char *tail; 625 626 err = dsl_dir_open(name, FTAG, &dd, &tail); 627 if (err) 628 return (err); 629 630 dp = dd->dd_pool; 631 if (tail != NULL) { 632 if (tail[0] != '@') { 633 dsl_dir_close(dd, FTAG); 634 return (ENOENT); 635 } 636 tail++; 637 /* Just blow away the snapshot */ 638 do { 639 txg_wait_synced(dp, 0); 640 err = dsl_dir_sync_task(dd, 641 dsl_dataset_destroy_sync, (void*)tail, 0); 642 } while (err == EAGAIN); 643 dsl_dir_close(dd, FTAG); 644 } else { 645 char buf[MAXNAMELEN]; 646 char *cp; 647 objset_t *os; 648 uint64_t obj; 649 dsl_dir_t *pds; 650 651 if (dd->dd_phys->dd_parent_obj == 0) { 652 dsl_dir_close(dd, FTAG); 653 return (EINVAL); 654 } 655 656 err = dmu_objset_open(name, DMU_OST_ANY, 657 DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); 658 if (err) { 659 dsl_dir_close(dd, FTAG); 660 return (err); 661 } 662 663 /* 664 * Check for errors and mark this ds as inconsistent, in 665 * case we crash while freeing the objects. 666 */ 667 err = dsl_dir_sync_task(os->os->os_dsl_dataset->ds_dir, 668 dsl_dataset_destroy_begin_sync, os->os->os_dsl_dataset, 0); 669 if (err) { 670 dmu_objset_close(os); 671 dsl_dir_close(dd, FTAG); 672 return (err); 673 } 674 675 /* 676 * remove the objects in open context, so that we won't 677 * have too much to do in syncing context. 678 */ 679 for (obj = 0; err == 0; 680 err = dmu_object_next(os, &obj, FALSE)) { 681 dmu_tx_t *tx = dmu_tx_create(os); 682 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 683 dmu_tx_hold_bonus(tx, obj); 684 err = dmu_tx_assign(tx, TXG_WAIT); 685 if (err) { 686 /* 687 * Perhaps there is not enough disk 688 * space. Just deal with it from 689 * dsl_dataset_destroy_sync(). 690 */ 691 dmu_tx_abort(tx); 692 continue; 693 } 694 VERIFY(0 == dmu_object_free(os, obj, tx)); 695 dmu_tx_commit(tx); 696 } 697 /* Make sure it's not dirty before we finish destroying it. */ 698 txg_wait_synced(dd->dd_pool, 0); 699 700 dmu_objset_close(os); 701 if (err != ESRCH) { 702 dsl_dir_close(dd, FTAG); 703 return (err); 704 } 705 706 /* 707 * Blow away the dsl_dir + head dataset. 708 * dsl_dir_destroy_sync() will call 709 * dsl_dataset_destroy_sync() to destroy the head dataset. 710 */ 711 rw_enter(&dp->dp_config_rwlock, RW_READER); 712 err = dsl_dir_open_obj(dd->dd_pool, 713 dd->dd_phys->dd_parent_obj, NULL, FTAG, &pds); 714 dsl_dir_close(dd, FTAG); 715 rw_exit(&dp->dp_config_rwlock); 716 if (err) 717 return (err); 718 719 (void) strcpy(buf, name); 720 cp = strrchr(buf, '/') + 1; 721 ASSERT(cp[0] != '\0'); 722 do { 723 txg_wait_synced(dp, 0); 724 err = dsl_dir_sync_task(pds, 725 dsl_dir_destroy_sync, cp, 0); 726 } while (err == EAGAIN); 727 dsl_dir_close(pds, FTAG); 728 } 729 730 return (err); 731 } 732 733 int 734 dsl_dataset_rollback(const char *name) 735 { 736 int err; 737 dsl_dir_t *dd; 738 const char *tail; 739 740 err = dsl_dir_open(name, FTAG, &dd, &tail); 741 if (err) 742 return (err); 743 744 if (tail != NULL) { 745 dsl_dir_close(dd, FTAG); 746 return (EINVAL); 747 } 748 do { 749 txg_wait_synced(dd->dd_pool, 0); 750 err = dsl_dir_sync_task(dd, 751 dsl_dataset_rollback_sync, NULL, 0); 752 } while (err == EAGAIN); 753 dsl_dir_close(dd, FTAG); 754 755 return (err); 756 } 757 758 void * 759 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 760 void *p, dsl_dataset_evict_func_t func) 761 { 762 void *old; 763 764 mutex_enter(&ds->ds_lock); 765 old = ds->ds_user_ptr; 766 if (old == NULL) { 767 ds->ds_user_ptr = p; 768 ds->ds_user_evict_func = func; 769 } 770 mutex_exit(&ds->ds_lock); 771 return (old); 772 } 773 774 void * 775 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 776 { 777 return (ds->ds_user_ptr); 778 } 779 780 781 void 782 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 783 { 784 *bp = ds->ds_phys->ds_bp; 785 } 786 787 void 788 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 789 { 790 ASSERT(dmu_tx_is_syncing(tx)); 791 /* If it's the meta-objset, set dp_meta_rootbp */ 792 if (ds == NULL) { 793 tx->tx_pool->dp_meta_rootbp = *bp; 794 } else { 795 dmu_buf_will_dirty(ds->ds_dbuf, tx); 796 ds->ds_phys->ds_bp = *bp; 797 } 798 } 799 800 spa_t * 801 dsl_dataset_get_spa(dsl_dataset_t *ds) 802 { 803 return (ds->ds_dir->dd_pool->dp_spa); 804 } 805 806 void 807 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 808 { 809 dsl_pool_t *dp; 810 811 if (ds == NULL) /* this is the meta-objset */ 812 return; 813 814 ASSERT(ds->ds_user_ptr != NULL); 815 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 816 817 dp = ds->ds_dir->dd_pool; 818 819 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 820 /* up the hold count until we can be written out */ 821 dmu_buf_add_ref(ds->ds_dbuf, ds); 822 } 823 } 824 825 struct killarg { 826 uint64_t *usedp; 827 uint64_t *compressedp; 828 uint64_t *uncompressedp; 829 zio_t *zio; 830 dmu_tx_t *tx; 831 }; 832 833 static int 834 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 835 { 836 struct killarg *ka = arg; 837 blkptr_t *bp = &bc->bc_blkptr; 838 839 ASSERT3U(bc->bc_errno, ==, 0); 840 841 /* 842 * Since this callback is not called concurrently, no lock is 843 * needed on the accounting values. 844 */ 845 *ka->usedp += BP_GET_ASIZE(bp); 846 *ka->compressedp += BP_GET_PSIZE(bp); 847 *ka->uncompressedp += BP_GET_UCSIZE(bp); 848 /* XXX check for EIO? */ 849 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 850 ARC_NOWAIT); 851 return (0); 852 } 853 854 /* ARGSUSED */ 855 int 856 dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 857 { 858 objset_t *mos = dd->dd_pool->dp_meta_objset; 859 dsl_dataset_t *ds; 860 int err; 861 862 if (dd->dd_phys->dd_head_dataset_obj == 0) 863 return (EINVAL); 864 err = dsl_dataset_open_obj(dd->dd_pool, 865 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds); 866 if (err) 867 return (err); 868 869 if (ds->ds_phys->ds_prev_snap_txg == 0) { 870 /* 871 * There's no previous snapshot. I suppose we could 872 * roll it back to being empty (and re-initialize the 873 * upper (ZPL) layer). But for now there's no way to do 874 * this via the user interface. 875 */ 876 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 877 return (EINVAL); 878 } 879 880 mutex_enter(&ds->ds_lock); 881 if (ds->ds_open_refcount > 0) { 882 mutex_exit(&ds->ds_lock); 883 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 884 return (EBUSY); 885 } 886 887 /* 888 * If we made changes this txg, traverse_dsl_dataset won't find 889 * them. Try again. 890 */ 891 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 892 mutex_exit(&ds->ds_lock); 893 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 894 return (EAGAIN); 895 } 896 897 /* THE POINT OF NO (unsuccessful) RETURN */ 898 ds->ds_open_refcount = DOS_REF_MAX; 899 mutex_exit(&ds->ds_lock); 900 901 dmu_buf_will_dirty(ds->ds_dbuf, tx); 902 903 /* Zero out the deadlist. */ 904 dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 905 bplist_close(&ds->ds_deadlist); 906 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 907 ds->ds_phys->ds_deadlist_obj = 908 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 909 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 910 ds->ds_phys->ds_deadlist_obj)); 911 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 912 913 { 914 /* Free blkptrs that we gave birth to */ 915 zio_t *zio; 916 uint64_t used = 0, compressed = 0, uncompressed = 0; 917 struct killarg ka; 918 919 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 920 ZIO_FLAG_MUSTSUCCEED); 921 ka.usedp = &used; 922 ka.compressedp = &compressed; 923 ka.uncompressedp = &uncompressed; 924 ka.zio = zio; 925 ka.tx = tx; 926 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 927 ADVANCE_POST, kill_blkptr, &ka); 928 (void) zio_wait(zio); 929 930 dsl_dir_diduse_space(dd, 931 -used, -compressed, -uncompressed, tx); 932 } 933 934 /* Change our contents to that of the prev snapshot (finally!) */ 935 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 936 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 937 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 938 ds->ds_phys->ds_compressed_bytes = 939 ds->ds_prev->ds_phys->ds_compressed_bytes; 940 ds->ds_phys->ds_uncompressed_bytes = 941 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 942 ds->ds_phys->ds_inconsistent = ds->ds_prev->ds_phys->ds_inconsistent; 943 ds->ds_phys->ds_unique_bytes = 0; 944 945 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 946 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 947 948 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 949 ds->ds_open_refcount = 0; 950 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 951 952 return (0); 953 } 954 955 /* ARGSUSED */ 956 static int 957 dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 958 { 959 dsl_dataset_t *ds = arg; 960 961 /* 962 * Can't delete a head dataset if there are snapshots of it. 963 * (Except if the only snapshots are from the branch we cloned 964 * from.) 965 */ 966 if (ds->ds_prev != NULL && 967 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 968 return (EINVAL); 969 970 /* Mark it as inconsistent on-disk, in case we crash */ 971 dmu_buf_will_dirty(ds->ds_dbuf, tx); 972 ds->ds_phys->ds_inconsistent = TRUE; 973 974 return (0); 975 } 976 977 int 978 dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 979 { 980 const char *snapname = arg; 981 uint64_t used = 0, compressed = 0, uncompressed = 0; 982 blkptr_t bp; 983 zio_t *zio; 984 int err; 985 int after_branch_point = FALSE; 986 int drop_lock = FALSE; 987 dsl_pool_t *dp = dd->dd_pool; 988 objset_t *mos = dp->dp_meta_objset; 989 dsl_dataset_t *ds, *ds_prev = NULL; 990 uint64_t obj; 991 992 if (dd->dd_phys->dd_head_dataset_obj == 0) 993 return (EINVAL); 994 995 if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) { 996 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 997 drop_lock = TRUE; 998 } 999 1000 err = dsl_dataset_open_obj(dd->dd_pool, 1001 dd->dd_phys->dd_head_dataset_obj, NULL, 1002 snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG, &ds); 1003 1004 if (err == 0 && snapname) { 1005 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1006 snapname, 8, 1, &obj); 1007 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1008 if (err == 0) { 1009 err = dsl_dataset_open_obj(dd->dd_pool, obj, NULL, 1010 DS_MODE_EXCLUSIVE, FTAG, &ds); 1011 } 1012 } 1013 if (err) { 1014 if (drop_lock) 1015 rw_exit(&dp->dp_config_rwlock); 1016 return (err); 1017 } 1018 1019 obj = ds->ds_object; 1020 1021 /* Can't delete a branch point. */ 1022 if (ds->ds_phys->ds_num_children > 1) { 1023 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1024 if (drop_lock) 1025 rw_exit(&dp->dp_config_rwlock); 1026 return (EINVAL); 1027 } 1028 1029 /* 1030 * Can't delete a head dataset if there are snapshots of it. 1031 * (Except if the only snapshots are from the branch we cloned 1032 * from.) 1033 */ 1034 if (ds->ds_prev != NULL && 1035 ds->ds_prev->ds_phys->ds_next_snap_obj == obj) { 1036 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1037 if (drop_lock) 1038 rw_exit(&dp->dp_config_rwlock); 1039 return (EINVAL); 1040 } 1041 1042 /* 1043 * If we made changes this txg, traverse_dsl_dataset won't find 1044 * them. Try again. 1045 */ 1046 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 1047 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1048 if (drop_lock) 1049 rw_exit(&dp->dp_config_rwlock); 1050 return (EAGAIN); 1051 } 1052 1053 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1054 if (ds->ds_prev) { 1055 ds_prev = ds->ds_prev; 1056 } else { 1057 err = dsl_dataset_open_obj(dd->dd_pool, 1058 ds->ds_phys->ds_prev_snap_obj, NULL, 1059 DS_MODE_NONE, FTAG, &ds_prev); 1060 if (err) { 1061 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1062 if (drop_lock) 1063 rw_exit(&dp->dp_config_rwlock); 1064 return (err); 1065 } 1066 } 1067 after_branch_point = 1068 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1069 1070 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1071 if (after_branch_point && 1072 ds->ds_phys->ds_next_snap_obj == 0) { 1073 /* This clone is toast. */ 1074 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1075 ds_prev->ds_phys->ds_num_children--; 1076 } else if (!after_branch_point) { 1077 ds_prev->ds_phys->ds_next_snap_obj = 1078 ds->ds_phys->ds_next_snap_obj; 1079 } 1080 } 1081 1082 /* THE POINT OF NO (unsuccessful) RETURN */ 1083 1084 ASSERT3P(tx->tx_pool, ==, dd->dd_pool); 1085 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1086 1087 if (ds->ds_phys->ds_next_snap_obj != 0) { 1088 dsl_dataset_t *ds_next; 1089 uint64_t itor = 0; 1090 1091 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1092 1093 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1094 ds->ds_phys->ds_next_snap_obj, NULL, 1095 DS_MODE_NONE, FTAG, &ds_next)); 1096 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1097 1098 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1099 ds_next->ds_phys->ds_prev_snap_obj = 1100 ds->ds_phys->ds_prev_snap_obj; 1101 ds_next->ds_phys->ds_prev_snap_txg = 1102 ds->ds_phys->ds_prev_snap_txg; 1103 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1104 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1105 1106 /* 1107 * Transfer to our deadlist (which will become next's 1108 * new deadlist) any entries from next's current 1109 * deadlist which were born before prev, and free the 1110 * other entries. 1111 * 1112 * XXX we're doing this long task with the config lock held 1113 */ 1114 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1115 &bp) == 0) { 1116 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1117 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1118 &bp, tx)); 1119 if (ds_prev && !after_branch_point && 1120 bp.blk_birth > 1121 ds_prev->ds_phys->ds_prev_snap_txg) { 1122 ds_prev->ds_phys->ds_unique_bytes += 1123 BP_GET_ASIZE(&bp); 1124 } 1125 } else { 1126 used += BP_GET_ASIZE(&bp); 1127 compressed += BP_GET_PSIZE(&bp); 1128 uncompressed += BP_GET_UCSIZE(&bp); 1129 /* XXX check return value? */ 1130 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1131 &bp, NULL, NULL, ARC_NOWAIT); 1132 } 1133 } 1134 1135 /* free next's deadlist */ 1136 bplist_close(&ds_next->ds_deadlist); 1137 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1138 1139 /* set next's deadlist to our deadlist */ 1140 ds_next->ds_phys->ds_deadlist_obj = 1141 ds->ds_phys->ds_deadlist_obj; 1142 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1143 ds_next->ds_phys->ds_deadlist_obj)); 1144 ds->ds_phys->ds_deadlist_obj = 0; 1145 1146 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1147 /* 1148 * Update next's unique to include blocks which 1149 * were previously shared by only this snapshot 1150 * and it. Those blocks will be born after the 1151 * prev snap and before this snap, and will have 1152 * died after the next snap and before the one 1153 * after that (ie. be on the snap after next's 1154 * deadlist). 1155 * 1156 * XXX we're doing this long task with the 1157 * config lock held 1158 */ 1159 dsl_dataset_t *ds_after_next; 1160 1161 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1162 ds_next->ds_phys->ds_next_snap_obj, NULL, 1163 DS_MODE_NONE, FTAG, &ds_after_next)); 1164 itor = 0; 1165 while (bplist_iterate(&ds_after_next->ds_deadlist, 1166 &itor, &bp) == 0) { 1167 if (bp.blk_birth > 1168 ds->ds_phys->ds_prev_snap_txg && 1169 bp.blk_birth <= 1170 ds->ds_phys->ds_creation_txg) { 1171 ds_next->ds_phys->ds_unique_bytes += 1172 BP_GET_ASIZE(&bp); 1173 } 1174 } 1175 1176 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1177 ASSERT3P(ds_next->ds_prev, ==, NULL); 1178 } else { 1179 /* 1180 * It would be nice to update the head dataset's 1181 * unique. To do so we would have to traverse 1182 * it for blocks born after ds_prev, which is 1183 * pretty expensive just to maintain something 1184 * for debugging purposes. 1185 */ 1186 ASSERT3P(ds_next->ds_prev, ==, ds); 1187 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1188 ds_next); 1189 if (ds_prev) { 1190 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1191 ds->ds_phys->ds_prev_snap_obj, NULL, 1192 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1193 } else { 1194 ds_next->ds_prev = NULL; 1195 } 1196 } 1197 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1198 1199 /* 1200 * NB: unique_bytes is not accurate for head objsets 1201 * because we don't update it when we delete the most 1202 * recent snapshot -- see above comment. 1203 */ 1204 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1205 } else { 1206 /* 1207 * There's no next snapshot, so this is a head dataset. 1208 * Destroy the deadlist. Unless it's a clone, the 1209 * deadlist should be empty. (If it's a clone, it's 1210 * safe to ignore the deadlist contents.) 1211 */ 1212 struct killarg ka; 1213 1214 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1215 bplist_close(&ds->ds_deadlist); 1216 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1217 ds->ds_phys->ds_deadlist_obj = 0; 1218 1219 /* 1220 * Free everything that we point to (that's born after 1221 * the previous snapshot, if we are a clone) 1222 * 1223 * XXX we're doing this long task with the config lock held 1224 */ 1225 ka.usedp = &used; 1226 ka.compressedp = &compressed; 1227 ka.uncompressedp = &uncompressed; 1228 ka.zio = zio; 1229 ka.tx = tx; 1230 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1231 ADVANCE_POST, kill_blkptr, &ka); 1232 ASSERT3U(err, ==, 0); 1233 } 1234 1235 err = zio_wait(zio); 1236 ASSERT3U(err, ==, 0); 1237 1238 dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx); 1239 1240 if (ds->ds_phys->ds_snapnames_zapobj) { 1241 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1242 ASSERT(err == 0); 1243 } 1244 1245 if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1246 /* Erase the link in the dataset */ 1247 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1248 dd->dd_phys->dd_head_dataset_obj = 0; 1249 /* 1250 * dsl_dir_sync_destroy() called us, they'll destroy 1251 * the dataset. 1252 */ 1253 } else { 1254 /* remove from snapshot namespace */ 1255 dsl_dataset_t *ds_head; 1256 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1257 dd->dd_phys->dd_head_dataset_obj, NULL, 1258 DS_MODE_NONE, FTAG, &ds_head)); 1259 #ifdef ZFS_DEBUG 1260 { 1261 uint64_t val; 1262 err = zap_lookup(mos, 1263 ds_head->ds_phys->ds_snapnames_zapobj, 1264 snapname, 8, 1, &val); 1265 ASSERT3U(err, ==, 0); 1266 ASSERT3U(val, ==, obj); 1267 } 1268 #endif 1269 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1270 snapname, tx); 1271 ASSERT(err == 0); 1272 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1273 } 1274 1275 if (ds_prev && ds->ds_prev != ds_prev) 1276 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1277 1278 err = dmu_object_free(mos, obj, tx); 1279 ASSERT(err == 0); 1280 1281 /* 1282 * Close the objset with mode NONE, thus leaving it with 1283 * DOS_REF_MAX set, so that noone can access it. 1284 */ 1285 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1286 1287 if (drop_lock) 1288 rw_exit(&dp->dp_config_rwlock); 1289 return (0); 1290 } 1291 1292 int 1293 dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1294 { 1295 const char *snapname = arg; 1296 dsl_pool_t *dp = dd->dd_pool; 1297 dmu_buf_t *dbuf; 1298 dsl_dataset_phys_t *dsphys; 1299 uint64_t dsobj, value; 1300 objset_t *mos = dp->dp_meta_objset; 1301 dsl_dataset_t *ds; 1302 int err; 1303 1304 ASSERT(dmu_tx_is_syncing(tx)); 1305 1306 if (dd->dd_phys->dd_head_dataset_obj == 0) 1307 return (EINVAL); 1308 err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL, 1309 DS_MODE_NONE, FTAG, &ds); 1310 if (err) 1311 return (err); 1312 1313 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1314 snapname, 8, 1, &value); 1315 if (err == 0) { 1316 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1317 return (EEXIST); 1318 } 1319 ASSERT(err == ENOENT); 1320 1321 /* The point of no (unsuccessful) return */ 1322 1323 dprintf_dd(dd, "taking snapshot %s in txg %llu\n", 1324 snapname, tx->tx_txg); 1325 1326 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1327 1328 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1329 1330 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1331 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1332 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1333 dmu_buf_will_dirty(dbuf, tx); 1334 dsphys = dbuf->db_data; 1335 dsphys->ds_dir_obj = dd->dd_object; 1336 dsphys->ds_fsid_guid = unique_create(); 1337 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1338 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1339 sizeof (dsphys->ds_guid)); 1340 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1341 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1342 dsphys->ds_next_snap_obj = ds->ds_object; 1343 dsphys->ds_num_children = 1; 1344 dsphys->ds_creation_time = gethrestime_sec(); 1345 dsphys->ds_creation_txg = tx->tx_txg; 1346 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1347 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1348 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1349 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1350 dsphys->ds_inconsistent = ds->ds_phys->ds_inconsistent; 1351 dsphys->ds_bp = ds->ds_phys->ds_bp; 1352 dmu_buf_rele(dbuf, FTAG); 1353 1354 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1355 dsl_dataset_t *ds_prev; 1356 1357 VERIFY(0 == dsl_dataset_open_obj(dp, 1358 ds->ds_phys->ds_prev_snap_obj, NULL, 1359 DS_MODE_NONE, FTAG, &ds_prev)); 1360 ASSERT(ds_prev->ds_phys->ds_next_snap_obj == 1361 ds->ds_object || 1362 ds_prev->ds_phys->ds_num_children > 1); 1363 if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1364 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1365 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1366 ds_prev->ds_phys->ds_creation_txg); 1367 ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1368 } 1369 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1370 } else { 1371 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0); 1372 } 1373 1374 bplist_close(&ds->ds_deadlist); 1375 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1376 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1377 ds->ds_phys->ds_prev_snap_obj = dsobj; 1378 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1379 ds->ds_phys->ds_unique_bytes = 0; 1380 ds->ds_phys->ds_deadlist_obj = 1381 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1382 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1383 ds->ds_phys->ds_deadlist_obj)); 1384 1385 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1386 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1387 snapname, 8, 1, &dsobj, tx); 1388 ASSERT(err == 0); 1389 1390 if (ds->ds_prev) 1391 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1392 VERIFY(0 == dsl_dataset_open_obj(dp, 1393 ds->ds_phys->ds_prev_snap_obj, snapname, 1394 DS_MODE_NONE, ds, &ds->ds_prev)); 1395 1396 rw_exit(&dp->dp_config_rwlock); 1397 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1398 1399 return (0); 1400 } 1401 1402 void 1403 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1404 { 1405 ASSERT(dmu_tx_is_syncing(tx)); 1406 ASSERT(ds->ds_user_ptr != NULL); 1407 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1408 1409 dmu_objset_sync(ds->ds_user_ptr, tx); 1410 dsl_dir_dirty(ds->ds_dir, tx); 1411 bplist_close(&ds->ds_deadlist); 1412 1413 dmu_buf_rele(ds->ds_dbuf, ds); 1414 } 1415 1416 void 1417 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) 1418 { 1419 /* fill in properties crap */ 1420 dsl_dir_stats(ds->ds_dir, dds); 1421 1422 if (ds->ds_phys->ds_num_children != 0) { 1423 dds->dds_is_snapshot = TRUE; 1424 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1425 } 1426 1427 dds->dds_inconsistent = ds->ds_phys->ds_inconsistent; 1428 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; 1429 1430 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; 1431 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; 1432 1433 /* We override the dataset's creation time... they should be the same */ 1434 dds->dds_creation_time = ds->ds_phys->ds_creation_time; 1435 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1436 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; 1437 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; 1438 1439 if (ds->ds_phys->ds_next_snap_obj) { 1440 /* 1441 * This is a snapshot; override the dd's space used with 1442 * our unique space 1443 */ 1444 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; 1445 dds->dds_compressed_bytes = 1446 ds->ds_phys->ds_compressed_bytes; 1447 dds->dds_uncompressed_bytes = 1448 ds->ds_phys->ds_uncompressed_bytes; 1449 } 1450 } 1451 1452 dsl_pool_t * 1453 dsl_dataset_pool(dsl_dataset_t *ds) 1454 { 1455 return (ds->ds_dir->dd_pool); 1456 } 1457 1458 struct osrenamearg { 1459 const char *oldname; 1460 const char *newname; 1461 }; 1462 1463 static int 1464 dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1465 { 1466 struct osrenamearg *ora = arg; 1467 objset_t *mos = dd->dd_pool->dp_meta_objset; 1468 dsl_dir_t *nds; 1469 const char *tail; 1470 int err; 1471 dsl_dataset_t *snds, *fsds; 1472 uint64_t val; 1473 1474 err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname, 1475 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds); 1476 if (err) 1477 return (err); 1478 1479 if (snds->ds_dir != dd) { 1480 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1481 return (EINVAL); 1482 } 1483 1484 /* better be changing a snapshot */ 1485 if (snds->ds_phys->ds_next_snap_obj == 0) { 1486 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1487 return (EINVAL); 1488 } 1489 1490 /* new fs better exist */ 1491 err = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, 1492 FTAG, &nds, &tail); 1493 if (err) { 1494 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1495 return (err); 1496 } 1497 1498 dsl_dir_close(nds, FTAG); 1499 1500 /* new name better be in same fs */ 1501 if (nds != dd) { 1502 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1503 return (EINVAL); 1504 } 1505 1506 /* new name better be a snapshot */ 1507 if (tail == NULL || tail[0] != '@') { 1508 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1509 return (EINVAL); 1510 } 1511 1512 tail++; 1513 1514 err = dsl_dataset_open_obj(dd->dd_pool, 1515 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &fsds); 1516 if (err) { 1517 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1518 return (err); 1519 } 1520 1521 /* new name better not be in use */ 1522 err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj, 1523 tail, 8, 1, &val); 1524 if (err != ENOENT) { 1525 if (err == 0) 1526 err = EEXIST; 1527 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1528 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1529 return (EEXIST); 1530 } 1531 1532 /* The point of no (unsuccessful) return */ 1533 1534 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); 1535 VERIFY(0 == dsl_dataset_get_snapname(snds)); 1536 err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj, 1537 snds->ds_snapname, tx); 1538 ASSERT3U(err, ==, 0); 1539 mutex_enter(&snds->ds_lock); 1540 (void) strcpy(snds->ds_snapname, tail); 1541 mutex_exit(&snds->ds_lock); 1542 err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj, 1543 snds->ds_snapname, 8, 1, &snds->ds_object, tx); 1544 ASSERT3U(err, ==, 0); 1545 rw_exit(&dd->dd_pool->dp_config_rwlock); 1546 1547 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1548 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1549 return (0); 1550 } 1551 1552 #pragma weak dmu_objset_rename = dsl_dataset_rename 1553 int 1554 dsl_dataset_rename(const char *osname, const char *newname) 1555 { 1556 dsl_dir_t *dd; 1557 const char *tail; 1558 struct osrenamearg ora; 1559 int err; 1560 1561 err = dsl_dir_open(osname, FTAG, &dd, &tail); 1562 if (err) 1563 return (err); 1564 if (tail == NULL) { 1565 err = dsl_dir_sync_task(dd, 1566 dsl_dir_rename_sync, (void*)newname, 1<<12); 1567 dsl_dir_close(dd, FTAG); 1568 return (err); 1569 } 1570 if (tail[0] != '@') { 1571 /* the name ended in a nonexistant component */ 1572 dsl_dir_close(dd, FTAG); 1573 return (ENOENT); 1574 } 1575 1576 ora.oldname = osname; 1577 ora.newname = newname; 1578 1579 err = dsl_dir_sync_task(dd, 1580 dsl_dataset_snapshot_rename_sync, &ora, 1<<12); 1581 dsl_dir_close(dd, FTAG); 1582 return (err); 1583 } 1584