1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dataset.h> 31 #include <sys/dsl_dir.h> 32 #include <sys/dmu_traverse.h> 33 #include <sys/dmu_tx.h> 34 #include <sys/arc.h> 35 #include <sys/zio.h> 36 #include <sys/zap.h> 37 #include <sys/unique.h> 38 #include <sys/zfs_context.h> 39 40 #define DOS_REF_MAX (1ULL << 62) 41 42 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 43 44 #define BP_GET_UCSIZE(bp) \ 45 ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ 46 BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); 47 48 /* 49 * We use weighted reference counts to express the various forms of exclusion 50 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 51 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 52 * This makes the exclusion logic simple: the total refcnt for all opens cannot 53 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 54 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 55 * just over half of the refcnt space, so there can't be more than one, but it 56 * can peacefully coexist with any number of STANDARD opens. 57 */ 58 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 59 0, /* DOS_MODE_NONE - invalid */ 60 1, /* DOS_MODE_STANDARD - unlimited number */ 61 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 62 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 63 }; 64 65 66 void 67 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 68 { 69 int used = BP_GET_ASIZE(bp); 70 int compressed = BP_GET_PSIZE(bp); 71 int uncompressed = BP_GET_UCSIZE(bp); 72 73 dprintf_bp(bp, "born, ds=%p\n", ds); 74 75 ASSERT(dmu_tx_is_syncing(tx)); 76 /* It could have been compressed away to nothing */ 77 if (BP_IS_HOLE(bp)) 78 return; 79 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 80 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 81 if (ds == NULL) { 82 /* 83 * Account for the meta-objset space in its placeholder 84 * dsl_dir. 85 */ 86 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 87 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 88 used, compressed, uncompressed, tx); 89 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 90 return; 91 } 92 dmu_buf_will_dirty(ds->ds_dbuf, tx); 93 mutex_enter(&ds->ds_lock); 94 ds->ds_phys->ds_used_bytes += used; 95 ds->ds_phys->ds_compressed_bytes += compressed; 96 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 97 ds->ds_phys->ds_unique_bytes += used; 98 mutex_exit(&ds->ds_lock); 99 dsl_dir_diduse_space(ds->ds_dir, 100 used, compressed, uncompressed, tx); 101 } 102 103 void 104 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 105 { 106 int used = BP_GET_ASIZE(bp); 107 int compressed = BP_GET_PSIZE(bp); 108 int uncompressed = BP_GET_UCSIZE(bp); 109 110 ASSERT(dmu_tx_is_syncing(tx)); 111 if (BP_IS_HOLE(bp)) 112 return; 113 114 ASSERT(used > 0); 115 if (ds == NULL) { 116 /* 117 * Account for the meta-objset space in its placeholder 118 * dataset. 119 */ 120 /* XXX this can fail, what do we do when it does? */ 121 (void) arc_free(NULL, tx->tx_pool->dp_spa, 122 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 123 bzero(bp, sizeof (blkptr_t)); 124 125 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 126 -used, -compressed, -uncompressed, tx); 127 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 128 return; 129 } 130 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 131 132 dmu_buf_will_dirty(ds->ds_dbuf, tx); 133 134 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 135 dprintf_bp(bp, "freeing: %s", ""); 136 /* XXX check return code? */ 137 (void) arc_free(NULL, tx->tx_pool->dp_spa, 138 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 139 140 mutex_enter(&ds->ds_lock); 141 /* XXX unique_bytes is not accurate for head datasets */ 142 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 143 ds->ds_phys->ds_unique_bytes -= used; 144 mutex_exit(&ds->ds_lock); 145 dsl_dir_diduse_space(ds->ds_dir, 146 -used, -compressed, -uncompressed, tx); 147 } else { 148 dprintf_bp(bp, "putting on dead list: %s", ""); 149 bplist_enqueue(&ds->ds_deadlist, bp, tx); 150 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 151 if (ds->ds_phys->ds_prev_snap_obj != 0) { 152 ASSERT3U(ds->ds_prev->ds_object, ==, 153 ds->ds_phys->ds_prev_snap_obj); 154 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 155 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 156 ds->ds_object && 157 bp->blk_birth > 158 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 159 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 160 mutex_enter(&ds->ds_prev->ds_lock); 161 ds->ds_prev->ds_phys->ds_unique_bytes += 162 used; 163 mutex_exit(&ds->ds_prev->ds_lock); 164 } 165 } 166 } 167 bzero(bp, sizeof (blkptr_t)); 168 mutex_enter(&ds->ds_lock); 169 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 170 ds->ds_phys->ds_used_bytes -= used; 171 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 172 ds->ds_phys->ds_compressed_bytes -= compressed; 173 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 174 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 175 mutex_exit(&ds->ds_lock); 176 } 177 178 int 179 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth, dmu_tx_t *tx) 180 { 181 uint64_t prev_snap_txg; 182 dsl_dir_t *dd; 183 /* ASSERT that it is not a snapshot */ 184 if (ds == NULL) 185 return (TRUE); 186 /* 187 * The snapshot creation could fail, but that would cause an 188 * incorrect FALSE return, which would only result in an 189 * overestimation of the amount of space that an operation would 190 * consume, which is OK. 191 * 192 * There's also a small window where we could miss a pending 193 * snapshot, because we could set the sync task in the quiescing 194 * phase. So this should only be used as a guess. 195 */ 196 dd = ds->ds_dir; 197 mutex_enter(&dd->dd_lock); 198 if (dd->dd_sync_func == dsl_dataset_snapshot_sync && 199 dd->dd_sync_txg < tx->tx_txg) 200 prev_snap_txg = dd->dd_sync_txg; 201 else 202 prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 203 mutex_exit(&dd->dd_lock); 204 return (blk_birth > prev_snap_txg); 205 } 206 207 /* ARGSUSED */ 208 static void 209 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 210 { 211 dsl_dataset_t *ds = dsv; 212 dsl_pool_t *dp = ds->ds_dir->dd_pool; 213 214 /* open_refcount == DOS_REF_MAX when deleting */ 215 ASSERT(ds->ds_open_refcount == 0 || 216 ds->ds_open_refcount == DOS_REF_MAX); 217 218 dprintf_ds(ds, "evicting %s\n", ""); 219 220 unique_remove(ds->ds_phys->ds_fsid_guid); 221 222 if (ds->ds_user_ptr != NULL) 223 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 224 225 if (ds->ds_prev) { 226 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 227 ds->ds_prev = NULL; 228 } 229 230 bplist_close(&ds->ds_deadlist); 231 dsl_dir_close(ds->ds_dir, ds); 232 233 if (list_link_active(&ds->ds_synced_link)) 234 list_remove(&dp->dp_synced_objsets, ds); 235 236 kmem_free(ds, sizeof (dsl_dataset_t)); 237 } 238 239 static void 240 dsl_dataset_get_snapname(dsl_dataset_t *ds) 241 { 242 dsl_dataset_phys_t *headphys; 243 int err; 244 dmu_buf_t *headdbuf; 245 dsl_pool_t *dp = ds->ds_dir->dd_pool; 246 objset_t *mos = dp->dp_meta_objset; 247 248 if (ds->ds_snapname[0]) 249 return; 250 if (ds->ds_phys->ds_next_snap_obj == 0) 251 return; 252 253 headdbuf = dmu_bonus_hold_tag(mos, 254 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG); 255 dmu_buf_read(headdbuf); 256 headphys = headdbuf->db_data; 257 err = zap_value_search(dp->dp_meta_objset, 258 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 259 ASSERT(err == 0); 260 dmu_buf_rele_tag(headdbuf, FTAG); 261 } 262 263 dsl_dataset_t * 264 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 265 int mode, void *tag) 266 { 267 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 268 objset_t *mos = dp->dp_meta_objset; 269 dmu_buf_t *dbuf; 270 dsl_dataset_t *ds; 271 272 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 273 dsl_pool_sync_context(dp)); 274 275 dbuf = dmu_bonus_hold_tag(mos, dsobj, tag); 276 dmu_buf_read(dbuf); 277 ds = dmu_buf_get_user(dbuf); 278 if (ds == NULL) { 279 dsl_dataset_t *winner; 280 281 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 282 ds->ds_dbuf = dbuf; 283 ds->ds_object = dsobj; 284 ds->ds_phys = dbuf->db_data; 285 ds->ds_dir = dsl_dir_open_obj(dp, 286 ds->ds_phys->ds_dir_obj, NULL, ds); 287 288 bplist_open(&ds->ds_deadlist, 289 mos, ds->ds_phys->ds_deadlist_obj); 290 291 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 292 ds->ds_snapname[0] = '\0'; 293 if (ds->ds_phys->ds_prev_snap_obj) { 294 ds->ds_prev = 295 dsl_dataset_open_obj(dp, 296 ds->ds_phys->ds_prev_snap_obj, NULL, 297 DS_MODE_NONE, ds); 298 } 299 } else { 300 if (snapname) { 301 #ifdef ZFS_DEBUG 302 dsl_dataset_phys_t *headphys; 303 int err; 304 dmu_buf_t *headdbuf = dmu_bonus_hold_tag(mos, 305 ds->ds_dir->dd_phys-> 306 dd_head_dataset_obj, FTAG); 307 dmu_buf_read(headdbuf); 308 headphys = headdbuf->db_data; 309 uint64_t foundobj; 310 err = zap_lookup(dp->dp_meta_objset, 311 headphys->ds_snapnames_zapobj, 312 snapname, sizeof (foundobj), 1, &foundobj); 313 ASSERT3U(err, ==, 0); 314 ASSERT3U(foundobj, ==, dsobj); 315 dmu_buf_rele_tag(headdbuf, FTAG); 316 #endif 317 (void) strcat(ds->ds_snapname, snapname); 318 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 319 dsl_dataset_get_snapname(ds); 320 } 321 } 322 323 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 324 dsl_dataset_evict); 325 if (winner) { 326 bplist_close(&ds->ds_deadlist); 327 if (ds->ds_prev) { 328 dsl_dataset_close(ds->ds_prev, 329 DS_MODE_NONE, ds); 330 } 331 dsl_dir_close(ds->ds_dir, ds); 332 kmem_free(ds, sizeof (dsl_dataset_t)); 333 ds = winner; 334 } else { 335 uint64_t new = 336 unique_insert(ds->ds_phys->ds_fsid_guid); 337 if (new != ds->ds_phys->ds_fsid_guid) { 338 /* XXX it won't necessarily be synced... */ 339 ds->ds_phys->ds_fsid_guid = new; 340 } 341 } 342 } 343 ASSERT3P(ds->ds_dbuf, ==, dbuf); 344 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 345 346 mutex_enter(&ds->ds_lock); 347 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 348 ds->ds_phys->ds_restoring && !DS_MODE_IS_RESTORE(mode)) || 349 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 350 mutex_exit(&ds->ds_lock); 351 dsl_dataset_close(ds, DS_MODE_NONE, tag); 352 return (NULL); 353 } 354 ds->ds_open_refcount += weight; 355 mutex_exit(&ds->ds_lock); 356 357 return (ds); 358 } 359 360 int 361 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 362 void *tag, dsl_dataset_t **dsp) 363 { 364 dsl_dir_t *dd; 365 dsl_pool_t *dp; 366 const char *tail; 367 uint64_t obj; 368 dsl_dataset_t *ds = NULL; 369 int err = 0; 370 371 dd = dsl_dir_open_spa(spa, name, FTAG, &tail); 372 if (dd == NULL) 373 return (ENOENT); 374 375 dp = dd->dd_pool; 376 obj = dd->dd_phys->dd_head_dataset_obj; 377 rw_enter(&dp->dp_config_rwlock, RW_READER); 378 if (obj == 0) { 379 /* A dataset with no associated objset */ 380 err = ENOENT; 381 goto out; 382 } 383 384 if (tail != NULL) { 385 objset_t *mos = dp->dp_meta_objset; 386 387 ds = dsl_dataset_open_obj(dp, obj, NULL, DS_MODE_NONE, tag); 388 obj = ds->ds_phys->ds_snapnames_zapobj; 389 dsl_dataset_close(ds, DS_MODE_NONE, tag); 390 ds = NULL; 391 392 if (tail[0] != '@') { 393 err = ENOENT; 394 goto out; 395 } 396 tail++; 397 398 /* Look for a snapshot */ 399 if (!DS_MODE_IS_READONLY(mode)) { 400 err = EROFS; 401 goto out; 402 } 403 dprintf("looking for snapshot '%s'\n", tail); 404 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 405 if (err) 406 goto out; 407 } 408 ds = dsl_dataset_open_obj(dp, obj, tail, mode, tag); 409 if (ds == NULL) 410 err = EBUSY; 411 412 out: 413 rw_exit(&dp->dp_config_rwlock); 414 dsl_dir_close(dd, FTAG); 415 416 ASSERT3U((err == 0), ==, (ds != NULL)); 417 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 418 419 *dsp = ds; 420 return (err); 421 } 422 423 int 424 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 425 { 426 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 427 } 428 429 void 430 dsl_dataset_name(dsl_dataset_t *ds, char *name) 431 { 432 if (ds == NULL) { 433 (void) strcpy(name, "mos"); 434 } else { 435 dsl_dir_name(ds->ds_dir, name); 436 dsl_dataset_get_snapname(ds); 437 if (ds->ds_snapname[0]) { 438 (void) strcat(name, "@"); 439 if (!MUTEX_HELD(&ds->ds_lock)) { 440 /* 441 * We use a "recursive" mutex so that we 442 * can call dprintf_ds() with ds_lock held. 443 */ 444 mutex_enter(&ds->ds_lock); 445 (void) strcat(name, ds->ds_snapname); 446 mutex_exit(&ds->ds_lock); 447 } else { 448 (void) strcat(name, ds->ds_snapname); 449 } 450 } 451 } 452 } 453 454 void 455 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 456 { 457 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 458 mutex_enter(&ds->ds_lock); 459 ASSERT3U(ds->ds_open_refcount, >=, weight); 460 ds->ds_open_refcount -= weight; 461 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 462 mode, ds->ds_open_refcount); 463 mutex_exit(&ds->ds_lock); 464 465 dmu_buf_rele_tag(ds->ds_dbuf, tag); 466 } 467 468 void 469 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 470 { 471 objset_t *mos = dp->dp_meta_objset; 472 dmu_buf_t *dbuf; 473 dsl_dataset_phys_t *dsphys; 474 dsl_dataset_t *ds; 475 uint64_t dsobj; 476 dsl_dir_t *dd; 477 478 dsl_dir_create_root(mos, ddobjp, tx); 479 dd = dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG); 480 ASSERT(dd != NULL); 481 482 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0, 483 DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx); 484 dbuf = dmu_bonus_hold(mos, dsobj); 485 dmu_buf_will_dirty(dbuf, tx); 486 dsphys = dbuf->db_data; 487 dsphys->ds_dir_obj = dd->dd_object; 488 dsphys->ds_fsid_guid = unique_create(); 489 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 490 sizeof (dsphys->ds_guid)); 491 dsphys->ds_snapnames_zapobj = 492 zap_create(mos, DMU_OT_DSL_OBJSET_SNAP_MAP, DMU_OT_NONE, 0, tx); 493 dsphys->ds_creation_time = gethrestime_sec(); 494 dsphys->ds_creation_txg = tx->tx_txg; 495 dsphys->ds_deadlist_obj = 496 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 497 dmu_buf_rele(dbuf); 498 499 dmu_buf_will_dirty(dd->dd_dbuf, tx); 500 dd->dd_phys->dd_head_dataset_obj = dsobj; 501 dsl_dir_close(dd, FTAG); 502 503 ds = dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG); 504 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 505 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 506 } 507 508 int 509 dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, 510 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 511 { 512 int err; 513 dsl_pool_t *dp = pds->dd_pool; 514 dmu_buf_t *dbuf; 515 dsl_dataset_phys_t *dsphys; 516 uint64_t dsobj; 517 objset_t *mos = dp->dp_meta_objset; 518 dsl_dir_t *dd; 519 520 if (clone_parent != NULL) { 521 /* 522 * You can't clone across pools. 523 */ 524 if (clone_parent->ds_dir->dd_pool != dp) 525 return (EXDEV); 526 527 /* 528 * You can only clone snapshots, not the head datasets. 529 */ 530 if (clone_parent->ds_phys->ds_num_children == 0) 531 return (EINVAL); 532 } 533 534 ASSERT(lastname[0] != '@'); 535 ASSERT(dmu_tx_is_syncing(tx)); 536 537 err = dsl_dir_create_sync(pds, lastname, tx); 538 if (err) 539 return (err); 540 dd = dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, NULL); 541 ASSERT(dd != NULL); 542 543 /* This is the point of no (unsuccessful) return */ 544 545 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0, 546 DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx); 547 dbuf = dmu_bonus_hold(mos, dsobj); 548 dmu_buf_will_dirty(dbuf, tx); 549 dsphys = dbuf->db_data; 550 dsphys->ds_dir_obj = dd->dd_object; 551 dsphys->ds_fsid_guid = unique_create(); 552 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 553 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 554 sizeof (dsphys->ds_guid)); 555 dsphys->ds_snapnames_zapobj = 556 zap_create(mos, DMU_OT_DSL_OBJSET_SNAP_MAP, DMU_OT_NONE, 0, tx); 557 dsphys->ds_creation_time = gethrestime_sec(); 558 dsphys->ds_creation_txg = tx->tx_txg; 559 dsphys->ds_deadlist_obj = 560 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 561 if (clone_parent) { 562 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 563 dsphys->ds_prev_snap_txg = 564 clone_parent->ds_phys->ds_creation_txg; 565 dsphys->ds_used_bytes = 566 clone_parent->ds_phys->ds_used_bytes; 567 dsphys->ds_compressed_bytes = 568 clone_parent->ds_phys->ds_compressed_bytes; 569 dsphys->ds_uncompressed_bytes = 570 clone_parent->ds_phys->ds_uncompressed_bytes; 571 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 572 573 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 574 clone_parent->ds_phys->ds_num_children++; 575 576 dmu_buf_will_dirty(dd->dd_dbuf, tx); 577 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 578 } 579 dmu_buf_rele(dbuf); 580 581 dmu_buf_will_dirty(dd->dd_dbuf, tx); 582 dd->dd_phys->dd_head_dataset_obj = dsobj; 583 dsl_dir_close(dd, FTAG); 584 585 return (0); 586 } 587 588 589 int 590 dsl_dataset_destroy(const char *name) 591 { 592 int err; 593 dsl_pool_t *dp; 594 dsl_dir_t *dd; 595 const char *tail; 596 597 dd = dsl_dir_open(name, FTAG, &tail); 598 if (dd == NULL) 599 return (ENOENT); 600 601 dp = dd->dd_pool; 602 if (tail != NULL) { 603 if (tail[0] != '@') { 604 dsl_dir_close(dd, FTAG); 605 return (ENOENT); 606 } 607 tail++; 608 /* Just blow away the snapshot */ 609 do { 610 txg_wait_synced(dp, 0); 611 err = dsl_dir_sync_task(dd, 612 dsl_dataset_destroy_sync, (void*)tail, 0); 613 } while (err == EAGAIN); 614 dsl_dir_close(dd, FTAG); 615 } else { 616 char buf[MAXNAMELEN]; 617 char *cp; 618 619 dsl_dir_t *pds; 620 if (dd->dd_phys->dd_parent_obj == 0) { 621 dsl_dir_close(dd, FTAG); 622 return (EINVAL); 623 } 624 /* 625 * Make sure it's not dirty before we destroy it. 626 */ 627 txg_wait_synced(dd->dd_pool, 0); 628 /* 629 * Blow away the dsl_dir + head dataset. 630 * dsl_dir_destroy_sync() will call 631 * dsl_dataset_destroy_sync() to destroy the head dataset. 632 */ 633 rw_enter(&dp->dp_config_rwlock, RW_READER); 634 pds = dsl_dir_open_obj(dd->dd_pool, 635 dd->dd_phys->dd_parent_obj, NULL, FTAG); 636 dsl_dir_close(dd, FTAG); 637 rw_exit(&dp->dp_config_rwlock); 638 639 (void) strcpy(buf, name); 640 cp = strrchr(buf, '/') + 1; 641 ASSERT(cp[0] != '\0'); 642 do { 643 txg_wait_synced(dp, 0); 644 err = dsl_dir_sync_task(pds, 645 dsl_dir_destroy_sync, cp, 0); 646 } while (err == EAGAIN); 647 dsl_dir_close(pds, FTAG); 648 } 649 650 return (err); 651 } 652 653 int 654 dsl_dataset_rollback(const char *name) 655 { 656 int err; 657 dsl_dir_t *dd; 658 const char *tail; 659 660 dd = dsl_dir_open(name, FTAG, &tail); 661 if (dd == NULL) 662 return (ENOENT); 663 664 if (tail != NULL) { 665 dsl_dir_close(dd, FTAG); 666 return (EINVAL); 667 } 668 do { 669 txg_wait_synced(dd->dd_pool, 0); 670 err = dsl_dir_sync_task(dd, 671 dsl_dataset_rollback_sync, NULL, 0); 672 } while (err == EAGAIN); 673 dsl_dir_close(dd, FTAG); 674 675 return (err); 676 } 677 678 void * 679 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 680 void *p, dsl_dataset_evict_func_t func) 681 { 682 void *old; 683 684 mutex_enter(&ds->ds_lock); 685 old = ds->ds_user_ptr; 686 if (old == NULL) { 687 ds->ds_user_ptr = p; 688 ds->ds_user_evict_func = func; 689 } 690 mutex_exit(&ds->ds_lock); 691 return (old); 692 } 693 694 void * 695 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 696 { 697 return (ds->ds_user_ptr); 698 } 699 700 701 void 702 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 703 { 704 *bp = ds->ds_phys->ds_bp; 705 } 706 707 void 708 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 709 { 710 ASSERT(dmu_tx_is_syncing(tx)); 711 /* If it's the meta-objset, set dp_meta_rootbp */ 712 if (ds == NULL) { 713 tx->tx_pool->dp_meta_rootbp = *bp; 714 } else { 715 dmu_buf_will_dirty(ds->ds_dbuf, tx); 716 ds->ds_phys->ds_bp = *bp; 717 } 718 } 719 720 spa_t * 721 dsl_dataset_get_spa(dsl_dataset_t *ds) 722 { 723 return (ds->ds_dir->dd_pool->dp_spa); 724 } 725 726 void 727 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 728 { 729 dsl_pool_t *dp; 730 731 if (ds == NULL) /* this is the meta-objset */ 732 return; 733 734 ASSERT(ds->ds_user_ptr != NULL); 735 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 736 737 dp = ds->ds_dir->dd_pool; 738 739 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 740 /* up the hold count until we can be written out */ 741 dmu_buf_add_ref(ds->ds_dbuf, ds); 742 } 743 } 744 745 struct killarg { 746 uint64_t *usedp; 747 uint64_t *compressedp; 748 uint64_t *uncompressedp; 749 zio_t *zio; 750 dmu_tx_t *tx; 751 }; 752 753 static int 754 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 755 { 756 struct killarg *ka = arg; 757 blkptr_t *bp = &bc->bc_blkptr; 758 759 ASSERT3U(bc->bc_errno, ==, 0); 760 761 /* 762 * Since this callback is not called concurrently, no lock is 763 * needed on the accounting values. 764 */ 765 *ka->usedp += BP_GET_ASIZE(bp); 766 *ka->compressedp += BP_GET_PSIZE(bp); 767 *ka->uncompressedp += BP_GET_UCSIZE(bp); 768 /* XXX check for EIO? */ 769 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 770 ARC_NOWAIT); 771 return (0); 772 } 773 774 /* ARGSUSED */ 775 int 776 dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 777 { 778 objset_t *mos = dd->dd_pool->dp_meta_objset; 779 dsl_dataset_t *ds; 780 781 if (dd->dd_phys->dd_head_dataset_obj == 0) 782 return (EINVAL); 783 ds = dsl_dataset_open_obj(dd->dd_pool, 784 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG); 785 786 if (ds->ds_phys->ds_prev_snap_txg == 0) { 787 /* 788 * There's no previous snapshot. I suppose we could 789 * roll it back to being empty (and re-initialize the 790 * upper (ZPL) layer). But for now there's no way to do 791 * this via the user interface. 792 */ 793 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 794 return (EINVAL); 795 } 796 797 mutex_enter(&ds->ds_lock); 798 if (ds->ds_open_refcount > 0) { 799 mutex_exit(&ds->ds_lock); 800 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 801 return (EBUSY); 802 } 803 804 /* 805 * If we made changes this txg, traverse_dsl_dataset won't find 806 * them. Try again. 807 */ 808 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 809 mutex_exit(&ds->ds_lock); 810 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 811 return (EAGAIN); 812 } 813 814 /* THE POINT OF NO (unsuccessful) RETURN */ 815 ds->ds_open_refcount = DOS_REF_MAX; 816 mutex_exit(&ds->ds_lock); 817 818 dmu_buf_will_dirty(ds->ds_dbuf, tx); 819 820 /* Zero out the deadlist. */ 821 dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 822 bplist_close(&ds->ds_deadlist); 823 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 824 ds->ds_phys->ds_deadlist_obj = 825 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 826 bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 827 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 828 829 { 830 /* Free blkptrs that we gave birth to */ 831 zio_t *zio; 832 uint64_t used = 0, compressed = 0, uncompressed = 0; 833 struct killarg ka; 834 835 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 836 ZIO_FLAG_MUSTSUCCEED); 837 ka.usedp = &used; 838 ka.compressedp = &compressed; 839 ka.uncompressedp = &uncompressed; 840 ka.zio = zio; 841 ka.tx = tx; 842 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 843 ADVANCE_POST, kill_blkptr, &ka); 844 (void) zio_wait(zio); 845 846 dsl_dir_diduse_space(dd, 847 -used, -compressed, -uncompressed, tx); 848 } 849 850 /* Change our contents to that of the prev snapshot (finally!) */ 851 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 852 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 853 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 854 ds->ds_phys->ds_compressed_bytes = 855 ds->ds_prev->ds_phys->ds_compressed_bytes; 856 ds->ds_phys->ds_uncompressed_bytes = 857 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 858 ds->ds_phys->ds_restoring = ds->ds_prev->ds_phys->ds_restoring; 859 ds->ds_phys->ds_unique_bytes = 0; 860 861 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 862 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 863 864 dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 865 ds->ds_open_refcount = 0; 866 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 867 868 return (0); 869 } 870 871 int 872 dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 873 { 874 const char *snapname = arg; 875 uint64_t used = 0, compressed = 0, uncompressed = 0; 876 blkptr_t bp; 877 zio_t *zio; 878 int err; 879 int after_branch_point = FALSE; 880 int drop_lock = FALSE; 881 dsl_pool_t *dp = dd->dd_pool; 882 objset_t *mos = dp->dp_meta_objset; 883 dsl_dataset_t *ds, *ds_prev = NULL; 884 uint64_t obj; 885 886 if (dd->dd_phys->dd_head_dataset_obj == 0) 887 return (EINVAL); 888 889 if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) { 890 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 891 drop_lock = TRUE; 892 } 893 894 ds = dsl_dataset_open_obj(dd->dd_pool, 895 dd->dd_phys->dd_head_dataset_obj, NULL, 896 snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG); 897 898 if (snapname) { 899 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 900 snapname, 8, 1, &obj); 901 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 902 if (err) { 903 if (drop_lock) 904 rw_exit(&dp->dp_config_rwlock); 905 return (err); 906 } 907 908 ds = dsl_dataset_open_obj(dd->dd_pool, obj, NULL, 909 DS_MODE_EXCLUSIVE, FTAG); 910 } 911 if (ds == NULL) { 912 if (drop_lock) 913 rw_exit(&dp->dp_config_rwlock); 914 return (EBUSY); 915 } 916 917 obj = ds->ds_object; 918 919 /* Can't delete a branch point. */ 920 if (ds->ds_phys->ds_num_children > 1) { 921 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 922 if (drop_lock) 923 rw_exit(&dp->dp_config_rwlock); 924 return (EINVAL); 925 } 926 927 /* 928 * Can't delete a head dataset if there are snapshots of it. 929 * (Except if the only snapshots are from the branch we cloned 930 * from.) 931 */ 932 if (ds->ds_prev != NULL && 933 ds->ds_prev->ds_phys->ds_next_snap_obj == obj) { 934 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 935 if (drop_lock) 936 rw_exit(&dp->dp_config_rwlock); 937 return (EINVAL); 938 } 939 940 /* 941 * If we made changes this txg, traverse_dsl_dataset won't find 942 * them. Try again. 943 */ 944 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 945 mutex_exit(&ds->ds_lock); 946 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 947 return (EAGAIN); 948 } 949 950 /* THE POINT OF NO (unsuccessful) RETURN */ 951 952 if (ds->ds_phys->ds_prev_snap_obj != 0) { 953 if (ds->ds_prev) { 954 ds_prev = ds->ds_prev; 955 } else { 956 ds_prev = dsl_dataset_open_obj(dd->dd_pool, 957 ds->ds_phys->ds_prev_snap_obj, NULL, 958 DS_MODE_NONE, FTAG); 959 } 960 after_branch_point = 961 (ds_prev->ds_phys->ds_next_snap_obj != obj); 962 963 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 964 if (after_branch_point && 965 ds->ds_phys->ds_next_snap_obj == 0) { 966 /* This clone is toast. */ 967 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 968 ds_prev->ds_phys->ds_num_children--; 969 } else if (!after_branch_point) { 970 ds_prev->ds_phys->ds_next_snap_obj = 971 ds->ds_phys->ds_next_snap_obj; 972 } 973 } 974 975 ASSERT3P(tx->tx_pool, ==, dd->dd_pool); 976 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 977 978 if (ds->ds_phys->ds_next_snap_obj != 0) { 979 dsl_dataset_t *ds_next; 980 uint64_t itor = 0; 981 982 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 983 984 ds_next = dsl_dataset_open_obj(dd->dd_pool, 985 ds->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG); 986 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 987 988 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 989 ds_next->ds_phys->ds_prev_snap_obj = 990 ds->ds_phys->ds_prev_snap_obj; 991 ds_next->ds_phys->ds_prev_snap_txg = 992 ds->ds_phys->ds_prev_snap_txg; 993 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 994 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 995 996 /* 997 * Transfer to our deadlist (which will become next's 998 * new deadlist) any entries from next's current 999 * deadlist which were born before prev, and free the 1000 * other entries. 1001 * 1002 * XXX we're doing this long task with the config lock held 1003 */ 1004 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1005 &bp) == 0) { 1006 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1007 bplist_enqueue(&ds->ds_deadlist, &bp, tx); 1008 if (ds_prev && !after_branch_point && 1009 bp.blk_birth > 1010 ds_prev->ds_phys->ds_prev_snap_txg) { 1011 ds_prev->ds_phys->ds_unique_bytes += 1012 BP_GET_ASIZE(&bp); 1013 } 1014 } else { 1015 used += BP_GET_ASIZE(&bp); 1016 compressed += BP_GET_PSIZE(&bp); 1017 uncompressed += BP_GET_UCSIZE(&bp); 1018 /* XXX check return value? */ 1019 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1020 &bp, NULL, NULL, ARC_NOWAIT); 1021 } 1022 } 1023 1024 /* free next's deadlist */ 1025 bplist_close(&ds_next->ds_deadlist); 1026 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1027 1028 /* set next's deadlist to our deadlist */ 1029 ds_next->ds_phys->ds_deadlist_obj = 1030 ds->ds_phys->ds_deadlist_obj; 1031 bplist_open(&ds_next->ds_deadlist, mos, 1032 ds_next->ds_phys->ds_deadlist_obj); 1033 ds->ds_phys->ds_deadlist_obj = 0; 1034 1035 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1036 /* 1037 * Update next's unique to include blocks which 1038 * were previously shared by only this snapshot 1039 * and it. Those blocks will be born after the 1040 * prev snap and before this snap, and will have 1041 * died after the next snap and before the one 1042 * after that (ie. be on the snap after next's 1043 * deadlist). 1044 * 1045 * XXX we're doing this long task with the 1046 * config lock held 1047 */ 1048 dsl_dataset_t *ds_after_next; 1049 1050 ds_after_next = dsl_dataset_open_obj(dd->dd_pool, 1051 ds_next->ds_phys->ds_next_snap_obj, NULL, 1052 DS_MODE_NONE, FTAG); 1053 itor = 0; 1054 while (bplist_iterate(&ds_after_next->ds_deadlist, 1055 &itor, &bp) == 0) { 1056 if (bp.blk_birth > 1057 ds->ds_phys->ds_prev_snap_txg && 1058 bp.blk_birth <= 1059 ds->ds_phys->ds_creation_txg) { 1060 ds_next->ds_phys->ds_unique_bytes += 1061 BP_GET_ASIZE(&bp); 1062 } 1063 } 1064 1065 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1066 ASSERT3P(ds_next->ds_prev, ==, NULL); 1067 } else { 1068 /* 1069 * It would be nice to update the head dataset's 1070 * unique. To do so we would have to traverse 1071 * it for blocks born after ds_prev, which is 1072 * pretty expensive just to maintain something 1073 * for debugging purposes. 1074 */ 1075 ASSERT3P(ds_next->ds_prev, ==, ds); 1076 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1077 ds_next); 1078 if (ds_prev) { 1079 ds_next->ds_prev = dsl_dataset_open_obj( 1080 dd->dd_pool, ds->ds_phys->ds_prev_snap_obj, 1081 NULL, DS_MODE_NONE, ds_next); 1082 } else { 1083 ds_next->ds_prev = NULL; 1084 } 1085 } 1086 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1087 1088 /* 1089 * NB: unique_bytes is not accurate for head objsets 1090 * because we don't update it when we delete the most 1091 * recent snapshot -- see above comment. 1092 */ 1093 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1094 } else { 1095 /* 1096 * There's no next snapshot, so this is a head dataset. 1097 * Destroy the deadlist. Unless it's a clone, the 1098 * deadlist should be empty. (If it's a clone, it's 1099 * safe to ignore the deadlist contents.) 1100 */ 1101 struct killarg ka; 1102 1103 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1104 bplist_close(&ds->ds_deadlist); 1105 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1106 ds->ds_phys->ds_deadlist_obj = 0; 1107 1108 /* 1109 * Free everything that we point to (that's born after 1110 * the previous snapshot, if we are a clone) 1111 * 1112 * XXX we're doing this long task with the config lock held 1113 */ 1114 ka.usedp = &used; 1115 ka.compressedp = &compressed; 1116 ka.uncompressedp = &uncompressed; 1117 ka.zio = zio; 1118 ka.tx = tx; 1119 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1120 ADVANCE_POST, kill_blkptr, &ka); 1121 ASSERT3U(err, ==, 0); 1122 } 1123 1124 err = zio_wait(zio); 1125 ASSERT3U(err, ==, 0); 1126 1127 dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx); 1128 1129 if (ds->ds_phys->ds_snapnames_zapobj) { 1130 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1131 ASSERT(err == 0); 1132 } 1133 1134 if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1135 /* Erase the link in the dataset */ 1136 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1137 dd->dd_phys->dd_head_dataset_obj = 0; 1138 /* 1139 * dsl_dir_sync_destroy() called us, they'll destroy 1140 * the dataset. 1141 */ 1142 } else { 1143 /* remove from snapshot namespace */ 1144 dsl_dataset_t *ds_head; 1145 ds_head = dsl_dataset_open_obj(dd->dd_pool, 1146 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG); 1147 #ifdef ZFS_DEBUG 1148 { 1149 uint64_t val; 1150 err = zap_lookup(mos, 1151 ds_head->ds_phys->ds_snapnames_zapobj, 1152 snapname, 8, 1, &val); 1153 ASSERT3U(err, ==, 0); 1154 ASSERT3U(val, ==, obj); 1155 } 1156 #endif 1157 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1158 snapname, tx); 1159 ASSERT(err == 0); 1160 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1161 } 1162 1163 if (ds_prev && ds->ds_prev != ds_prev) 1164 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1165 1166 err = dmu_object_free(mos, obj, tx); 1167 ASSERT(err == 0); 1168 1169 /* 1170 * Close the objset with mode NONE, thus leaving it with 1171 * DOS_REF_MAX set, so that noone can access it. 1172 */ 1173 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1174 1175 if (drop_lock) 1176 rw_exit(&dp->dp_config_rwlock); 1177 return (0); 1178 } 1179 1180 int 1181 dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1182 { 1183 const char *snapname = arg; 1184 dsl_pool_t *dp = dd->dd_pool; 1185 dmu_buf_t *dbuf; 1186 dsl_dataset_phys_t *dsphys; 1187 uint64_t dsobj, value; 1188 objset_t *mos = dp->dp_meta_objset; 1189 dsl_dataset_t *ds; 1190 int err; 1191 1192 ASSERT(dmu_tx_is_syncing(tx)); 1193 1194 if (dd->dd_phys->dd_head_dataset_obj == 0) 1195 return (EINVAL); 1196 ds = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL, 1197 DS_MODE_NONE, FTAG); 1198 1199 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1200 snapname, 8, 1, &value); 1201 if (err == 0) { 1202 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1203 return (EEXIST); 1204 } 1205 ASSERT(err == ENOENT); 1206 1207 /* The point of no (unsuccessful) return */ 1208 1209 dprintf_dd(dd, "taking snapshot %s in txg %llu\n", 1210 snapname, tx->tx_txg); 1211 1212 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1213 1214 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1215 1216 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0, 1217 DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx); 1218 dbuf = dmu_bonus_hold(mos, dsobj); 1219 dmu_buf_will_dirty(dbuf, tx); 1220 dsphys = dbuf->db_data; 1221 dsphys->ds_dir_obj = dd->dd_object; 1222 dsphys->ds_fsid_guid = unique_create(); 1223 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1224 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1225 sizeof (dsphys->ds_guid)); 1226 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1227 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1228 dsphys->ds_next_snap_obj = ds->ds_object; 1229 dsphys->ds_num_children = 1; 1230 dsphys->ds_creation_time = gethrestime_sec(); 1231 dsphys->ds_creation_txg = tx->tx_txg; 1232 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1233 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1234 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1235 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1236 dsphys->ds_restoring = ds->ds_phys->ds_restoring; 1237 dsphys->ds_bp = ds->ds_phys->ds_bp; 1238 dmu_buf_rele(dbuf); 1239 1240 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1241 dsl_dataset_t *ds_prev; 1242 1243 ds_prev = dsl_dataset_open_obj(dp, 1244 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, FTAG); 1245 ASSERT(ds_prev->ds_phys->ds_next_snap_obj == 1246 ds->ds_object || 1247 ds_prev->ds_phys->ds_num_children > 1); 1248 if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1249 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1250 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1251 ds_prev->ds_phys->ds_creation_txg); 1252 ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1253 } 1254 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1255 } else { 1256 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0); 1257 } 1258 1259 bplist_close(&ds->ds_deadlist); 1260 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1261 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1262 ds->ds_phys->ds_prev_snap_obj = dsobj; 1263 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1264 ds->ds_phys->ds_unique_bytes = 0; 1265 ds->ds_phys->ds_deadlist_obj = 1266 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1267 bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1268 1269 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1270 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1271 snapname, 8, 1, &dsobj, tx); 1272 ASSERT(err == 0); 1273 1274 if (ds->ds_prev) 1275 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1276 ds->ds_prev = dsl_dataset_open_obj(dp, 1277 ds->ds_phys->ds_prev_snap_obj, snapname, DS_MODE_NONE, ds); 1278 1279 rw_exit(&dp->dp_config_rwlock); 1280 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1281 1282 return (0); 1283 } 1284 1285 void 1286 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1287 { 1288 ASSERT(dmu_tx_is_syncing(tx)); 1289 ASSERT(ds->ds_user_ptr != NULL); 1290 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1291 1292 dmu_objset_sync(ds->ds_user_ptr, tx); 1293 dsl_dir_dirty(ds->ds_dir, tx); 1294 bplist_close(&ds->ds_deadlist); 1295 1296 dmu_buf_remove_ref(ds->ds_dbuf, ds); 1297 } 1298 1299 void 1300 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) 1301 { 1302 /* fill in properties crap */ 1303 dsl_dir_stats(ds->ds_dir, dds); 1304 1305 if (ds->ds_phys->ds_num_children != 0) { 1306 dds->dds_is_snapshot = TRUE; 1307 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1308 } 1309 1310 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; 1311 1312 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; 1313 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; 1314 1315 /* We override the dataset's creation time... they should be the same */ 1316 dds->dds_creation_time = ds->ds_phys->ds_creation_time; 1317 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1318 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; 1319 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; 1320 dds->dds_guid = ds->ds_phys->ds_guid; 1321 1322 if (ds->ds_phys->ds_next_snap_obj) { 1323 /* 1324 * This is a snapshot; override the dd's space used with 1325 * our unique space 1326 */ 1327 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; 1328 dds->dds_compressed_bytes = 1329 ds->ds_phys->ds_compressed_bytes; 1330 dds->dds_uncompressed_bytes = 1331 ds->ds_phys->ds_uncompressed_bytes; 1332 } 1333 1334 dds->dds_objset_obj = ds->ds_object; 1335 } 1336 1337 dsl_pool_t * 1338 dsl_dataset_pool(dsl_dataset_t *ds) 1339 { 1340 return (ds->ds_dir->dd_pool); 1341 } 1342 1343 struct osrenamearg { 1344 const char *oldname; 1345 const char *newname; 1346 }; 1347 1348 static int 1349 dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1350 { 1351 struct osrenamearg *ora = arg; 1352 objset_t *mos = dd->dd_pool->dp_meta_objset; 1353 dsl_dir_t *nds; 1354 const char *tail; 1355 int err; 1356 dsl_dataset_t *snds, *fsds; 1357 uint64_t val; 1358 1359 err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname, 1360 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds); 1361 if (err) 1362 return (err); 1363 1364 if (snds->ds_dir != dd) { 1365 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1366 return (EINVAL); 1367 } 1368 1369 /* better be changing a snapshot */ 1370 if (snds->ds_phys->ds_next_snap_obj == 0) { 1371 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1372 return (EINVAL); 1373 } 1374 1375 /* new fs better exist */ 1376 nds = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, FTAG, &tail); 1377 if (nds == NULL) { 1378 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1379 return (ENOENT); 1380 } 1381 1382 dsl_dir_close(nds, FTAG); 1383 1384 /* new name better be in same fs */ 1385 if (nds != dd) { 1386 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1387 return (EINVAL); 1388 } 1389 1390 /* new name better be a snapshot */ 1391 if (tail == NULL || tail[0] != '@') { 1392 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1393 return (EINVAL); 1394 } 1395 1396 tail++; 1397 1398 fsds = dsl_dataset_open_obj(dd->dd_pool, 1399 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG); 1400 1401 /* new name better not be in use */ 1402 err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj, 1403 tail, 8, 1, &val); 1404 if (err != ENOENT) { 1405 if (err == 0) 1406 err = EEXIST; 1407 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1408 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1409 return (EEXIST); 1410 } 1411 1412 /* The point of no (unsuccessful) return */ 1413 1414 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); 1415 dsl_dataset_get_snapname(snds); 1416 err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj, 1417 snds->ds_snapname, tx); 1418 ASSERT3U(err, ==, 0); 1419 mutex_enter(&snds->ds_lock); 1420 (void) strcpy(snds->ds_snapname, tail); 1421 mutex_exit(&snds->ds_lock); 1422 err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj, 1423 snds->ds_snapname, 8, 1, &snds->ds_object, tx); 1424 ASSERT3U(err, ==, 0); 1425 rw_exit(&dd->dd_pool->dp_config_rwlock); 1426 1427 dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1428 dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1429 return (0); 1430 } 1431 1432 #pragma weak dmu_objset_rename = dsl_dataset_rename 1433 int 1434 dsl_dataset_rename(const char *osname, const char *newname) 1435 { 1436 dsl_dir_t *dd; 1437 const char *tail; 1438 struct osrenamearg ora; 1439 int err; 1440 1441 dd = dsl_dir_open(osname, FTAG, &tail); 1442 if (dd == NULL) 1443 return (ENOENT); 1444 if (tail == NULL) { 1445 err = dsl_dir_sync_task(dd, 1446 dsl_dir_rename_sync, (void*)newname, 1<<12); 1447 dsl_dir_close(dd, FTAG); 1448 return (err); 1449 } 1450 if (tail[0] != '@') { 1451 /* the name ended in a nonexistant component */ 1452 dsl_dir_close(dd, FTAG); 1453 return (ENOENT); 1454 } 1455 1456 ora.oldname = osname; 1457 ora.newname = newname; 1458 1459 err = dsl_dir_sync_task(dd, 1460 dsl_dataset_snapshot_rename_sync, &ora, 1<<12); 1461 dsl_dir_close(dd, FTAG); 1462 return (err); 1463 } 1464