1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 /* 55 * We use weighted reference counts to express the various forms of exclusion 56 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 57 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 58 * This makes the exclusion logic simple: the total refcnt for all opens cannot 59 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 60 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 61 * just over half of the refcnt space, so there can't be more than one, but it 62 * can peacefully coexist with any number of STANDARD opens. 63 */ 64 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 65 0, /* DS_MODE_NONE - invalid */ 66 1, /* DS_MODE_STANDARD - unlimited number */ 67 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 68 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 69 }; 70 71 /* 72 * Figure out how much of this delta should be propogated to the dsl_dir 73 * layer. If there's a refreservation, that space has already been 74 * partially accounted for in our ancestors. 75 */ 76 static int64_t 77 parent_delta(dsl_dataset_t *ds, int64_t delta) 78 { 79 uint64_t old_bytes, new_bytes; 80 81 if (ds->ds_reserved == 0) 82 return (delta); 83 84 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 85 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 86 87 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 88 return (new_bytes - old_bytes); 89 } 90 91 void 92 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 93 { 94 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 95 int compressed = BP_GET_PSIZE(bp); 96 int uncompressed = BP_GET_UCSIZE(bp); 97 int64_t delta; 98 99 dprintf_bp(bp, "born, ds=%p\n", ds); 100 101 ASSERT(dmu_tx_is_syncing(tx)); 102 /* It could have been compressed away to nothing */ 103 if (BP_IS_HOLE(bp)) 104 return; 105 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 106 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 107 if (ds == NULL) { 108 /* 109 * Account for the meta-objset space in its placeholder 110 * dsl_dir. 111 */ 112 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 113 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 114 used, compressed, uncompressed, tx); 115 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 116 return; 117 } 118 dmu_buf_will_dirty(ds->ds_dbuf, tx); 119 mutex_enter(&ds->ds_lock); 120 delta = parent_delta(ds, used); 121 ds->ds_phys->ds_used_bytes += used; 122 ds->ds_phys->ds_compressed_bytes += compressed; 123 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 124 ds->ds_phys->ds_unique_bytes += used; 125 mutex_exit(&ds->ds_lock); 126 dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx); 127 } 128 129 void 130 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 131 dmu_tx_t *tx) 132 { 133 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 134 int compressed = BP_GET_PSIZE(bp); 135 int uncompressed = BP_GET_UCSIZE(bp); 136 137 ASSERT(dmu_tx_is_syncing(tx)); 138 /* No block pointer => nothing to free */ 139 if (BP_IS_HOLE(bp)) 140 return; 141 142 ASSERT(used > 0); 143 if (ds == NULL) { 144 int err; 145 /* 146 * Account for the meta-objset space in its placeholder 147 * dataset. 148 */ 149 err = arc_free(pio, tx->tx_pool->dp_spa, 150 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 151 ASSERT(err == 0); 152 153 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 154 -used, -compressed, -uncompressed, tx); 155 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 156 return; 157 } 158 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 159 160 dmu_buf_will_dirty(ds->ds_dbuf, tx); 161 162 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 163 int err; 164 int64_t delta; 165 166 dprintf_bp(bp, "freeing: %s", ""); 167 err = arc_free(pio, tx->tx_pool->dp_spa, 168 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 169 ASSERT(err == 0); 170 171 mutex_enter(&ds->ds_lock); 172 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 173 !DS_UNIQUE_IS_ACCURATE(ds)); 174 delta = parent_delta(ds, -used); 175 ds->ds_phys->ds_unique_bytes -= used; 176 mutex_exit(&ds->ds_lock); 177 dsl_dir_diduse_space(ds->ds_dir, 178 delta, -compressed, -uncompressed, tx); 179 } else { 180 dprintf_bp(bp, "putting on dead list: %s", ""); 181 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_phys->ds_prev_snap_obj != 0) { 184 ASSERT3U(ds->ds_prev->ds_object, ==, 185 ds->ds_phys->ds_prev_snap_obj); 186 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 187 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 188 ds->ds_object && bp->blk_birth > 189 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 190 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 191 mutex_enter(&ds->ds_prev->ds_lock); 192 ds->ds_prev->ds_phys->ds_unique_bytes += 193 used; 194 mutex_exit(&ds->ds_prev->ds_lock); 195 } 196 } 197 } 198 mutex_enter(&ds->ds_lock); 199 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 200 ds->ds_phys->ds_used_bytes -= used; 201 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 202 ds->ds_phys->ds_compressed_bytes -= compressed; 203 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 204 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 205 mutex_exit(&ds->ds_lock); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 int 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 /* open_refcount == DS_REF_MAX when deleting */ 244 ASSERT(ds->ds_open_refcount == 0 || 245 ds->ds_open_refcount == DS_REF_MAX); 246 247 dprintf_ds(ds, "evicting %s\n", ""); 248 249 unique_remove(ds->ds_fsid_guid); 250 251 if (ds->ds_user_ptr != NULL) 252 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 253 254 if (ds->ds_prev) { 255 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 256 ds->ds_prev = NULL; 257 } 258 259 bplist_close(&ds->ds_deadlist); 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 mutex_destroy(&ds->ds_deadlist.bpl_lock); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 int 297 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 298 int mode, void *tag, dsl_dataset_t **dsp) 299 { 300 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 301 objset_t *mos = dp->dp_meta_objset; 302 dmu_buf_t *dbuf; 303 dsl_dataset_t *ds; 304 int err; 305 306 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 307 dsl_pool_sync_context(dp)); 308 309 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 310 if (err) 311 return (err); 312 ds = dmu_buf_get_user(dbuf); 313 if (ds == NULL) { 314 dsl_dataset_t *winner; 315 316 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 317 ds->ds_dbuf = dbuf; 318 ds->ds_object = dsobj; 319 ds->ds_phys = dbuf->db_data; 320 321 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 322 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 323 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 324 NULL); 325 326 err = bplist_open(&ds->ds_deadlist, 327 mos, ds->ds_phys->ds_deadlist_obj); 328 if (err == 0) { 329 err = dsl_dir_open_obj(dp, 330 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 331 } 332 if (err) { 333 /* 334 * we don't really need to close the blist if we 335 * just opened it. 336 */ 337 mutex_destroy(&ds->ds_lock); 338 mutex_destroy(&ds->ds_opening_lock); 339 mutex_destroy(&ds->ds_deadlist.bpl_lock); 340 kmem_free(ds, sizeof (dsl_dataset_t)); 341 dmu_buf_rele(dbuf, tag); 342 return (err); 343 } 344 345 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 346 ds->ds_snapname[0] = '\0'; 347 if (ds->ds_phys->ds_prev_snap_obj) { 348 err = dsl_dataset_open_obj(dp, 349 ds->ds_phys->ds_prev_snap_obj, NULL, 350 DS_MODE_NONE, ds, &ds->ds_prev); 351 } 352 } else { 353 if (snapname) { 354 #ifdef ZFS_DEBUG 355 dsl_dataset_phys_t *headphys; 356 dmu_buf_t *headdbuf; 357 err = dmu_bonus_hold(mos, 358 ds->ds_dir->dd_phys->dd_head_dataset_obj, 359 FTAG, &headdbuf); 360 if (err == 0) { 361 headphys = headdbuf->db_data; 362 uint64_t foundobj; 363 err = zap_lookup(dp->dp_meta_objset, 364 headphys->ds_snapnames_zapobj, 365 snapname, sizeof (foundobj), 1, 366 &foundobj); 367 ASSERT3U(foundobj, ==, dsobj); 368 dmu_buf_rele(headdbuf, FTAG); 369 } 370 #endif 371 (void) strcat(ds->ds_snapname, snapname); 372 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 373 err = dsl_dataset_get_snapname(ds); 374 } 375 } 376 377 if (err == 0) { 378 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 379 dsl_dataset_evict); 380 } 381 if (err || winner) { 382 bplist_close(&ds->ds_deadlist); 383 if (ds->ds_prev) { 384 dsl_dataset_close(ds->ds_prev, 385 DS_MODE_NONE, ds); 386 } 387 dsl_dir_close(ds->ds_dir, ds); 388 mutex_destroy(&ds->ds_lock); 389 mutex_destroy(&ds->ds_opening_lock); 390 mutex_destroy(&ds->ds_deadlist.bpl_lock); 391 kmem_free(ds, sizeof (dsl_dataset_t)); 392 if (err) { 393 dmu_buf_rele(dbuf, tag); 394 return (err); 395 } 396 ds = winner; 397 } else { 398 ds->ds_fsid_guid = 399 unique_insert(ds->ds_phys->ds_fsid_guid); 400 } 401 402 if (!dsl_dataset_is_snapshot(ds)) { 403 boolean_t need_lock = 404 !RW_LOCK_HELD(&dp->dp_config_rwlock); 405 406 if (need_lock) 407 rw_enter(&dp->dp_config_rwlock, RW_READER); 408 VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir, 409 "refreservation", sizeof (uint64_t), 1, 410 &ds->ds_reserved, NULL)); 411 VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir, 412 "refquota", sizeof (uint64_t), 1, &ds->ds_quota, 413 NULL)); 414 if (need_lock) 415 rw_exit(&dp->dp_config_rwlock); 416 } else { 417 ds->ds_reserved = ds->ds_quota = 0; 418 } 419 } 420 ASSERT3P(ds->ds_dbuf, ==, dbuf); 421 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 422 423 mutex_enter(&ds->ds_lock); 424 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 425 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 426 !DS_MODE_IS_INCONSISTENT(mode)) || 427 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 428 mutex_exit(&ds->ds_lock); 429 dsl_dataset_close(ds, DS_MODE_NONE, tag); 430 return (EBUSY); 431 } 432 ds->ds_open_refcount += weight; 433 mutex_exit(&ds->ds_lock); 434 435 *dsp = ds; 436 return (0); 437 } 438 439 int 440 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 441 void *tag, dsl_dataset_t **dsp) 442 { 443 dsl_dir_t *dd; 444 dsl_pool_t *dp; 445 const char *tail; 446 uint64_t obj; 447 dsl_dataset_t *ds = NULL; 448 int err = 0; 449 450 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 451 if (err) 452 return (err); 453 454 dp = dd->dd_pool; 455 obj = dd->dd_phys->dd_head_dataset_obj; 456 rw_enter(&dp->dp_config_rwlock, RW_READER); 457 if (obj == 0) { 458 /* A dataset with no associated objset */ 459 err = ENOENT; 460 goto out; 461 } 462 463 if (tail != NULL) { 464 objset_t *mos = dp->dp_meta_objset; 465 466 err = dsl_dataset_open_obj(dp, obj, NULL, 467 DS_MODE_NONE, tag, &ds); 468 if (err) 469 goto out; 470 obj = ds->ds_phys->ds_snapnames_zapobj; 471 dsl_dataset_close(ds, DS_MODE_NONE, tag); 472 ds = NULL; 473 474 if (tail[0] != '@') { 475 err = ENOENT; 476 goto out; 477 } 478 tail++; 479 480 /* Look for a snapshot */ 481 if (!DS_MODE_IS_READONLY(mode)) { 482 err = EROFS; 483 goto out; 484 } 485 dprintf("looking for snapshot '%s'\n", tail); 486 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 487 if (err) 488 goto out; 489 } 490 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 491 492 out: 493 rw_exit(&dp->dp_config_rwlock); 494 dsl_dir_close(dd, FTAG); 495 496 ASSERT3U((err == 0), ==, (ds != NULL)); 497 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 498 499 *dsp = ds; 500 return (err); 501 } 502 503 int 504 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 505 { 506 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 507 } 508 509 void 510 dsl_dataset_name(dsl_dataset_t *ds, char *name) 511 { 512 if (ds == NULL) { 513 (void) strcpy(name, "mos"); 514 } else { 515 dsl_dir_name(ds->ds_dir, name); 516 VERIFY(0 == dsl_dataset_get_snapname(ds)); 517 if (ds->ds_snapname[0]) { 518 (void) strcat(name, "@"); 519 if (!MUTEX_HELD(&ds->ds_lock)) { 520 /* 521 * We use a "recursive" mutex so that we 522 * can call dprintf_ds() with ds_lock held. 523 */ 524 mutex_enter(&ds->ds_lock); 525 (void) strcat(name, ds->ds_snapname); 526 mutex_exit(&ds->ds_lock); 527 } else { 528 (void) strcat(name, ds->ds_snapname); 529 } 530 } 531 } 532 } 533 534 static int 535 dsl_dataset_namelen(dsl_dataset_t *ds) 536 { 537 int result; 538 539 if (ds == NULL) { 540 result = 3; /* "mos" */ 541 } else { 542 result = dsl_dir_namelen(ds->ds_dir); 543 VERIFY(0 == dsl_dataset_get_snapname(ds)); 544 if (ds->ds_snapname[0]) { 545 ++result; /* adding one for the @-sign */ 546 if (!MUTEX_HELD(&ds->ds_lock)) { 547 /* see dsl_datset_name */ 548 mutex_enter(&ds->ds_lock); 549 result += strlen(ds->ds_snapname); 550 mutex_exit(&ds->ds_lock); 551 } else { 552 result += strlen(ds->ds_snapname); 553 } 554 } 555 } 556 557 return (result); 558 } 559 560 void 561 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 562 { 563 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 564 mutex_enter(&ds->ds_lock); 565 ASSERT3U(ds->ds_open_refcount, >=, weight); 566 ds->ds_open_refcount -= weight; 567 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 568 mode, ds->ds_open_refcount); 569 mutex_exit(&ds->ds_lock); 570 571 dmu_buf_rele(ds->ds_dbuf, tag); 572 } 573 574 void 575 dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) 576 { 577 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 578 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 579 mutex_enter(&ds->ds_lock); 580 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 581 ASSERT3U(oldweight, >=, newweight); 582 ds->ds_open_refcount -= oldweight; 583 ds->ds_open_refcount += newweight; 584 mutex_exit(&ds->ds_lock); 585 } 586 587 boolean_t 588 dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) 589 { 590 boolean_t rv; 591 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 592 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 593 mutex_enter(&ds->ds_lock); 594 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 595 ASSERT3U(newweight, >=, oldweight); 596 if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { 597 rv = B_FALSE; 598 } else { 599 ds->ds_open_refcount -= oldweight; 600 ds->ds_open_refcount += newweight; 601 rv = B_TRUE; 602 } 603 mutex_exit(&ds->ds_lock); 604 return (rv); 605 } 606 607 void 608 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 609 { 610 objset_t *mos = dp->dp_meta_objset; 611 dmu_buf_t *dbuf; 612 dsl_dataset_phys_t *dsphys; 613 dsl_dataset_t *ds; 614 uint64_t dsobj; 615 dsl_dir_t *dd; 616 617 dsl_dir_create_root(mos, ddobjp, tx); 618 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 619 620 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 621 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 622 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 623 dmu_buf_will_dirty(dbuf, tx); 624 dsphys = dbuf->db_data; 625 dsphys->ds_dir_obj = dd->dd_object; 626 dsphys->ds_fsid_guid = unique_create(); 627 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 628 sizeof (dsphys->ds_guid)); 629 dsphys->ds_snapnames_zapobj = 630 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 631 dsphys->ds_creation_time = gethrestime_sec(); 632 dsphys->ds_creation_txg = tx->tx_txg; 633 dsphys->ds_deadlist_obj = 634 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 635 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 636 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 637 dmu_buf_rele(dbuf, FTAG); 638 639 dmu_buf_will_dirty(dd->dd_dbuf, tx); 640 dd->dd_phys->dd_head_dataset_obj = dsobj; 641 dsl_dir_close(dd, FTAG); 642 643 VERIFY(0 == 644 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 645 (void) dmu_objset_create_impl(dp->dp_spa, ds, 646 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 647 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 648 } 649 650 uint64_t 651 dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx) 652 { 653 dsl_pool_t *dp = dd->dd_pool; 654 dmu_buf_t *dbuf; 655 dsl_dataset_phys_t *dsphys; 656 uint64_t dsobj; 657 objset_t *mos = dp->dp_meta_objset; 658 659 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 660 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 661 ASSERT(dmu_tx_is_syncing(tx)); 662 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 663 664 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 665 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 666 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 667 dmu_buf_will_dirty(dbuf, tx); 668 dsphys = dbuf->db_data; 669 dsphys->ds_dir_obj = dd->dd_object; 670 dsphys->ds_fsid_guid = unique_create(); 671 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 672 sizeof (dsphys->ds_guid)); 673 dsphys->ds_snapnames_zapobj = 674 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 675 dsphys->ds_creation_time = gethrestime_sec(); 676 dsphys->ds_creation_txg = tx->tx_txg; 677 dsphys->ds_deadlist_obj = 678 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 679 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 680 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 681 682 if (origin) { 683 dsphys->ds_prev_snap_obj = origin->ds_object; 684 dsphys->ds_prev_snap_txg = 685 origin->ds_phys->ds_creation_txg; 686 dsphys->ds_used_bytes = 687 origin->ds_phys->ds_used_bytes; 688 dsphys->ds_compressed_bytes = 689 origin->ds_phys->ds_compressed_bytes; 690 dsphys->ds_uncompressed_bytes = 691 origin->ds_phys->ds_uncompressed_bytes; 692 dsphys->ds_bp = origin->ds_phys->ds_bp; 693 694 dmu_buf_will_dirty(origin->ds_dbuf, tx); 695 origin->ds_phys->ds_num_children++; 696 697 dmu_buf_will_dirty(dd->dd_dbuf, tx); 698 dd->dd_phys->dd_origin_obj = origin->ds_object; 699 } 700 dmu_buf_rele(dbuf, FTAG); 701 702 dmu_buf_will_dirty(dd->dd_dbuf, tx); 703 dd->dd_phys->dd_head_dataset_obj = dsobj; 704 705 return (dsobj); 706 } 707 708 uint64_t 709 dsl_dataset_create_sync(dsl_dir_t *pdd, 710 const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx) 711 { 712 dsl_pool_t *dp = pdd->dd_pool; 713 uint64_t dsobj, ddobj; 714 dsl_dir_t *dd; 715 716 ASSERT(lastname[0] != '@'); 717 718 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 719 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 720 721 dsobj = dsl_dataset_create_sync_impl(dd, origin, tx); 722 723 dsl_deleg_set_create_perms(dd, tx, cr); 724 725 dsl_dir_close(dd, FTAG); 726 727 return (dsobj); 728 } 729 730 struct destroyarg { 731 dsl_sync_task_group_t *dstg; 732 char *snapname; 733 char *failed; 734 }; 735 736 static int 737 dsl_snapshot_destroy_one(char *name, void *arg) 738 { 739 struct destroyarg *da = arg; 740 dsl_dataset_t *ds; 741 char *cp; 742 int err; 743 744 (void) strcat(name, "@"); 745 (void) strcat(name, da->snapname); 746 err = dsl_dataset_open(name, 747 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 748 da->dstg, &ds); 749 cp = strchr(name, '@'); 750 *cp = '\0'; 751 if (err == ENOENT) 752 return (0); 753 if (err) { 754 (void) strcpy(da->failed, name); 755 return (err); 756 } 757 758 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 759 dsl_dataset_destroy_sync, ds, da->dstg, 0); 760 return (0); 761 } 762 763 /* 764 * Destroy 'snapname' in all descendants of 'fsname'. 765 */ 766 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 767 int 768 dsl_snapshots_destroy(char *fsname, char *snapname) 769 { 770 int err; 771 struct destroyarg da; 772 dsl_sync_task_t *dst; 773 spa_t *spa; 774 775 err = spa_open(fsname, &spa, FTAG); 776 if (err) 777 return (err); 778 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 779 da.snapname = snapname; 780 da.failed = fsname; 781 782 err = dmu_objset_find(fsname, 783 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 784 785 if (err == 0) 786 err = dsl_sync_task_group_wait(da.dstg); 787 788 for (dst = list_head(&da.dstg->dstg_tasks); dst; 789 dst = list_next(&da.dstg->dstg_tasks, dst)) { 790 dsl_dataset_t *ds = dst->dst_arg1; 791 if (dst->dst_err) { 792 dsl_dataset_name(ds, fsname); 793 *strchr(fsname, '@') = '\0'; 794 } 795 /* 796 * If it was successful, destroy_sync would have 797 * closed the ds 798 */ 799 if (err) 800 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 801 } 802 803 dsl_sync_task_group_destroy(da.dstg); 804 spa_close(spa, FTAG); 805 return (err); 806 } 807 808 /* 809 * ds must be opened EXCLUSIVE or PRIMARY. on return (whether 810 * successful or not), ds will be closed and caller can no longer 811 * dereference it. 812 */ 813 int 814 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 815 { 816 int err; 817 dsl_sync_task_group_t *dstg; 818 objset_t *os; 819 dsl_dir_t *dd; 820 uint64_t obj; 821 822 if (ds->ds_open_refcount != DS_REF_MAX) { 823 if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, 824 DS_MODE_EXCLUSIVE) == 0) { 825 dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); 826 return (EBUSY); 827 } 828 } 829 830 if (dsl_dataset_is_snapshot(ds)) { 831 /* Destroying a snapshot is simpler */ 832 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 833 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 834 ds, tag, 0); 835 goto out; 836 } 837 838 dd = ds->ds_dir; 839 840 /* 841 * Check for errors and mark this ds as inconsistent, in 842 * case we crash while freeing the objects. 843 */ 844 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 845 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 846 if (err) 847 goto out; 848 849 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 850 if (err) 851 goto out; 852 853 /* 854 * remove the objects in open context, so that we won't 855 * have too much to do in syncing context. 856 */ 857 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 858 ds->ds_phys->ds_prev_snap_txg)) { 859 dmu_tx_t *tx = dmu_tx_create(os); 860 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 861 dmu_tx_hold_bonus(tx, obj); 862 err = dmu_tx_assign(tx, TXG_WAIT); 863 if (err) { 864 /* 865 * Perhaps there is not enough disk 866 * space. Just deal with it from 867 * dsl_dataset_destroy_sync(). 868 */ 869 dmu_tx_abort(tx); 870 continue; 871 } 872 VERIFY(0 == dmu_object_free(os, obj, tx)); 873 dmu_tx_commit(tx); 874 } 875 /* Make sure it's not dirty before we finish destroying it. */ 876 txg_wait_synced(dd->dd_pool, 0); 877 878 dmu_objset_close(os); 879 if (err != ESRCH) 880 goto out; 881 882 if (ds->ds_user_ptr) { 883 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 884 ds->ds_user_ptr = NULL; 885 } 886 887 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 888 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 889 rw_exit(&dd->dd_pool->dp_config_rwlock); 890 891 if (err) 892 goto out; 893 894 /* 895 * Blow away the dsl_dir + head dataset. 896 */ 897 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 898 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 899 dsl_dataset_destroy_sync, ds, tag, 0); 900 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 901 dsl_dir_destroy_sync, dd, FTAG, 0); 902 err = dsl_sync_task_group_wait(dstg); 903 dsl_sync_task_group_destroy(dstg); 904 /* if it is successful, *destroy_sync will close the ds+dd */ 905 if (err) 906 dsl_dir_close(dd, FTAG); 907 out: 908 if (err) 909 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 910 return (err); 911 } 912 913 int 914 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 915 { 916 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 917 918 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 919 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 920 ds, &ost, 0)); 921 } 922 923 void * 924 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 925 void *p, dsl_dataset_evict_func_t func) 926 { 927 void *old; 928 929 mutex_enter(&ds->ds_lock); 930 old = ds->ds_user_ptr; 931 if (old == NULL) { 932 ds->ds_user_ptr = p; 933 ds->ds_user_evict_func = func; 934 } 935 mutex_exit(&ds->ds_lock); 936 return (old); 937 } 938 939 void * 940 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 941 { 942 return (ds->ds_user_ptr); 943 } 944 945 946 blkptr_t * 947 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 948 { 949 return (&ds->ds_phys->ds_bp); 950 } 951 952 void 953 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 954 { 955 ASSERT(dmu_tx_is_syncing(tx)); 956 /* If it's the meta-objset, set dp_meta_rootbp */ 957 if (ds == NULL) { 958 tx->tx_pool->dp_meta_rootbp = *bp; 959 } else { 960 dmu_buf_will_dirty(ds->ds_dbuf, tx); 961 ds->ds_phys->ds_bp = *bp; 962 } 963 } 964 965 spa_t * 966 dsl_dataset_get_spa(dsl_dataset_t *ds) 967 { 968 return (ds->ds_dir->dd_pool->dp_spa); 969 } 970 971 void 972 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 973 { 974 dsl_pool_t *dp; 975 976 if (ds == NULL) /* this is the meta-objset */ 977 return; 978 979 ASSERT(ds->ds_user_ptr != NULL); 980 981 if (ds->ds_phys->ds_next_snap_obj != 0) 982 panic("dirtying snapshot!"); 983 984 dp = ds->ds_dir->dd_pool; 985 986 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 987 /* up the hold count until we can be written out */ 988 dmu_buf_add_ref(ds->ds_dbuf, ds); 989 } 990 } 991 992 /* 993 * The unique space in the head dataset can be calculated by subtracting 994 * the space used in the most recent snapshot, that is still being used 995 * in this file system, from the space currently in use. To figure out 996 * the space in the most recent snapshot still in use, we need to take 997 * the total space used in the snapshot and subtract out the space that 998 * has been freed up since the snapshot was taken. 999 */ 1000 static void 1001 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1002 { 1003 uint64_t mrs_used; 1004 uint64_t dlused, dlcomp, dluncomp; 1005 1006 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1007 1008 if (ds->ds_phys->ds_prev_snap_obj != 0) 1009 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1010 else 1011 mrs_used = 0; 1012 1013 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1014 &dluncomp)); 1015 1016 ASSERT3U(dlused, <=, mrs_used); 1017 ds->ds_phys->ds_unique_bytes = 1018 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1019 1020 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1021 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1022 SPA_VERSION_UNIQUE_ACCURATE) 1023 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1024 } 1025 1026 static uint64_t 1027 dsl_dataset_unique(dsl_dataset_t *ds) 1028 { 1029 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1030 dsl_dataset_recalc_head_uniq(ds); 1031 1032 return (ds->ds_phys->ds_unique_bytes); 1033 } 1034 1035 struct killarg { 1036 int64_t *usedp; 1037 int64_t *compressedp; 1038 int64_t *uncompressedp; 1039 zio_t *zio; 1040 dmu_tx_t *tx; 1041 }; 1042 1043 static int 1044 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1045 { 1046 struct killarg *ka = arg; 1047 blkptr_t *bp = &bc->bc_blkptr; 1048 1049 ASSERT3U(bc->bc_errno, ==, 0); 1050 1051 /* 1052 * Since this callback is not called concurrently, no lock is 1053 * needed on the accounting values. 1054 */ 1055 *ka->usedp += bp_get_dasize(spa, bp); 1056 *ka->compressedp += BP_GET_PSIZE(bp); 1057 *ka->uncompressedp += BP_GET_UCSIZE(bp); 1058 /* XXX check for EIO? */ 1059 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 1060 ARC_NOWAIT); 1061 return (0); 1062 } 1063 1064 /* ARGSUSED */ 1065 static int 1066 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1067 { 1068 dsl_dataset_t *ds = arg1; 1069 dmu_objset_type_t *ost = arg2; 1070 1071 /* 1072 * We can only roll back to emptyness if it is a ZPL objset. 1073 */ 1074 if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1075 return (EINVAL); 1076 1077 /* 1078 * This must not be a snapshot. 1079 */ 1080 if (ds->ds_phys->ds_next_snap_obj != 0) 1081 return (EINVAL); 1082 1083 /* 1084 * If we made changes this txg, traverse_dsl_dataset won't find 1085 * them. Try again. 1086 */ 1087 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1088 return (EAGAIN); 1089 1090 return (0); 1091 } 1092 1093 /* ARGSUSED */ 1094 static void 1095 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1096 { 1097 dsl_dataset_t *ds = arg1; 1098 dmu_objset_type_t *ost = arg2; 1099 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1100 1101 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1102 1103 /* 1104 * Before the roll back destroy the zil. 1105 */ 1106 if (ds->ds_user_ptr != NULL) { 1107 zil_rollback_destroy( 1108 ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 1109 1110 /* 1111 * We need to make sure that the objset_impl_t is reopened after 1112 * we do the rollback, otherwise it will have the wrong 1113 * objset_phys_t. Normally this would happen when this 1114 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the 1115 * dataset to be immediately evicted. But when doing "zfs recv 1116 * -F", we reopen the objset before that, so that there is no 1117 * window where the dataset is closed and inconsistent. 1118 */ 1119 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1120 ds->ds_user_ptr = NULL; 1121 } 1122 1123 /* Zero out the deadlist. */ 1124 bplist_close(&ds->ds_deadlist); 1125 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1126 ds->ds_phys->ds_deadlist_obj = 1127 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1128 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1129 ds->ds_phys->ds_deadlist_obj)); 1130 1131 { 1132 /* Free blkptrs that we gave birth to */ 1133 zio_t *zio; 1134 int64_t used = 0, compressed = 0, uncompressed = 0; 1135 struct killarg ka; 1136 1137 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1138 ZIO_FLAG_MUSTSUCCEED); 1139 ka.usedp = &used; 1140 ka.compressedp = &compressed; 1141 ka.uncompressedp = &uncompressed; 1142 ka.zio = zio; 1143 ka.tx = tx; 1144 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1145 ADVANCE_POST, kill_blkptr, &ka); 1146 (void) zio_wait(zio); 1147 1148 dsl_dir_diduse_space(ds->ds_dir, 1149 -used, -compressed, -uncompressed, tx); 1150 } 1151 1152 if (ds->ds_prev) { 1153 /* Change our contents to that of the prev snapshot */ 1154 ASSERT3U(ds->ds_prev->ds_object, ==, 1155 ds->ds_phys->ds_prev_snap_obj); 1156 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1157 ds->ds_phys->ds_used_bytes = 1158 ds->ds_prev->ds_phys->ds_used_bytes; 1159 ds->ds_phys->ds_compressed_bytes = 1160 ds->ds_prev->ds_phys->ds_compressed_bytes; 1161 ds->ds_phys->ds_uncompressed_bytes = 1162 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1163 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1164 ds->ds_phys->ds_unique_bytes = 0; 1165 1166 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1167 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1168 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1169 } 1170 } else { 1171 /* Zero out our contents, recreate objset */ 1172 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1173 ds->ds_phys->ds_used_bytes = 0; 1174 ds->ds_phys->ds_compressed_bytes = 0; 1175 ds->ds_phys->ds_uncompressed_bytes = 0; 1176 ds->ds_phys->ds_flags = 0; 1177 ds->ds_phys->ds_unique_bytes = 0; 1178 (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1179 &ds->ds_phys->ds_bp, *ost, tx); 1180 } 1181 1182 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1183 tx, cr, "dataset = %llu", ds->ds_object); 1184 } 1185 1186 /* ARGSUSED */ 1187 static int 1188 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1189 { 1190 dsl_dataset_t *ds = arg1; 1191 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1192 uint64_t count; 1193 int err; 1194 1195 /* 1196 * Can't delete a head dataset if there are snapshots of it. 1197 * (Except if the only snapshots are from the branch we cloned 1198 * from.) 1199 */ 1200 if (ds->ds_prev != NULL && 1201 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1202 return (EINVAL); 1203 1204 /* 1205 * This is really a dsl_dir thing, but check it here so that 1206 * we'll be less likely to leave this dataset inconsistent & 1207 * nearly destroyed. 1208 */ 1209 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1210 if (err) 1211 return (err); 1212 if (count != 0) 1213 return (EEXIST); 1214 1215 return (0); 1216 } 1217 1218 /* ARGSUSED */ 1219 static void 1220 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1221 { 1222 dsl_dataset_t *ds = arg1; 1223 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1224 1225 /* Mark it as inconsistent on-disk, in case we crash */ 1226 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1227 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1228 1229 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1230 cr, "dataset = %llu", ds->ds_object); 1231 } 1232 1233 /* ARGSUSED */ 1234 int 1235 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1236 { 1237 dsl_dataset_t *ds = arg1; 1238 1239 /* Can't delete a branch point. */ 1240 if (ds->ds_phys->ds_num_children > 1) 1241 return (EEXIST); 1242 1243 /* 1244 * Can't delete a head dataset if there are snapshots of it. 1245 * (Except if the only snapshots are from the branch we cloned 1246 * from.) 1247 */ 1248 if (ds->ds_prev != NULL && 1249 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1250 return (EINVAL); 1251 1252 /* 1253 * If we made changes this txg, traverse_dsl_dataset won't find 1254 * them. Try again. 1255 */ 1256 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1257 return (EAGAIN); 1258 1259 /* XXX we should do some i/o error checking... */ 1260 return (0); 1261 } 1262 1263 void 1264 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1265 { 1266 dsl_dataset_t *ds = arg1; 1267 int64_t used = 0, compressed = 0, uncompressed = 0; 1268 zio_t *zio; 1269 int err; 1270 int after_branch_point = FALSE; 1271 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1272 objset_t *mos = dp->dp_meta_objset; 1273 dsl_dataset_t *ds_prev = NULL; 1274 uint64_t obj; 1275 1276 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1277 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1278 ASSERT(ds->ds_prev == NULL || 1279 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1280 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1281 1282 /* Remove our reservation */ 1283 if (ds->ds_reserved != 0) { 1284 uint64_t val = 0; 1285 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1286 ASSERT3U(ds->ds_reserved, ==, 0); 1287 } 1288 1289 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1290 1291 obj = ds->ds_object; 1292 1293 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1294 if (ds->ds_prev) { 1295 ds_prev = ds->ds_prev; 1296 } else { 1297 VERIFY(0 == dsl_dataset_open_obj(dp, 1298 ds->ds_phys->ds_prev_snap_obj, NULL, 1299 DS_MODE_NONE, FTAG, &ds_prev)); 1300 } 1301 after_branch_point = 1302 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1303 1304 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1305 if (after_branch_point && 1306 ds->ds_phys->ds_next_snap_obj == 0) { 1307 /* This clone is toast. */ 1308 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1309 ds_prev->ds_phys->ds_num_children--; 1310 } else if (!after_branch_point) { 1311 ds_prev->ds_phys->ds_next_snap_obj = 1312 ds->ds_phys->ds_next_snap_obj; 1313 } 1314 } 1315 1316 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1317 1318 if (ds->ds_phys->ds_next_snap_obj != 0) { 1319 blkptr_t bp; 1320 dsl_dataset_t *ds_next; 1321 uint64_t itor = 0; 1322 uint64_t old_unique; 1323 1324 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1325 1326 VERIFY(0 == dsl_dataset_open_obj(dp, 1327 ds->ds_phys->ds_next_snap_obj, NULL, 1328 DS_MODE_NONE, FTAG, &ds_next)); 1329 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1330 1331 old_unique = dsl_dataset_unique(ds_next); 1332 1333 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1334 ds_next->ds_phys->ds_prev_snap_obj = 1335 ds->ds_phys->ds_prev_snap_obj; 1336 ds_next->ds_phys->ds_prev_snap_txg = 1337 ds->ds_phys->ds_prev_snap_txg; 1338 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1339 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1340 1341 /* 1342 * Transfer to our deadlist (which will become next's 1343 * new deadlist) any entries from next's current 1344 * deadlist which were born before prev, and free the 1345 * other entries. 1346 * 1347 * XXX we're doing this long task with the config lock held 1348 */ 1349 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1350 &bp) == 0) { 1351 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1352 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1353 &bp, tx)); 1354 if (ds_prev && !after_branch_point && 1355 bp.blk_birth > 1356 ds_prev->ds_phys->ds_prev_snap_txg) { 1357 ds_prev->ds_phys->ds_unique_bytes += 1358 bp_get_dasize(dp->dp_spa, &bp); 1359 } 1360 } else { 1361 used += bp_get_dasize(dp->dp_spa, &bp); 1362 compressed += BP_GET_PSIZE(&bp); 1363 uncompressed += BP_GET_UCSIZE(&bp); 1364 /* XXX check return value? */ 1365 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1366 &bp, NULL, NULL, ARC_NOWAIT); 1367 } 1368 } 1369 1370 /* free next's deadlist */ 1371 bplist_close(&ds_next->ds_deadlist); 1372 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1373 1374 /* set next's deadlist to our deadlist */ 1375 ds_next->ds_phys->ds_deadlist_obj = 1376 ds->ds_phys->ds_deadlist_obj; 1377 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1378 ds_next->ds_phys->ds_deadlist_obj)); 1379 ds->ds_phys->ds_deadlist_obj = 0; 1380 1381 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1382 /* 1383 * Update next's unique to include blocks which 1384 * were previously shared by only this snapshot 1385 * and it. Those blocks will be born after the 1386 * prev snap and before this snap, and will have 1387 * died after the next snap and before the one 1388 * after that (ie. be on the snap after next's 1389 * deadlist). 1390 * 1391 * XXX we're doing this long task with the 1392 * config lock held 1393 */ 1394 dsl_dataset_t *ds_after_next; 1395 1396 VERIFY(0 == dsl_dataset_open_obj(dp, 1397 ds_next->ds_phys->ds_next_snap_obj, NULL, 1398 DS_MODE_NONE, FTAG, &ds_after_next)); 1399 itor = 0; 1400 while (bplist_iterate(&ds_after_next->ds_deadlist, 1401 &itor, &bp) == 0) { 1402 if (bp.blk_birth > 1403 ds->ds_phys->ds_prev_snap_txg && 1404 bp.blk_birth <= 1405 ds->ds_phys->ds_creation_txg) { 1406 ds_next->ds_phys->ds_unique_bytes += 1407 bp_get_dasize(dp->dp_spa, &bp); 1408 } 1409 } 1410 1411 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1412 ASSERT3P(ds_next->ds_prev, ==, NULL); 1413 } else { 1414 ASSERT3P(ds_next->ds_prev, ==, ds); 1415 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1416 ds_next); 1417 if (ds_prev) { 1418 VERIFY(0 == dsl_dataset_open_obj(dp, 1419 ds->ds_phys->ds_prev_snap_obj, NULL, 1420 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1421 } else { 1422 ds_next->ds_prev = NULL; 1423 } 1424 1425 dsl_dataset_recalc_head_uniq(ds_next); 1426 1427 /* 1428 * Reduce the amount of our unconsmed refreservation 1429 * being charged to our parent by the amount of 1430 * new unique data we have gained. 1431 */ 1432 if (old_unique < ds_next->ds_reserved) { 1433 int64_t mrsdelta; 1434 uint64_t new_unique = 1435 ds_next->ds_phys->ds_unique_bytes; 1436 1437 ASSERT(old_unique <= new_unique); 1438 mrsdelta = MIN(new_unique - old_unique, 1439 ds_next->ds_reserved - old_unique); 1440 dsl_dir_diduse_space(ds->ds_dir, -mrsdelta, 1441 0, 0, tx); 1442 } 1443 } 1444 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1445 1446 /* 1447 * NB: unique_bytes might not be accurate for the head objset. 1448 * Before SPA_VERSION 9, we didn't update its value when we 1449 * deleted the most recent snapshot. 1450 */ 1451 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1452 } else { 1453 /* 1454 * There's no next snapshot, so this is a head dataset. 1455 * Destroy the deadlist. Unless it's a clone, the 1456 * deadlist should be empty. (If it's a clone, it's 1457 * safe to ignore the deadlist contents.) 1458 */ 1459 struct killarg ka; 1460 1461 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1462 bplist_close(&ds->ds_deadlist); 1463 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1464 ds->ds_phys->ds_deadlist_obj = 0; 1465 1466 /* 1467 * Free everything that we point to (that's born after 1468 * the previous snapshot, if we are a clone) 1469 * 1470 * XXX we're doing this long task with the config lock held 1471 */ 1472 ka.usedp = &used; 1473 ka.compressedp = &compressed; 1474 ka.uncompressedp = &uncompressed; 1475 ka.zio = zio; 1476 ka.tx = tx; 1477 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1478 ADVANCE_POST, kill_blkptr, &ka); 1479 ASSERT3U(err, ==, 0); 1480 ASSERT(spa_version(dp->dp_spa) < 1481 SPA_VERSION_UNIQUE_ACCURATE || 1482 used == ds->ds_phys->ds_unique_bytes); 1483 } 1484 1485 err = zio_wait(zio); 1486 ASSERT3U(err, ==, 0); 1487 1488 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1489 1490 if (ds->ds_phys->ds_snapnames_zapobj) { 1491 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1492 ASSERT(err == 0); 1493 } 1494 1495 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1496 /* Erase the link in the dataset */ 1497 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1498 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1499 /* 1500 * dsl_dir_sync_destroy() called us, they'll destroy 1501 * the dataset. 1502 */ 1503 } else { 1504 /* remove from snapshot namespace */ 1505 dsl_dataset_t *ds_head; 1506 VERIFY(0 == dsl_dataset_open_obj(dp, 1507 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1508 DS_MODE_NONE, FTAG, &ds_head)); 1509 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1510 #ifdef ZFS_DEBUG 1511 { 1512 uint64_t val; 1513 err = zap_lookup(mos, 1514 ds_head->ds_phys->ds_snapnames_zapobj, 1515 ds->ds_snapname, 8, 1, &val); 1516 ASSERT3U(err, ==, 0); 1517 ASSERT3U(val, ==, obj); 1518 } 1519 #endif 1520 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1521 ds->ds_snapname, tx); 1522 ASSERT(err == 0); 1523 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1524 } 1525 1526 if (ds_prev && ds->ds_prev != ds_prev) 1527 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1528 1529 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1530 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1531 cr, "dataset = %llu", ds->ds_object); 1532 1533 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1534 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1535 1536 } 1537 1538 static int 1539 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1540 { 1541 uint64_t asize; 1542 1543 if (!dmu_tx_is_syncing(tx)) 1544 return (0); 1545 1546 /* 1547 * If there's an fs-only reservation, any blocks that might become 1548 * owned by the snapshot dataset must be accommodated by space 1549 * outside of the reservation. 1550 */ 1551 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1552 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1553 return (ENOSPC); 1554 1555 /* 1556 * Propogate any reserved space for this snapshot to other 1557 * snapshot checks in this sync group. 1558 */ 1559 if (asize > 0) 1560 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1561 1562 return (0); 1563 } 1564 1565 /* ARGSUSED */ 1566 int 1567 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1568 { 1569 dsl_dataset_t *ds = arg1; 1570 const char *snapname = arg2; 1571 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1572 int err; 1573 uint64_t value; 1574 1575 /* 1576 * We don't allow multiple snapshots of the same txg. If there 1577 * is already one, try again. 1578 */ 1579 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1580 return (EAGAIN); 1581 1582 /* 1583 * Check for conflicting name snapshot name. 1584 */ 1585 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1586 snapname, 8, 1, &value); 1587 if (err == 0) 1588 return (EEXIST); 1589 if (err != ENOENT) 1590 return (err); 1591 1592 /* 1593 * Check that the dataset's name is not too long. Name consists 1594 * of the dataset's length + 1 for the @-sign + snapshot name's length 1595 */ 1596 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1597 return (ENAMETOOLONG); 1598 1599 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1600 if (err) 1601 return (err); 1602 1603 ds->ds_trysnap_txg = tx->tx_txg; 1604 return (0); 1605 } 1606 1607 void 1608 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1609 { 1610 dsl_dataset_t *ds = arg1; 1611 const char *snapname = arg2; 1612 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1613 dmu_buf_t *dbuf; 1614 dsl_dataset_phys_t *dsphys; 1615 uint64_t dsobj; 1616 objset_t *mos = dp->dp_meta_objset; 1617 int err; 1618 1619 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1620 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1621 1622 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1623 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1624 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1625 dmu_buf_will_dirty(dbuf, tx); 1626 dsphys = dbuf->db_data; 1627 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1628 dsphys->ds_fsid_guid = unique_create(); 1629 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1630 sizeof (dsphys->ds_guid)); 1631 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1632 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1633 dsphys->ds_next_snap_obj = ds->ds_object; 1634 dsphys->ds_num_children = 1; 1635 dsphys->ds_creation_time = gethrestime_sec(); 1636 dsphys->ds_creation_txg = tx->tx_txg; 1637 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1638 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1639 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1640 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1641 dsphys->ds_flags = ds->ds_phys->ds_flags; 1642 dsphys->ds_bp = ds->ds_phys->ds_bp; 1643 dmu_buf_rele(dbuf, FTAG); 1644 1645 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1646 if (ds->ds_prev) { 1647 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1648 ds->ds_object || 1649 ds->ds_prev->ds_phys->ds_num_children > 1); 1650 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1651 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1652 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1653 ds->ds_prev->ds_phys->ds_creation_txg); 1654 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1655 } 1656 } 1657 1658 /* 1659 * If we have a reference-reservation on this dataset, we will 1660 * need to increase the amount of refreservation being charged 1661 * since our unique space is going to zero. 1662 */ 1663 if (ds->ds_reserved) { 1664 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1665 dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx); 1666 } 1667 1668 bplist_close(&ds->ds_deadlist); 1669 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1670 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1671 ds->ds_phys->ds_prev_snap_obj = dsobj; 1672 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1673 ds->ds_phys->ds_unique_bytes = 0; 1674 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1675 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1676 ds->ds_phys->ds_deadlist_obj = 1677 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1678 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1679 ds->ds_phys->ds_deadlist_obj)); 1680 1681 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1682 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1683 snapname, 8, 1, &dsobj, tx); 1684 ASSERT(err == 0); 1685 1686 if (ds->ds_prev) 1687 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1688 VERIFY(0 == dsl_dataset_open_obj(dp, 1689 ds->ds_phys->ds_prev_snap_obj, snapname, 1690 DS_MODE_NONE, ds, &ds->ds_prev)); 1691 1692 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1693 "dataset = %llu", dsobj); 1694 } 1695 1696 void 1697 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1698 { 1699 ASSERT(dmu_tx_is_syncing(tx)); 1700 ASSERT(ds->ds_user_ptr != NULL); 1701 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1702 1703 /* 1704 * in case we had to change ds_fsid_guid when we opened it, 1705 * sync it out now. 1706 */ 1707 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1708 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1709 1710 dsl_dir_dirty(ds->ds_dir, tx); 1711 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1712 } 1713 1714 void 1715 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1716 { 1717 uint64_t refd, avail, uobjs, aobjs; 1718 1719 dsl_dir_stats(ds->ds_dir, nv); 1720 1721 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1722 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1723 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1724 1725 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1726 ds->ds_phys->ds_creation_time); 1727 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1728 ds->ds_phys->ds_creation_txg); 1729 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1730 ds->ds_quota); 1731 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1732 ds->ds_reserved); 1733 1734 if (ds->ds_phys->ds_next_snap_obj) { 1735 /* 1736 * This is a snapshot; override the dd's space used with 1737 * our unique space and compression ratio. 1738 */ 1739 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1740 ds->ds_phys->ds_unique_bytes); 1741 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1742 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1743 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1744 ds->ds_phys->ds_compressed_bytes)); 1745 } 1746 } 1747 1748 void 1749 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1750 { 1751 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1752 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1753 stat->dds_guid = ds->ds_phys->ds_guid; 1754 if (ds->ds_phys->ds_next_snap_obj) { 1755 stat->dds_is_snapshot = B_TRUE; 1756 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1757 } 1758 1759 /* clone origin is really a dsl_dir thing... */ 1760 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1761 if (ds->ds_dir->dd_phys->dd_origin_obj) { 1762 dsl_dataset_t *ods; 1763 1764 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1765 ds->ds_dir->dd_phys->dd_origin_obj, 1766 NULL, DS_MODE_NONE, FTAG, &ods)); 1767 dsl_dataset_name(ods, stat->dds_origin); 1768 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1769 } 1770 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1771 } 1772 1773 uint64_t 1774 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1775 { 1776 return (ds->ds_fsid_guid); 1777 } 1778 1779 void 1780 dsl_dataset_space(dsl_dataset_t *ds, 1781 uint64_t *refdbytesp, uint64_t *availbytesp, 1782 uint64_t *usedobjsp, uint64_t *availobjsp) 1783 { 1784 *refdbytesp = ds->ds_phys->ds_used_bytes; 1785 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1786 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1787 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1788 if (ds->ds_quota != 0) { 1789 /* 1790 * Adjust available bytes according to refquota 1791 */ 1792 if (*refdbytesp < ds->ds_quota) 1793 *availbytesp = MIN(*availbytesp, 1794 ds->ds_quota - *refdbytesp); 1795 else 1796 *availbytesp = 0; 1797 } 1798 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1799 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1800 } 1801 1802 boolean_t 1803 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1804 { 1805 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1806 1807 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1808 dsl_pool_sync_context(dp)); 1809 if (ds->ds_prev == NULL) 1810 return (B_FALSE); 1811 if (ds->ds_phys->ds_bp.blk_birth > 1812 ds->ds_prev->ds_phys->ds_creation_txg) 1813 return (B_TRUE); 1814 return (B_FALSE); 1815 } 1816 1817 /* ARGSUSED */ 1818 static int 1819 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1820 { 1821 dsl_dataset_t *ds = arg1; 1822 char *newsnapname = arg2; 1823 dsl_dir_t *dd = ds->ds_dir; 1824 objset_t *mos = dd->dd_pool->dp_meta_objset; 1825 dsl_dataset_t *hds; 1826 uint64_t val; 1827 int err; 1828 1829 err = dsl_dataset_open_obj(dd->dd_pool, 1830 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1831 if (err) 1832 return (err); 1833 1834 /* new name better not be in use */ 1835 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1836 newsnapname, 8, 1, &val); 1837 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1838 1839 if (err == 0) 1840 err = EEXIST; 1841 else if (err == ENOENT) 1842 err = 0; 1843 1844 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1845 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1846 err = ENAMETOOLONG; 1847 1848 return (err); 1849 } 1850 1851 static void 1852 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1853 cred_t *cr, dmu_tx_t *tx) 1854 { 1855 dsl_dataset_t *ds = arg1; 1856 const char *newsnapname = arg2; 1857 dsl_dir_t *dd = ds->ds_dir; 1858 objset_t *mos = dd->dd_pool->dp_meta_objset; 1859 dsl_dataset_t *hds; 1860 int err; 1861 1862 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1863 1864 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1865 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1866 1867 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1868 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1869 ds->ds_snapname, tx); 1870 ASSERT3U(err, ==, 0); 1871 mutex_enter(&ds->ds_lock); 1872 (void) strcpy(ds->ds_snapname, newsnapname); 1873 mutex_exit(&ds->ds_lock); 1874 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1875 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1876 ASSERT3U(err, ==, 0); 1877 1878 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1879 cr, "dataset = %llu", ds->ds_object); 1880 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1881 } 1882 1883 struct renamesnaparg { 1884 dsl_sync_task_group_t *dstg; 1885 char failed[MAXPATHLEN]; 1886 char *oldsnap; 1887 char *newsnap; 1888 }; 1889 1890 static int 1891 dsl_snapshot_rename_one(char *name, void *arg) 1892 { 1893 struct renamesnaparg *ra = arg; 1894 dsl_dataset_t *ds = NULL; 1895 char *cp; 1896 int err; 1897 1898 cp = name + strlen(name); 1899 *cp = '@'; 1900 (void) strcpy(cp + 1, ra->oldsnap); 1901 1902 /* 1903 * For recursive snapshot renames the parent won't be changing 1904 * so we just pass name for both the to/from argument. 1905 */ 1906 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1907 (void) strcpy(ra->failed, name); 1908 return (err); 1909 } 1910 1911 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1912 ra->dstg, &ds); 1913 if (err == ENOENT) { 1914 *cp = '\0'; 1915 return (0); 1916 } 1917 if (err) { 1918 (void) strcpy(ra->failed, name); 1919 *cp = '\0'; 1920 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1921 return (err); 1922 } 1923 1924 #ifdef _KERNEL 1925 /* for all filesystems undergoing rename, we'll need to unmount it */ 1926 (void) zfs_unmount_snap(name, NULL); 1927 #endif 1928 1929 *cp = '\0'; 1930 1931 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1932 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1933 1934 return (0); 1935 } 1936 1937 static int 1938 dsl_recursive_rename(char *oldname, const char *newname) 1939 { 1940 int err; 1941 struct renamesnaparg *ra; 1942 dsl_sync_task_t *dst; 1943 spa_t *spa; 1944 char *cp, *fsname = spa_strdup(oldname); 1945 int len = strlen(oldname); 1946 1947 /* truncate the snapshot name to get the fsname */ 1948 cp = strchr(fsname, '@'); 1949 *cp = '\0'; 1950 1951 err = spa_open(fsname, &spa, FTAG); 1952 if (err) { 1953 kmem_free(fsname, len + 1); 1954 return (err); 1955 } 1956 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 1957 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1958 1959 ra->oldsnap = strchr(oldname, '@') + 1; 1960 ra->newsnap = strchr(newname, '@') + 1; 1961 *ra->failed = '\0'; 1962 1963 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1964 DS_FIND_CHILDREN); 1965 kmem_free(fsname, len + 1); 1966 1967 if (err == 0) { 1968 err = dsl_sync_task_group_wait(ra->dstg); 1969 } 1970 1971 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1972 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1973 dsl_dataset_t *ds = dst->dst_arg1; 1974 if (dst->dst_err) { 1975 dsl_dir_name(ds->ds_dir, ra->failed); 1976 (void) strcat(ra->failed, "@"); 1977 (void) strcat(ra->failed, ra->newsnap); 1978 } 1979 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1980 } 1981 1982 if (err) 1983 (void) strcpy(oldname, ra->failed); 1984 1985 dsl_sync_task_group_destroy(ra->dstg); 1986 kmem_free(ra, sizeof (struct renamesnaparg)); 1987 spa_close(spa, FTAG); 1988 return (err); 1989 } 1990 1991 static int 1992 dsl_valid_rename(char *oldname, void *arg) 1993 { 1994 int delta = *(int *)arg; 1995 1996 if (strlen(oldname) + delta >= MAXNAMELEN) 1997 return (ENAMETOOLONG); 1998 1999 return (0); 2000 } 2001 2002 #pragma weak dmu_objset_rename = dsl_dataset_rename 2003 int 2004 dsl_dataset_rename(char *oldname, const char *newname, 2005 boolean_t recursive) 2006 { 2007 dsl_dir_t *dd; 2008 dsl_dataset_t *ds; 2009 const char *tail; 2010 int err; 2011 2012 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2013 if (err) 2014 return (err); 2015 if (tail == NULL) { 2016 int delta = strlen(newname) - strlen(oldname); 2017 2018 /* if we're growing, validate child size lengths */ 2019 if (delta > 0) 2020 err = dmu_objset_find(oldname, dsl_valid_rename, 2021 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2022 2023 if (!err) 2024 err = dsl_dir_rename(dd, newname); 2025 dsl_dir_close(dd, FTAG); 2026 return (err); 2027 } 2028 if (tail[0] != '@') { 2029 /* the name ended in a nonexistant component */ 2030 dsl_dir_close(dd, FTAG); 2031 return (ENOENT); 2032 } 2033 2034 dsl_dir_close(dd, FTAG); 2035 2036 /* new name must be snapshot in same filesystem */ 2037 tail = strchr(newname, '@'); 2038 if (tail == NULL) 2039 return (EINVAL); 2040 tail++; 2041 if (strncmp(oldname, newname, tail - newname) != 0) 2042 return (EXDEV); 2043 2044 if (recursive) { 2045 err = dsl_recursive_rename(oldname, newname); 2046 } else { 2047 err = dsl_dataset_open(oldname, 2048 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 2049 if (err) 2050 return (err); 2051 2052 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2053 dsl_dataset_snapshot_rename_check, 2054 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2055 2056 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2057 } 2058 2059 return (err); 2060 } 2061 2062 struct promotearg { 2063 uint64_t used, comp, uncomp, unique; 2064 uint64_t newnext_obj, snapnames_obj; 2065 }; 2066 2067 /* ARGSUSED */ 2068 static int 2069 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2070 { 2071 dsl_dataset_t *hds = arg1; 2072 struct promotearg *pa = arg2; 2073 dsl_dir_t *dd = hds->ds_dir; 2074 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2075 dsl_dir_t *odd = NULL; 2076 dsl_dataset_t *ds = NULL; 2077 dsl_dataset_t *origin_ds = NULL; 2078 dsl_dataset_t *newnext_ds = NULL; 2079 int err; 2080 char *name = NULL; 2081 uint64_t itor = 0; 2082 blkptr_t bp; 2083 2084 bzero(pa, sizeof (*pa)); 2085 2086 /* Check that it is a clone */ 2087 if (dd->dd_phys->dd_origin_obj == 0) 2088 return (EINVAL); 2089 2090 /* Since this is so expensive, don't do the preliminary check */ 2091 if (!dmu_tx_is_syncing(tx)) 2092 return (0); 2093 2094 if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2095 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) 2096 goto out; 2097 odd = origin_ds->ds_dir; 2098 2099 { 2100 dsl_dataset_t *phds; 2101 if (err = dsl_dataset_open_obj(dd->dd_pool, 2102 odd->dd_phys->dd_head_dataset_obj, 2103 NULL, DS_MODE_NONE, FTAG, &phds)) 2104 goto out; 2105 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 2106 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 2107 } 2108 2109 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2110 err = EXDEV; 2111 goto out; 2112 } 2113 2114 /* find origin's new next ds */ 2115 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 2116 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 2117 while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { 2118 dsl_dataset_t *prev; 2119 2120 if (err = dsl_dataset_open_obj(dd->dd_pool, 2121 newnext_ds->ds_phys->ds_prev_snap_obj, 2122 NULL, DS_MODE_NONE, FTAG, &prev)) 2123 goto out; 2124 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2125 newnext_ds = prev; 2126 } 2127 pa->newnext_obj = newnext_ds->ds_object; 2128 2129 /* compute origin's new unique space */ 2130 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 2131 &itor, &bp)) == 0) { 2132 if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) 2133 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 2134 } 2135 if (err != ENOENT) 2136 goto out; 2137 2138 /* Walk the snapshots that we are moving */ 2139 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2140 ds = origin_ds; 2141 /* CONSTCOND */ 2142 while (TRUE) { 2143 uint64_t val, dlused, dlcomp, dluncomp; 2144 dsl_dataset_t *prev; 2145 2146 /* Check that the snapshot name does not conflict */ 2147 dsl_dataset_name(ds, name); 2148 err = zap_lookup(dd->dd_pool->dp_meta_objset, 2149 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2150 8, 1, &val); 2151 if (err != ENOENT) { 2152 if (err == 0) 2153 err = EEXIST; 2154 goto out; 2155 } 2156 2157 /* 2158 * compute space to transfer. Each snapshot gave birth to: 2159 * (my used) - (prev's used) + (deadlist's used) 2160 */ 2161 pa->used += ds->ds_phys->ds_used_bytes; 2162 pa->comp += ds->ds_phys->ds_compressed_bytes; 2163 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 2164 2165 /* If we reach the first snapshot, we're done. */ 2166 if (ds->ds_phys->ds_prev_snap_obj == 0) 2167 break; 2168 2169 if (err = bplist_space(&ds->ds_deadlist, 2170 &dlused, &dlcomp, &dluncomp)) 2171 goto out; 2172 if (err = dsl_dataset_open_obj(dd->dd_pool, 2173 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2174 FTAG, &prev)) 2175 goto out; 2176 pa->used += dlused - prev->ds_phys->ds_used_bytes; 2177 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 2178 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 2179 2180 /* 2181 * We could be a clone of a clone. If we reach our 2182 * parent's branch point, we're done. 2183 */ 2184 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2185 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2186 break; 2187 } 2188 if (ds != origin_ds) 2189 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2190 ds = prev; 2191 } 2192 2193 /* Check that there is enough space here */ 2194 err = dsl_dir_transfer_possible(odd, dd, pa->used); 2195 2196 out: 2197 if (ds && ds != origin_ds) 2198 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2199 if (origin_ds) 2200 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2201 if (newnext_ds) 2202 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2203 if (name) 2204 kmem_free(name, MAXPATHLEN); 2205 return (err); 2206 } 2207 2208 static void 2209 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2210 { 2211 dsl_dataset_t *hds = arg1; 2212 struct promotearg *pa = arg2; 2213 dsl_dir_t *dd = hds->ds_dir; 2214 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2215 dsl_dir_t *odd = NULL; 2216 dsl_dataset_t *ds, *origin_ds; 2217 char *name; 2218 2219 ASSERT(dd->dd_phys->dd_origin_obj != 0); 2220 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2221 2222 VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2223 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); 2224 /* 2225 * We need to explicitly open odd, since origin_ds's dd will be 2226 * changing. 2227 */ 2228 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2229 NULL, FTAG, &odd)); 2230 2231 /* move snapshots to this dir */ 2232 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2233 ds = origin_ds; 2234 /* CONSTCOND */ 2235 while (TRUE) { 2236 dsl_dataset_t *prev; 2237 2238 /* move snap name entry */ 2239 dsl_dataset_name(ds, name); 2240 VERIFY(0 == zap_remove(dp->dp_meta_objset, 2241 pa->snapnames_obj, ds->ds_snapname, tx)); 2242 VERIFY(0 == zap_add(dp->dp_meta_objset, 2243 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2244 8, 1, &ds->ds_object, tx)); 2245 2246 /* change containing dsl_dir */ 2247 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2248 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2249 ds->ds_phys->ds_dir_obj = dd->dd_object; 2250 ASSERT3P(ds->ds_dir, ==, odd); 2251 dsl_dir_close(ds->ds_dir, ds); 2252 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2253 NULL, ds, &ds->ds_dir)); 2254 2255 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2256 2257 if (ds->ds_phys->ds_prev_snap_obj == 0) 2258 break; 2259 2260 VERIFY(0 == dsl_dataset_open_obj(dp, 2261 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2262 FTAG, &prev)); 2263 2264 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2265 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2266 break; 2267 } 2268 if (ds != origin_ds) 2269 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2270 ds = prev; 2271 } 2272 if (ds != origin_ds) 2273 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2274 2275 /* change origin's next snap */ 2276 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2277 origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2278 2279 /* change origin */ 2280 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2281 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2282 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2283 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2284 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2285 2286 /* change space accounting */ 2287 dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); 2288 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2289 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2290 2291 /* log history record */ 2292 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2293 cr, "dataset = %llu", ds->ds_object); 2294 2295 dsl_dir_close(odd, FTAG); 2296 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2297 kmem_free(name, MAXPATHLEN); 2298 } 2299 2300 int 2301 dsl_dataset_promote(const char *name) 2302 { 2303 dsl_dataset_t *ds; 2304 int err; 2305 dmu_object_info_t doi; 2306 struct promotearg pa; 2307 2308 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2309 if (err) 2310 return (err); 2311 2312 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2313 ds->ds_phys->ds_snapnames_zapobj, &doi); 2314 if (err) { 2315 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2316 return (err); 2317 } 2318 2319 /* 2320 * Add in 128x the snapnames zapobj size, since we will be moving 2321 * a bunch of snapnames to the promoted ds, and dirtying their 2322 * bonus buffers. 2323 */ 2324 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2325 dsl_dataset_promote_check, 2326 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2327 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2328 return (err); 2329 } 2330 2331 struct cloneswaparg { 2332 dsl_dataset_t *cds; /* clone dataset */ 2333 dsl_dataset_t *ohds; /* origin's head dataset */ 2334 boolean_t force; 2335 }; 2336 2337 /* ARGSUSED */ 2338 static int 2339 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2340 { 2341 struct cloneswaparg *csa = arg1; 2342 2343 if (csa->ohds->ds_reserved != 0) 2344 return (EINVAL); 2345 2346 /* they should both be heads */ 2347 if (dsl_dataset_is_snapshot(csa->cds) || 2348 dsl_dataset_is_snapshot(csa->ohds)) 2349 return (EINVAL); 2350 2351 /* the branch point should be just before them */ 2352 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2353 return (EINVAL); 2354 2355 /* cds should be the clone */ 2356 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2357 csa->ohds->ds_object) 2358 return (EINVAL); 2359 2360 /* the clone should be a child of the origin */ 2361 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2362 return (EINVAL); 2363 2364 /* ohds shouldn't be modified unless 'force' */ 2365 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2366 return (ETXTBSY); 2367 return (0); 2368 } 2369 2370 /* ARGSUSED */ 2371 static void 2372 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2373 { 2374 struct cloneswaparg *csa = arg1; 2375 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2376 uint64_t itor = 0; 2377 blkptr_t bp; 2378 uint64_t unique = 0; 2379 int err; 2380 2381 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2382 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2383 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2384 2385 if (csa->cds->ds_user_ptr != NULL) { 2386 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2387 csa->cds->ds_user_ptr = NULL; 2388 } 2389 2390 if (csa->ohds->ds_user_ptr != NULL) { 2391 csa->ohds->ds_user_evict_func(csa->ohds, 2392 csa->ohds->ds_user_ptr); 2393 csa->ohds->ds_user_ptr = NULL; 2394 } 2395 2396 /* compute unique space */ 2397 while ((err = bplist_iterate(&csa->cds->ds_deadlist, 2398 &itor, &bp)) == 0) { 2399 if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) 2400 unique += bp_get_dasize(dp->dp_spa, &bp); 2401 } 2402 VERIFY(err == ENOENT); 2403 2404 /* undo any accounting due to a refreservation */ 2405 if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) { 2406 dsl_dir_diduse_space(csa->ohds->ds_dir, 2407 csa->ohds->ds_phys->ds_unique_bytes - 2408 csa->ohds->ds_reserved, 0, 0, tx); 2409 } 2410 2411 /* reset origin's unique bytes */ 2412 csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; 2413 2414 /* swap blkptrs */ 2415 { 2416 blkptr_t tmp; 2417 tmp = csa->ohds->ds_phys->ds_bp; 2418 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2419 csa->cds->ds_phys->ds_bp = tmp; 2420 } 2421 2422 /* set dd_*_bytes */ 2423 { 2424 int64_t dused, dcomp, duncomp; 2425 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2426 uint64_t odl_used, odl_comp, odl_uncomp; 2427 2428 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2429 &cdl_comp, &cdl_uncomp)); 2430 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2431 &odl_comp, &odl_uncomp)); 2432 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2433 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2434 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2435 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2436 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2437 cdl_uncomp - 2438 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2439 2440 dsl_dir_diduse_space(csa->ohds->ds_dir, 2441 dused, dcomp, duncomp, tx); 2442 dsl_dir_diduse_space(csa->cds->ds_dir, 2443 -dused, -dcomp, -duncomp, tx); 2444 } 2445 2446 #define SWITCH64(x, y) \ 2447 { \ 2448 uint64_t __tmp = (x); \ 2449 (x) = (y); \ 2450 (y) = __tmp; \ 2451 } 2452 2453 /* redo any accounting due to a refreservation */ 2454 if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) { 2455 dsl_dir_diduse_space(csa->ohds->ds_dir, 2456 csa->ohds->ds_reserved - 2457 csa->ohds->ds_phys->ds_unique_bytes, 0, 0, tx); 2458 } 2459 2460 /* swap ds_*_bytes */ 2461 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2462 csa->cds->ds_phys->ds_used_bytes); 2463 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2464 csa->cds->ds_phys->ds_compressed_bytes); 2465 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2466 csa->cds->ds_phys->ds_uncompressed_bytes); 2467 2468 /* swap deadlists */ 2469 bplist_close(&csa->cds->ds_deadlist); 2470 bplist_close(&csa->ohds->ds_deadlist); 2471 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2472 csa->cds->ds_phys->ds_deadlist_obj); 2473 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2474 csa->cds->ds_phys->ds_deadlist_obj)); 2475 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2476 csa->ohds->ds_phys->ds_deadlist_obj)); 2477 /* fix up clone's unique */ 2478 dsl_dataset_recalc_head_uniq(csa->cds); 2479 2480 } 2481 2482 /* 2483 * Swap the clone "cosname" with its origin head file system. 2484 */ 2485 int 2486 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2487 boolean_t force) 2488 { 2489 struct cloneswaparg csa; 2490 2491 ASSERT(clone->ds_open_refcount == DS_REF_MAX); 2492 ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); 2493 2494 csa.cds = clone; 2495 csa.ohds = origin_head; 2496 csa.force = force; 2497 return (dsl_sync_task_do(clone->ds_dir->dd_pool, 2498 dsl_dataset_clone_swap_check, 2499 dsl_dataset_clone_swap_sync, &csa, NULL, 9)); 2500 } 2501 2502 /* 2503 * Given a pool name and a dataset object number in that pool, 2504 * return the name of that dataset. 2505 */ 2506 int 2507 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2508 { 2509 spa_t *spa; 2510 dsl_pool_t *dp; 2511 dsl_dataset_t *ds = NULL; 2512 int error; 2513 2514 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2515 return (error); 2516 dp = spa_get_dsl(spa); 2517 rw_enter(&dp->dp_config_rwlock, RW_READER); 2518 if ((error = dsl_dataset_open_obj(dp, obj, 2519 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2520 rw_exit(&dp->dp_config_rwlock); 2521 spa_close(spa, FTAG); 2522 return (error); 2523 } 2524 dsl_dataset_name(ds, buf); 2525 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2526 rw_exit(&dp->dp_config_rwlock); 2527 spa_close(spa, FTAG); 2528 2529 return (0); 2530 } 2531 2532 int 2533 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2534 uint64_t asize, uint64_t inflight, uint64_t *used) 2535 { 2536 int error = 0; 2537 2538 ASSERT3S(asize, >, 0); 2539 2540 mutex_enter(&ds->ds_lock); 2541 /* 2542 * Make a space adjustment for reserved bytes. 2543 */ 2544 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2545 ASSERT3U(*used, >=, 2546 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2547 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2548 } 2549 2550 if (!check_quota || ds->ds_quota == 0) { 2551 mutex_exit(&ds->ds_lock); 2552 return (0); 2553 } 2554 /* 2555 * If they are requesting more space, and our current estimate 2556 * is over quota, they get to try again unless the actual 2557 * on-disk is over quota and there are no pending changes (which 2558 * may free up space for us). 2559 */ 2560 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2561 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2562 error = ERESTART; 2563 else 2564 error = EDQUOT; 2565 } 2566 mutex_exit(&ds->ds_lock); 2567 2568 return (error); 2569 } 2570 2571 /* ARGSUSED */ 2572 static int 2573 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2574 { 2575 dsl_dataset_t *ds = arg1; 2576 uint64_t *quotap = arg2; 2577 uint64_t new_quota = *quotap; 2578 2579 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2580 return (ENOTSUP); 2581 2582 if (new_quota == 0) 2583 return (0); 2584 2585 if (new_quota < ds->ds_phys->ds_used_bytes || 2586 new_quota < ds->ds_reserved) 2587 return (ENOSPC); 2588 2589 return (0); 2590 } 2591 2592 /* ARGSUSED */ 2593 void 2594 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2595 { 2596 dsl_dataset_t *ds = arg1; 2597 uint64_t *quotap = arg2; 2598 uint64_t new_quota = *quotap; 2599 2600 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2601 2602 mutex_enter(&ds->ds_lock); 2603 ds->ds_quota = new_quota; 2604 mutex_exit(&ds->ds_lock); 2605 2606 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 2607 2608 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 2609 tx, cr, "%lld dataset = %llu ", 2610 (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj); 2611 } 2612 2613 int 2614 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 2615 { 2616 dsl_dataset_t *ds; 2617 int err; 2618 2619 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2620 if (err) 2621 return (err); 2622 2623 /* 2624 * If someone removes a file, then tries to set the quota, we 2625 * want to make sure the file freeing takes effect. 2626 */ 2627 txg_wait_open(ds->ds_dir->dd_pool, 0); 2628 2629 err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_set_quota_check, 2630 dsl_dataset_set_quota_sync, ds, "a, 0); 2631 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2632 return (err); 2633 } 2634 2635 static int 2636 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 2637 { 2638 dsl_dataset_t *ds = arg1; 2639 uint64_t *reservationp = arg2; 2640 uint64_t new_reservation = *reservationp; 2641 int64_t delta; 2642 uint64_t unique; 2643 2644 if (new_reservation > INT64_MAX) 2645 return (EOVERFLOW); 2646 2647 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 2648 SPA_VERSION_REFRESERVATION) 2649 return (ENOTSUP); 2650 2651 if (dsl_dataset_is_snapshot(ds)) 2652 return (EINVAL); 2653 2654 /* 2655 * If we are doing the preliminary check in open context, the 2656 * space estimates may be inaccurate. 2657 */ 2658 if (!dmu_tx_is_syncing(tx)) 2659 return (0); 2660 2661 mutex_enter(&ds->ds_lock); 2662 unique = dsl_dataset_unique(ds); 2663 delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); 2664 mutex_exit(&ds->ds_lock); 2665 2666 if (delta > 0 && 2667 delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 2668 return (ENOSPC); 2669 if (delta > 0 && ds->ds_quota > 0 && 2670 new_reservation > ds->ds_quota) 2671 return (ENOSPC); 2672 2673 return (0); 2674 } 2675 2676 /* ARGSUSED */ 2677 static void 2678 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 2679 dmu_tx_t *tx) 2680 { 2681 dsl_dataset_t *ds = arg1; 2682 uint64_t *reservationp = arg2; 2683 uint64_t new_reservation = *reservationp; 2684 uint64_t unique; 2685 int64_t delta; 2686 2687 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2688 2689 mutex_enter(&ds->ds_lock); 2690 unique = dsl_dataset_unique(ds); 2691 delta = MAX(0, (int64_t)(new_reservation - unique)) - 2692 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2693 ds->ds_reserved = new_reservation; 2694 mutex_exit(&ds->ds_lock); 2695 2696 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 2697 new_reservation, cr, tx); 2698 2699 dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx); 2700 2701 spa_history_internal_log(LOG_DS_REFRESERV, 2702 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 2703 (longlong_t)new_reservation, 2704 ds->ds_dir->dd_phys->dd_head_dataset_obj); 2705 } 2706 2707 int 2708 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 2709 { 2710 dsl_dataset_t *ds; 2711 int err; 2712 2713 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2714 if (err) 2715 return (err); 2716 2717 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2718 dsl_dataset_set_reservation_check, 2719 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 2720 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2721 return (err); 2722 } 2723