1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 /* 55 * We use weighted reference counts to express the various forms of exclusion 56 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 57 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 58 * This makes the exclusion logic simple: the total refcnt for all opens cannot 59 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 60 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 61 * just over half of the refcnt space, so there can't be more than one, but it 62 * can peacefully coexist with any number of STANDARD opens. 63 */ 64 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 65 0, /* DS_MODE_NONE - invalid */ 66 1, /* DS_MODE_STANDARD - unlimited number */ 67 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 68 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 69 }; 70 71 /* 72 * Figure out how much of this delta should be propogated to the dsl_dir 73 * layer. If there's a refreservation, that space has already been 74 * partially accounted for in our ancestors. 75 */ 76 static int64_t 77 parent_delta(dsl_dataset_t *ds, int64_t delta) 78 { 79 uint64_t old_bytes, new_bytes; 80 81 if (ds->ds_reserved == 0) 82 return (delta); 83 84 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 85 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 86 87 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 88 return (new_bytes - old_bytes); 89 } 90 91 void 92 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 93 { 94 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 95 int compressed = BP_GET_PSIZE(bp); 96 int uncompressed = BP_GET_UCSIZE(bp); 97 int64_t delta; 98 99 dprintf_bp(bp, "born, ds=%p\n", ds); 100 101 ASSERT(dmu_tx_is_syncing(tx)); 102 /* It could have been compressed away to nothing */ 103 if (BP_IS_HOLE(bp)) 104 return; 105 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 106 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 107 if (ds == NULL) { 108 /* 109 * Account for the meta-objset space in its placeholder 110 * dsl_dir. 111 */ 112 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 113 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 114 used, compressed, uncompressed, tx); 115 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 116 return; 117 } 118 dmu_buf_will_dirty(ds->ds_dbuf, tx); 119 mutex_enter(&ds->ds_lock); 120 delta = parent_delta(ds, used); 121 ds->ds_phys->ds_used_bytes += used; 122 ds->ds_phys->ds_compressed_bytes += compressed; 123 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 124 ds->ds_phys->ds_unique_bytes += used; 125 mutex_exit(&ds->ds_lock); 126 dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx); 127 } 128 129 void 130 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 131 dmu_tx_t *tx) 132 { 133 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 134 int compressed = BP_GET_PSIZE(bp); 135 int uncompressed = BP_GET_UCSIZE(bp); 136 137 ASSERT(dmu_tx_is_syncing(tx)); 138 /* No block pointer => nothing to free */ 139 if (BP_IS_HOLE(bp)) 140 return; 141 142 ASSERT(used > 0); 143 if (ds == NULL) { 144 int err; 145 /* 146 * Account for the meta-objset space in its placeholder 147 * dataset. 148 */ 149 err = arc_free(pio, tx->tx_pool->dp_spa, 150 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 151 ASSERT(err == 0); 152 153 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 154 -used, -compressed, -uncompressed, tx); 155 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 156 return; 157 } 158 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 159 160 dmu_buf_will_dirty(ds->ds_dbuf, tx); 161 162 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 163 int err; 164 int64_t delta; 165 166 dprintf_bp(bp, "freeing: %s", ""); 167 err = arc_free(pio, tx->tx_pool->dp_spa, 168 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 169 ASSERT(err == 0); 170 171 mutex_enter(&ds->ds_lock); 172 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 173 !DS_UNIQUE_IS_ACCURATE(ds)); 174 delta = parent_delta(ds, -used); 175 ds->ds_phys->ds_unique_bytes -= used; 176 mutex_exit(&ds->ds_lock); 177 dsl_dir_diduse_space(ds->ds_dir, 178 delta, -compressed, -uncompressed, tx); 179 } else { 180 dprintf_bp(bp, "putting on dead list: %s", ""); 181 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_phys->ds_prev_snap_obj != 0) { 184 ASSERT3U(ds->ds_prev->ds_object, ==, 185 ds->ds_phys->ds_prev_snap_obj); 186 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 187 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 188 ds->ds_object && bp->blk_birth > 189 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 190 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 191 mutex_enter(&ds->ds_prev->ds_lock); 192 ds->ds_prev->ds_phys->ds_unique_bytes += 193 used; 194 mutex_exit(&ds->ds_prev->ds_lock); 195 } 196 } 197 } 198 mutex_enter(&ds->ds_lock); 199 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 200 ds->ds_phys->ds_used_bytes -= used; 201 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 202 ds->ds_phys->ds_compressed_bytes -= compressed; 203 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 204 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 205 mutex_exit(&ds->ds_lock); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 int 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 /* open_refcount == DS_REF_MAX when deleting */ 244 ASSERT(ds->ds_open_refcount == 0 || 245 ds->ds_open_refcount == DS_REF_MAX); 246 247 dprintf_ds(ds, "evicting %s\n", ""); 248 249 unique_remove(ds->ds_fsid_guid); 250 251 if (ds->ds_user_ptr != NULL) 252 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 253 254 if (ds->ds_prev) { 255 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 256 ds->ds_prev = NULL; 257 } 258 259 bplist_close(&ds->ds_deadlist); 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 mutex_destroy(&ds->ds_deadlist.bpl_lock); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 int 297 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 298 int mode, void *tag, dsl_dataset_t **dsp) 299 { 300 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 301 objset_t *mos = dp->dp_meta_objset; 302 dmu_buf_t *dbuf; 303 dsl_dataset_t *ds; 304 int err; 305 306 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 307 dsl_pool_sync_context(dp)); 308 309 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 310 if (err) 311 return (err); 312 ds = dmu_buf_get_user(dbuf); 313 if (ds == NULL) { 314 dsl_dataset_t *winner; 315 316 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 317 ds->ds_dbuf = dbuf; 318 ds->ds_object = dsobj; 319 ds->ds_phys = dbuf->db_data; 320 321 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 322 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 323 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 324 NULL); 325 326 err = bplist_open(&ds->ds_deadlist, 327 mos, ds->ds_phys->ds_deadlist_obj); 328 if (err == 0) { 329 err = dsl_dir_open_obj(dp, 330 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 331 } 332 if (err) { 333 /* 334 * we don't really need to close the blist if we 335 * just opened it. 336 */ 337 mutex_destroy(&ds->ds_lock); 338 mutex_destroy(&ds->ds_opening_lock); 339 mutex_destroy(&ds->ds_deadlist.bpl_lock); 340 kmem_free(ds, sizeof (dsl_dataset_t)); 341 dmu_buf_rele(dbuf, tag); 342 return (err); 343 } 344 345 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 346 ds->ds_snapname[0] = '\0'; 347 if (ds->ds_phys->ds_prev_snap_obj) { 348 err = dsl_dataset_open_obj(dp, 349 ds->ds_phys->ds_prev_snap_obj, NULL, 350 DS_MODE_NONE, ds, &ds->ds_prev); 351 } 352 } else { 353 if (snapname) { 354 #ifdef ZFS_DEBUG 355 dsl_dataset_phys_t *headphys; 356 dmu_buf_t *headdbuf; 357 err = dmu_bonus_hold(mos, 358 ds->ds_dir->dd_phys->dd_head_dataset_obj, 359 FTAG, &headdbuf); 360 if (err == 0) { 361 headphys = headdbuf->db_data; 362 uint64_t foundobj; 363 err = zap_lookup(dp->dp_meta_objset, 364 headphys->ds_snapnames_zapobj, 365 snapname, sizeof (foundobj), 1, 366 &foundobj); 367 ASSERT3U(foundobj, ==, dsobj); 368 dmu_buf_rele(headdbuf, FTAG); 369 } 370 #endif 371 (void) strcat(ds->ds_snapname, snapname); 372 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 373 err = dsl_dataset_get_snapname(ds); 374 } 375 } 376 377 if (!dsl_dataset_is_snapshot(ds)) { 378 boolean_t need_lock = 379 !RW_LOCK_HELD(&dp->dp_config_rwlock); 380 381 if (need_lock) 382 rw_enter(&dp->dp_config_rwlock, RW_READER); 383 384 err = dsl_prop_get_ds_locked(ds->ds_dir, 385 "refreservation", sizeof (uint64_t), 1, 386 &ds->ds_reserved, NULL); 387 if (err == 0) { 388 err = dsl_prop_get_ds_locked(ds->ds_dir, 389 "refquota", sizeof (uint64_t), 1, 390 &ds->ds_quota, NULL); 391 } 392 393 if (need_lock) 394 rw_exit(&dp->dp_config_rwlock); 395 } else { 396 ds->ds_reserved = ds->ds_quota = 0; 397 } 398 399 if (err == 0) { 400 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 401 dsl_dataset_evict); 402 } 403 if (err || winner) { 404 bplist_close(&ds->ds_deadlist); 405 if (ds->ds_prev) { 406 dsl_dataset_close(ds->ds_prev, 407 DS_MODE_NONE, ds); 408 } 409 dsl_dir_close(ds->ds_dir, ds); 410 mutex_destroy(&ds->ds_lock); 411 mutex_destroy(&ds->ds_opening_lock); 412 mutex_destroy(&ds->ds_deadlist.bpl_lock); 413 kmem_free(ds, sizeof (dsl_dataset_t)); 414 if (err) { 415 dmu_buf_rele(dbuf, tag); 416 return (err); 417 } 418 ds = winner; 419 } else { 420 ds->ds_fsid_guid = 421 unique_insert(ds->ds_phys->ds_fsid_guid); 422 } 423 } 424 ASSERT3P(ds->ds_dbuf, ==, dbuf); 425 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 426 427 mutex_enter(&ds->ds_lock); 428 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 429 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 430 !DS_MODE_IS_INCONSISTENT(mode)) || 431 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 432 mutex_exit(&ds->ds_lock); 433 dsl_dataset_close(ds, DS_MODE_NONE, tag); 434 return (EBUSY); 435 } 436 ds->ds_open_refcount += weight; 437 mutex_exit(&ds->ds_lock); 438 439 *dsp = ds; 440 return (0); 441 } 442 443 int 444 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 445 void *tag, dsl_dataset_t **dsp) 446 { 447 dsl_dir_t *dd; 448 dsl_pool_t *dp; 449 const char *tail; 450 uint64_t obj; 451 dsl_dataset_t *ds = NULL; 452 int err = 0; 453 454 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 455 if (err) 456 return (err); 457 458 dp = dd->dd_pool; 459 obj = dd->dd_phys->dd_head_dataset_obj; 460 rw_enter(&dp->dp_config_rwlock, RW_READER); 461 if (obj == 0) { 462 /* A dataset with no associated objset */ 463 err = ENOENT; 464 goto out; 465 } 466 467 if (tail != NULL) { 468 objset_t *mos = dp->dp_meta_objset; 469 470 err = dsl_dataset_open_obj(dp, obj, NULL, 471 DS_MODE_NONE, tag, &ds); 472 if (err) 473 goto out; 474 obj = ds->ds_phys->ds_snapnames_zapobj; 475 dsl_dataset_close(ds, DS_MODE_NONE, tag); 476 ds = NULL; 477 478 if (tail[0] != '@') { 479 err = ENOENT; 480 goto out; 481 } 482 tail++; 483 484 /* Look for a snapshot */ 485 if (!DS_MODE_IS_READONLY(mode)) { 486 err = EROFS; 487 goto out; 488 } 489 dprintf("looking for snapshot '%s'\n", tail); 490 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 491 if (err) 492 goto out; 493 } 494 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 495 496 out: 497 rw_exit(&dp->dp_config_rwlock); 498 dsl_dir_close(dd, FTAG); 499 500 ASSERT3U((err == 0), ==, (ds != NULL)); 501 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 502 503 *dsp = ds; 504 return (err); 505 } 506 507 int 508 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 509 { 510 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 511 } 512 513 void 514 dsl_dataset_name(dsl_dataset_t *ds, char *name) 515 { 516 if (ds == NULL) { 517 (void) strcpy(name, "mos"); 518 } else { 519 dsl_dir_name(ds->ds_dir, name); 520 VERIFY(0 == dsl_dataset_get_snapname(ds)); 521 if (ds->ds_snapname[0]) { 522 (void) strcat(name, "@"); 523 if (!MUTEX_HELD(&ds->ds_lock)) { 524 /* 525 * We use a "recursive" mutex so that we 526 * can call dprintf_ds() with ds_lock held. 527 */ 528 mutex_enter(&ds->ds_lock); 529 (void) strcat(name, ds->ds_snapname); 530 mutex_exit(&ds->ds_lock); 531 } else { 532 (void) strcat(name, ds->ds_snapname); 533 } 534 } 535 } 536 } 537 538 static int 539 dsl_dataset_namelen(dsl_dataset_t *ds) 540 { 541 int result; 542 543 if (ds == NULL) { 544 result = 3; /* "mos" */ 545 } else { 546 result = dsl_dir_namelen(ds->ds_dir); 547 VERIFY(0 == dsl_dataset_get_snapname(ds)); 548 if (ds->ds_snapname[0]) { 549 ++result; /* adding one for the @-sign */ 550 if (!MUTEX_HELD(&ds->ds_lock)) { 551 /* see dsl_datset_name */ 552 mutex_enter(&ds->ds_lock); 553 result += strlen(ds->ds_snapname); 554 mutex_exit(&ds->ds_lock); 555 } else { 556 result += strlen(ds->ds_snapname); 557 } 558 } 559 } 560 561 return (result); 562 } 563 564 void 565 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 566 { 567 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 568 mutex_enter(&ds->ds_lock); 569 ASSERT3U(ds->ds_open_refcount, >=, weight); 570 ds->ds_open_refcount -= weight; 571 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 572 mode, ds->ds_open_refcount); 573 mutex_exit(&ds->ds_lock); 574 575 dmu_buf_rele(ds->ds_dbuf, tag); 576 } 577 578 void 579 dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) 580 { 581 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 582 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 583 mutex_enter(&ds->ds_lock); 584 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 585 ASSERT3U(oldweight, >=, newweight); 586 ds->ds_open_refcount -= oldweight; 587 ds->ds_open_refcount += newweight; 588 mutex_exit(&ds->ds_lock); 589 } 590 591 boolean_t 592 dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) 593 { 594 boolean_t rv; 595 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 596 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 597 mutex_enter(&ds->ds_lock); 598 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 599 ASSERT3U(newweight, >=, oldweight); 600 if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { 601 rv = B_FALSE; 602 } else { 603 ds->ds_open_refcount -= oldweight; 604 ds->ds_open_refcount += newweight; 605 rv = B_TRUE; 606 } 607 mutex_exit(&ds->ds_lock); 608 return (rv); 609 } 610 611 void 612 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 613 { 614 objset_t *mos = dp->dp_meta_objset; 615 dmu_buf_t *dbuf; 616 dsl_dataset_phys_t *dsphys; 617 dsl_dataset_t *ds; 618 uint64_t dsobj; 619 dsl_dir_t *dd; 620 621 dsl_dir_create_root(mos, ddobjp, tx); 622 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 623 624 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 625 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 626 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 627 dmu_buf_will_dirty(dbuf, tx); 628 dsphys = dbuf->db_data; 629 dsphys->ds_dir_obj = dd->dd_object; 630 dsphys->ds_fsid_guid = unique_create(); 631 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 632 sizeof (dsphys->ds_guid)); 633 dsphys->ds_snapnames_zapobj = 634 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 635 dsphys->ds_creation_time = gethrestime_sec(); 636 dsphys->ds_creation_txg = tx->tx_txg; 637 dsphys->ds_deadlist_obj = 638 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 639 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 640 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 641 dmu_buf_rele(dbuf, FTAG); 642 643 dmu_buf_will_dirty(dd->dd_dbuf, tx); 644 dd->dd_phys->dd_head_dataset_obj = dsobj; 645 dsl_dir_close(dd, FTAG); 646 647 VERIFY(0 == 648 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 649 (void) dmu_objset_create_impl(dp->dp_spa, ds, 650 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 651 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 652 } 653 654 uint64_t 655 dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx) 656 { 657 dsl_pool_t *dp = dd->dd_pool; 658 dmu_buf_t *dbuf; 659 dsl_dataset_phys_t *dsphys; 660 uint64_t dsobj; 661 objset_t *mos = dp->dp_meta_objset; 662 663 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 664 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 665 ASSERT(dmu_tx_is_syncing(tx)); 666 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 667 668 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 669 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 670 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 671 dmu_buf_will_dirty(dbuf, tx); 672 dsphys = dbuf->db_data; 673 dsphys->ds_dir_obj = dd->dd_object; 674 dsphys->ds_fsid_guid = unique_create(); 675 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 676 sizeof (dsphys->ds_guid)); 677 dsphys->ds_snapnames_zapobj = 678 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 679 dsphys->ds_creation_time = gethrestime_sec(); 680 dsphys->ds_creation_txg = tx->tx_txg; 681 dsphys->ds_deadlist_obj = 682 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 683 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 684 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 685 686 if (origin) { 687 dsphys->ds_prev_snap_obj = origin->ds_object; 688 dsphys->ds_prev_snap_txg = 689 origin->ds_phys->ds_creation_txg; 690 dsphys->ds_used_bytes = 691 origin->ds_phys->ds_used_bytes; 692 dsphys->ds_compressed_bytes = 693 origin->ds_phys->ds_compressed_bytes; 694 dsphys->ds_uncompressed_bytes = 695 origin->ds_phys->ds_uncompressed_bytes; 696 dsphys->ds_bp = origin->ds_phys->ds_bp; 697 698 dmu_buf_will_dirty(origin->ds_dbuf, tx); 699 origin->ds_phys->ds_num_children++; 700 701 dmu_buf_will_dirty(dd->dd_dbuf, tx); 702 dd->dd_phys->dd_origin_obj = origin->ds_object; 703 } 704 dmu_buf_rele(dbuf, FTAG); 705 706 dmu_buf_will_dirty(dd->dd_dbuf, tx); 707 dd->dd_phys->dd_head_dataset_obj = dsobj; 708 709 return (dsobj); 710 } 711 712 uint64_t 713 dsl_dataset_create_sync(dsl_dir_t *pdd, 714 const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx) 715 { 716 dsl_pool_t *dp = pdd->dd_pool; 717 uint64_t dsobj, ddobj; 718 dsl_dir_t *dd; 719 720 ASSERT(lastname[0] != '@'); 721 722 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 723 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 724 725 dsobj = dsl_dataset_create_sync_impl(dd, origin, tx); 726 727 dsl_deleg_set_create_perms(dd, tx, cr); 728 729 dsl_dir_close(dd, FTAG); 730 731 return (dsobj); 732 } 733 734 struct destroyarg { 735 dsl_sync_task_group_t *dstg; 736 char *snapname; 737 char *failed; 738 }; 739 740 static int 741 dsl_snapshot_destroy_one(char *name, void *arg) 742 { 743 struct destroyarg *da = arg; 744 dsl_dataset_t *ds; 745 char *cp; 746 int err; 747 748 (void) strcat(name, "@"); 749 (void) strcat(name, da->snapname); 750 err = dsl_dataset_open(name, 751 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 752 da->dstg, &ds); 753 cp = strchr(name, '@'); 754 *cp = '\0'; 755 if (err == ENOENT) 756 return (0); 757 if (err) { 758 (void) strcpy(da->failed, name); 759 return (err); 760 } 761 762 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 763 dsl_dataset_destroy_sync, ds, da->dstg, 0); 764 return (0); 765 } 766 767 /* 768 * Destroy 'snapname' in all descendants of 'fsname'. 769 */ 770 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 771 int 772 dsl_snapshots_destroy(char *fsname, char *snapname) 773 { 774 int err; 775 struct destroyarg da; 776 dsl_sync_task_t *dst; 777 spa_t *spa; 778 779 err = spa_open(fsname, &spa, FTAG); 780 if (err) 781 return (err); 782 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 783 da.snapname = snapname; 784 da.failed = fsname; 785 786 err = dmu_objset_find(fsname, 787 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 788 789 if (err == 0) 790 err = dsl_sync_task_group_wait(da.dstg); 791 792 for (dst = list_head(&da.dstg->dstg_tasks); dst; 793 dst = list_next(&da.dstg->dstg_tasks, dst)) { 794 dsl_dataset_t *ds = dst->dst_arg1; 795 if (dst->dst_err) { 796 dsl_dataset_name(ds, fsname); 797 *strchr(fsname, '@') = '\0'; 798 } 799 /* 800 * If it was successful, destroy_sync would have 801 * closed the ds 802 */ 803 if (err) 804 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 805 } 806 807 dsl_sync_task_group_destroy(da.dstg); 808 spa_close(spa, FTAG); 809 return (err); 810 } 811 812 /* 813 * ds must be opened EXCLUSIVE or PRIMARY. on return (whether 814 * successful or not), ds will be closed and caller can no longer 815 * dereference it. 816 */ 817 int 818 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 819 { 820 int err; 821 dsl_sync_task_group_t *dstg; 822 objset_t *os; 823 dsl_dir_t *dd; 824 uint64_t obj; 825 826 if (ds->ds_open_refcount != DS_REF_MAX) { 827 if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, 828 DS_MODE_EXCLUSIVE) == 0) { 829 dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); 830 return (EBUSY); 831 } 832 } 833 834 if (dsl_dataset_is_snapshot(ds)) { 835 /* Destroying a snapshot is simpler */ 836 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 837 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 838 ds, tag, 0); 839 goto out; 840 } 841 842 dd = ds->ds_dir; 843 844 /* 845 * Check for errors and mark this ds as inconsistent, in 846 * case we crash while freeing the objects. 847 */ 848 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 849 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 850 if (err) 851 goto out; 852 853 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 854 if (err) 855 goto out; 856 857 /* 858 * remove the objects in open context, so that we won't 859 * have too much to do in syncing context. 860 */ 861 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 862 ds->ds_phys->ds_prev_snap_txg)) { 863 dmu_tx_t *tx = dmu_tx_create(os); 864 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 865 dmu_tx_hold_bonus(tx, obj); 866 err = dmu_tx_assign(tx, TXG_WAIT); 867 if (err) { 868 /* 869 * Perhaps there is not enough disk 870 * space. Just deal with it from 871 * dsl_dataset_destroy_sync(). 872 */ 873 dmu_tx_abort(tx); 874 continue; 875 } 876 VERIFY(0 == dmu_object_free(os, obj, tx)); 877 dmu_tx_commit(tx); 878 } 879 /* Make sure it's not dirty before we finish destroying it. */ 880 txg_wait_synced(dd->dd_pool, 0); 881 882 dmu_objset_close(os); 883 if (err != ESRCH) 884 goto out; 885 886 if (ds->ds_user_ptr) { 887 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 888 ds->ds_user_ptr = NULL; 889 } 890 891 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 892 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 893 rw_exit(&dd->dd_pool->dp_config_rwlock); 894 895 if (err) 896 goto out; 897 898 /* 899 * Blow away the dsl_dir + head dataset. 900 */ 901 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 902 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 903 dsl_dataset_destroy_sync, ds, tag, 0); 904 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 905 dsl_dir_destroy_sync, dd, FTAG, 0); 906 err = dsl_sync_task_group_wait(dstg); 907 dsl_sync_task_group_destroy(dstg); 908 /* if it is successful, *destroy_sync will close the ds+dd */ 909 if (err) 910 dsl_dir_close(dd, FTAG); 911 out: 912 if (err) 913 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 914 return (err); 915 } 916 917 int 918 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 919 { 920 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 921 922 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 923 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 924 ds, &ost, 0)); 925 } 926 927 void * 928 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 929 void *p, dsl_dataset_evict_func_t func) 930 { 931 void *old; 932 933 mutex_enter(&ds->ds_lock); 934 old = ds->ds_user_ptr; 935 if (old == NULL) { 936 ds->ds_user_ptr = p; 937 ds->ds_user_evict_func = func; 938 } 939 mutex_exit(&ds->ds_lock); 940 return (old); 941 } 942 943 void * 944 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 945 { 946 return (ds->ds_user_ptr); 947 } 948 949 950 blkptr_t * 951 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 952 { 953 return (&ds->ds_phys->ds_bp); 954 } 955 956 void 957 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 958 { 959 ASSERT(dmu_tx_is_syncing(tx)); 960 /* If it's the meta-objset, set dp_meta_rootbp */ 961 if (ds == NULL) { 962 tx->tx_pool->dp_meta_rootbp = *bp; 963 } else { 964 dmu_buf_will_dirty(ds->ds_dbuf, tx); 965 ds->ds_phys->ds_bp = *bp; 966 } 967 } 968 969 spa_t * 970 dsl_dataset_get_spa(dsl_dataset_t *ds) 971 { 972 return (ds->ds_dir->dd_pool->dp_spa); 973 } 974 975 void 976 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 977 { 978 dsl_pool_t *dp; 979 980 if (ds == NULL) /* this is the meta-objset */ 981 return; 982 983 ASSERT(ds->ds_user_ptr != NULL); 984 985 if (ds->ds_phys->ds_next_snap_obj != 0) 986 panic("dirtying snapshot!"); 987 988 dp = ds->ds_dir->dd_pool; 989 990 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 991 /* up the hold count until we can be written out */ 992 dmu_buf_add_ref(ds->ds_dbuf, ds); 993 } 994 } 995 996 /* 997 * The unique space in the head dataset can be calculated by subtracting 998 * the space used in the most recent snapshot, that is still being used 999 * in this file system, from the space currently in use. To figure out 1000 * the space in the most recent snapshot still in use, we need to take 1001 * the total space used in the snapshot and subtract out the space that 1002 * has been freed up since the snapshot was taken. 1003 */ 1004 static void 1005 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1006 { 1007 uint64_t mrs_used; 1008 uint64_t dlused, dlcomp, dluncomp; 1009 1010 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1011 1012 if (ds->ds_phys->ds_prev_snap_obj != 0) 1013 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1014 else 1015 mrs_used = 0; 1016 1017 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1018 &dluncomp)); 1019 1020 ASSERT3U(dlused, <=, mrs_used); 1021 ds->ds_phys->ds_unique_bytes = 1022 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1023 1024 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1025 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1026 SPA_VERSION_UNIQUE_ACCURATE) 1027 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1028 } 1029 1030 static uint64_t 1031 dsl_dataset_unique(dsl_dataset_t *ds) 1032 { 1033 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1034 dsl_dataset_recalc_head_uniq(ds); 1035 1036 return (ds->ds_phys->ds_unique_bytes); 1037 } 1038 1039 struct killarg { 1040 int64_t *usedp; 1041 int64_t *compressedp; 1042 int64_t *uncompressedp; 1043 zio_t *zio; 1044 dmu_tx_t *tx; 1045 }; 1046 1047 static int 1048 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1049 { 1050 struct killarg *ka = arg; 1051 blkptr_t *bp = &bc->bc_blkptr; 1052 1053 ASSERT3U(bc->bc_errno, ==, 0); 1054 1055 /* 1056 * Since this callback is not called concurrently, no lock is 1057 * needed on the accounting values. 1058 */ 1059 *ka->usedp += bp_get_dasize(spa, bp); 1060 *ka->compressedp += BP_GET_PSIZE(bp); 1061 *ka->uncompressedp += BP_GET_UCSIZE(bp); 1062 /* XXX check for EIO? */ 1063 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 1064 ARC_NOWAIT); 1065 return (0); 1066 } 1067 1068 /* ARGSUSED */ 1069 static int 1070 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1071 { 1072 dsl_dataset_t *ds = arg1; 1073 dmu_objset_type_t *ost = arg2; 1074 1075 /* 1076 * We can only roll back to emptyness if it is a ZPL objset. 1077 */ 1078 if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1079 return (EINVAL); 1080 1081 /* 1082 * This must not be a snapshot. 1083 */ 1084 if (ds->ds_phys->ds_next_snap_obj != 0) 1085 return (EINVAL); 1086 1087 /* 1088 * If we made changes this txg, traverse_dsl_dataset won't find 1089 * them. Try again. 1090 */ 1091 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1092 return (EAGAIN); 1093 1094 return (0); 1095 } 1096 1097 /* ARGSUSED */ 1098 static void 1099 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1100 { 1101 dsl_dataset_t *ds = arg1; 1102 dmu_objset_type_t *ost = arg2; 1103 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1104 1105 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1106 1107 /* 1108 * Before the roll back destroy the zil. 1109 */ 1110 if (ds->ds_user_ptr != NULL) { 1111 zil_rollback_destroy( 1112 ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 1113 1114 /* 1115 * We need to make sure that the objset_impl_t is reopened after 1116 * we do the rollback, otherwise it will have the wrong 1117 * objset_phys_t. Normally this would happen when this 1118 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the 1119 * dataset to be immediately evicted. But when doing "zfs recv 1120 * -F", we reopen the objset before that, so that there is no 1121 * window where the dataset is closed and inconsistent. 1122 */ 1123 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1124 ds->ds_user_ptr = NULL; 1125 } 1126 1127 /* Zero out the deadlist. */ 1128 bplist_close(&ds->ds_deadlist); 1129 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1130 ds->ds_phys->ds_deadlist_obj = 1131 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1132 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1133 ds->ds_phys->ds_deadlist_obj)); 1134 1135 { 1136 /* Free blkptrs that we gave birth to */ 1137 zio_t *zio; 1138 int64_t used = 0, compressed = 0, uncompressed = 0; 1139 struct killarg ka; 1140 1141 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1142 ZIO_FLAG_MUSTSUCCEED); 1143 ka.usedp = &used; 1144 ka.compressedp = &compressed; 1145 ka.uncompressedp = &uncompressed; 1146 ka.zio = zio; 1147 ka.tx = tx; 1148 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1149 ADVANCE_POST, kill_blkptr, &ka); 1150 (void) zio_wait(zio); 1151 1152 dsl_dir_diduse_space(ds->ds_dir, 1153 -used, -compressed, -uncompressed, tx); 1154 } 1155 1156 if (ds->ds_prev) { 1157 /* Change our contents to that of the prev snapshot */ 1158 ASSERT3U(ds->ds_prev->ds_object, ==, 1159 ds->ds_phys->ds_prev_snap_obj); 1160 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1161 ds->ds_phys->ds_used_bytes = 1162 ds->ds_prev->ds_phys->ds_used_bytes; 1163 ds->ds_phys->ds_compressed_bytes = 1164 ds->ds_prev->ds_phys->ds_compressed_bytes; 1165 ds->ds_phys->ds_uncompressed_bytes = 1166 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1167 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1168 ds->ds_phys->ds_unique_bytes = 0; 1169 1170 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1171 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1172 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1173 } 1174 } else { 1175 /* Zero out our contents, recreate objset */ 1176 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1177 ds->ds_phys->ds_used_bytes = 0; 1178 ds->ds_phys->ds_compressed_bytes = 0; 1179 ds->ds_phys->ds_uncompressed_bytes = 0; 1180 ds->ds_phys->ds_flags = 0; 1181 ds->ds_phys->ds_unique_bytes = 0; 1182 (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1183 &ds->ds_phys->ds_bp, *ost, tx); 1184 } 1185 1186 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1187 tx, cr, "dataset = %llu", ds->ds_object); 1188 } 1189 1190 /* ARGSUSED */ 1191 static int 1192 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1193 { 1194 dsl_dataset_t *ds = arg1; 1195 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1196 uint64_t count; 1197 int err; 1198 1199 /* 1200 * Can't delete a head dataset if there are snapshots of it. 1201 * (Except if the only snapshots are from the branch we cloned 1202 * from.) 1203 */ 1204 if (ds->ds_prev != NULL && 1205 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1206 return (EINVAL); 1207 1208 /* 1209 * This is really a dsl_dir thing, but check it here so that 1210 * we'll be less likely to leave this dataset inconsistent & 1211 * nearly destroyed. 1212 */ 1213 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1214 if (err) 1215 return (err); 1216 if (count != 0) 1217 return (EEXIST); 1218 1219 return (0); 1220 } 1221 1222 /* ARGSUSED */ 1223 static void 1224 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1225 { 1226 dsl_dataset_t *ds = arg1; 1227 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1228 1229 /* Mark it as inconsistent on-disk, in case we crash */ 1230 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1231 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1232 1233 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1234 cr, "dataset = %llu", ds->ds_object); 1235 } 1236 1237 /* ARGSUSED */ 1238 int 1239 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1240 { 1241 dsl_dataset_t *ds = arg1; 1242 1243 /* Can't delete a branch point. */ 1244 if (ds->ds_phys->ds_num_children > 1) 1245 return (EEXIST); 1246 1247 /* 1248 * Can't delete a head dataset if there are snapshots of it. 1249 * (Except if the only snapshots are from the branch we cloned 1250 * from.) 1251 */ 1252 if (ds->ds_prev != NULL && 1253 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1254 return (EINVAL); 1255 1256 /* 1257 * If we made changes this txg, traverse_dsl_dataset won't find 1258 * them. Try again. 1259 */ 1260 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1261 return (EAGAIN); 1262 1263 /* XXX we should do some i/o error checking... */ 1264 return (0); 1265 } 1266 1267 void 1268 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1269 { 1270 dsl_dataset_t *ds = arg1; 1271 int64_t used = 0, compressed = 0, uncompressed = 0; 1272 zio_t *zio; 1273 int err; 1274 int after_branch_point = FALSE; 1275 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1276 objset_t *mos = dp->dp_meta_objset; 1277 dsl_dataset_t *ds_prev = NULL; 1278 uint64_t obj; 1279 1280 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1281 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1282 ASSERT(ds->ds_prev == NULL || 1283 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1284 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1285 1286 /* Remove our reservation */ 1287 if (ds->ds_reserved != 0) { 1288 uint64_t val = 0; 1289 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1290 ASSERT3U(ds->ds_reserved, ==, 0); 1291 } 1292 1293 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1294 1295 obj = ds->ds_object; 1296 1297 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1298 if (ds->ds_prev) { 1299 ds_prev = ds->ds_prev; 1300 } else { 1301 VERIFY(0 == dsl_dataset_open_obj(dp, 1302 ds->ds_phys->ds_prev_snap_obj, NULL, 1303 DS_MODE_NONE, FTAG, &ds_prev)); 1304 } 1305 after_branch_point = 1306 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1307 1308 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1309 if (after_branch_point && 1310 ds->ds_phys->ds_next_snap_obj == 0) { 1311 /* This clone is toast. */ 1312 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1313 ds_prev->ds_phys->ds_num_children--; 1314 } else if (!after_branch_point) { 1315 ds_prev->ds_phys->ds_next_snap_obj = 1316 ds->ds_phys->ds_next_snap_obj; 1317 } 1318 } 1319 1320 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1321 1322 if (ds->ds_phys->ds_next_snap_obj != 0) { 1323 blkptr_t bp; 1324 dsl_dataset_t *ds_next; 1325 uint64_t itor = 0; 1326 uint64_t old_unique; 1327 1328 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1329 1330 VERIFY(0 == dsl_dataset_open_obj(dp, 1331 ds->ds_phys->ds_next_snap_obj, NULL, 1332 DS_MODE_NONE, FTAG, &ds_next)); 1333 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1334 1335 old_unique = dsl_dataset_unique(ds_next); 1336 1337 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1338 ds_next->ds_phys->ds_prev_snap_obj = 1339 ds->ds_phys->ds_prev_snap_obj; 1340 ds_next->ds_phys->ds_prev_snap_txg = 1341 ds->ds_phys->ds_prev_snap_txg; 1342 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1343 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1344 1345 /* 1346 * Transfer to our deadlist (which will become next's 1347 * new deadlist) any entries from next's current 1348 * deadlist which were born before prev, and free the 1349 * other entries. 1350 * 1351 * XXX we're doing this long task with the config lock held 1352 */ 1353 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1354 &bp) == 0) { 1355 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1356 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1357 &bp, tx)); 1358 if (ds_prev && !after_branch_point && 1359 bp.blk_birth > 1360 ds_prev->ds_phys->ds_prev_snap_txg) { 1361 ds_prev->ds_phys->ds_unique_bytes += 1362 bp_get_dasize(dp->dp_spa, &bp); 1363 } 1364 } else { 1365 used += bp_get_dasize(dp->dp_spa, &bp); 1366 compressed += BP_GET_PSIZE(&bp); 1367 uncompressed += BP_GET_UCSIZE(&bp); 1368 /* XXX check return value? */ 1369 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1370 &bp, NULL, NULL, ARC_NOWAIT); 1371 } 1372 } 1373 1374 /* free next's deadlist */ 1375 bplist_close(&ds_next->ds_deadlist); 1376 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1377 1378 /* set next's deadlist to our deadlist */ 1379 ds_next->ds_phys->ds_deadlist_obj = 1380 ds->ds_phys->ds_deadlist_obj; 1381 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1382 ds_next->ds_phys->ds_deadlist_obj)); 1383 ds->ds_phys->ds_deadlist_obj = 0; 1384 1385 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1386 /* 1387 * Update next's unique to include blocks which 1388 * were previously shared by only this snapshot 1389 * and it. Those blocks will be born after the 1390 * prev snap and before this snap, and will have 1391 * died after the next snap and before the one 1392 * after that (ie. be on the snap after next's 1393 * deadlist). 1394 * 1395 * XXX we're doing this long task with the 1396 * config lock held 1397 */ 1398 dsl_dataset_t *ds_after_next; 1399 1400 VERIFY(0 == dsl_dataset_open_obj(dp, 1401 ds_next->ds_phys->ds_next_snap_obj, NULL, 1402 DS_MODE_NONE, FTAG, &ds_after_next)); 1403 itor = 0; 1404 while (bplist_iterate(&ds_after_next->ds_deadlist, 1405 &itor, &bp) == 0) { 1406 if (bp.blk_birth > 1407 ds->ds_phys->ds_prev_snap_txg && 1408 bp.blk_birth <= 1409 ds->ds_phys->ds_creation_txg) { 1410 ds_next->ds_phys->ds_unique_bytes += 1411 bp_get_dasize(dp->dp_spa, &bp); 1412 } 1413 } 1414 1415 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1416 ASSERT3P(ds_next->ds_prev, ==, NULL); 1417 } else { 1418 ASSERT3P(ds_next->ds_prev, ==, ds); 1419 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1420 ds_next); 1421 if (ds_prev) { 1422 VERIFY(0 == dsl_dataset_open_obj(dp, 1423 ds->ds_phys->ds_prev_snap_obj, NULL, 1424 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1425 } else { 1426 ds_next->ds_prev = NULL; 1427 } 1428 1429 dsl_dataset_recalc_head_uniq(ds_next); 1430 1431 /* 1432 * Reduce the amount of our unconsmed refreservation 1433 * being charged to our parent by the amount of 1434 * new unique data we have gained. 1435 */ 1436 if (old_unique < ds_next->ds_reserved) { 1437 int64_t mrsdelta; 1438 uint64_t new_unique = 1439 ds_next->ds_phys->ds_unique_bytes; 1440 1441 ASSERT(old_unique <= new_unique); 1442 mrsdelta = MIN(new_unique - old_unique, 1443 ds_next->ds_reserved - old_unique); 1444 dsl_dir_diduse_space(ds->ds_dir, -mrsdelta, 1445 0, 0, tx); 1446 } 1447 } 1448 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1449 1450 /* 1451 * NB: unique_bytes might not be accurate for the head objset. 1452 * Before SPA_VERSION 9, we didn't update its value when we 1453 * deleted the most recent snapshot. 1454 */ 1455 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1456 } else { 1457 /* 1458 * There's no next snapshot, so this is a head dataset. 1459 * Destroy the deadlist. Unless it's a clone, the 1460 * deadlist should be empty. (If it's a clone, it's 1461 * safe to ignore the deadlist contents.) 1462 */ 1463 struct killarg ka; 1464 1465 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1466 bplist_close(&ds->ds_deadlist); 1467 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1468 ds->ds_phys->ds_deadlist_obj = 0; 1469 1470 /* 1471 * Free everything that we point to (that's born after 1472 * the previous snapshot, if we are a clone) 1473 * 1474 * XXX we're doing this long task with the config lock held 1475 */ 1476 ka.usedp = &used; 1477 ka.compressedp = &compressed; 1478 ka.uncompressedp = &uncompressed; 1479 ka.zio = zio; 1480 ka.tx = tx; 1481 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1482 ADVANCE_POST, kill_blkptr, &ka); 1483 ASSERT3U(err, ==, 0); 1484 ASSERT(spa_version(dp->dp_spa) < 1485 SPA_VERSION_UNIQUE_ACCURATE || 1486 used == ds->ds_phys->ds_unique_bytes); 1487 } 1488 1489 err = zio_wait(zio); 1490 ASSERT3U(err, ==, 0); 1491 1492 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1493 1494 if (ds->ds_phys->ds_snapnames_zapobj) { 1495 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1496 ASSERT(err == 0); 1497 } 1498 1499 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1500 /* Erase the link in the dataset */ 1501 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1502 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1503 /* 1504 * dsl_dir_sync_destroy() called us, they'll destroy 1505 * the dataset. 1506 */ 1507 } else { 1508 /* remove from snapshot namespace */ 1509 dsl_dataset_t *ds_head; 1510 VERIFY(0 == dsl_dataset_open_obj(dp, 1511 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1512 DS_MODE_NONE, FTAG, &ds_head)); 1513 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1514 #ifdef ZFS_DEBUG 1515 { 1516 uint64_t val; 1517 err = zap_lookup(mos, 1518 ds_head->ds_phys->ds_snapnames_zapobj, 1519 ds->ds_snapname, 8, 1, &val); 1520 ASSERT3U(err, ==, 0); 1521 ASSERT3U(val, ==, obj); 1522 } 1523 #endif 1524 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1525 ds->ds_snapname, tx); 1526 ASSERT(err == 0); 1527 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1528 } 1529 1530 if (ds_prev && ds->ds_prev != ds_prev) 1531 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1532 1533 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1534 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1535 cr, "dataset = %llu", ds->ds_object); 1536 1537 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1538 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1539 1540 } 1541 1542 static int 1543 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1544 { 1545 uint64_t asize; 1546 1547 if (!dmu_tx_is_syncing(tx)) 1548 return (0); 1549 1550 /* 1551 * If there's an fs-only reservation, any blocks that might become 1552 * owned by the snapshot dataset must be accommodated by space 1553 * outside of the reservation. 1554 */ 1555 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1556 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1557 return (ENOSPC); 1558 1559 /* 1560 * Propogate any reserved space for this snapshot to other 1561 * snapshot checks in this sync group. 1562 */ 1563 if (asize > 0) 1564 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1565 1566 return (0); 1567 } 1568 1569 /* ARGSUSED */ 1570 int 1571 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1572 { 1573 dsl_dataset_t *ds = arg1; 1574 const char *snapname = arg2; 1575 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1576 int err; 1577 uint64_t value; 1578 1579 /* 1580 * We don't allow multiple snapshots of the same txg. If there 1581 * is already one, try again. 1582 */ 1583 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1584 return (EAGAIN); 1585 1586 /* 1587 * Check for conflicting name snapshot name. 1588 */ 1589 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1590 snapname, 8, 1, &value); 1591 if (err == 0) 1592 return (EEXIST); 1593 if (err != ENOENT) 1594 return (err); 1595 1596 /* 1597 * Check that the dataset's name is not too long. Name consists 1598 * of the dataset's length + 1 for the @-sign + snapshot name's length 1599 */ 1600 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1601 return (ENAMETOOLONG); 1602 1603 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1604 if (err) 1605 return (err); 1606 1607 ds->ds_trysnap_txg = tx->tx_txg; 1608 return (0); 1609 } 1610 1611 void 1612 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1613 { 1614 dsl_dataset_t *ds = arg1; 1615 const char *snapname = arg2; 1616 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1617 dmu_buf_t *dbuf; 1618 dsl_dataset_phys_t *dsphys; 1619 uint64_t dsobj; 1620 objset_t *mos = dp->dp_meta_objset; 1621 int err; 1622 1623 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1624 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1625 1626 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1627 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1628 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1629 dmu_buf_will_dirty(dbuf, tx); 1630 dsphys = dbuf->db_data; 1631 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1632 dsphys->ds_fsid_guid = unique_create(); 1633 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1634 sizeof (dsphys->ds_guid)); 1635 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1636 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1637 dsphys->ds_next_snap_obj = ds->ds_object; 1638 dsphys->ds_num_children = 1; 1639 dsphys->ds_creation_time = gethrestime_sec(); 1640 dsphys->ds_creation_txg = tx->tx_txg; 1641 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1642 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1643 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1644 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1645 dsphys->ds_flags = ds->ds_phys->ds_flags; 1646 dsphys->ds_bp = ds->ds_phys->ds_bp; 1647 dmu_buf_rele(dbuf, FTAG); 1648 1649 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1650 if (ds->ds_prev) { 1651 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1652 ds->ds_object || 1653 ds->ds_prev->ds_phys->ds_num_children > 1); 1654 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1655 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1656 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1657 ds->ds_prev->ds_phys->ds_creation_txg); 1658 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1659 } 1660 } 1661 1662 /* 1663 * If we have a reference-reservation on this dataset, we will 1664 * need to increase the amount of refreservation being charged 1665 * since our unique space is going to zero. 1666 */ 1667 if (ds->ds_reserved) { 1668 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1669 dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx); 1670 } 1671 1672 bplist_close(&ds->ds_deadlist); 1673 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1674 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1675 ds->ds_phys->ds_prev_snap_obj = dsobj; 1676 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1677 ds->ds_phys->ds_unique_bytes = 0; 1678 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1679 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1680 ds->ds_phys->ds_deadlist_obj = 1681 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1682 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1683 ds->ds_phys->ds_deadlist_obj)); 1684 1685 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1686 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1687 snapname, 8, 1, &dsobj, tx); 1688 ASSERT(err == 0); 1689 1690 if (ds->ds_prev) 1691 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1692 VERIFY(0 == dsl_dataset_open_obj(dp, 1693 ds->ds_phys->ds_prev_snap_obj, snapname, 1694 DS_MODE_NONE, ds, &ds->ds_prev)); 1695 1696 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1697 "dataset = %llu", dsobj); 1698 } 1699 1700 void 1701 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1702 { 1703 ASSERT(dmu_tx_is_syncing(tx)); 1704 ASSERT(ds->ds_user_ptr != NULL); 1705 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1706 1707 /* 1708 * in case we had to change ds_fsid_guid when we opened it, 1709 * sync it out now. 1710 */ 1711 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1712 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1713 1714 dsl_dir_dirty(ds->ds_dir, tx); 1715 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1716 } 1717 1718 void 1719 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1720 { 1721 uint64_t refd, avail, uobjs, aobjs; 1722 1723 dsl_dir_stats(ds->ds_dir, nv); 1724 1725 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1726 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1727 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1728 1729 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1730 ds->ds_phys->ds_creation_time); 1731 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1732 ds->ds_phys->ds_creation_txg); 1733 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1734 ds->ds_quota); 1735 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1736 ds->ds_reserved); 1737 1738 if (ds->ds_phys->ds_next_snap_obj) { 1739 /* 1740 * This is a snapshot; override the dd's space used with 1741 * our unique space and compression ratio. 1742 */ 1743 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1744 ds->ds_phys->ds_unique_bytes); 1745 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1746 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1747 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1748 ds->ds_phys->ds_compressed_bytes)); 1749 } 1750 } 1751 1752 void 1753 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1754 { 1755 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1756 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1757 stat->dds_guid = ds->ds_phys->ds_guid; 1758 if (ds->ds_phys->ds_next_snap_obj) { 1759 stat->dds_is_snapshot = B_TRUE; 1760 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1761 } 1762 1763 /* clone origin is really a dsl_dir thing... */ 1764 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1765 if (ds->ds_dir->dd_phys->dd_origin_obj) { 1766 dsl_dataset_t *ods; 1767 1768 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1769 ds->ds_dir->dd_phys->dd_origin_obj, 1770 NULL, DS_MODE_NONE, FTAG, &ods)); 1771 dsl_dataset_name(ods, stat->dds_origin); 1772 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1773 } 1774 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1775 } 1776 1777 uint64_t 1778 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1779 { 1780 return (ds->ds_fsid_guid); 1781 } 1782 1783 void 1784 dsl_dataset_space(dsl_dataset_t *ds, 1785 uint64_t *refdbytesp, uint64_t *availbytesp, 1786 uint64_t *usedobjsp, uint64_t *availobjsp) 1787 { 1788 *refdbytesp = ds->ds_phys->ds_used_bytes; 1789 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1790 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1791 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1792 if (ds->ds_quota != 0) { 1793 /* 1794 * Adjust available bytes according to refquota 1795 */ 1796 if (*refdbytesp < ds->ds_quota) 1797 *availbytesp = MIN(*availbytesp, 1798 ds->ds_quota - *refdbytesp); 1799 else 1800 *availbytesp = 0; 1801 } 1802 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1803 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1804 } 1805 1806 boolean_t 1807 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1808 { 1809 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1810 1811 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1812 dsl_pool_sync_context(dp)); 1813 if (ds->ds_prev == NULL) 1814 return (B_FALSE); 1815 if (ds->ds_phys->ds_bp.blk_birth > 1816 ds->ds_prev->ds_phys->ds_creation_txg) 1817 return (B_TRUE); 1818 return (B_FALSE); 1819 } 1820 1821 /* ARGSUSED */ 1822 static int 1823 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1824 { 1825 dsl_dataset_t *ds = arg1; 1826 char *newsnapname = arg2; 1827 dsl_dir_t *dd = ds->ds_dir; 1828 objset_t *mos = dd->dd_pool->dp_meta_objset; 1829 dsl_dataset_t *hds; 1830 uint64_t val; 1831 int err; 1832 1833 err = dsl_dataset_open_obj(dd->dd_pool, 1834 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1835 if (err) 1836 return (err); 1837 1838 /* new name better not be in use */ 1839 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1840 newsnapname, 8, 1, &val); 1841 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1842 1843 if (err == 0) 1844 err = EEXIST; 1845 else if (err == ENOENT) 1846 err = 0; 1847 1848 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1849 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1850 err = ENAMETOOLONG; 1851 1852 return (err); 1853 } 1854 1855 static void 1856 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1857 cred_t *cr, dmu_tx_t *tx) 1858 { 1859 dsl_dataset_t *ds = arg1; 1860 const char *newsnapname = arg2; 1861 dsl_dir_t *dd = ds->ds_dir; 1862 objset_t *mos = dd->dd_pool->dp_meta_objset; 1863 dsl_dataset_t *hds; 1864 int err; 1865 1866 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1867 1868 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1869 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1870 1871 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1872 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1873 ds->ds_snapname, tx); 1874 ASSERT3U(err, ==, 0); 1875 mutex_enter(&ds->ds_lock); 1876 (void) strcpy(ds->ds_snapname, newsnapname); 1877 mutex_exit(&ds->ds_lock); 1878 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1879 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1880 ASSERT3U(err, ==, 0); 1881 1882 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1883 cr, "dataset = %llu", ds->ds_object); 1884 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1885 } 1886 1887 struct renamesnaparg { 1888 dsl_sync_task_group_t *dstg; 1889 char failed[MAXPATHLEN]; 1890 char *oldsnap; 1891 char *newsnap; 1892 }; 1893 1894 static int 1895 dsl_snapshot_rename_one(char *name, void *arg) 1896 { 1897 struct renamesnaparg *ra = arg; 1898 dsl_dataset_t *ds = NULL; 1899 char *cp; 1900 int err; 1901 1902 cp = name + strlen(name); 1903 *cp = '@'; 1904 (void) strcpy(cp + 1, ra->oldsnap); 1905 1906 /* 1907 * For recursive snapshot renames the parent won't be changing 1908 * so we just pass name for both the to/from argument. 1909 */ 1910 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1911 (void) strcpy(ra->failed, name); 1912 return (err); 1913 } 1914 1915 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1916 ra->dstg, &ds); 1917 if (err == ENOENT) { 1918 *cp = '\0'; 1919 return (0); 1920 } 1921 if (err) { 1922 (void) strcpy(ra->failed, name); 1923 *cp = '\0'; 1924 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1925 return (err); 1926 } 1927 1928 #ifdef _KERNEL 1929 /* for all filesystems undergoing rename, we'll need to unmount it */ 1930 (void) zfs_unmount_snap(name, NULL); 1931 #endif 1932 1933 *cp = '\0'; 1934 1935 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1936 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1937 1938 return (0); 1939 } 1940 1941 static int 1942 dsl_recursive_rename(char *oldname, const char *newname) 1943 { 1944 int err; 1945 struct renamesnaparg *ra; 1946 dsl_sync_task_t *dst; 1947 spa_t *spa; 1948 char *cp, *fsname = spa_strdup(oldname); 1949 int len = strlen(oldname); 1950 1951 /* truncate the snapshot name to get the fsname */ 1952 cp = strchr(fsname, '@'); 1953 *cp = '\0'; 1954 1955 err = spa_open(fsname, &spa, FTAG); 1956 if (err) { 1957 kmem_free(fsname, len + 1); 1958 return (err); 1959 } 1960 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 1961 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1962 1963 ra->oldsnap = strchr(oldname, '@') + 1; 1964 ra->newsnap = strchr(newname, '@') + 1; 1965 *ra->failed = '\0'; 1966 1967 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1968 DS_FIND_CHILDREN); 1969 kmem_free(fsname, len + 1); 1970 1971 if (err == 0) { 1972 err = dsl_sync_task_group_wait(ra->dstg); 1973 } 1974 1975 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1976 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1977 dsl_dataset_t *ds = dst->dst_arg1; 1978 if (dst->dst_err) { 1979 dsl_dir_name(ds->ds_dir, ra->failed); 1980 (void) strcat(ra->failed, "@"); 1981 (void) strcat(ra->failed, ra->newsnap); 1982 } 1983 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1984 } 1985 1986 if (err) 1987 (void) strcpy(oldname, ra->failed); 1988 1989 dsl_sync_task_group_destroy(ra->dstg); 1990 kmem_free(ra, sizeof (struct renamesnaparg)); 1991 spa_close(spa, FTAG); 1992 return (err); 1993 } 1994 1995 static int 1996 dsl_valid_rename(char *oldname, void *arg) 1997 { 1998 int delta = *(int *)arg; 1999 2000 if (strlen(oldname) + delta >= MAXNAMELEN) 2001 return (ENAMETOOLONG); 2002 2003 return (0); 2004 } 2005 2006 #pragma weak dmu_objset_rename = dsl_dataset_rename 2007 int 2008 dsl_dataset_rename(char *oldname, const char *newname, 2009 boolean_t recursive) 2010 { 2011 dsl_dir_t *dd; 2012 dsl_dataset_t *ds; 2013 const char *tail; 2014 int err; 2015 2016 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2017 if (err) 2018 return (err); 2019 if (tail == NULL) { 2020 int delta = strlen(newname) - strlen(oldname); 2021 2022 /* if we're growing, validate child size lengths */ 2023 if (delta > 0) 2024 err = dmu_objset_find(oldname, dsl_valid_rename, 2025 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2026 2027 if (!err) 2028 err = dsl_dir_rename(dd, newname); 2029 dsl_dir_close(dd, FTAG); 2030 return (err); 2031 } 2032 if (tail[0] != '@') { 2033 /* the name ended in a nonexistant component */ 2034 dsl_dir_close(dd, FTAG); 2035 return (ENOENT); 2036 } 2037 2038 dsl_dir_close(dd, FTAG); 2039 2040 /* new name must be snapshot in same filesystem */ 2041 tail = strchr(newname, '@'); 2042 if (tail == NULL) 2043 return (EINVAL); 2044 tail++; 2045 if (strncmp(oldname, newname, tail - newname) != 0) 2046 return (EXDEV); 2047 2048 if (recursive) { 2049 err = dsl_recursive_rename(oldname, newname); 2050 } else { 2051 err = dsl_dataset_open(oldname, 2052 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 2053 if (err) 2054 return (err); 2055 2056 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2057 dsl_dataset_snapshot_rename_check, 2058 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2059 2060 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2061 } 2062 2063 return (err); 2064 } 2065 2066 struct promotearg { 2067 uint64_t used, comp, uncomp, unique; 2068 uint64_t newnext_obj, snapnames_obj; 2069 }; 2070 2071 /* ARGSUSED */ 2072 static int 2073 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2074 { 2075 dsl_dataset_t *hds = arg1; 2076 struct promotearg *pa = arg2; 2077 dsl_dir_t *dd = hds->ds_dir; 2078 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2079 dsl_dir_t *odd = NULL; 2080 dsl_dataset_t *ds = NULL; 2081 dsl_dataset_t *origin_ds = NULL; 2082 dsl_dataset_t *newnext_ds = NULL; 2083 int err; 2084 char *name = NULL; 2085 uint64_t itor = 0; 2086 blkptr_t bp; 2087 2088 bzero(pa, sizeof (*pa)); 2089 2090 /* Check that it is a clone */ 2091 if (dd->dd_phys->dd_origin_obj == 0) 2092 return (EINVAL); 2093 2094 /* Since this is so expensive, don't do the preliminary check */ 2095 if (!dmu_tx_is_syncing(tx)) 2096 return (0); 2097 2098 if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2099 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) 2100 goto out; 2101 odd = origin_ds->ds_dir; 2102 2103 { 2104 dsl_dataset_t *phds; 2105 if (err = dsl_dataset_open_obj(dd->dd_pool, 2106 odd->dd_phys->dd_head_dataset_obj, 2107 NULL, DS_MODE_NONE, FTAG, &phds)) 2108 goto out; 2109 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 2110 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 2111 } 2112 2113 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2114 err = EXDEV; 2115 goto out; 2116 } 2117 2118 /* find origin's new next ds */ 2119 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 2120 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 2121 while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { 2122 dsl_dataset_t *prev; 2123 2124 if (err = dsl_dataset_open_obj(dd->dd_pool, 2125 newnext_ds->ds_phys->ds_prev_snap_obj, 2126 NULL, DS_MODE_NONE, FTAG, &prev)) 2127 goto out; 2128 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2129 newnext_ds = prev; 2130 } 2131 pa->newnext_obj = newnext_ds->ds_object; 2132 2133 /* compute origin's new unique space */ 2134 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 2135 &itor, &bp)) == 0) { 2136 if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) 2137 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 2138 } 2139 if (err != ENOENT) 2140 goto out; 2141 2142 /* Walk the snapshots that we are moving */ 2143 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2144 ds = origin_ds; 2145 /* CONSTCOND */ 2146 while (TRUE) { 2147 uint64_t val, dlused, dlcomp, dluncomp; 2148 dsl_dataset_t *prev; 2149 2150 /* Check that the snapshot name does not conflict */ 2151 dsl_dataset_name(ds, name); 2152 err = zap_lookup(dd->dd_pool->dp_meta_objset, 2153 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2154 8, 1, &val); 2155 if (err != ENOENT) { 2156 if (err == 0) 2157 err = EEXIST; 2158 goto out; 2159 } 2160 2161 /* 2162 * compute space to transfer. Each snapshot gave birth to: 2163 * (my used) - (prev's used) + (deadlist's used) 2164 */ 2165 pa->used += ds->ds_phys->ds_used_bytes; 2166 pa->comp += ds->ds_phys->ds_compressed_bytes; 2167 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 2168 2169 /* If we reach the first snapshot, we're done. */ 2170 if (ds->ds_phys->ds_prev_snap_obj == 0) 2171 break; 2172 2173 if (err = bplist_space(&ds->ds_deadlist, 2174 &dlused, &dlcomp, &dluncomp)) 2175 goto out; 2176 if (err = dsl_dataset_open_obj(dd->dd_pool, 2177 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2178 FTAG, &prev)) 2179 goto out; 2180 pa->used += dlused - prev->ds_phys->ds_used_bytes; 2181 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 2182 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 2183 2184 /* 2185 * We could be a clone of a clone. If we reach our 2186 * parent's branch point, we're done. 2187 */ 2188 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2189 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2190 break; 2191 } 2192 if (ds != origin_ds) 2193 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2194 ds = prev; 2195 } 2196 2197 /* Check that there is enough space here */ 2198 err = dsl_dir_transfer_possible(odd, dd, pa->used); 2199 2200 out: 2201 if (ds && ds != origin_ds) 2202 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2203 if (origin_ds) 2204 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2205 if (newnext_ds) 2206 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2207 if (name) 2208 kmem_free(name, MAXPATHLEN); 2209 return (err); 2210 } 2211 2212 static void 2213 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2214 { 2215 dsl_dataset_t *hds = arg1; 2216 struct promotearg *pa = arg2; 2217 dsl_dir_t *dd = hds->ds_dir; 2218 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2219 dsl_dir_t *odd = NULL; 2220 dsl_dataset_t *ds, *origin_ds; 2221 char *name; 2222 2223 ASSERT(dd->dd_phys->dd_origin_obj != 0); 2224 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2225 2226 VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2227 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); 2228 /* 2229 * We need to explicitly open odd, since origin_ds's dd will be 2230 * changing. 2231 */ 2232 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2233 NULL, FTAG, &odd)); 2234 2235 /* move snapshots to this dir */ 2236 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2237 ds = origin_ds; 2238 /* CONSTCOND */ 2239 while (TRUE) { 2240 dsl_dataset_t *prev; 2241 2242 /* move snap name entry */ 2243 dsl_dataset_name(ds, name); 2244 VERIFY(0 == zap_remove(dp->dp_meta_objset, 2245 pa->snapnames_obj, ds->ds_snapname, tx)); 2246 VERIFY(0 == zap_add(dp->dp_meta_objset, 2247 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2248 8, 1, &ds->ds_object, tx)); 2249 2250 /* change containing dsl_dir */ 2251 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2252 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2253 ds->ds_phys->ds_dir_obj = dd->dd_object; 2254 ASSERT3P(ds->ds_dir, ==, odd); 2255 dsl_dir_close(ds->ds_dir, ds); 2256 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2257 NULL, ds, &ds->ds_dir)); 2258 2259 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2260 2261 if (ds->ds_phys->ds_prev_snap_obj == 0) 2262 break; 2263 2264 VERIFY(0 == dsl_dataset_open_obj(dp, 2265 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2266 FTAG, &prev)); 2267 2268 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2269 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2270 break; 2271 } 2272 if (ds != origin_ds) 2273 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2274 ds = prev; 2275 } 2276 if (ds != origin_ds) 2277 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2278 2279 /* change origin's next snap */ 2280 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2281 origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2282 2283 /* change origin */ 2284 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2285 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2286 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2287 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2288 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2289 2290 /* change space accounting */ 2291 dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); 2292 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2293 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2294 2295 /* log history record */ 2296 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2297 cr, "dataset = %llu", ds->ds_object); 2298 2299 dsl_dir_close(odd, FTAG); 2300 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2301 kmem_free(name, MAXPATHLEN); 2302 } 2303 2304 int 2305 dsl_dataset_promote(const char *name) 2306 { 2307 dsl_dataset_t *ds; 2308 int err; 2309 dmu_object_info_t doi; 2310 struct promotearg pa; 2311 2312 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2313 if (err) 2314 return (err); 2315 2316 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2317 ds->ds_phys->ds_snapnames_zapobj, &doi); 2318 if (err) { 2319 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2320 return (err); 2321 } 2322 2323 /* 2324 * Add in 128x the snapnames zapobj size, since we will be moving 2325 * a bunch of snapnames to the promoted ds, and dirtying their 2326 * bonus buffers. 2327 */ 2328 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2329 dsl_dataset_promote_check, 2330 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2331 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2332 return (err); 2333 } 2334 2335 struct cloneswaparg { 2336 dsl_dataset_t *cds; /* clone dataset */ 2337 dsl_dataset_t *ohds; /* origin's head dataset */ 2338 boolean_t force; 2339 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2340 }; 2341 2342 /* ARGSUSED */ 2343 static int 2344 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2345 { 2346 struct cloneswaparg *csa = arg1; 2347 2348 /* they should both be heads */ 2349 if (dsl_dataset_is_snapshot(csa->cds) || 2350 dsl_dataset_is_snapshot(csa->ohds)) 2351 return (EINVAL); 2352 2353 /* the branch point should be just before them */ 2354 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2355 return (EINVAL); 2356 2357 /* cds should be the clone */ 2358 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2359 csa->ohds->ds_object) 2360 return (EINVAL); 2361 2362 /* the clone should be a child of the origin */ 2363 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2364 return (EINVAL); 2365 2366 /* ohds shouldn't be modified unless 'force' */ 2367 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2368 return (ETXTBSY); 2369 2370 /* adjust amount of any unconsumed refreservation */ 2371 csa->unused_refres_delta = 2372 (int64_t)MIN(csa->ohds->ds_reserved, 2373 csa->ohds->ds_phys->ds_unique_bytes) - 2374 (int64_t)MIN(csa->ohds->ds_reserved, 2375 csa->cds->ds_phys->ds_unique_bytes); 2376 2377 if (csa->unused_refres_delta > 0 && 2378 csa->unused_refres_delta > 2379 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2380 return (ENOSPC); 2381 2382 return (0); 2383 } 2384 2385 /* ARGSUSED */ 2386 static void 2387 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2388 { 2389 struct cloneswaparg *csa = arg1; 2390 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2391 uint64_t itor = 0; 2392 blkptr_t bp; 2393 uint64_t unique = 0; 2394 int err; 2395 2396 ASSERT(csa->cds->ds_reserved == 0); 2397 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2398 2399 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2400 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2401 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2402 2403 if (csa->cds->ds_user_ptr != NULL) { 2404 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2405 csa->cds->ds_user_ptr = NULL; 2406 } 2407 2408 if (csa->ohds->ds_user_ptr != NULL) { 2409 csa->ohds->ds_user_evict_func(csa->ohds, 2410 csa->ohds->ds_user_ptr); 2411 csa->ohds->ds_user_ptr = NULL; 2412 } 2413 2414 /* compute unique space */ 2415 while ((err = bplist_iterate(&csa->cds->ds_deadlist, 2416 &itor, &bp)) == 0) { 2417 if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) 2418 unique += bp_get_dasize(dp->dp_spa, &bp); 2419 } 2420 VERIFY(err == ENOENT); 2421 2422 /* reset origin's unique bytes */ 2423 csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; 2424 2425 /* swap blkptrs */ 2426 { 2427 blkptr_t tmp; 2428 tmp = csa->ohds->ds_phys->ds_bp; 2429 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2430 csa->cds->ds_phys->ds_bp = tmp; 2431 } 2432 2433 /* set dd_*_bytes */ 2434 { 2435 int64_t dused, dcomp, duncomp; 2436 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2437 uint64_t odl_used, odl_comp, odl_uncomp; 2438 2439 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2440 &cdl_comp, &cdl_uncomp)); 2441 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2442 &odl_comp, &odl_uncomp)); 2443 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2444 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2445 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2446 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2447 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2448 cdl_uncomp - 2449 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2450 2451 dsl_dir_diduse_space(csa->ohds->ds_dir, 2452 dused, dcomp, duncomp, tx); 2453 dsl_dir_diduse_space(csa->cds->ds_dir, 2454 -dused, -dcomp, -duncomp, tx); 2455 } 2456 2457 #define SWITCH64(x, y) \ 2458 { \ 2459 uint64_t __tmp = (x); \ 2460 (x) = (y); \ 2461 (y) = __tmp; \ 2462 } 2463 2464 /* swap ds_*_bytes */ 2465 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2466 csa->cds->ds_phys->ds_used_bytes); 2467 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2468 csa->cds->ds_phys->ds_compressed_bytes); 2469 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2470 csa->cds->ds_phys->ds_uncompressed_bytes); 2471 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2472 csa->cds->ds_phys->ds_unique_bytes); 2473 2474 /* apply any parent delta for change in unconsumed refreservation */ 2475 dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta, 2476 0, 0, tx); 2477 2478 /* swap deadlists */ 2479 bplist_close(&csa->cds->ds_deadlist); 2480 bplist_close(&csa->ohds->ds_deadlist); 2481 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2482 csa->cds->ds_phys->ds_deadlist_obj); 2483 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2484 csa->cds->ds_phys->ds_deadlist_obj)); 2485 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2486 csa->ohds->ds_phys->ds_deadlist_obj)); 2487 } 2488 2489 /* 2490 * Swap 'clone' with its origin head file system. 2491 */ 2492 int 2493 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2494 boolean_t force) 2495 { 2496 struct cloneswaparg csa; 2497 2498 ASSERT(clone->ds_open_refcount == DS_REF_MAX); 2499 ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); 2500 2501 csa.cds = clone; 2502 csa.ohds = origin_head; 2503 csa.force = force; 2504 return (dsl_sync_task_do(clone->ds_dir->dd_pool, 2505 dsl_dataset_clone_swap_check, 2506 dsl_dataset_clone_swap_sync, &csa, NULL, 9)); 2507 } 2508 2509 /* 2510 * Given a pool name and a dataset object number in that pool, 2511 * return the name of that dataset. 2512 */ 2513 int 2514 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2515 { 2516 spa_t *spa; 2517 dsl_pool_t *dp; 2518 dsl_dataset_t *ds = NULL; 2519 int error; 2520 2521 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2522 return (error); 2523 dp = spa_get_dsl(spa); 2524 rw_enter(&dp->dp_config_rwlock, RW_READER); 2525 if ((error = dsl_dataset_open_obj(dp, obj, 2526 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2527 rw_exit(&dp->dp_config_rwlock); 2528 spa_close(spa, FTAG); 2529 return (error); 2530 } 2531 dsl_dataset_name(ds, buf); 2532 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2533 rw_exit(&dp->dp_config_rwlock); 2534 spa_close(spa, FTAG); 2535 2536 return (0); 2537 } 2538 2539 int 2540 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2541 uint64_t asize, uint64_t inflight, uint64_t *used) 2542 { 2543 int error = 0; 2544 2545 ASSERT3S(asize, >, 0); 2546 2547 mutex_enter(&ds->ds_lock); 2548 /* 2549 * Make a space adjustment for reserved bytes. 2550 */ 2551 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2552 ASSERT3U(*used, >=, 2553 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2554 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2555 } 2556 2557 if (!check_quota || ds->ds_quota == 0) { 2558 mutex_exit(&ds->ds_lock); 2559 return (0); 2560 } 2561 /* 2562 * If they are requesting more space, and our current estimate 2563 * is over quota, they get to try again unless the actual 2564 * on-disk is over quota and there are no pending changes (which 2565 * may free up space for us). 2566 */ 2567 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2568 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2569 error = ERESTART; 2570 else 2571 error = EDQUOT; 2572 } 2573 mutex_exit(&ds->ds_lock); 2574 2575 return (error); 2576 } 2577 2578 /* ARGSUSED */ 2579 static int 2580 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2581 { 2582 dsl_dataset_t *ds = arg1; 2583 uint64_t *quotap = arg2; 2584 uint64_t new_quota = *quotap; 2585 2586 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2587 return (ENOTSUP); 2588 2589 if (new_quota == 0) 2590 return (0); 2591 2592 if (new_quota < ds->ds_phys->ds_used_bytes || 2593 new_quota < ds->ds_reserved) 2594 return (ENOSPC); 2595 2596 return (0); 2597 } 2598 2599 /* ARGSUSED */ 2600 void 2601 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2602 { 2603 dsl_dataset_t *ds = arg1; 2604 uint64_t *quotap = arg2; 2605 uint64_t new_quota = *quotap; 2606 2607 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2608 2609 mutex_enter(&ds->ds_lock); 2610 ds->ds_quota = new_quota; 2611 mutex_exit(&ds->ds_lock); 2612 2613 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 2614 2615 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 2616 tx, cr, "%lld dataset = %llu ", 2617 (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj); 2618 } 2619 2620 int 2621 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 2622 { 2623 dsl_dataset_t *ds; 2624 int err; 2625 2626 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2627 if (err) 2628 return (err); 2629 2630 if (quota != ds->ds_quota) { 2631 /* 2632 * If someone removes a file, then tries to set the quota, we 2633 * want to make sure the file freeing takes effect. 2634 */ 2635 txg_wait_open(ds->ds_dir->dd_pool, 0); 2636 2637 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2638 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 2639 ds, "a, 0); 2640 } 2641 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2642 return (err); 2643 } 2644 2645 static int 2646 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 2647 { 2648 dsl_dataset_t *ds = arg1; 2649 uint64_t *reservationp = arg2; 2650 uint64_t new_reservation = *reservationp; 2651 int64_t delta; 2652 uint64_t unique; 2653 2654 if (new_reservation > INT64_MAX) 2655 return (EOVERFLOW); 2656 2657 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 2658 SPA_VERSION_REFRESERVATION) 2659 return (ENOTSUP); 2660 2661 if (dsl_dataset_is_snapshot(ds)) 2662 return (EINVAL); 2663 2664 /* 2665 * If we are doing the preliminary check in open context, the 2666 * space estimates may be inaccurate. 2667 */ 2668 if (!dmu_tx_is_syncing(tx)) 2669 return (0); 2670 2671 mutex_enter(&ds->ds_lock); 2672 unique = dsl_dataset_unique(ds); 2673 delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); 2674 mutex_exit(&ds->ds_lock); 2675 2676 if (delta > 0 && 2677 delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 2678 return (ENOSPC); 2679 if (delta > 0 && ds->ds_quota > 0 && 2680 new_reservation > ds->ds_quota) 2681 return (ENOSPC); 2682 2683 return (0); 2684 } 2685 2686 /* ARGSUSED */ 2687 static void 2688 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 2689 dmu_tx_t *tx) 2690 { 2691 dsl_dataset_t *ds = arg1; 2692 uint64_t *reservationp = arg2; 2693 uint64_t new_reservation = *reservationp; 2694 uint64_t unique; 2695 int64_t delta; 2696 2697 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2698 2699 mutex_enter(&ds->ds_lock); 2700 unique = dsl_dataset_unique(ds); 2701 delta = MAX(0, (int64_t)(new_reservation - unique)) - 2702 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2703 ds->ds_reserved = new_reservation; 2704 mutex_exit(&ds->ds_lock); 2705 2706 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 2707 new_reservation, cr, tx); 2708 2709 dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx); 2710 2711 spa_history_internal_log(LOG_DS_REFRESERV, 2712 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 2713 (longlong_t)new_reservation, 2714 ds->ds_dir->dd_phys->dd_head_dataset_obj); 2715 } 2716 2717 int 2718 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 2719 { 2720 dsl_dataset_t *ds; 2721 int err; 2722 2723 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2724 if (err) 2725 return (err); 2726 2727 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2728 dsl_dataset_set_reservation_check, 2729 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 2730 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2731 return (err); 2732 } 2733