1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 /* 55 * We use weighted reference counts to express the various forms of exclusion 56 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 57 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 58 * This makes the exclusion logic simple: the total refcnt for all opens cannot 59 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 60 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 61 * just over half of the refcnt space, so there can't be more than one, but it 62 * can peacefully coexist with any number of STANDARD opens. 63 */ 64 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 65 0, /* DS_MODE_NONE - invalid */ 66 1, /* DS_MODE_STANDARD - unlimited number */ 67 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 68 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 69 }; 70 71 /* 72 * Figure out how much of this delta should be propogated to the dsl_dir 73 * layer. If there's a refreservation, that space has already been 74 * partially accounted for in our ancestors. 75 */ 76 static int64_t 77 parent_delta(dsl_dataset_t *ds, int64_t delta) 78 { 79 uint64_t old_bytes, new_bytes; 80 81 if (ds->ds_reserved == 0) 82 return (delta); 83 84 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 85 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 86 87 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 88 return (new_bytes - old_bytes); 89 } 90 91 void 92 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 93 { 94 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 95 int compressed = BP_GET_PSIZE(bp); 96 int uncompressed = BP_GET_UCSIZE(bp); 97 int64_t delta; 98 99 dprintf_bp(bp, "born, ds=%p\n", ds); 100 101 ASSERT(dmu_tx_is_syncing(tx)); 102 /* It could have been compressed away to nothing */ 103 if (BP_IS_HOLE(bp)) 104 return; 105 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 106 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 107 if (ds == NULL) { 108 /* 109 * Account for the meta-objset space in its placeholder 110 * dsl_dir. 111 */ 112 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 113 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 114 used, compressed, uncompressed, tx); 115 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 116 return; 117 } 118 dmu_buf_will_dirty(ds->ds_dbuf, tx); 119 mutex_enter(&ds->ds_lock); 120 delta = parent_delta(ds, used); 121 ds->ds_phys->ds_used_bytes += used; 122 ds->ds_phys->ds_compressed_bytes += compressed; 123 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 124 ds->ds_phys->ds_unique_bytes += used; 125 mutex_exit(&ds->ds_lock); 126 dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx); 127 } 128 129 void 130 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 131 dmu_tx_t *tx) 132 { 133 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 134 int compressed = BP_GET_PSIZE(bp); 135 int uncompressed = BP_GET_UCSIZE(bp); 136 137 ASSERT(dmu_tx_is_syncing(tx)); 138 /* No block pointer => nothing to free */ 139 if (BP_IS_HOLE(bp)) 140 return; 141 142 ASSERT(used > 0); 143 if (ds == NULL) { 144 int err; 145 /* 146 * Account for the meta-objset space in its placeholder 147 * dataset. 148 */ 149 err = arc_free(pio, tx->tx_pool->dp_spa, 150 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 151 ASSERT(err == 0); 152 153 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 154 -used, -compressed, -uncompressed, tx); 155 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 156 return; 157 } 158 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 159 160 dmu_buf_will_dirty(ds->ds_dbuf, tx); 161 162 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 163 int err; 164 int64_t delta; 165 166 dprintf_bp(bp, "freeing: %s", ""); 167 err = arc_free(pio, tx->tx_pool->dp_spa, 168 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 169 ASSERT(err == 0); 170 171 mutex_enter(&ds->ds_lock); 172 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 173 !DS_UNIQUE_IS_ACCURATE(ds)); 174 delta = parent_delta(ds, -used); 175 ds->ds_phys->ds_unique_bytes -= used; 176 mutex_exit(&ds->ds_lock); 177 dsl_dir_diduse_space(ds->ds_dir, 178 delta, -compressed, -uncompressed, tx); 179 } else { 180 dprintf_bp(bp, "putting on dead list: %s", ""); 181 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 182 ASSERT3U(ds->ds_prev->ds_object, ==, 183 ds->ds_phys->ds_prev_snap_obj); 184 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 185 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 186 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 187 ds->ds_object && bp->blk_birth > 188 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 189 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 190 mutex_enter(&ds->ds_prev->ds_lock); 191 ds->ds_prev->ds_phys->ds_unique_bytes += used; 192 mutex_exit(&ds->ds_prev->ds_lock); 193 } 194 } 195 mutex_enter(&ds->ds_lock); 196 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 197 ds->ds_phys->ds_used_bytes -= used; 198 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 199 ds->ds_phys->ds_compressed_bytes -= compressed; 200 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 201 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 202 mutex_exit(&ds->ds_lock); 203 } 204 205 uint64_t 206 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 207 { 208 uint64_t trysnap = 0; 209 210 if (ds == NULL) 211 return (0); 212 /* 213 * The snapshot creation could fail, but that would cause an 214 * incorrect FALSE return, which would only result in an 215 * overestimation of the amount of space that an operation would 216 * consume, which is OK. 217 * 218 * There's also a small window where we could miss a pending 219 * snapshot, because we could set the sync task in the quiescing 220 * phase. So this should only be used as a guess. 221 */ 222 if (ds->ds_trysnap_txg > 223 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 224 trysnap = ds->ds_trysnap_txg; 225 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 226 } 227 228 int 229 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 230 { 231 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 232 } 233 234 /* ARGSUSED */ 235 static void 236 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 237 { 238 dsl_dataset_t *ds = dsv; 239 240 /* open_refcount == DS_REF_MAX when deleting */ 241 ASSERT(ds->ds_open_refcount == 0 || 242 ds->ds_open_refcount == DS_REF_MAX); 243 244 dprintf_ds(ds, "evicting %s\n", ""); 245 246 unique_remove(ds->ds_fsid_guid); 247 248 if (ds->ds_user_ptr != NULL) 249 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 250 251 if (ds->ds_prev) { 252 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 253 ds->ds_prev = NULL; 254 } 255 256 bplist_close(&ds->ds_deadlist); 257 dsl_dir_close(ds->ds_dir, ds); 258 259 ASSERT(!list_link_active(&ds->ds_synced_link)); 260 261 mutex_destroy(&ds->ds_lock); 262 mutex_destroy(&ds->ds_opening_lock); 263 mutex_destroy(&ds->ds_deadlist.bpl_lock); 264 265 kmem_free(ds, sizeof (dsl_dataset_t)); 266 } 267 268 static int 269 dsl_dataset_get_snapname(dsl_dataset_t *ds) 270 { 271 dsl_dataset_phys_t *headphys; 272 int err; 273 dmu_buf_t *headdbuf; 274 dsl_pool_t *dp = ds->ds_dir->dd_pool; 275 objset_t *mos = dp->dp_meta_objset; 276 277 if (ds->ds_snapname[0]) 278 return (0); 279 if (ds->ds_phys->ds_next_snap_obj == 0) 280 return (0); 281 282 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 283 FTAG, &headdbuf); 284 if (err) 285 return (err); 286 headphys = headdbuf->db_data; 287 err = zap_value_search(dp->dp_meta_objset, 288 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 289 dmu_buf_rele(headdbuf, FTAG); 290 return (err); 291 } 292 293 int 294 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 295 int mode, void *tag, dsl_dataset_t **dsp) 296 { 297 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 298 objset_t *mos = dp->dp_meta_objset; 299 dmu_buf_t *dbuf; 300 dsl_dataset_t *ds; 301 int err; 302 303 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 304 dsl_pool_sync_context(dp)); 305 306 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 307 if (err) 308 return (err); 309 ds = dmu_buf_get_user(dbuf); 310 if (ds == NULL) { 311 dsl_dataset_t *winner; 312 313 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 314 ds->ds_dbuf = dbuf; 315 ds->ds_object = dsobj; 316 ds->ds_phys = dbuf->db_data; 317 318 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 319 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 320 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 321 NULL); 322 323 err = bplist_open(&ds->ds_deadlist, 324 mos, ds->ds_phys->ds_deadlist_obj); 325 if (err == 0) { 326 err = dsl_dir_open_obj(dp, 327 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 328 } 329 if (err) { 330 /* 331 * we don't really need to close the blist if we 332 * just opened it. 333 */ 334 mutex_destroy(&ds->ds_lock); 335 mutex_destroy(&ds->ds_opening_lock); 336 mutex_destroy(&ds->ds_deadlist.bpl_lock); 337 kmem_free(ds, sizeof (dsl_dataset_t)); 338 dmu_buf_rele(dbuf, tag); 339 return (err); 340 } 341 342 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 343 ds->ds_snapname[0] = '\0'; 344 if (ds->ds_phys->ds_prev_snap_obj) { 345 err = dsl_dataset_open_obj(dp, 346 ds->ds_phys->ds_prev_snap_obj, NULL, 347 DS_MODE_NONE, ds, &ds->ds_prev); 348 } 349 } else { 350 if (snapname) { 351 #ifdef ZFS_DEBUG 352 dsl_dataset_phys_t *headphys; 353 dmu_buf_t *headdbuf; 354 err = dmu_bonus_hold(mos, 355 ds->ds_dir->dd_phys->dd_head_dataset_obj, 356 FTAG, &headdbuf); 357 if (err == 0) { 358 headphys = headdbuf->db_data; 359 uint64_t foundobj; 360 err = zap_lookup(dp->dp_meta_objset, 361 headphys->ds_snapnames_zapobj, 362 snapname, sizeof (foundobj), 1, 363 &foundobj); 364 ASSERT3U(foundobj, ==, dsobj); 365 dmu_buf_rele(headdbuf, FTAG); 366 } 367 #endif 368 (void) strcat(ds->ds_snapname, snapname); 369 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 370 err = dsl_dataset_get_snapname(ds); 371 } 372 } 373 374 if (!dsl_dataset_is_snapshot(ds)) { 375 /* 376 * In sync context, we're called with either no lock 377 * or with the write lock. If we're not syncing, 378 * we're always called with the read lock held. 379 */ 380 boolean_t need_lock = 381 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 382 dsl_pool_sync_context(dp); 383 384 if (need_lock) 385 rw_enter(&dp->dp_config_rwlock, RW_READER); 386 387 err = dsl_prop_get_ds_locked(ds->ds_dir, 388 "refreservation", sizeof (uint64_t), 1, 389 &ds->ds_reserved, NULL); 390 if (err == 0) { 391 err = dsl_prop_get_ds_locked(ds->ds_dir, 392 "refquota", sizeof (uint64_t), 1, 393 &ds->ds_quota, NULL); 394 } 395 396 if (need_lock) 397 rw_exit(&dp->dp_config_rwlock); 398 } else { 399 ds->ds_reserved = ds->ds_quota = 0; 400 } 401 402 if (err == 0) { 403 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 404 dsl_dataset_evict); 405 } 406 if (err || winner) { 407 bplist_close(&ds->ds_deadlist); 408 if (ds->ds_prev) { 409 dsl_dataset_close(ds->ds_prev, 410 DS_MODE_NONE, ds); 411 } 412 dsl_dir_close(ds->ds_dir, ds); 413 mutex_destroy(&ds->ds_lock); 414 mutex_destroy(&ds->ds_opening_lock); 415 mutex_destroy(&ds->ds_deadlist.bpl_lock); 416 kmem_free(ds, sizeof (dsl_dataset_t)); 417 if (err) { 418 dmu_buf_rele(dbuf, tag); 419 return (err); 420 } 421 ds = winner; 422 } else { 423 ds->ds_fsid_guid = 424 unique_insert(ds->ds_phys->ds_fsid_guid); 425 } 426 } 427 ASSERT3P(ds->ds_dbuf, ==, dbuf); 428 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 429 430 mutex_enter(&ds->ds_lock); 431 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 432 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 433 !DS_MODE_IS_INCONSISTENT(mode)) || 434 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 435 mutex_exit(&ds->ds_lock); 436 dsl_dataset_close(ds, DS_MODE_NONE, tag); 437 return (EBUSY); 438 } 439 ds->ds_open_refcount += weight; 440 mutex_exit(&ds->ds_lock); 441 442 *dsp = ds; 443 return (0); 444 } 445 446 int 447 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 448 void *tag, dsl_dataset_t **dsp) 449 { 450 dsl_dir_t *dd; 451 dsl_pool_t *dp; 452 const char *tail; 453 uint64_t obj; 454 dsl_dataset_t *ds = NULL; 455 int err = 0; 456 457 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 458 if (err) 459 return (err); 460 461 dp = dd->dd_pool; 462 obj = dd->dd_phys->dd_head_dataset_obj; 463 rw_enter(&dp->dp_config_rwlock, RW_READER); 464 if (obj == 0) { 465 /* A dataset with no associated objset */ 466 err = ENOENT; 467 goto out; 468 } 469 470 if (tail != NULL) { 471 objset_t *mos = dp->dp_meta_objset; 472 473 err = dsl_dataset_open_obj(dp, obj, NULL, 474 DS_MODE_NONE, tag, &ds); 475 if (err) 476 goto out; 477 obj = ds->ds_phys->ds_snapnames_zapobj; 478 dsl_dataset_close(ds, DS_MODE_NONE, tag); 479 ds = NULL; 480 481 if (tail[0] != '@') { 482 err = ENOENT; 483 goto out; 484 } 485 tail++; 486 487 /* Look for a snapshot */ 488 if (!DS_MODE_IS_READONLY(mode)) { 489 err = EROFS; 490 goto out; 491 } 492 dprintf("looking for snapshot '%s'\n", tail); 493 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 494 if (err) 495 goto out; 496 } 497 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 498 499 out: 500 rw_exit(&dp->dp_config_rwlock); 501 dsl_dir_close(dd, FTAG); 502 503 ASSERT3U((err == 0), ==, (ds != NULL)); 504 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 505 506 *dsp = ds; 507 return (err); 508 } 509 510 int 511 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 512 { 513 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 514 } 515 516 void 517 dsl_dataset_name(dsl_dataset_t *ds, char *name) 518 { 519 if (ds == NULL) { 520 (void) strcpy(name, "mos"); 521 } else { 522 dsl_dir_name(ds->ds_dir, name); 523 VERIFY(0 == dsl_dataset_get_snapname(ds)); 524 if (ds->ds_snapname[0]) { 525 (void) strcat(name, "@"); 526 if (!MUTEX_HELD(&ds->ds_lock)) { 527 /* 528 * We use a "recursive" mutex so that we 529 * can call dprintf_ds() with ds_lock held. 530 */ 531 mutex_enter(&ds->ds_lock); 532 (void) strcat(name, ds->ds_snapname); 533 mutex_exit(&ds->ds_lock); 534 } else { 535 (void) strcat(name, ds->ds_snapname); 536 } 537 } 538 } 539 } 540 541 static int 542 dsl_dataset_namelen(dsl_dataset_t *ds) 543 { 544 int result; 545 546 if (ds == NULL) { 547 result = 3; /* "mos" */ 548 } else { 549 result = dsl_dir_namelen(ds->ds_dir); 550 VERIFY(0 == dsl_dataset_get_snapname(ds)); 551 if (ds->ds_snapname[0]) { 552 ++result; /* adding one for the @-sign */ 553 if (!MUTEX_HELD(&ds->ds_lock)) { 554 /* see dsl_datset_name */ 555 mutex_enter(&ds->ds_lock); 556 result += strlen(ds->ds_snapname); 557 mutex_exit(&ds->ds_lock); 558 } else { 559 result += strlen(ds->ds_snapname); 560 } 561 } 562 } 563 564 return (result); 565 } 566 567 void 568 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 569 { 570 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 571 mutex_enter(&ds->ds_lock); 572 ASSERT3U(ds->ds_open_refcount, >=, weight); 573 ds->ds_open_refcount -= weight; 574 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 575 mode, ds->ds_open_refcount); 576 mutex_exit(&ds->ds_lock); 577 578 dmu_buf_rele(ds->ds_dbuf, tag); 579 } 580 581 void 582 dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) 583 { 584 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 585 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 586 mutex_enter(&ds->ds_lock); 587 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 588 ASSERT3U(oldweight, >=, newweight); 589 ds->ds_open_refcount -= oldweight; 590 ds->ds_open_refcount += newweight; 591 mutex_exit(&ds->ds_lock); 592 } 593 594 boolean_t 595 dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) 596 { 597 boolean_t rv; 598 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 599 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 600 mutex_enter(&ds->ds_lock); 601 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 602 ASSERT3U(newweight, >=, oldweight); 603 if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { 604 rv = B_FALSE; 605 } else { 606 ds->ds_open_refcount -= oldweight; 607 ds->ds_open_refcount += newweight; 608 rv = B_TRUE; 609 } 610 mutex_exit(&ds->ds_lock); 611 return (rv); 612 } 613 614 void 615 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 616 { 617 objset_t *mos = dp->dp_meta_objset; 618 dmu_buf_t *dbuf; 619 dsl_dataset_phys_t *dsphys; 620 dsl_dataset_t *ds; 621 uint64_t dsobj; 622 dsl_dir_t *dd; 623 624 dsl_dir_create_root(mos, ddobjp, tx); 625 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 626 627 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 628 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 629 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 630 dmu_buf_will_dirty(dbuf, tx); 631 dsphys = dbuf->db_data; 632 dsphys->ds_dir_obj = dd->dd_object; 633 dsphys->ds_fsid_guid = unique_create(); 634 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 635 sizeof (dsphys->ds_guid)); 636 dsphys->ds_snapnames_zapobj = 637 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 638 dsphys->ds_creation_time = gethrestime_sec(); 639 dsphys->ds_creation_txg = tx->tx_txg; 640 dsphys->ds_deadlist_obj = 641 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 642 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 643 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 644 dmu_buf_rele(dbuf, FTAG); 645 646 dmu_buf_will_dirty(dd->dd_dbuf, tx); 647 dd->dd_phys->dd_head_dataset_obj = dsobj; 648 dsl_dir_close(dd, FTAG); 649 650 VERIFY(0 == 651 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 652 (void) dmu_objset_create_impl(dp->dp_spa, ds, 653 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 654 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 655 } 656 657 uint64_t 658 dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx) 659 { 660 dsl_pool_t *dp = dd->dd_pool; 661 dmu_buf_t *dbuf; 662 dsl_dataset_phys_t *dsphys; 663 uint64_t dsobj; 664 objset_t *mos = dp->dp_meta_objset; 665 666 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 667 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 668 ASSERT(dmu_tx_is_syncing(tx)); 669 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 670 671 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 672 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 673 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 674 dmu_buf_will_dirty(dbuf, tx); 675 dsphys = dbuf->db_data; 676 dsphys->ds_dir_obj = dd->dd_object; 677 dsphys->ds_fsid_guid = unique_create(); 678 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 679 sizeof (dsphys->ds_guid)); 680 dsphys->ds_snapnames_zapobj = 681 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 682 dsphys->ds_creation_time = gethrestime_sec(); 683 dsphys->ds_creation_txg = tx->tx_txg; 684 dsphys->ds_deadlist_obj = 685 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 686 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 687 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 688 689 if (origin) { 690 dsphys->ds_prev_snap_obj = origin->ds_object; 691 dsphys->ds_prev_snap_txg = 692 origin->ds_phys->ds_creation_txg; 693 dsphys->ds_used_bytes = 694 origin->ds_phys->ds_used_bytes; 695 dsphys->ds_compressed_bytes = 696 origin->ds_phys->ds_compressed_bytes; 697 dsphys->ds_uncompressed_bytes = 698 origin->ds_phys->ds_uncompressed_bytes; 699 dsphys->ds_bp = origin->ds_phys->ds_bp; 700 701 dmu_buf_will_dirty(origin->ds_dbuf, tx); 702 origin->ds_phys->ds_num_children++; 703 704 dmu_buf_will_dirty(dd->dd_dbuf, tx); 705 dd->dd_phys->dd_origin_obj = origin->ds_object; 706 } 707 dmu_buf_rele(dbuf, FTAG); 708 709 dmu_buf_will_dirty(dd->dd_dbuf, tx); 710 dd->dd_phys->dd_head_dataset_obj = dsobj; 711 712 return (dsobj); 713 } 714 715 uint64_t 716 dsl_dataset_create_sync(dsl_dir_t *pdd, 717 const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx) 718 { 719 dsl_pool_t *dp = pdd->dd_pool; 720 uint64_t dsobj, ddobj; 721 dsl_dir_t *dd; 722 723 ASSERT(lastname[0] != '@'); 724 725 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 726 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 727 728 dsobj = dsl_dataset_create_sync_impl(dd, origin, tx); 729 730 dsl_deleg_set_create_perms(dd, tx, cr); 731 732 dsl_dir_close(dd, FTAG); 733 734 return (dsobj); 735 } 736 737 struct destroyarg { 738 dsl_sync_task_group_t *dstg; 739 char *snapname; 740 char *failed; 741 }; 742 743 static int 744 dsl_snapshot_destroy_one(char *name, void *arg) 745 { 746 struct destroyarg *da = arg; 747 dsl_dataset_t *ds; 748 char *cp; 749 int err; 750 751 (void) strcat(name, "@"); 752 (void) strcat(name, da->snapname); 753 err = dsl_dataset_open(name, 754 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 755 da->dstg, &ds); 756 cp = strchr(name, '@'); 757 *cp = '\0'; 758 if (err == ENOENT) 759 return (0); 760 if (err) { 761 (void) strcpy(da->failed, name); 762 return (err); 763 } 764 765 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 766 dsl_dataset_destroy_sync, ds, da->dstg, 0); 767 return (0); 768 } 769 770 /* 771 * Destroy 'snapname' in all descendants of 'fsname'. 772 */ 773 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 774 int 775 dsl_snapshots_destroy(char *fsname, char *snapname) 776 { 777 int err; 778 struct destroyarg da; 779 dsl_sync_task_t *dst; 780 spa_t *spa; 781 782 err = spa_open(fsname, &spa, FTAG); 783 if (err) 784 return (err); 785 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 786 da.snapname = snapname; 787 da.failed = fsname; 788 789 err = dmu_objset_find(fsname, 790 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 791 792 if (err == 0) 793 err = dsl_sync_task_group_wait(da.dstg); 794 795 for (dst = list_head(&da.dstg->dstg_tasks); dst; 796 dst = list_next(&da.dstg->dstg_tasks, dst)) { 797 dsl_dataset_t *ds = dst->dst_arg1; 798 if (dst->dst_err) { 799 dsl_dataset_name(ds, fsname); 800 *strchr(fsname, '@') = '\0'; 801 } 802 /* 803 * If it was successful, destroy_sync would have 804 * closed the ds 805 */ 806 if (err) 807 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 808 } 809 810 dsl_sync_task_group_destroy(da.dstg); 811 spa_close(spa, FTAG); 812 return (err); 813 } 814 815 /* 816 * ds must be opened EXCLUSIVE or PRIMARY. on return (whether 817 * successful or not), ds will be closed and caller can no longer 818 * dereference it. 819 */ 820 int 821 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 822 { 823 int err; 824 dsl_sync_task_group_t *dstg; 825 objset_t *os; 826 dsl_dir_t *dd; 827 uint64_t obj; 828 829 if (ds->ds_open_refcount != DS_REF_MAX) { 830 if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, 831 DS_MODE_EXCLUSIVE) == 0) { 832 dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); 833 return (EBUSY); 834 } 835 } 836 837 if (dsl_dataset_is_snapshot(ds)) { 838 /* Destroying a snapshot is simpler */ 839 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 840 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 841 ds, tag, 0); 842 goto out; 843 } 844 845 dd = ds->ds_dir; 846 847 /* 848 * Check for errors and mark this ds as inconsistent, in 849 * case we crash while freeing the objects. 850 */ 851 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 852 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 853 if (err) 854 goto out; 855 856 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 857 if (err) 858 goto out; 859 860 /* 861 * remove the objects in open context, so that we won't 862 * have too much to do in syncing context. 863 */ 864 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 865 ds->ds_phys->ds_prev_snap_txg)) { 866 dmu_tx_t *tx = dmu_tx_create(os); 867 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 868 dmu_tx_hold_bonus(tx, obj); 869 err = dmu_tx_assign(tx, TXG_WAIT); 870 if (err) { 871 /* 872 * Perhaps there is not enough disk 873 * space. Just deal with it from 874 * dsl_dataset_destroy_sync(). 875 */ 876 dmu_tx_abort(tx); 877 continue; 878 } 879 VERIFY(0 == dmu_object_free(os, obj, tx)); 880 dmu_tx_commit(tx); 881 } 882 /* Make sure it's not dirty before we finish destroying it. */ 883 txg_wait_synced(dd->dd_pool, 0); 884 885 dmu_objset_close(os); 886 if (err != ESRCH) 887 goto out; 888 889 if (ds->ds_user_ptr) { 890 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 891 ds->ds_user_ptr = NULL; 892 } 893 894 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 895 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 896 rw_exit(&dd->dd_pool->dp_config_rwlock); 897 898 if (err) 899 goto out; 900 901 /* 902 * Blow away the dsl_dir + head dataset. 903 */ 904 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 905 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 906 dsl_dataset_destroy_sync, ds, tag, 0); 907 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 908 dsl_dir_destroy_sync, dd, FTAG, 0); 909 err = dsl_sync_task_group_wait(dstg); 910 dsl_sync_task_group_destroy(dstg); 911 /* if it is successful, *destroy_sync will close the ds+dd */ 912 if (err) 913 dsl_dir_close(dd, FTAG); 914 out: 915 if (err) 916 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 917 return (err); 918 } 919 920 int 921 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 922 { 923 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 924 925 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 926 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 927 ds, &ost, 0)); 928 } 929 930 void * 931 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 932 void *p, dsl_dataset_evict_func_t func) 933 { 934 void *old; 935 936 mutex_enter(&ds->ds_lock); 937 old = ds->ds_user_ptr; 938 if (old == NULL) { 939 ds->ds_user_ptr = p; 940 ds->ds_user_evict_func = func; 941 } 942 mutex_exit(&ds->ds_lock); 943 return (old); 944 } 945 946 void * 947 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 948 { 949 return (ds->ds_user_ptr); 950 } 951 952 953 blkptr_t * 954 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 955 { 956 return (&ds->ds_phys->ds_bp); 957 } 958 959 void 960 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 961 { 962 ASSERT(dmu_tx_is_syncing(tx)); 963 /* If it's the meta-objset, set dp_meta_rootbp */ 964 if (ds == NULL) { 965 tx->tx_pool->dp_meta_rootbp = *bp; 966 } else { 967 dmu_buf_will_dirty(ds->ds_dbuf, tx); 968 ds->ds_phys->ds_bp = *bp; 969 } 970 } 971 972 spa_t * 973 dsl_dataset_get_spa(dsl_dataset_t *ds) 974 { 975 return (ds->ds_dir->dd_pool->dp_spa); 976 } 977 978 void 979 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 980 { 981 dsl_pool_t *dp; 982 983 if (ds == NULL) /* this is the meta-objset */ 984 return; 985 986 ASSERT(ds->ds_user_ptr != NULL); 987 988 if (ds->ds_phys->ds_next_snap_obj != 0) 989 panic("dirtying snapshot!"); 990 991 dp = ds->ds_dir->dd_pool; 992 993 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 994 /* up the hold count until we can be written out */ 995 dmu_buf_add_ref(ds->ds_dbuf, ds); 996 } 997 } 998 999 /* 1000 * The unique space in the head dataset can be calculated by subtracting 1001 * the space used in the most recent snapshot, that is still being used 1002 * in this file system, from the space currently in use. To figure out 1003 * the space in the most recent snapshot still in use, we need to take 1004 * the total space used in the snapshot and subtract out the space that 1005 * has been freed up since the snapshot was taken. 1006 */ 1007 static void 1008 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1009 { 1010 uint64_t mrs_used; 1011 uint64_t dlused, dlcomp, dluncomp; 1012 1013 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1014 1015 if (ds->ds_phys->ds_prev_snap_obj != 0) 1016 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1017 else 1018 mrs_used = 0; 1019 1020 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1021 &dluncomp)); 1022 1023 ASSERT3U(dlused, <=, mrs_used); 1024 ds->ds_phys->ds_unique_bytes = 1025 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1026 1027 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1028 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1029 SPA_VERSION_UNIQUE_ACCURATE) 1030 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1031 } 1032 1033 static uint64_t 1034 dsl_dataset_unique(dsl_dataset_t *ds) 1035 { 1036 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1037 dsl_dataset_recalc_head_uniq(ds); 1038 1039 return (ds->ds_phys->ds_unique_bytes); 1040 } 1041 1042 struct killarg { 1043 int64_t *usedp; 1044 int64_t *compressedp; 1045 int64_t *uncompressedp; 1046 zio_t *zio; 1047 dmu_tx_t *tx; 1048 }; 1049 1050 static int 1051 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1052 { 1053 struct killarg *ka = arg; 1054 blkptr_t *bp = &bc->bc_blkptr; 1055 1056 ASSERT3U(bc->bc_errno, ==, 0); 1057 1058 /* 1059 * Since this callback is not called concurrently, no lock is 1060 * needed on the accounting values. 1061 */ 1062 *ka->usedp += bp_get_dasize(spa, bp); 1063 *ka->compressedp += BP_GET_PSIZE(bp); 1064 *ka->uncompressedp += BP_GET_UCSIZE(bp); 1065 /* XXX check for EIO? */ 1066 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 1067 ARC_NOWAIT); 1068 return (0); 1069 } 1070 1071 /* ARGSUSED */ 1072 static int 1073 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1074 { 1075 dsl_dataset_t *ds = arg1; 1076 dmu_objset_type_t *ost = arg2; 1077 1078 /* 1079 * We can only roll back to emptyness if it is a ZPL objset. 1080 */ 1081 if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1082 return (EINVAL); 1083 1084 /* 1085 * This must not be a snapshot. 1086 */ 1087 if (ds->ds_phys->ds_next_snap_obj != 0) 1088 return (EINVAL); 1089 1090 /* 1091 * If we made changes this txg, traverse_dsl_dataset won't find 1092 * them. Try again. 1093 */ 1094 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1095 return (EAGAIN); 1096 1097 return (0); 1098 } 1099 1100 /* ARGSUSED */ 1101 static void 1102 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1103 { 1104 dsl_dataset_t *ds = arg1; 1105 dmu_objset_type_t *ost = arg2; 1106 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1107 1108 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1109 1110 /* 1111 * Before the roll back destroy the zil. 1112 */ 1113 if (ds->ds_user_ptr != NULL) { 1114 zil_rollback_destroy( 1115 ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 1116 1117 /* 1118 * We need to make sure that the objset_impl_t is reopened after 1119 * we do the rollback, otherwise it will have the wrong 1120 * objset_phys_t. Normally this would happen when this 1121 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the 1122 * dataset to be immediately evicted. But when doing "zfs recv 1123 * -F", we reopen the objset before that, so that there is no 1124 * window where the dataset is closed and inconsistent. 1125 */ 1126 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1127 ds->ds_user_ptr = NULL; 1128 } 1129 1130 /* Zero out the deadlist. */ 1131 bplist_close(&ds->ds_deadlist); 1132 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1133 ds->ds_phys->ds_deadlist_obj = 1134 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1135 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1136 ds->ds_phys->ds_deadlist_obj)); 1137 1138 { 1139 /* Free blkptrs that we gave birth to */ 1140 zio_t *zio; 1141 int64_t used = 0, compressed = 0, uncompressed = 0; 1142 struct killarg ka; 1143 int64_t delta; 1144 1145 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1146 ZIO_FLAG_MUSTSUCCEED); 1147 ka.usedp = &used; 1148 ka.compressedp = &compressed; 1149 ka.uncompressedp = &uncompressed; 1150 ka.zio = zio; 1151 ka.tx = tx; 1152 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1153 ADVANCE_POST, kill_blkptr, &ka); 1154 (void) zio_wait(zio); 1155 1156 /* only deduct space beyond any refreservation */ 1157 delta = parent_delta(ds, -used); 1158 dsl_dir_diduse_space(ds->ds_dir, 1159 delta, -compressed, -uncompressed, tx); 1160 } 1161 1162 if (ds->ds_prev) { 1163 /* Change our contents to that of the prev snapshot */ 1164 ASSERT3U(ds->ds_prev->ds_object, ==, 1165 ds->ds_phys->ds_prev_snap_obj); 1166 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1167 ds->ds_phys->ds_used_bytes = 1168 ds->ds_prev->ds_phys->ds_used_bytes; 1169 ds->ds_phys->ds_compressed_bytes = 1170 ds->ds_prev->ds_phys->ds_compressed_bytes; 1171 ds->ds_phys->ds_uncompressed_bytes = 1172 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1173 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1174 ds->ds_phys->ds_unique_bytes = 0; 1175 1176 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1177 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1178 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1179 } 1180 } else { 1181 /* Zero out our contents, recreate objset */ 1182 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1183 ds->ds_phys->ds_used_bytes = 0; 1184 ds->ds_phys->ds_compressed_bytes = 0; 1185 ds->ds_phys->ds_uncompressed_bytes = 0; 1186 ds->ds_phys->ds_flags = 0; 1187 ds->ds_phys->ds_unique_bytes = 0; 1188 (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1189 &ds->ds_phys->ds_bp, *ost, tx); 1190 } 1191 1192 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1193 tx, cr, "dataset = %llu", ds->ds_object); 1194 } 1195 1196 /* ARGSUSED */ 1197 static int 1198 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1199 { 1200 dsl_dataset_t *ds = arg1; 1201 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1202 uint64_t count; 1203 int err; 1204 1205 /* 1206 * Can't delete a head dataset if there are snapshots of it. 1207 * (Except if the only snapshots are from the branch we cloned 1208 * from.) 1209 */ 1210 if (ds->ds_prev != NULL && 1211 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1212 return (EINVAL); 1213 1214 /* 1215 * This is really a dsl_dir thing, but check it here so that 1216 * we'll be less likely to leave this dataset inconsistent & 1217 * nearly destroyed. 1218 */ 1219 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1220 if (err) 1221 return (err); 1222 if (count != 0) 1223 return (EEXIST); 1224 1225 return (0); 1226 } 1227 1228 /* ARGSUSED */ 1229 static void 1230 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1231 { 1232 dsl_dataset_t *ds = arg1; 1233 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1234 1235 /* Mark it as inconsistent on-disk, in case we crash */ 1236 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1237 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1238 1239 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1240 cr, "dataset = %llu", ds->ds_object); 1241 } 1242 1243 /* ARGSUSED */ 1244 int 1245 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1246 { 1247 dsl_dataset_t *ds = arg1; 1248 1249 /* Can't delete a branch point. */ 1250 if (ds->ds_phys->ds_num_children > 1) 1251 return (EEXIST); 1252 1253 /* 1254 * Can't delete a head dataset if there are snapshots of it. 1255 * (Except if the only snapshots are from the branch we cloned 1256 * from.) 1257 */ 1258 if (ds->ds_prev != NULL && 1259 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1260 return (EINVAL); 1261 1262 /* 1263 * If we made changes this txg, traverse_dsl_dataset won't find 1264 * them. Try again. 1265 */ 1266 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1267 return (EAGAIN); 1268 1269 /* XXX we should do some i/o error checking... */ 1270 return (0); 1271 } 1272 1273 void 1274 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1275 { 1276 dsl_dataset_t *ds = arg1; 1277 int64_t used = 0, compressed = 0, uncompressed = 0; 1278 zio_t *zio; 1279 int err; 1280 int after_branch_point = FALSE; 1281 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1282 objset_t *mos = dp->dp_meta_objset; 1283 dsl_dataset_t *ds_prev = NULL; 1284 uint64_t obj; 1285 1286 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1287 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1288 ASSERT(ds->ds_prev == NULL || 1289 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1290 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1291 1292 /* Remove our reservation */ 1293 if (ds->ds_reserved != 0) { 1294 uint64_t val = 0; 1295 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1296 ASSERT3U(ds->ds_reserved, ==, 0); 1297 } 1298 1299 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1300 1301 obj = ds->ds_object; 1302 1303 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1304 if (ds->ds_prev) { 1305 ds_prev = ds->ds_prev; 1306 } else { 1307 VERIFY(0 == dsl_dataset_open_obj(dp, 1308 ds->ds_phys->ds_prev_snap_obj, NULL, 1309 DS_MODE_NONE, FTAG, &ds_prev)); 1310 } 1311 after_branch_point = 1312 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1313 1314 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1315 if (after_branch_point && 1316 ds->ds_phys->ds_next_snap_obj == 0) { 1317 /* This clone is toast. */ 1318 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1319 ds_prev->ds_phys->ds_num_children--; 1320 } else if (!after_branch_point) { 1321 ds_prev->ds_phys->ds_next_snap_obj = 1322 ds->ds_phys->ds_next_snap_obj; 1323 } 1324 } 1325 1326 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1327 1328 if (ds->ds_phys->ds_next_snap_obj != 0) { 1329 blkptr_t bp; 1330 dsl_dataset_t *ds_next; 1331 uint64_t itor = 0; 1332 uint64_t old_unique; 1333 1334 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1335 1336 VERIFY(0 == dsl_dataset_open_obj(dp, 1337 ds->ds_phys->ds_next_snap_obj, NULL, 1338 DS_MODE_NONE, FTAG, &ds_next)); 1339 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1340 1341 old_unique = dsl_dataset_unique(ds_next); 1342 1343 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1344 ds_next->ds_phys->ds_prev_snap_obj = 1345 ds->ds_phys->ds_prev_snap_obj; 1346 ds_next->ds_phys->ds_prev_snap_txg = 1347 ds->ds_phys->ds_prev_snap_txg; 1348 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1349 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1350 1351 /* 1352 * Transfer to our deadlist (which will become next's 1353 * new deadlist) any entries from next's current 1354 * deadlist which were born before prev, and free the 1355 * other entries. 1356 * 1357 * XXX we're doing this long task with the config lock held 1358 */ 1359 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1360 &bp) == 0) { 1361 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1362 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1363 &bp, tx)); 1364 if (ds_prev && !after_branch_point && 1365 bp.blk_birth > 1366 ds_prev->ds_phys->ds_prev_snap_txg) { 1367 ds_prev->ds_phys->ds_unique_bytes += 1368 bp_get_dasize(dp->dp_spa, &bp); 1369 } 1370 } else { 1371 used += bp_get_dasize(dp->dp_spa, &bp); 1372 compressed += BP_GET_PSIZE(&bp); 1373 uncompressed += BP_GET_UCSIZE(&bp); 1374 /* XXX check return value? */ 1375 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1376 &bp, NULL, NULL, ARC_NOWAIT); 1377 } 1378 } 1379 1380 /* free next's deadlist */ 1381 bplist_close(&ds_next->ds_deadlist); 1382 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1383 1384 /* set next's deadlist to our deadlist */ 1385 ds_next->ds_phys->ds_deadlist_obj = 1386 ds->ds_phys->ds_deadlist_obj; 1387 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1388 ds_next->ds_phys->ds_deadlist_obj)); 1389 ds->ds_phys->ds_deadlist_obj = 0; 1390 1391 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1392 /* 1393 * Update next's unique to include blocks which 1394 * were previously shared by only this snapshot 1395 * and it. Those blocks will be born after the 1396 * prev snap and before this snap, and will have 1397 * died after the next snap and before the one 1398 * after that (ie. be on the snap after next's 1399 * deadlist). 1400 * 1401 * XXX we're doing this long task with the 1402 * config lock held 1403 */ 1404 dsl_dataset_t *ds_after_next; 1405 1406 VERIFY(0 == dsl_dataset_open_obj(dp, 1407 ds_next->ds_phys->ds_next_snap_obj, NULL, 1408 DS_MODE_NONE, FTAG, &ds_after_next)); 1409 itor = 0; 1410 while (bplist_iterate(&ds_after_next->ds_deadlist, 1411 &itor, &bp) == 0) { 1412 if (bp.blk_birth > 1413 ds->ds_phys->ds_prev_snap_txg && 1414 bp.blk_birth <= 1415 ds->ds_phys->ds_creation_txg) { 1416 ds_next->ds_phys->ds_unique_bytes += 1417 bp_get_dasize(dp->dp_spa, &bp); 1418 } 1419 } 1420 1421 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1422 ASSERT3P(ds_next->ds_prev, ==, NULL); 1423 } else { 1424 ASSERT3P(ds_next->ds_prev, ==, ds); 1425 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1426 ds_next); 1427 if (ds_prev) { 1428 VERIFY(0 == dsl_dataset_open_obj(dp, 1429 ds->ds_phys->ds_prev_snap_obj, NULL, 1430 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1431 } else { 1432 ds_next->ds_prev = NULL; 1433 } 1434 1435 dsl_dataset_recalc_head_uniq(ds_next); 1436 1437 /* 1438 * Reduce the amount of our unconsmed refreservation 1439 * being charged to our parent by the amount of 1440 * new unique data we have gained. 1441 */ 1442 if (old_unique < ds_next->ds_reserved) { 1443 int64_t mrsdelta; 1444 uint64_t new_unique = 1445 ds_next->ds_phys->ds_unique_bytes; 1446 1447 ASSERT(old_unique <= new_unique); 1448 mrsdelta = MIN(new_unique - old_unique, 1449 ds_next->ds_reserved - old_unique); 1450 dsl_dir_diduse_space(ds->ds_dir, -mrsdelta, 1451 0, 0, tx); 1452 } 1453 } 1454 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1455 1456 /* 1457 * NB: unique_bytes might not be accurate for the head objset. 1458 * Before SPA_VERSION 9, we didn't update its value when we 1459 * deleted the most recent snapshot. 1460 */ 1461 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1462 } else { 1463 /* 1464 * There's no next snapshot, so this is a head dataset. 1465 * Destroy the deadlist. Unless it's a clone, the 1466 * deadlist should be empty. (If it's a clone, it's 1467 * safe to ignore the deadlist contents.) 1468 */ 1469 struct killarg ka; 1470 1471 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1472 bplist_close(&ds->ds_deadlist); 1473 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1474 ds->ds_phys->ds_deadlist_obj = 0; 1475 1476 /* 1477 * Free everything that we point to (that's born after 1478 * the previous snapshot, if we are a clone) 1479 * 1480 * XXX we're doing this long task with the config lock held 1481 */ 1482 ka.usedp = &used; 1483 ka.compressedp = &compressed; 1484 ka.uncompressedp = &uncompressed; 1485 ka.zio = zio; 1486 ka.tx = tx; 1487 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1488 ADVANCE_POST, kill_blkptr, &ka); 1489 ASSERT3U(err, ==, 0); 1490 ASSERT(spa_version(dp->dp_spa) < 1491 SPA_VERSION_UNIQUE_ACCURATE || 1492 used == ds->ds_phys->ds_unique_bytes); 1493 } 1494 1495 err = zio_wait(zio); 1496 ASSERT3U(err, ==, 0); 1497 1498 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1499 1500 if (ds->ds_phys->ds_snapnames_zapobj) { 1501 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1502 ASSERT(err == 0); 1503 } 1504 1505 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1506 /* Erase the link in the dataset */ 1507 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1508 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1509 /* 1510 * dsl_dir_sync_destroy() called us, they'll destroy 1511 * the dataset. 1512 */ 1513 } else { 1514 /* remove from snapshot namespace */ 1515 dsl_dataset_t *ds_head; 1516 VERIFY(0 == dsl_dataset_open_obj(dp, 1517 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1518 DS_MODE_NONE, FTAG, &ds_head)); 1519 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1520 #ifdef ZFS_DEBUG 1521 { 1522 uint64_t val; 1523 err = zap_lookup(mos, 1524 ds_head->ds_phys->ds_snapnames_zapobj, 1525 ds->ds_snapname, 8, 1, &val); 1526 ASSERT3U(err, ==, 0); 1527 ASSERT3U(val, ==, obj); 1528 } 1529 #endif 1530 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1531 ds->ds_snapname, tx); 1532 ASSERT(err == 0); 1533 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1534 } 1535 1536 if (ds_prev && ds->ds_prev != ds_prev) 1537 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1538 1539 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1540 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1541 cr, "dataset = %llu", ds->ds_object); 1542 1543 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1544 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1545 1546 } 1547 1548 static int 1549 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1550 { 1551 uint64_t asize; 1552 1553 if (!dmu_tx_is_syncing(tx)) 1554 return (0); 1555 1556 /* 1557 * If there's an fs-only reservation, any blocks that might become 1558 * owned by the snapshot dataset must be accommodated by space 1559 * outside of the reservation. 1560 */ 1561 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1562 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1563 return (ENOSPC); 1564 1565 /* 1566 * Propogate any reserved space for this snapshot to other 1567 * snapshot checks in this sync group. 1568 */ 1569 if (asize > 0) 1570 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1571 1572 return (0); 1573 } 1574 1575 /* ARGSUSED */ 1576 int 1577 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1578 { 1579 dsl_dataset_t *ds = arg1; 1580 const char *snapname = arg2; 1581 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1582 int err; 1583 uint64_t value; 1584 1585 /* 1586 * We don't allow multiple snapshots of the same txg. If there 1587 * is already one, try again. 1588 */ 1589 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1590 return (EAGAIN); 1591 1592 /* 1593 * Check for conflicting name snapshot name. 1594 */ 1595 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1596 snapname, 8, 1, &value); 1597 if (err == 0) 1598 return (EEXIST); 1599 if (err != ENOENT) 1600 return (err); 1601 1602 /* 1603 * Check that the dataset's name is not too long. Name consists 1604 * of the dataset's length + 1 for the @-sign + snapshot name's length 1605 */ 1606 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1607 return (ENAMETOOLONG); 1608 1609 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1610 if (err) 1611 return (err); 1612 1613 ds->ds_trysnap_txg = tx->tx_txg; 1614 return (0); 1615 } 1616 1617 void 1618 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1619 { 1620 dsl_dataset_t *ds = arg1; 1621 const char *snapname = arg2; 1622 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1623 dmu_buf_t *dbuf; 1624 dsl_dataset_phys_t *dsphys; 1625 uint64_t dsobj; 1626 objset_t *mos = dp->dp_meta_objset; 1627 int err; 1628 1629 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1630 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1631 1632 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1633 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1634 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1635 dmu_buf_will_dirty(dbuf, tx); 1636 dsphys = dbuf->db_data; 1637 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1638 dsphys->ds_fsid_guid = unique_create(); 1639 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1640 sizeof (dsphys->ds_guid)); 1641 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1642 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1643 dsphys->ds_next_snap_obj = ds->ds_object; 1644 dsphys->ds_num_children = 1; 1645 dsphys->ds_creation_time = gethrestime_sec(); 1646 dsphys->ds_creation_txg = tx->tx_txg; 1647 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1648 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1649 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1650 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1651 dsphys->ds_flags = ds->ds_phys->ds_flags; 1652 dsphys->ds_bp = ds->ds_phys->ds_bp; 1653 dmu_buf_rele(dbuf, FTAG); 1654 1655 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1656 if (ds->ds_prev) { 1657 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1658 ds->ds_object || 1659 ds->ds_prev->ds_phys->ds_num_children > 1); 1660 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1661 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1662 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1663 ds->ds_prev->ds_phys->ds_creation_txg); 1664 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1665 } 1666 } 1667 1668 /* 1669 * If we have a reference-reservation on this dataset, we will 1670 * need to increase the amount of refreservation being charged 1671 * since our unique space is going to zero. 1672 */ 1673 if (ds->ds_reserved) { 1674 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1675 dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx); 1676 } 1677 1678 bplist_close(&ds->ds_deadlist); 1679 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1680 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1681 ds->ds_phys->ds_prev_snap_obj = dsobj; 1682 ds->ds_phys->ds_prev_snap_txg = tx->tx_txg; 1683 ds->ds_phys->ds_unique_bytes = 0; 1684 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1685 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1686 ds->ds_phys->ds_deadlist_obj = 1687 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1688 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1689 ds->ds_phys->ds_deadlist_obj)); 1690 1691 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1692 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1693 snapname, 8, 1, &dsobj, tx); 1694 ASSERT(err == 0); 1695 1696 if (ds->ds_prev) 1697 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1698 VERIFY(0 == dsl_dataset_open_obj(dp, 1699 ds->ds_phys->ds_prev_snap_obj, snapname, 1700 DS_MODE_NONE, ds, &ds->ds_prev)); 1701 1702 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1703 "dataset = %llu", dsobj); 1704 } 1705 1706 void 1707 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1708 { 1709 ASSERT(dmu_tx_is_syncing(tx)); 1710 ASSERT(ds->ds_user_ptr != NULL); 1711 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1712 1713 /* 1714 * in case we had to change ds_fsid_guid when we opened it, 1715 * sync it out now. 1716 */ 1717 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1718 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1719 1720 dsl_dir_dirty(ds->ds_dir, tx); 1721 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1722 } 1723 1724 void 1725 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1726 { 1727 uint64_t refd, avail, uobjs, aobjs; 1728 1729 dsl_dir_stats(ds->ds_dir, nv); 1730 1731 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1732 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1733 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1734 1735 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1736 ds->ds_phys->ds_creation_time); 1737 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1738 ds->ds_phys->ds_creation_txg); 1739 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1740 ds->ds_quota); 1741 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1742 ds->ds_reserved); 1743 1744 if (ds->ds_phys->ds_next_snap_obj) { 1745 /* 1746 * This is a snapshot; override the dd's space used with 1747 * our unique space and compression ratio. 1748 */ 1749 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1750 ds->ds_phys->ds_unique_bytes); 1751 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1752 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1753 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1754 ds->ds_phys->ds_compressed_bytes)); 1755 } 1756 } 1757 1758 void 1759 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1760 { 1761 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1762 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1763 stat->dds_guid = ds->ds_phys->ds_guid; 1764 if (ds->ds_phys->ds_next_snap_obj) { 1765 stat->dds_is_snapshot = B_TRUE; 1766 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1767 } 1768 1769 /* clone origin is really a dsl_dir thing... */ 1770 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1771 if (ds->ds_dir->dd_phys->dd_origin_obj) { 1772 dsl_dataset_t *ods; 1773 1774 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1775 ds->ds_dir->dd_phys->dd_origin_obj, 1776 NULL, DS_MODE_NONE, FTAG, &ods)); 1777 dsl_dataset_name(ods, stat->dds_origin); 1778 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1779 } 1780 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1781 } 1782 1783 uint64_t 1784 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1785 { 1786 return (ds->ds_fsid_guid); 1787 } 1788 1789 void 1790 dsl_dataset_space(dsl_dataset_t *ds, 1791 uint64_t *refdbytesp, uint64_t *availbytesp, 1792 uint64_t *usedobjsp, uint64_t *availobjsp) 1793 { 1794 *refdbytesp = ds->ds_phys->ds_used_bytes; 1795 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1796 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1797 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1798 if (ds->ds_quota != 0) { 1799 /* 1800 * Adjust available bytes according to refquota 1801 */ 1802 if (*refdbytesp < ds->ds_quota) 1803 *availbytesp = MIN(*availbytesp, 1804 ds->ds_quota - *refdbytesp); 1805 else 1806 *availbytesp = 0; 1807 } 1808 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1809 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1810 } 1811 1812 boolean_t 1813 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1814 { 1815 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1816 1817 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1818 dsl_pool_sync_context(dp)); 1819 if (ds->ds_prev == NULL) 1820 return (B_FALSE); 1821 if (ds->ds_phys->ds_bp.blk_birth > 1822 ds->ds_prev->ds_phys->ds_creation_txg) 1823 return (B_TRUE); 1824 return (B_FALSE); 1825 } 1826 1827 /* ARGSUSED */ 1828 static int 1829 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1830 { 1831 dsl_dataset_t *ds = arg1; 1832 char *newsnapname = arg2; 1833 dsl_dir_t *dd = ds->ds_dir; 1834 objset_t *mos = dd->dd_pool->dp_meta_objset; 1835 dsl_dataset_t *hds; 1836 uint64_t val; 1837 int err; 1838 1839 err = dsl_dataset_open_obj(dd->dd_pool, 1840 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1841 if (err) 1842 return (err); 1843 1844 /* new name better not be in use */ 1845 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1846 newsnapname, 8, 1, &val); 1847 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1848 1849 if (err == 0) 1850 err = EEXIST; 1851 else if (err == ENOENT) 1852 err = 0; 1853 1854 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1855 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1856 err = ENAMETOOLONG; 1857 1858 return (err); 1859 } 1860 1861 static void 1862 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1863 cred_t *cr, dmu_tx_t *tx) 1864 { 1865 dsl_dataset_t *ds = arg1; 1866 const char *newsnapname = arg2; 1867 dsl_dir_t *dd = ds->ds_dir; 1868 objset_t *mos = dd->dd_pool->dp_meta_objset; 1869 dsl_dataset_t *hds; 1870 int err; 1871 1872 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1873 1874 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1875 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1876 1877 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1878 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1879 ds->ds_snapname, tx); 1880 ASSERT3U(err, ==, 0); 1881 mutex_enter(&ds->ds_lock); 1882 (void) strcpy(ds->ds_snapname, newsnapname); 1883 mutex_exit(&ds->ds_lock); 1884 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1885 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1886 ASSERT3U(err, ==, 0); 1887 1888 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1889 cr, "dataset = %llu", ds->ds_object); 1890 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1891 } 1892 1893 struct renamesnaparg { 1894 dsl_sync_task_group_t *dstg; 1895 char failed[MAXPATHLEN]; 1896 char *oldsnap; 1897 char *newsnap; 1898 }; 1899 1900 static int 1901 dsl_snapshot_rename_one(char *name, void *arg) 1902 { 1903 struct renamesnaparg *ra = arg; 1904 dsl_dataset_t *ds = NULL; 1905 char *cp; 1906 int err; 1907 1908 cp = name + strlen(name); 1909 *cp = '@'; 1910 (void) strcpy(cp + 1, ra->oldsnap); 1911 1912 /* 1913 * For recursive snapshot renames the parent won't be changing 1914 * so we just pass name for both the to/from argument. 1915 */ 1916 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1917 (void) strcpy(ra->failed, name); 1918 return (err); 1919 } 1920 1921 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1922 ra->dstg, &ds); 1923 if (err == ENOENT) { 1924 *cp = '\0'; 1925 return (0); 1926 } 1927 if (err) { 1928 (void) strcpy(ra->failed, name); 1929 *cp = '\0'; 1930 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1931 return (err); 1932 } 1933 1934 #ifdef _KERNEL 1935 /* for all filesystems undergoing rename, we'll need to unmount it */ 1936 (void) zfs_unmount_snap(name, NULL); 1937 #endif 1938 1939 *cp = '\0'; 1940 1941 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1942 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1943 1944 return (0); 1945 } 1946 1947 static int 1948 dsl_recursive_rename(char *oldname, const char *newname) 1949 { 1950 int err; 1951 struct renamesnaparg *ra; 1952 dsl_sync_task_t *dst; 1953 spa_t *spa; 1954 char *cp, *fsname = spa_strdup(oldname); 1955 int len = strlen(oldname); 1956 1957 /* truncate the snapshot name to get the fsname */ 1958 cp = strchr(fsname, '@'); 1959 *cp = '\0'; 1960 1961 err = spa_open(fsname, &spa, FTAG); 1962 if (err) { 1963 kmem_free(fsname, len + 1); 1964 return (err); 1965 } 1966 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 1967 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1968 1969 ra->oldsnap = strchr(oldname, '@') + 1; 1970 ra->newsnap = strchr(newname, '@') + 1; 1971 *ra->failed = '\0'; 1972 1973 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1974 DS_FIND_CHILDREN); 1975 kmem_free(fsname, len + 1); 1976 1977 if (err == 0) { 1978 err = dsl_sync_task_group_wait(ra->dstg); 1979 } 1980 1981 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1982 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1983 dsl_dataset_t *ds = dst->dst_arg1; 1984 if (dst->dst_err) { 1985 dsl_dir_name(ds->ds_dir, ra->failed); 1986 (void) strcat(ra->failed, "@"); 1987 (void) strcat(ra->failed, ra->newsnap); 1988 } 1989 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1990 } 1991 1992 if (err) 1993 (void) strcpy(oldname, ra->failed); 1994 1995 dsl_sync_task_group_destroy(ra->dstg); 1996 kmem_free(ra, sizeof (struct renamesnaparg)); 1997 spa_close(spa, FTAG); 1998 return (err); 1999 } 2000 2001 static int 2002 dsl_valid_rename(char *oldname, void *arg) 2003 { 2004 int delta = *(int *)arg; 2005 2006 if (strlen(oldname) + delta >= MAXNAMELEN) 2007 return (ENAMETOOLONG); 2008 2009 return (0); 2010 } 2011 2012 #pragma weak dmu_objset_rename = dsl_dataset_rename 2013 int 2014 dsl_dataset_rename(char *oldname, const char *newname, 2015 boolean_t recursive) 2016 { 2017 dsl_dir_t *dd; 2018 dsl_dataset_t *ds; 2019 const char *tail; 2020 int err; 2021 2022 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2023 if (err) 2024 return (err); 2025 if (tail == NULL) { 2026 int delta = strlen(newname) - strlen(oldname); 2027 2028 /* if we're growing, validate child size lengths */ 2029 if (delta > 0) 2030 err = dmu_objset_find(oldname, dsl_valid_rename, 2031 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2032 2033 if (!err) 2034 err = dsl_dir_rename(dd, newname); 2035 dsl_dir_close(dd, FTAG); 2036 return (err); 2037 } 2038 if (tail[0] != '@') { 2039 /* the name ended in a nonexistant component */ 2040 dsl_dir_close(dd, FTAG); 2041 return (ENOENT); 2042 } 2043 2044 dsl_dir_close(dd, FTAG); 2045 2046 /* new name must be snapshot in same filesystem */ 2047 tail = strchr(newname, '@'); 2048 if (tail == NULL) 2049 return (EINVAL); 2050 tail++; 2051 if (strncmp(oldname, newname, tail - newname) != 0) 2052 return (EXDEV); 2053 2054 if (recursive) { 2055 err = dsl_recursive_rename(oldname, newname); 2056 } else { 2057 err = dsl_dataset_open(oldname, 2058 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 2059 if (err) 2060 return (err); 2061 2062 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2063 dsl_dataset_snapshot_rename_check, 2064 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2065 2066 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2067 } 2068 2069 return (err); 2070 } 2071 2072 struct promotearg { 2073 uint64_t used, comp, uncomp, unique; 2074 uint64_t newnext_obj, snapnames_obj; 2075 }; 2076 2077 /* ARGSUSED */ 2078 static int 2079 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2080 { 2081 dsl_dataset_t *hds = arg1; 2082 struct promotearg *pa = arg2; 2083 dsl_dir_t *dd = hds->ds_dir; 2084 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2085 dsl_dir_t *odd = NULL; 2086 dsl_dataset_t *ds = NULL; 2087 dsl_dataset_t *origin_ds = NULL; 2088 dsl_dataset_t *newnext_ds = NULL; 2089 int err; 2090 char *name = NULL; 2091 uint64_t itor = 0; 2092 blkptr_t bp; 2093 2094 bzero(pa, sizeof (*pa)); 2095 2096 /* Check that it is a clone */ 2097 if (dd->dd_phys->dd_origin_obj == 0) 2098 return (EINVAL); 2099 2100 /* Since this is so expensive, don't do the preliminary check */ 2101 if (!dmu_tx_is_syncing(tx)) 2102 return (0); 2103 2104 if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2105 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) 2106 goto out; 2107 odd = origin_ds->ds_dir; 2108 2109 { 2110 dsl_dataset_t *phds; 2111 if (err = dsl_dataset_open_obj(dd->dd_pool, 2112 odd->dd_phys->dd_head_dataset_obj, 2113 NULL, DS_MODE_NONE, FTAG, &phds)) 2114 goto out; 2115 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 2116 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 2117 } 2118 2119 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2120 err = EXDEV; 2121 goto out; 2122 } 2123 2124 /* find origin's new next ds */ 2125 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 2126 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 2127 while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { 2128 dsl_dataset_t *prev; 2129 2130 if (err = dsl_dataset_open_obj(dd->dd_pool, 2131 newnext_ds->ds_phys->ds_prev_snap_obj, 2132 NULL, DS_MODE_NONE, FTAG, &prev)) 2133 goto out; 2134 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2135 newnext_ds = prev; 2136 } 2137 pa->newnext_obj = newnext_ds->ds_object; 2138 2139 /* compute origin's new unique space */ 2140 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 2141 &itor, &bp)) == 0) { 2142 if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) 2143 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 2144 } 2145 if (err != ENOENT) 2146 goto out; 2147 2148 /* Walk the snapshots that we are moving */ 2149 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2150 ds = origin_ds; 2151 /* CONSTCOND */ 2152 while (TRUE) { 2153 uint64_t val, dlused, dlcomp, dluncomp; 2154 dsl_dataset_t *prev; 2155 2156 /* Check that the snapshot name does not conflict */ 2157 dsl_dataset_name(ds, name); 2158 err = zap_lookup(dd->dd_pool->dp_meta_objset, 2159 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2160 8, 1, &val); 2161 if (err != ENOENT) { 2162 if (err == 0) 2163 err = EEXIST; 2164 goto out; 2165 } 2166 2167 /* 2168 * compute space to transfer. Each snapshot gave birth to: 2169 * (my used) - (prev's used) + (deadlist's used) 2170 */ 2171 pa->used += ds->ds_phys->ds_used_bytes; 2172 pa->comp += ds->ds_phys->ds_compressed_bytes; 2173 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 2174 2175 /* If we reach the first snapshot, we're done. */ 2176 if (ds->ds_phys->ds_prev_snap_obj == 0) 2177 break; 2178 2179 if (err = bplist_space(&ds->ds_deadlist, 2180 &dlused, &dlcomp, &dluncomp)) 2181 goto out; 2182 if (err = dsl_dataset_open_obj(dd->dd_pool, 2183 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2184 FTAG, &prev)) 2185 goto out; 2186 pa->used += dlused - prev->ds_phys->ds_used_bytes; 2187 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 2188 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 2189 2190 /* 2191 * We could be a clone of a clone. If we reach our 2192 * parent's branch point, we're done. 2193 */ 2194 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2195 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2196 break; 2197 } 2198 if (ds != origin_ds) 2199 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2200 ds = prev; 2201 } 2202 2203 /* Check that there is enough space here */ 2204 err = dsl_dir_transfer_possible(odd, dd, pa->used); 2205 2206 out: 2207 if (ds && ds != origin_ds) 2208 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2209 if (origin_ds) 2210 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2211 if (newnext_ds) 2212 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2213 if (name) 2214 kmem_free(name, MAXPATHLEN); 2215 return (err); 2216 } 2217 2218 static void 2219 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2220 { 2221 dsl_dataset_t *hds = arg1; 2222 struct promotearg *pa = arg2; 2223 dsl_dir_t *dd = hds->ds_dir; 2224 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2225 dsl_dir_t *odd = NULL; 2226 dsl_dataset_t *ds, *origin_ds; 2227 char *name; 2228 2229 ASSERT(dd->dd_phys->dd_origin_obj != 0); 2230 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2231 2232 VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2233 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); 2234 /* 2235 * We need to explicitly open odd, since origin_ds's dd will be 2236 * changing. 2237 */ 2238 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2239 NULL, FTAG, &odd)); 2240 2241 /* move snapshots to this dir */ 2242 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2243 ds = origin_ds; 2244 /* CONSTCOND */ 2245 while (TRUE) { 2246 dsl_dataset_t *prev; 2247 2248 /* move snap name entry */ 2249 dsl_dataset_name(ds, name); 2250 VERIFY(0 == zap_remove(dp->dp_meta_objset, 2251 pa->snapnames_obj, ds->ds_snapname, tx)); 2252 VERIFY(0 == zap_add(dp->dp_meta_objset, 2253 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2254 8, 1, &ds->ds_object, tx)); 2255 2256 /* change containing dsl_dir */ 2257 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2258 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2259 ds->ds_phys->ds_dir_obj = dd->dd_object; 2260 ASSERT3P(ds->ds_dir, ==, odd); 2261 dsl_dir_close(ds->ds_dir, ds); 2262 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2263 NULL, ds, &ds->ds_dir)); 2264 2265 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2266 2267 if (ds->ds_phys->ds_prev_snap_obj == 0) 2268 break; 2269 2270 VERIFY(0 == dsl_dataset_open_obj(dp, 2271 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2272 FTAG, &prev)); 2273 2274 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2275 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2276 break; 2277 } 2278 if (ds != origin_ds) 2279 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2280 ds = prev; 2281 } 2282 if (ds != origin_ds) 2283 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2284 2285 /* change origin's next snap */ 2286 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2287 origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2288 2289 /* change origin */ 2290 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2291 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2292 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2293 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2294 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2295 2296 /* change space accounting */ 2297 dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); 2298 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2299 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2300 2301 /* log history record */ 2302 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2303 cr, "dataset = %llu", ds->ds_object); 2304 2305 dsl_dir_close(odd, FTAG); 2306 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2307 kmem_free(name, MAXPATHLEN); 2308 } 2309 2310 int 2311 dsl_dataset_promote(const char *name) 2312 { 2313 dsl_dataset_t *ds; 2314 int err; 2315 dmu_object_info_t doi; 2316 struct promotearg pa; 2317 2318 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2319 if (err) 2320 return (err); 2321 2322 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2323 ds->ds_phys->ds_snapnames_zapobj, &doi); 2324 if (err) { 2325 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2326 return (err); 2327 } 2328 2329 /* 2330 * Add in 128x the snapnames zapobj size, since we will be moving 2331 * a bunch of snapnames to the promoted ds, and dirtying their 2332 * bonus buffers. 2333 */ 2334 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2335 dsl_dataset_promote_check, 2336 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2337 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2338 return (err); 2339 } 2340 2341 struct cloneswaparg { 2342 dsl_dataset_t *cds; /* clone dataset */ 2343 dsl_dataset_t *ohds; /* origin's head dataset */ 2344 boolean_t force; 2345 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2346 }; 2347 2348 /* ARGSUSED */ 2349 static int 2350 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2351 { 2352 struct cloneswaparg *csa = arg1; 2353 2354 /* they should both be heads */ 2355 if (dsl_dataset_is_snapshot(csa->cds) || 2356 dsl_dataset_is_snapshot(csa->ohds)) 2357 return (EINVAL); 2358 2359 /* the branch point should be just before them */ 2360 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2361 return (EINVAL); 2362 2363 /* cds should be the clone */ 2364 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2365 csa->ohds->ds_object) 2366 return (EINVAL); 2367 2368 /* the clone should be a child of the origin */ 2369 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2370 return (EINVAL); 2371 2372 /* ohds shouldn't be modified unless 'force' */ 2373 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2374 return (ETXTBSY); 2375 2376 /* adjust amount of any unconsumed refreservation */ 2377 csa->unused_refres_delta = 2378 (int64_t)MIN(csa->ohds->ds_reserved, 2379 csa->ohds->ds_phys->ds_unique_bytes) - 2380 (int64_t)MIN(csa->ohds->ds_reserved, 2381 csa->cds->ds_phys->ds_unique_bytes); 2382 2383 if (csa->unused_refres_delta > 0 && 2384 csa->unused_refres_delta > 2385 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2386 return (ENOSPC); 2387 2388 return (0); 2389 } 2390 2391 /* ARGSUSED */ 2392 static void 2393 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2394 { 2395 struct cloneswaparg *csa = arg1; 2396 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2397 uint64_t itor = 0; 2398 blkptr_t bp; 2399 uint64_t unique = 0; 2400 int err; 2401 2402 ASSERT(csa->cds->ds_reserved == 0); 2403 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2404 2405 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2406 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2407 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2408 2409 if (csa->cds->ds_user_ptr != NULL) { 2410 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2411 csa->cds->ds_user_ptr = NULL; 2412 } 2413 2414 if (csa->ohds->ds_user_ptr != NULL) { 2415 csa->ohds->ds_user_evict_func(csa->ohds, 2416 csa->ohds->ds_user_ptr); 2417 csa->ohds->ds_user_ptr = NULL; 2418 } 2419 2420 /* compute unique space */ 2421 while ((err = bplist_iterate(&csa->cds->ds_deadlist, 2422 &itor, &bp)) == 0) { 2423 if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) 2424 unique += bp_get_dasize(dp->dp_spa, &bp); 2425 } 2426 VERIFY(err == ENOENT); 2427 2428 /* reset origin's unique bytes */ 2429 csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; 2430 2431 /* swap blkptrs */ 2432 { 2433 blkptr_t tmp; 2434 tmp = csa->ohds->ds_phys->ds_bp; 2435 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2436 csa->cds->ds_phys->ds_bp = tmp; 2437 } 2438 2439 /* set dd_*_bytes */ 2440 { 2441 int64_t dused, dcomp, duncomp; 2442 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2443 uint64_t odl_used, odl_comp, odl_uncomp; 2444 2445 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2446 &cdl_comp, &cdl_uncomp)); 2447 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2448 &odl_comp, &odl_uncomp)); 2449 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2450 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2451 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2452 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2453 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2454 cdl_uncomp - 2455 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2456 2457 dsl_dir_diduse_space(csa->ohds->ds_dir, 2458 dused, dcomp, duncomp, tx); 2459 dsl_dir_diduse_space(csa->cds->ds_dir, 2460 -dused, -dcomp, -duncomp, tx); 2461 } 2462 2463 #define SWITCH64(x, y) \ 2464 { \ 2465 uint64_t __tmp = (x); \ 2466 (x) = (y); \ 2467 (y) = __tmp; \ 2468 } 2469 2470 /* swap ds_*_bytes */ 2471 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2472 csa->cds->ds_phys->ds_used_bytes); 2473 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2474 csa->cds->ds_phys->ds_compressed_bytes); 2475 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2476 csa->cds->ds_phys->ds_uncompressed_bytes); 2477 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2478 csa->cds->ds_phys->ds_unique_bytes); 2479 2480 /* apply any parent delta for change in unconsumed refreservation */ 2481 dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta, 2482 0, 0, tx); 2483 2484 /* swap deadlists */ 2485 bplist_close(&csa->cds->ds_deadlist); 2486 bplist_close(&csa->ohds->ds_deadlist); 2487 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2488 csa->cds->ds_phys->ds_deadlist_obj); 2489 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2490 csa->cds->ds_phys->ds_deadlist_obj)); 2491 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2492 csa->ohds->ds_phys->ds_deadlist_obj)); 2493 } 2494 2495 /* 2496 * Swap 'clone' with its origin head file system. 2497 */ 2498 int 2499 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2500 boolean_t force) 2501 { 2502 struct cloneswaparg csa; 2503 2504 ASSERT(clone->ds_open_refcount == DS_REF_MAX); 2505 ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); 2506 2507 csa.cds = clone; 2508 csa.ohds = origin_head; 2509 csa.force = force; 2510 return (dsl_sync_task_do(clone->ds_dir->dd_pool, 2511 dsl_dataset_clone_swap_check, 2512 dsl_dataset_clone_swap_sync, &csa, NULL, 9)); 2513 } 2514 2515 /* 2516 * Given a pool name and a dataset object number in that pool, 2517 * return the name of that dataset. 2518 */ 2519 int 2520 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2521 { 2522 spa_t *spa; 2523 dsl_pool_t *dp; 2524 dsl_dataset_t *ds = NULL; 2525 int error; 2526 2527 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2528 return (error); 2529 dp = spa_get_dsl(spa); 2530 rw_enter(&dp->dp_config_rwlock, RW_READER); 2531 if ((error = dsl_dataset_open_obj(dp, obj, 2532 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2533 rw_exit(&dp->dp_config_rwlock); 2534 spa_close(spa, FTAG); 2535 return (error); 2536 } 2537 dsl_dataset_name(ds, buf); 2538 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2539 rw_exit(&dp->dp_config_rwlock); 2540 spa_close(spa, FTAG); 2541 2542 return (0); 2543 } 2544 2545 int 2546 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2547 uint64_t asize, uint64_t inflight, uint64_t *used) 2548 { 2549 int error = 0; 2550 2551 ASSERT3S(asize, >, 0); 2552 2553 mutex_enter(&ds->ds_lock); 2554 /* 2555 * Make a space adjustment for reserved bytes. 2556 */ 2557 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2558 ASSERT3U(*used, >=, 2559 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2560 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2561 } 2562 2563 if (!check_quota || ds->ds_quota == 0) { 2564 mutex_exit(&ds->ds_lock); 2565 return (0); 2566 } 2567 /* 2568 * If they are requesting more space, and our current estimate 2569 * is over quota, they get to try again unless the actual 2570 * on-disk is over quota and there are no pending changes (which 2571 * may free up space for us). 2572 */ 2573 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2574 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2575 error = ERESTART; 2576 else 2577 error = EDQUOT; 2578 } 2579 mutex_exit(&ds->ds_lock); 2580 2581 return (error); 2582 } 2583 2584 /* ARGSUSED */ 2585 static int 2586 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2587 { 2588 dsl_dataset_t *ds = arg1; 2589 uint64_t *quotap = arg2; 2590 uint64_t new_quota = *quotap; 2591 2592 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2593 return (ENOTSUP); 2594 2595 if (new_quota == 0) 2596 return (0); 2597 2598 if (new_quota < ds->ds_phys->ds_used_bytes || 2599 new_quota < ds->ds_reserved) 2600 return (ENOSPC); 2601 2602 return (0); 2603 } 2604 2605 /* ARGSUSED */ 2606 void 2607 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2608 { 2609 dsl_dataset_t *ds = arg1; 2610 uint64_t *quotap = arg2; 2611 uint64_t new_quota = *quotap; 2612 2613 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2614 2615 mutex_enter(&ds->ds_lock); 2616 ds->ds_quota = new_quota; 2617 mutex_exit(&ds->ds_lock); 2618 2619 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 2620 2621 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 2622 tx, cr, "%lld dataset = %llu ", 2623 (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj); 2624 } 2625 2626 int 2627 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 2628 { 2629 dsl_dataset_t *ds; 2630 int err; 2631 2632 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2633 if (err) 2634 return (err); 2635 2636 if (quota != ds->ds_quota) { 2637 /* 2638 * If someone removes a file, then tries to set the quota, we 2639 * want to make sure the file freeing takes effect. 2640 */ 2641 txg_wait_open(ds->ds_dir->dd_pool, 0); 2642 2643 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2644 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 2645 ds, "a, 0); 2646 } 2647 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2648 return (err); 2649 } 2650 2651 static int 2652 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 2653 { 2654 dsl_dataset_t *ds = arg1; 2655 uint64_t *reservationp = arg2; 2656 uint64_t new_reservation = *reservationp; 2657 int64_t delta; 2658 uint64_t unique; 2659 2660 if (new_reservation > INT64_MAX) 2661 return (EOVERFLOW); 2662 2663 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 2664 SPA_VERSION_REFRESERVATION) 2665 return (ENOTSUP); 2666 2667 if (dsl_dataset_is_snapshot(ds)) 2668 return (EINVAL); 2669 2670 /* 2671 * If we are doing the preliminary check in open context, the 2672 * space estimates may be inaccurate. 2673 */ 2674 if (!dmu_tx_is_syncing(tx)) 2675 return (0); 2676 2677 mutex_enter(&ds->ds_lock); 2678 unique = dsl_dataset_unique(ds); 2679 delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); 2680 mutex_exit(&ds->ds_lock); 2681 2682 if (delta > 0 && 2683 delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 2684 return (ENOSPC); 2685 if (delta > 0 && ds->ds_quota > 0 && 2686 new_reservation > ds->ds_quota) 2687 return (ENOSPC); 2688 2689 return (0); 2690 } 2691 2692 /* ARGSUSED */ 2693 static void 2694 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 2695 dmu_tx_t *tx) 2696 { 2697 dsl_dataset_t *ds = arg1; 2698 uint64_t *reservationp = arg2; 2699 uint64_t new_reservation = *reservationp; 2700 uint64_t unique; 2701 int64_t delta; 2702 2703 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2704 2705 mutex_enter(&ds->ds_lock); 2706 unique = dsl_dataset_unique(ds); 2707 delta = MAX(0, (int64_t)(new_reservation - unique)) - 2708 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2709 ds->ds_reserved = new_reservation; 2710 mutex_exit(&ds->ds_lock); 2711 2712 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 2713 new_reservation, cr, tx); 2714 2715 dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx); 2716 2717 spa_history_internal_log(LOG_DS_REFRESERV, 2718 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 2719 (longlong_t)new_reservation, 2720 ds->ds_dir->dd_phys->dd_head_dataset_obj); 2721 } 2722 2723 int 2724 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 2725 { 2726 dsl_dataset_t *ds; 2727 int err; 2728 2729 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2730 if (err) 2731 return (err); 2732 2733 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2734 dsl_dataset_set_reservation_check, 2735 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 2736 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2737 return (err); 2738 } 2739