1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/spa.h> 42 #include <sys/sunddi.h> 43 44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 46 static dsl_checkfunc_t dsl_dataset_rollback_check; 47 static dsl_syncfunc_t dsl_dataset_rollback_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 /* 55 * We use weighted reference counts to express the various forms of exclusion 56 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 57 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 58 * This makes the exclusion logic simple: the total refcnt for all opens cannot 59 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 60 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 61 * just over half of the refcnt space, so there can't be more than one, but it 62 * can peacefully coexist with any number of STANDARD opens. 63 */ 64 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 65 0, /* DS_MODE_NONE - invalid */ 66 1, /* DS_MODE_STANDARD - unlimited number */ 67 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 68 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 69 }; 70 71 /* 72 * Figure out how much of this delta should be propogated to the dsl_dir 73 * layer. If there's a refreservation, that space has already been 74 * partially accounted for in our ancestors. 75 */ 76 static int64_t 77 parent_delta(dsl_dataset_t *ds, int64_t delta) 78 { 79 uint64_t old_bytes, new_bytes; 80 81 if (ds->ds_reserved == 0) 82 return (delta); 83 84 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 85 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 86 87 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 88 return (new_bytes - old_bytes); 89 } 90 91 void 92 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 93 { 94 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 95 int compressed = BP_GET_PSIZE(bp); 96 int uncompressed = BP_GET_UCSIZE(bp); 97 int64_t delta; 98 99 dprintf_bp(bp, "born, ds=%p\n", ds); 100 101 ASSERT(dmu_tx_is_syncing(tx)); 102 /* It could have been compressed away to nothing */ 103 if (BP_IS_HOLE(bp)) 104 return; 105 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 106 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 107 if (ds == NULL) { 108 /* 109 * Account for the meta-objset space in its placeholder 110 * dsl_dir. 111 */ 112 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 113 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 114 used, compressed, uncompressed, tx); 115 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 116 return; 117 } 118 dmu_buf_will_dirty(ds->ds_dbuf, tx); 119 mutex_enter(&ds->ds_lock); 120 delta = parent_delta(ds, used); 121 ds->ds_phys->ds_used_bytes += used; 122 ds->ds_phys->ds_compressed_bytes += compressed; 123 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 124 ds->ds_phys->ds_unique_bytes += used; 125 mutex_exit(&ds->ds_lock); 126 dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx); 127 } 128 129 void 130 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 131 dmu_tx_t *tx) 132 { 133 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 134 int compressed = BP_GET_PSIZE(bp); 135 int uncompressed = BP_GET_UCSIZE(bp); 136 137 ASSERT(dmu_tx_is_syncing(tx)); 138 /* No block pointer => nothing to free */ 139 if (BP_IS_HOLE(bp)) 140 return; 141 142 ASSERT(used > 0); 143 if (ds == NULL) { 144 int err; 145 /* 146 * Account for the meta-objset space in its placeholder 147 * dataset. 148 */ 149 err = arc_free(pio, tx->tx_pool->dp_spa, 150 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 151 ASSERT(err == 0); 152 153 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 154 -used, -compressed, -uncompressed, tx); 155 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 156 return; 157 } 158 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 159 160 dmu_buf_will_dirty(ds->ds_dbuf, tx); 161 162 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 163 int err; 164 int64_t delta; 165 166 dprintf_bp(bp, "freeing: %s", ""); 167 err = arc_free(pio, tx->tx_pool->dp_spa, 168 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 169 ASSERT(err == 0); 170 171 mutex_enter(&ds->ds_lock); 172 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 173 !DS_UNIQUE_IS_ACCURATE(ds)); 174 delta = parent_delta(ds, -used); 175 ds->ds_phys->ds_unique_bytes -= used; 176 mutex_exit(&ds->ds_lock); 177 dsl_dir_diduse_space(ds->ds_dir, 178 delta, -compressed, -uncompressed, tx); 179 } else { 180 dprintf_bp(bp, "putting on dead list: %s", ""); 181 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_phys->ds_prev_snap_obj != 0) { 184 ASSERT3U(ds->ds_prev->ds_object, ==, 185 ds->ds_phys->ds_prev_snap_obj); 186 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 187 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 188 ds->ds_object && bp->blk_birth > 189 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 190 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 191 mutex_enter(&ds->ds_prev->ds_lock); 192 ds->ds_prev->ds_phys->ds_unique_bytes += 193 used; 194 mutex_exit(&ds->ds_prev->ds_lock); 195 } 196 } 197 } 198 mutex_enter(&ds->ds_lock); 199 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 200 ds->ds_phys->ds_used_bytes -= used; 201 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 202 ds->ds_phys->ds_compressed_bytes -= compressed; 203 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 204 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 205 mutex_exit(&ds->ds_lock); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 int 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 /* open_refcount == DS_REF_MAX when deleting */ 244 ASSERT(ds->ds_open_refcount == 0 || 245 ds->ds_open_refcount == DS_REF_MAX); 246 247 dprintf_ds(ds, "evicting %s\n", ""); 248 249 unique_remove(ds->ds_fsid_guid); 250 251 if (ds->ds_user_ptr != NULL) 252 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 253 254 if (ds->ds_prev) { 255 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 256 ds->ds_prev = NULL; 257 } 258 259 bplist_close(&ds->ds_deadlist); 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 mutex_destroy(&ds->ds_deadlist.bpl_lock); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 int 297 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 298 int mode, void *tag, dsl_dataset_t **dsp) 299 { 300 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 301 objset_t *mos = dp->dp_meta_objset; 302 dmu_buf_t *dbuf; 303 dsl_dataset_t *ds; 304 int err; 305 306 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 307 dsl_pool_sync_context(dp)); 308 309 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 310 if (err) 311 return (err); 312 ds = dmu_buf_get_user(dbuf); 313 if (ds == NULL) { 314 dsl_dataset_t *winner; 315 316 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 317 ds->ds_dbuf = dbuf; 318 ds->ds_object = dsobj; 319 ds->ds_phys = dbuf->db_data; 320 321 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 322 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 323 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 324 NULL); 325 326 err = bplist_open(&ds->ds_deadlist, 327 mos, ds->ds_phys->ds_deadlist_obj); 328 if (err == 0) { 329 err = dsl_dir_open_obj(dp, 330 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 331 } 332 if (err) { 333 /* 334 * we don't really need to close the blist if we 335 * just opened it. 336 */ 337 mutex_destroy(&ds->ds_lock); 338 mutex_destroy(&ds->ds_opening_lock); 339 mutex_destroy(&ds->ds_deadlist.bpl_lock); 340 kmem_free(ds, sizeof (dsl_dataset_t)); 341 dmu_buf_rele(dbuf, tag); 342 return (err); 343 } 344 345 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 346 ds->ds_snapname[0] = '\0'; 347 if (ds->ds_phys->ds_prev_snap_obj) { 348 err = dsl_dataset_open_obj(dp, 349 ds->ds_phys->ds_prev_snap_obj, NULL, 350 DS_MODE_NONE, ds, &ds->ds_prev); 351 } 352 } else { 353 if (snapname) { 354 #ifdef ZFS_DEBUG 355 dsl_dataset_phys_t *headphys; 356 dmu_buf_t *headdbuf; 357 err = dmu_bonus_hold(mos, 358 ds->ds_dir->dd_phys->dd_head_dataset_obj, 359 FTAG, &headdbuf); 360 if (err == 0) { 361 headphys = headdbuf->db_data; 362 uint64_t foundobj; 363 err = zap_lookup(dp->dp_meta_objset, 364 headphys->ds_snapnames_zapobj, 365 snapname, sizeof (foundobj), 1, 366 &foundobj); 367 ASSERT3U(foundobj, ==, dsobj); 368 dmu_buf_rele(headdbuf, FTAG); 369 } 370 #endif 371 (void) strcat(ds->ds_snapname, snapname); 372 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 373 err = dsl_dataset_get_snapname(ds); 374 } 375 } 376 377 if (!dsl_dataset_is_snapshot(ds)) { 378 boolean_t need_lock = 379 !RW_LOCK_HELD(&dp->dp_config_rwlock); 380 381 if (need_lock) 382 rw_enter(&dp->dp_config_rwlock, RW_READER); 383 384 err = dsl_prop_get_ds_locked(ds->ds_dir, 385 "refreservation", sizeof (uint64_t), 1, 386 &ds->ds_reserved, NULL); 387 if (err == 0) { 388 err = dsl_prop_get_ds_locked(ds->ds_dir, 389 "refquota", sizeof (uint64_t), 1, 390 &ds->ds_quota, NULL); 391 } 392 393 if (need_lock) 394 rw_exit(&dp->dp_config_rwlock); 395 } else { 396 ds->ds_reserved = ds->ds_quota = 0; 397 } 398 399 if (err == 0) { 400 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 401 dsl_dataset_evict); 402 } 403 if (err || winner) { 404 bplist_close(&ds->ds_deadlist); 405 if (ds->ds_prev) { 406 dsl_dataset_close(ds->ds_prev, 407 DS_MODE_NONE, ds); 408 } 409 dsl_dir_close(ds->ds_dir, ds); 410 mutex_destroy(&ds->ds_lock); 411 mutex_destroy(&ds->ds_opening_lock); 412 mutex_destroy(&ds->ds_deadlist.bpl_lock); 413 kmem_free(ds, sizeof (dsl_dataset_t)); 414 if (err) { 415 dmu_buf_rele(dbuf, tag); 416 return (err); 417 } 418 ds = winner; 419 } else { 420 ds->ds_fsid_guid = 421 unique_insert(ds->ds_phys->ds_fsid_guid); 422 } 423 } 424 ASSERT3P(ds->ds_dbuf, ==, dbuf); 425 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 426 427 mutex_enter(&ds->ds_lock); 428 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 429 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 430 !DS_MODE_IS_INCONSISTENT(mode)) || 431 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 432 mutex_exit(&ds->ds_lock); 433 dsl_dataset_close(ds, DS_MODE_NONE, tag); 434 return (EBUSY); 435 } 436 ds->ds_open_refcount += weight; 437 mutex_exit(&ds->ds_lock); 438 439 *dsp = ds; 440 return (0); 441 } 442 443 int 444 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 445 void *tag, dsl_dataset_t **dsp) 446 { 447 dsl_dir_t *dd; 448 dsl_pool_t *dp; 449 const char *tail; 450 uint64_t obj; 451 dsl_dataset_t *ds = NULL; 452 int err = 0; 453 454 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 455 if (err) 456 return (err); 457 458 dp = dd->dd_pool; 459 obj = dd->dd_phys->dd_head_dataset_obj; 460 rw_enter(&dp->dp_config_rwlock, RW_READER); 461 if (obj == 0) { 462 /* A dataset with no associated objset */ 463 err = ENOENT; 464 goto out; 465 } 466 467 if (tail != NULL) { 468 objset_t *mos = dp->dp_meta_objset; 469 470 err = dsl_dataset_open_obj(dp, obj, NULL, 471 DS_MODE_NONE, tag, &ds); 472 if (err) 473 goto out; 474 obj = ds->ds_phys->ds_snapnames_zapobj; 475 dsl_dataset_close(ds, DS_MODE_NONE, tag); 476 ds = NULL; 477 478 if (tail[0] != '@') { 479 err = ENOENT; 480 goto out; 481 } 482 tail++; 483 484 /* Look for a snapshot */ 485 if (!DS_MODE_IS_READONLY(mode)) { 486 err = EROFS; 487 goto out; 488 } 489 dprintf("looking for snapshot '%s'\n", tail); 490 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 491 if (err) 492 goto out; 493 } 494 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 495 496 out: 497 rw_exit(&dp->dp_config_rwlock); 498 dsl_dir_close(dd, FTAG); 499 500 ASSERT3U((err == 0), ==, (ds != NULL)); 501 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 502 503 *dsp = ds; 504 return (err); 505 } 506 507 int 508 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 509 { 510 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 511 } 512 513 void 514 dsl_dataset_name(dsl_dataset_t *ds, char *name) 515 { 516 if (ds == NULL) { 517 (void) strcpy(name, "mos"); 518 } else { 519 dsl_dir_name(ds->ds_dir, name); 520 VERIFY(0 == dsl_dataset_get_snapname(ds)); 521 if (ds->ds_snapname[0]) { 522 (void) strcat(name, "@"); 523 if (!MUTEX_HELD(&ds->ds_lock)) { 524 /* 525 * We use a "recursive" mutex so that we 526 * can call dprintf_ds() with ds_lock held. 527 */ 528 mutex_enter(&ds->ds_lock); 529 (void) strcat(name, ds->ds_snapname); 530 mutex_exit(&ds->ds_lock); 531 } else { 532 (void) strcat(name, ds->ds_snapname); 533 } 534 } 535 } 536 } 537 538 static int 539 dsl_dataset_namelen(dsl_dataset_t *ds) 540 { 541 int result; 542 543 if (ds == NULL) { 544 result = 3; /* "mos" */ 545 } else { 546 result = dsl_dir_namelen(ds->ds_dir); 547 VERIFY(0 == dsl_dataset_get_snapname(ds)); 548 if (ds->ds_snapname[0]) { 549 ++result; /* adding one for the @-sign */ 550 if (!MUTEX_HELD(&ds->ds_lock)) { 551 /* see dsl_datset_name */ 552 mutex_enter(&ds->ds_lock); 553 result += strlen(ds->ds_snapname); 554 mutex_exit(&ds->ds_lock); 555 } else { 556 result += strlen(ds->ds_snapname); 557 } 558 } 559 } 560 561 return (result); 562 } 563 564 void 565 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 566 { 567 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 568 mutex_enter(&ds->ds_lock); 569 ASSERT3U(ds->ds_open_refcount, >=, weight); 570 ds->ds_open_refcount -= weight; 571 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 572 mode, ds->ds_open_refcount); 573 mutex_exit(&ds->ds_lock); 574 575 dmu_buf_rele(ds->ds_dbuf, tag); 576 } 577 578 void 579 dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) 580 { 581 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 582 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 583 mutex_enter(&ds->ds_lock); 584 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 585 ASSERT3U(oldweight, >=, newweight); 586 ds->ds_open_refcount -= oldweight; 587 ds->ds_open_refcount += newweight; 588 mutex_exit(&ds->ds_lock); 589 } 590 591 boolean_t 592 dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) 593 { 594 boolean_t rv; 595 uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; 596 uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; 597 mutex_enter(&ds->ds_lock); 598 ASSERT3U(ds->ds_open_refcount, >=, oldweight); 599 ASSERT3U(newweight, >=, oldweight); 600 if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { 601 rv = B_FALSE; 602 } else { 603 ds->ds_open_refcount -= oldweight; 604 ds->ds_open_refcount += newweight; 605 rv = B_TRUE; 606 } 607 mutex_exit(&ds->ds_lock); 608 return (rv); 609 } 610 611 void 612 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 613 { 614 objset_t *mos = dp->dp_meta_objset; 615 dmu_buf_t *dbuf; 616 dsl_dataset_phys_t *dsphys; 617 dsl_dataset_t *ds; 618 uint64_t dsobj; 619 dsl_dir_t *dd; 620 621 dsl_dir_create_root(mos, ddobjp, tx); 622 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 623 624 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 625 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 626 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 627 dmu_buf_will_dirty(dbuf, tx); 628 dsphys = dbuf->db_data; 629 dsphys->ds_dir_obj = dd->dd_object; 630 dsphys->ds_fsid_guid = unique_create(); 631 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 632 sizeof (dsphys->ds_guid)); 633 dsphys->ds_snapnames_zapobj = 634 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 635 dsphys->ds_creation_time = gethrestime_sec(); 636 dsphys->ds_creation_txg = tx->tx_txg; 637 dsphys->ds_deadlist_obj = 638 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 639 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 640 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 641 dmu_buf_rele(dbuf, FTAG); 642 643 dmu_buf_will_dirty(dd->dd_dbuf, tx); 644 dd->dd_phys->dd_head_dataset_obj = dsobj; 645 dsl_dir_close(dd, FTAG); 646 647 VERIFY(0 == 648 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 649 (void) dmu_objset_create_impl(dp->dp_spa, ds, 650 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 651 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 652 } 653 654 uint64_t 655 dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx) 656 { 657 dsl_pool_t *dp = dd->dd_pool; 658 dmu_buf_t *dbuf; 659 dsl_dataset_phys_t *dsphys; 660 uint64_t dsobj; 661 objset_t *mos = dp->dp_meta_objset; 662 663 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 664 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 665 ASSERT(dmu_tx_is_syncing(tx)); 666 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 667 668 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 669 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 670 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 671 dmu_buf_will_dirty(dbuf, tx); 672 dsphys = dbuf->db_data; 673 dsphys->ds_dir_obj = dd->dd_object; 674 dsphys->ds_fsid_guid = unique_create(); 675 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 676 sizeof (dsphys->ds_guid)); 677 dsphys->ds_snapnames_zapobj = 678 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 679 dsphys->ds_creation_time = gethrestime_sec(); 680 dsphys->ds_creation_txg = tx->tx_txg; 681 dsphys->ds_deadlist_obj = 682 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 683 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 684 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 685 686 if (origin) { 687 dsphys->ds_prev_snap_obj = origin->ds_object; 688 dsphys->ds_prev_snap_txg = 689 origin->ds_phys->ds_creation_txg; 690 dsphys->ds_used_bytes = 691 origin->ds_phys->ds_used_bytes; 692 dsphys->ds_compressed_bytes = 693 origin->ds_phys->ds_compressed_bytes; 694 dsphys->ds_uncompressed_bytes = 695 origin->ds_phys->ds_uncompressed_bytes; 696 dsphys->ds_bp = origin->ds_phys->ds_bp; 697 698 dmu_buf_will_dirty(origin->ds_dbuf, tx); 699 origin->ds_phys->ds_num_children++; 700 701 dmu_buf_will_dirty(dd->dd_dbuf, tx); 702 dd->dd_phys->dd_origin_obj = origin->ds_object; 703 } 704 dmu_buf_rele(dbuf, FTAG); 705 706 dmu_buf_will_dirty(dd->dd_dbuf, tx); 707 dd->dd_phys->dd_head_dataset_obj = dsobj; 708 709 return (dsobj); 710 } 711 712 uint64_t 713 dsl_dataset_create_sync(dsl_dir_t *pdd, 714 const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx) 715 { 716 dsl_pool_t *dp = pdd->dd_pool; 717 uint64_t dsobj, ddobj; 718 dsl_dir_t *dd; 719 720 ASSERT(lastname[0] != '@'); 721 722 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 723 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 724 725 dsobj = dsl_dataset_create_sync_impl(dd, origin, tx); 726 727 dsl_deleg_set_create_perms(dd, tx, cr); 728 729 dsl_dir_close(dd, FTAG); 730 731 return (dsobj); 732 } 733 734 struct destroyarg { 735 dsl_sync_task_group_t *dstg; 736 char *snapname; 737 char *failed; 738 }; 739 740 static int 741 dsl_snapshot_destroy_one(char *name, void *arg) 742 { 743 struct destroyarg *da = arg; 744 dsl_dataset_t *ds; 745 char *cp; 746 int err; 747 748 (void) strcat(name, "@"); 749 (void) strcat(name, da->snapname); 750 err = dsl_dataset_open(name, 751 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 752 da->dstg, &ds); 753 cp = strchr(name, '@'); 754 *cp = '\0'; 755 if (err == ENOENT) 756 return (0); 757 if (err) { 758 (void) strcpy(da->failed, name); 759 return (err); 760 } 761 762 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 763 dsl_dataset_destroy_sync, ds, da->dstg, 0); 764 return (0); 765 } 766 767 /* 768 * Destroy 'snapname' in all descendants of 'fsname'. 769 */ 770 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 771 int 772 dsl_snapshots_destroy(char *fsname, char *snapname) 773 { 774 int err; 775 struct destroyarg da; 776 dsl_sync_task_t *dst; 777 spa_t *spa; 778 779 err = spa_open(fsname, &spa, FTAG); 780 if (err) 781 return (err); 782 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 783 da.snapname = snapname; 784 da.failed = fsname; 785 786 err = dmu_objset_find(fsname, 787 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 788 789 if (err == 0) 790 err = dsl_sync_task_group_wait(da.dstg); 791 792 for (dst = list_head(&da.dstg->dstg_tasks); dst; 793 dst = list_next(&da.dstg->dstg_tasks, dst)) { 794 dsl_dataset_t *ds = dst->dst_arg1; 795 if (dst->dst_err) { 796 dsl_dataset_name(ds, fsname); 797 *strchr(fsname, '@') = '\0'; 798 } 799 /* 800 * If it was successful, destroy_sync would have 801 * closed the ds 802 */ 803 if (err) 804 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); 805 } 806 807 dsl_sync_task_group_destroy(da.dstg); 808 spa_close(spa, FTAG); 809 return (err); 810 } 811 812 /* 813 * ds must be opened EXCLUSIVE or PRIMARY. on return (whether 814 * successful or not), ds will be closed and caller can no longer 815 * dereference it. 816 */ 817 int 818 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 819 { 820 int err; 821 dsl_sync_task_group_t *dstg; 822 objset_t *os; 823 dsl_dir_t *dd; 824 uint64_t obj; 825 826 if (ds->ds_open_refcount != DS_REF_MAX) { 827 if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, 828 DS_MODE_EXCLUSIVE) == 0) { 829 dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); 830 return (EBUSY); 831 } 832 } 833 834 if (dsl_dataset_is_snapshot(ds)) { 835 /* Destroying a snapshot is simpler */ 836 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 837 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 838 ds, tag, 0); 839 goto out; 840 } 841 842 dd = ds->ds_dir; 843 844 /* 845 * Check for errors and mark this ds as inconsistent, in 846 * case we crash while freeing the objects. 847 */ 848 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 849 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 850 if (err) 851 goto out; 852 853 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 854 if (err) 855 goto out; 856 857 /* 858 * remove the objects in open context, so that we won't 859 * have too much to do in syncing context. 860 */ 861 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 862 ds->ds_phys->ds_prev_snap_txg)) { 863 dmu_tx_t *tx = dmu_tx_create(os); 864 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 865 dmu_tx_hold_bonus(tx, obj); 866 err = dmu_tx_assign(tx, TXG_WAIT); 867 if (err) { 868 /* 869 * Perhaps there is not enough disk 870 * space. Just deal with it from 871 * dsl_dataset_destroy_sync(). 872 */ 873 dmu_tx_abort(tx); 874 continue; 875 } 876 VERIFY(0 == dmu_object_free(os, obj, tx)); 877 dmu_tx_commit(tx); 878 } 879 /* Make sure it's not dirty before we finish destroying it. */ 880 txg_wait_synced(dd->dd_pool, 0); 881 882 dmu_objset_close(os); 883 if (err != ESRCH) 884 goto out; 885 886 if (ds->ds_user_ptr) { 887 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 888 ds->ds_user_ptr = NULL; 889 } 890 891 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 892 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 893 rw_exit(&dd->dd_pool->dp_config_rwlock); 894 895 if (err) 896 goto out; 897 898 /* 899 * Blow away the dsl_dir + head dataset. 900 */ 901 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 902 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 903 dsl_dataset_destroy_sync, ds, tag, 0); 904 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 905 dsl_dir_destroy_sync, dd, FTAG, 0); 906 err = dsl_sync_task_group_wait(dstg); 907 dsl_sync_task_group_destroy(dstg); 908 /* if it is successful, *destroy_sync will close the ds+dd */ 909 if (err) 910 dsl_dir_close(dd, FTAG); 911 out: 912 if (err) 913 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 914 return (err); 915 } 916 917 int 918 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 919 { 920 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 921 922 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 923 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 924 ds, &ost, 0)); 925 } 926 927 void * 928 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 929 void *p, dsl_dataset_evict_func_t func) 930 { 931 void *old; 932 933 mutex_enter(&ds->ds_lock); 934 old = ds->ds_user_ptr; 935 if (old == NULL) { 936 ds->ds_user_ptr = p; 937 ds->ds_user_evict_func = func; 938 } 939 mutex_exit(&ds->ds_lock); 940 return (old); 941 } 942 943 void * 944 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 945 { 946 return (ds->ds_user_ptr); 947 } 948 949 950 blkptr_t * 951 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 952 { 953 return (&ds->ds_phys->ds_bp); 954 } 955 956 void 957 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 958 { 959 ASSERT(dmu_tx_is_syncing(tx)); 960 /* If it's the meta-objset, set dp_meta_rootbp */ 961 if (ds == NULL) { 962 tx->tx_pool->dp_meta_rootbp = *bp; 963 } else { 964 dmu_buf_will_dirty(ds->ds_dbuf, tx); 965 ds->ds_phys->ds_bp = *bp; 966 } 967 } 968 969 spa_t * 970 dsl_dataset_get_spa(dsl_dataset_t *ds) 971 { 972 return (ds->ds_dir->dd_pool->dp_spa); 973 } 974 975 void 976 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 977 { 978 dsl_pool_t *dp; 979 980 if (ds == NULL) /* this is the meta-objset */ 981 return; 982 983 ASSERT(ds->ds_user_ptr != NULL); 984 985 if (ds->ds_phys->ds_next_snap_obj != 0) 986 panic("dirtying snapshot!"); 987 988 dp = ds->ds_dir->dd_pool; 989 990 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 991 /* up the hold count until we can be written out */ 992 dmu_buf_add_ref(ds->ds_dbuf, ds); 993 } 994 } 995 996 /* 997 * The unique space in the head dataset can be calculated by subtracting 998 * the space used in the most recent snapshot, that is still being used 999 * in this file system, from the space currently in use. To figure out 1000 * the space in the most recent snapshot still in use, we need to take 1001 * the total space used in the snapshot and subtract out the space that 1002 * has been freed up since the snapshot was taken. 1003 */ 1004 static void 1005 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1006 { 1007 uint64_t mrs_used; 1008 uint64_t dlused, dlcomp, dluncomp; 1009 1010 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1011 1012 if (ds->ds_phys->ds_prev_snap_obj != 0) 1013 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1014 else 1015 mrs_used = 0; 1016 1017 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1018 &dluncomp)); 1019 1020 ASSERT3U(dlused, <=, mrs_used); 1021 ds->ds_phys->ds_unique_bytes = 1022 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1023 1024 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1025 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1026 SPA_VERSION_UNIQUE_ACCURATE) 1027 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1028 } 1029 1030 static uint64_t 1031 dsl_dataset_unique(dsl_dataset_t *ds) 1032 { 1033 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1034 dsl_dataset_recalc_head_uniq(ds); 1035 1036 return (ds->ds_phys->ds_unique_bytes); 1037 } 1038 1039 struct killarg { 1040 int64_t *usedp; 1041 int64_t *compressedp; 1042 int64_t *uncompressedp; 1043 zio_t *zio; 1044 dmu_tx_t *tx; 1045 }; 1046 1047 static int 1048 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1049 { 1050 struct killarg *ka = arg; 1051 blkptr_t *bp = &bc->bc_blkptr; 1052 1053 ASSERT3U(bc->bc_errno, ==, 0); 1054 1055 /* 1056 * Since this callback is not called concurrently, no lock is 1057 * needed on the accounting values. 1058 */ 1059 *ka->usedp += bp_get_dasize(spa, bp); 1060 *ka->compressedp += BP_GET_PSIZE(bp); 1061 *ka->uncompressedp += BP_GET_UCSIZE(bp); 1062 /* XXX check for EIO? */ 1063 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 1064 ARC_NOWAIT); 1065 return (0); 1066 } 1067 1068 /* ARGSUSED */ 1069 static int 1070 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1071 { 1072 dsl_dataset_t *ds = arg1; 1073 dmu_objset_type_t *ost = arg2; 1074 1075 /* 1076 * We can only roll back to emptyness if it is a ZPL objset. 1077 */ 1078 if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1079 return (EINVAL); 1080 1081 /* 1082 * This must not be a snapshot. 1083 */ 1084 if (ds->ds_phys->ds_next_snap_obj != 0) 1085 return (EINVAL); 1086 1087 /* 1088 * If we made changes this txg, traverse_dsl_dataset won't find 1089 * them. Try again. 1090 */ 1091 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1092 return (EAGAIN); 1093 1094 return (0); 1095 } 1096 1097 /* ARGSUSED */ 1098 static void 1099 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1100 { 1101 dsl_dataset_t *ds = arg1; 1102 dmu_objset_type_t *ost = arg2; 1103 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1104 1105 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1106 1107 /* 1108 * Before the roll back destroy the zil. 1109 */ 1110 if (ds->ds_user_ptr != NULL) { 1111 zil_rollback_destroy( 1112 ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 1113 1114 /* 1115 * We need to make sure that the objset_impl_t is reopened after 1116 * we do the rollback, otherwise it will have the wrong 1117 * objset_phys_t. Normally this would happen when this 1118 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the 1119 * dataset to be immediately evicted. But when doing "zfs recv 1120 * -F", we reopen the objset before that, so that there is no 1121 * window where the dataset is closed and inconsistent. 1122 */ 1123 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1124 ds->ds_user_ptr = NULL; 1125 } 1126 1127 /* Zero out the deadlist. */ 1128 bplist_close(&ds->ds_deadlist); 1129 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1130 ds->ds_phys->ds_deadlist_obj = 1131 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1132 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1133 ds->ds_phys->ds_deadlist_obj)); 1134 1135 { 1136 /* Free blkptrs that we gave birth to */ 1137 zio_t *zio; 1138 int64_t used = 0, compressed = 0, uncompressed = 0; 1139 struct killarg ka; 1140 int64_t delta; 1141 1142 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1143 ZIO_FLAG_MUSTSUCCEED); 1144 ka.usedp = &used; 1145 ka.compressedp = &compressed; 1146 ka.uncompressedp = &uncompressed; 1147 ka.zio = zio; 1148 ka.tx = tx; 1149 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1150 ADVANCE_POST, kill_blkptr, &ka); 1151 (void) zio_wait(zio); 1152 1153 /* only deduct space beyond any refreservation */ 1154 delta = parent_delta(ds, -used); 1155 dsl_dir_diduse_space(ds->ds_dir, 1156 delta, -compressed, -uncompressed, tx); 1157 } 1158 1159 if (ds->ds_prev) { 1160 /* Change our contents to that of the prev snapshot */ 1161 ASSERT3U(ds->ds_prev->ds_object, ==, 1162 ds->ds_phys->ds_prev_snap_obj); 1163 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1164 ds->ds_phys->ds_used_bytes = 1165 ds->ds_prev->ds_phys->ds_used_bytes; 1166 ds->ds_phys->ds_compressed_bytes = 1167 ds->ds_prev->ds_phys->ds_compressed_bytes; 1168 ds->ds_phys->ds_uncompressed_bytes = 1169 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1170 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1171 ds->ds_phys->ds_unique_bytes = 0; 1172 1173 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1174 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1175 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1176 } 1177 } else { 1178 /* Zero out our contents, recreate objset */ 1179 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1180 ds->ds_phys->ds_used_bytes = 0; 1181 ds->ds_phys->ds_compressed_bytes = 0; 1182 ds->ds_phys->ds_uncompressed_bytes = 0; 1183 ds->ds_phys->ds_flags = 0; 1184 ds->ds_phys->ds_unique_bytes = 0; 1185 (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1186 &ds->ds_phys->ds_bp, *ost, tx); 1187 } 1188 1189 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1190 tx, cr, "dataset = %llu", ds->ds_object); 1191 } 1192 1193 /* ARGSUSED */ 1194 static int 1195 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1196 { 1197 dsl_dataset_t *ds = arg1; 1198 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1199 uint64_t count; 1200 int err; 1201 1202 /* 1203 * Can't delete a head dataset if there are snapshots of it. 1204 * (Except if the only snapshots are from the branch we cloned 1205 * from.) 1206 */ 1207 if (ds->ds_prev != NULL && 1208 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1209 return (EINVAL); 1210 1211 /* 1212 * This is really a dsl_dir thing, but check it here so that 1213 * we'll be less likely to leave this dataset inconsistent & 1214 * nearly destroyed. 1215 */ 1216 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1217 if (err) 1218 return (err); 1219 if (count != 0) 1220 return (EEXIST); 1221 1222 return (0); 1223 } 1224 1225 /* ARGSUSED */ 1226 static void 1227 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1228 { 1229 dsl_dataset_t *ds = arg1; 1230 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1231 1232 /* Mark it as inconsistent on-disk, in case we crash */ 1233 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1234 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1235 1236 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1237 cr, "dataset = %llu", ds->ds_object); 1238 } 1239 1240 /* ARGSUSED */ 1241 int 1242 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1243 { 1244 dsl_dataset_t *ds = arg1; 1245 1246 /* Can't delete a branch point. */ 1247 if (ds->ds_phys->ds_num_children > 1) 1248 return (EEXIST); 1249 1250 /* 1251 * Can't delete a head dataset if there are snapshots of it. 1252 * (Except if the only snapshots are from the branch we cloned 1253 * from.) 1254 */ 1255 if (ds->ds_prev != NULL && 1256 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1257 return (EINVAL); 1258 1259 /* 1260 * If we made changes this txg, traverse_dsl_dataset won't find 1261 * them. Try again. 1262 */ 1263 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1264 return (EAGAIN); 1265 1266 /* XXX we should do some i/o error checking... */ 1267 return (0); 1268 } 1269 1270 void 1271 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1272 { 1273 dsl_dataset_t *ds = arg1; 1274 int64_t used = 0, compressed = 0, uncompressed = 0; 1275 zio_t *zio; 1276 int err; 1277 int after_branch_point = FALSE; 1278 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1279 objset_t *mos = dp->dp_meta_objset; 1280 dsl_dataset_t *ds_prev = NULL; 1281 uint64_t obj; 1282 1283 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1284 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1285 ASSERT(ds->ds_prev == NULL || 1286 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1287 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1288 1289 /* Remove our reservation */ 1290 if (ds->ds_reserved != 0) { 1291 uint64_t val = 0; 1292 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1293 ASSERT3U(ds->ds_reserved, ==, 0); 1294 } 1295 1296 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1297 1298 obj = ds->ds_object; 1299 1300 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1301 if (ds->ds_prev) { 1302 ds_prev = ds->ds_prev; 1303 } else { 1304 VERIFY(0 == dsl_dataset_open_obj(dp, 1305 ds->ds_phys->ds_prev_snap_obj, NULL, 1306 DS_MODE_NONE, FTAG, &ds_prev)); 1307 } 1308 after_branch_point = 1309 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1310 1311 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1312 if (after_branch_point && 1313 ds->ds_phys->ds_next_snap_obj == 0) { 1314 /* This clone is toast. */ 1315 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1316 ds_prev->ds_phys->ds_num_children--; 1317 } else if (!after_branch_point) { 1318 ds_prev->ds_phys->ds_next_snap_obj = 1319 ds->ds_phys->ds_next_snap_obj; 1320 } 1321 } 1322 1323 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1324 1325 if (ds->ds_phys->ds_next_snap_obj != 0) { 1326 blkptr_t bp; 1327 dsl_dataset_t *ds_next; 1328 uint64_t itor = 0; 1329 uint64_t old_unique; 1330 1331 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1332 1333 VERIFY(0 == dsl_dataset_open_obj(dp, 1334 ds->ds_phys->ds_next_snap_obj, NULL, 1335 DS_MODE_NONE, FTAG, &ds_next)); 1336 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1337 1338 old_unique = dsl_dataset_unique(ds_next); 1339 1340 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1341 ds_next->ds_phys->ds_prev_snap_obj = 1342 ds->ds_phys->ds_prev_snap_obj; 1343 ds_next->ds_phys->ds_prev_snap_txg = 1344 ds->ds_phys->ds_prev_snap_txg; 1345 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1346 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1347 1348 /* 1349 * Transfer to our deadlist (which will become next's 1350 * new deadlist) any entries from next's current 1351 * deadlist which were born before prev, and free the 1352 * other entries. 1353 * 1354 * XXX we're doing this long task with the config lock held 1355 */ 1356 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1357 &bp) == 0) { 1358 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1359 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1360 &bp, tx)); 1361 if (ds_prev && !after_branch_point && 1362 bp.blk_birth > 1363 ds_prev->ds_phys->ds_prev_snap_txg) { 1364 ds_prev->ds_phys->ds_unique_bytes += 1365 bp_get_dasize(dp->dp_spa, &bp); 1366 } 1367 } else { 1368 used += bp_get_dasize(dp->dp_spa, &bp); 1369 compressed += BP_GET_PSIZE(&bp); 1370 uncompressed += BP_GET_UCSIZE(&bp); 1371 /* XXX check return value? */ 1372 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1373 &bp, NULL, NULL, ARC_NOWAIT); 1374 } 1375 } 1376 1377 /* free next's deadlist */ 1378 bplist_close(&ds_next->ds_deadlist); 1379 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1380 1381 /* set next's deadlist to our deadlist */ 1382 ds_next->ds_phys->ds_deadlist_obj = 1383 ds->ds_phys->ds_deadlist_obj; 1384 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1385 ds_next->ds_phys->ds_deadlist_obj)); 1386 ds->ds_phys->ds_deadlist_obj = 0; 1387 1388 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1389 /* 1390 * Update next's unique to include blocks which 1391 * were previously shared by only this snapshot 1392 * and it. Those blocks will be born after the 1393 * prev snap and before this snap, and will have 1394 * died after the next snap and before the one 1395 * after that (ie. be on the snap after next's 1396 * deadlist). 1397 * 1398 * XXX we're doing this long task with the 1399 * config lock held 1400 */ 1401 dsl_dataset_t *ds_after_next; 1402 1403 VERIFY(0 == dsl_dataset_open_obj(dp, 1404 ds_next->ds_phys->ds_next_snap_obj, NULL, 1405 DS_MODE_NONE, FTAG, &ds_after_next)); 1406 itor = 0; 1407 while (bplist_iterate(&ds_after_next->ds_deadlist, 1408 &itor, &bp) == 0) { 1409 if (bp.blk_birth > 1410 ds->ds_phys->ds_prev_snap_txg && 1411 bp.blk_birth <= 1412 ds->ds_phys->ds_creation_txg) { 1413 ds_next->ds_phys->ds_unique_bytes += 1414 bp_get_dasize(dp->dp_spa, &bp); 1415 } 1416 } 1417 1418 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1419 ASSERT3P(ds_next->ds_prev, ==, NULL); 1420 } else { 1421 ASSERT3P(ds_next->ds_prev, ==, ds); 1422 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1423 ds_next); 1424 if (ds_prev) { 1425 VERIFY(0 == dsl_dataset_open_obj(dp, 1426 ds->ds_phys->ds_prev_snap_obj, NULL, 1427 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1428 } else { 1429 ds_next->ds_prev = NULL; 1430 } 1431 1432 dsl_dataset_recalc_head_uniq(ds_next); 1433 1434 /* 1435 * Reduce the amount of our unconsmed refreservation 1436 * being charged to our parent by the amount of 1437 * new unique data we have gained. 1438 */ 1439 if (old_unique < ds_next->ds_reserved) { 1440 int64_t mrsdelta; 1441 uint64_t new_unique = 1442 ds_next->ds_phys->ds_unique_bytes; 1443 1444 ASSERT(old_unique <= new_unique); 1445 mrsdelta = MIN(new_unique - old_unique, 1446 ds_next->ds_reserved - old_unique); 1447 dsl_dir_diduse_space(ds->ds_dir, -mrsdelta, 1448 0, 0, tx); 1449 } 1450 } 1451 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1452 1453 /* 1454 * NB: unique_bytes might not be accurate for the head objset. 1455 * Before SPA_VERSION 9, we didn't update its value when we 1456 * deleted the most recent snapshot. 1457 */ 1458 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1459 } else { 1460 /* 1461 * There's no next snapshot, so this is a head dataset. 1462 * Destroy the deadlist. Unless it's a clone, the 1463 * deadlist should be empty. (If it's a clone, it's 1464 * safe to ignore the deadlist contents.) 1465 */ 1466 struct killarg ka; 1467 1468 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1469 bplist_close(&ds->ds_deadlist); 1470 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1471 ds->ds_phys->ds_deadlist_obj = 0; 1472 1473 /* 1474 * Free everything that we point to (that's born after 1475 * the previous snapshot, if we are a clone) 1476 * 1477 * XXX we're doing this long task with the config lock held 1478 */ 1479 ka.usedp = &used; 1480 ka.compressedp = &compressed; 1481 ka.uncompressedp = &uncompressed; 1482 ka.zio = zio; 1483 ka.tx = tx; 1484 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1485 ADVANCE_POST, kill_blkptr, &ka); 1486 ASSERT3U(err, ==, 0); 1487 ASSERT(spa_version(dp->dp_spa) < 1488 SPA_VERSION_UNIQUE_ACCURATE || 1489 used == ds->ds_phys->ds_unique_bytes); 1490 } 1491 1492 err = zio_wait(zio); 1493 ASSERT3U(err, ==, 0); 1494 1495 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1496 1497 if (ds->ds_phys->ds_snapnames_zapobj) { 1498 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1499 ASSERT(err == 0); 1500 } 1501 1502 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1503 /* Erase the link in the dataset */ 1504 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1505 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1506 /* 1507 * dsl_dir_sync_destroy() called us, they'll destroy 1508 * the dataset. 1509 */ 1510 } else { 1511 /* remove from snapshot namespace */ 1512 dsl_dataset_t *ds_head; 1513 VERIFY(0 == dsl_dataset_open_obj(dp, 1514 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1515 DS_MODE_NONE, FTAG, &ds_head)); 1516 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1517 #ifdef ZFS_DEBUG 1518 { 1519 uint64_t val; 1520 err = zap_lookup(mos, 1521 ds_head->ds_phys->ds_snapnames_zapobj, 1522 ds->ds_snapname, 8, 1, &val); 1523 ASSERT3U(err, ==, 0); 1524 ASSERT3U(val, ==, obj); 1525 } 1526 #endif 1527 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1528 ds->ds_snapname, tx); 1529 ASSERT(err == 0); 1530 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1531 } 1532 1533 if (ds_prev && ds->ds_prev != ds_prev) 1534 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1535 1536 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1537 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1538 cr, "dataset = %llu", ds->ds_object); 1539 1540 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1541 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1542 1543 } 1544 1545 static int 1546 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1547 { 1548 uint64_t asize; 1549 1550 if (!dmu_tx_is_syncing(tx)) 1551 return (0); 1552 1553 /* 1554 * If there's an fs-only reservation, any blocks that might become 1555 * owned by the snapshot dataset must be accommodated by space 1556 * outside of the reservation. 1557 */ 1558 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1559 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1560 return (ENOSPC); 1561 1562 /* 1563 * Propogate any reserved space for this snapshot to other 1564 * snapshot checks in this sync group. 1565 */ 1566 if (asize > 0) 1567 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1568 1569 return (0); 1570 } 1571 1572 /* ARGSUSED */ 1573 int 1574 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1575 { 1576 dsl_dataset_t *ds = arg1; 1577 const char *snapname = arg2; 1578 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1579 int err; 1580 uint64_t value; 1581 1582 /* 1583 * We don't allow multiple snapshots of the same txg. If there 1584 * is already one, try again. 1585 */ 1586 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1587 return (EAGAIN); 1588 1589 /* 1590 * Check for conflicting name snapshot name. 1591 */ 1592 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1593 snapname, 8, 1, &value); 1594 if (err == 0) 1595 return (EEXIST); 1596 if (err != ENOENT) 1597 return (err); 1598 1599 /* 1600 * Check that the dataset's name is not too long. Name consists 1601 * of the dataset's length + 1 for the @-sign + snapshot name's length 1602 */ 1603 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1604 return (ENAMETOOLONG); 1605 1606 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1607 if (err) 1608 return (err); 1609 1610 ds->ds_trysnap_txg = tx->tx_txg; 1611 return (0); 1612 } 1613 1614 void 1615 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1616 { 1617 dsl_dataset_t *ds = arg1; 1618 const char *snapname = arg2; 1619 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1620 dmu_buf_t *dbuf; 1621 dsl_dataset_phys_t *dsphys; 1622 uint64_t dsobj; 1623 objset_t *mos = dp->dp_meta_objset; 1624 int err; 1625 1626 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1627 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1628 1629 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1630 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1631 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1632 dmu_buf_will_dirty(dbuf, tx); 1633 dsphys = dbuf->db_data; 1634 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1635 dsphys->ds_fsid_guid = unique_create(); 1636 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1637 sizeof (dsphys->ds_guid)); 1638 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1639 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1640 dsphys->ds_next_snap_obj = ds->ds_object; 1641 dsphys->ds_num_children = 1; 1642 dsphys->ds_creation_time = gethrestime_sec(); 1643 dsphys->ds_creation_txg = tx->tx_txg; 1644 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1645 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1646 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1647 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1648 dsphys->ds_flags = ds->ds_phys->ds_flags; 1649 dsphys->ds_bp = ds->ds_phys->ds_bp; 1650 dmu_buf_rele(dbuf, FTAG); 1651 1652 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1653 if (ds->ds_prev) { 1654 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1655 ds->ds_object || 1656 ds->ds_prev->ds_phys->ds_num_children > 1); 1657 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1658 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1659 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1660 ds->ds_prev->ds_phys->ds_creation_txg); 1661 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1662 } 1663 } 1664 1665 /* 1666 * If we have a reference-reservation on this dataset, we will 1667 * need to increase the amount of refreservation being charged 1668 * since our unique space is going to zero. 1669 */ 1670 if (ds->ds_reserved) { 1671 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1672 dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx); 1673 } 1674 1675 bplist_close(&ds->ds_deadlist); 1676 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1677 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1678 ds->ds_phys->ds_prev_snap_obj = dsobj; 1679 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1680 ds->ds_phys->ds_unique_bytes = 0; 1681 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1682 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1683 ds->ds_phys->ds_deadlist_obj = 1684 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1685 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1686 ds->ds_phys->ds_deadlist_obj)); 1687 1688 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1689 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1690 snapname, 8, 1, &dsobj, tx); 1691 ASSERT(err == 0); 1692 1693 if (ds->ds_prev) 1694 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1695 VERIFY(0 == dsl_dataset_open_obj(dp, 1696 ds->ds_phys->ds_prev_snap_obj, snapname, 1697 DS_MODE_NONE, ds, &ds->ds_prev)); 1698 1699 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1700 "dataset = %llu", dsobj); 1701 } 1702 1703 void 1704 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1705 { 1706 ASSERT(dmu_tx_is_syncing(tx)); 1707 ASSERT(ds->ds_user_ptr != NULL); 1708 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1709 1710 /* 1711 * in case we had to change ds_fsid_guid when we opened it, 1712 * sync it out now. 1713 */ 1714 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1715 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1716 1717 dsl_dir_dirty(ds->ds_dir, tx); 1718 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1719 } 1720 1721 void 1722 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1723 { 1724 uint64_t refd, avail, uobjs, aobjs; 1725 1726 dsl_dir_stats(ds->ds_dir, nv); 1727 1728 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1729 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1730 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1731 1732 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1733 ds->ds_phys->ds_creation_time); 1734 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1735 ds->ds_phys->ds_creation_txg); 1736 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1737 ds->ds_quota); 1738 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1739 ds->ds_reserved); 1740 1741 if (ds->ds_phys->ds_next_snap_obj) { 1742 /* 1743 * This is a snapshot; override the dd's space used with 1744 * our unique space and compression ratio. 1745 */ 1746 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1747 ds->ds_phys->ds_unique_bytes); 1748 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1749 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1750 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1751 ds->ds_phys->ds_compressed_bytes)); 1752 } 1753 } 1754 1755 void 1756 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1757 { 1758 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1759 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1760 stat->dds_guid = ds->ds_phys->ds_guid; 1761 if (ds->ds_phys->ds_next_snap_obj) { 1762 stat->dds_is_snapshot = B_TRUE; 1763 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1764 } 1765 1766 /* clone origin is really a dsl_dir thing... */ 1767 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1768 if (ds->ds_dir->dd_phys->dd_origin_obj) { 1769 dsl_dataset_t *ods; 1770 1771 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1772 ds->ds_dir->dd_phys->dd_origin_obj, 1773 NULL, DS_MODE_NONE, FTAG, &ods)); 1774 dsl_dataset_name(ods, stat->dds_origin); 1775 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1776 } 1777 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1778 } 1779 1780 uint64_t 1781 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1782 { 1783 return (ds->ds_fsid_guid); 1784 } 1785 1786 void 1787 dsl_dataset_space(dsl_dataset_t *ds, 1788 uint64_t *refdbytesp, uint64_t *availbytesp, 1789 uint64_t *usedobjsp, uint64_t *availobjsp) 1790 { 1791 *refdbytesp = ds->ds_phys->ds_used_bytes; 1792 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1793 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1794 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1795 if (ds->ds_quota != 0) { 1796 /* 1797 * Adjust available bytes according to refquota 1798 */ 1799 if (*refdbytesp < ds->ds_quota) 1800 *availbytesp = MIN(*availbytesp, 1801 ds->ds_quota - *refdbytesp); 1802 else 1803 *availbytesp = 0; 1804 } 1805 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1806 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1807 } 1808 1809 boolean_t 1810 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1811 { 1812 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1813 1814 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1815 dsl_pool_sync_context(dp)); 1816 if (ds->ds_prev == NULL) 1817 return (B_FALSE); 1818 if (ds->ds_phys->ds_bp.blk_birth > 1819 ds->ds_prev->ds_phys->ds_creation_txg) 1820 return (B_TRUE); 1821 return (B_FALSE); 1822 } 1823 1824 /* ARGSUSED */ 1825 static int 1826 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1827 { 1828 dsl_dataset_t *ds = arg1; 1829 char *newsnapname = arg2; 1830 dsl_dir_t *dd = ds->ds_dir; 1831 objset_t *mos = dd->dd_pool->dp_meta_objset; 1832 dsl_dataset_t *hds; 1833 uint64_t val; 1834 int err; 1835 1836 err = dsl_dataset_open_obj(dd->dd_pool, 1837 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1838 if (err) 1839 return (err); 1840 1841 /* new name better not be in use */ 1842 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1843 newsnapname, 8, 1, &val); 1844 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1845 1846 if (err == 0) 1847 err = EEXIST; 1848 else if (err == ENOENT) 1849 err = 0; 1850 1851 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1852 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 1853 err = ENAMETOOLONG; 1854 1855 return (err); 1856 } 1857 1858 static void 1859 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 1860 cred_t *cr, dmu_tx_t *tx) 1861 { 1862 dsl_dataset_t *ds = arg1; 1863 const char *newsnapname = arg2; 1864 dsl_dir_t *dd = ds->ds_dir; 1865 objset_t *mos = dd->dd_pool->dp_meta_objset; 1866 dsl_dataset_t *hds; 1867 int err; 1868 1869 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1870 1871 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1872 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1873 1874 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1875 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1876 ds->ds_snapname, tx); 1877 ASSERT3U(err, ==, 0); 1878 mutex_enter(&ds->ds_lock); 1879 (void) strcpy(ds->ds_snapname, newsnapname); 1880 mutex_exit(&ds->ds_lock); 1881 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1882 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1883 ASSERT3U(err, ==, 0); 1884 1885 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 1886 cr, "dataset = %llu", ds->ds_object); 1887 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1888 } 1889 1890 struct renamesnaparg { 1891 dsl_sync_task_group_t *dstg; 1892 char failed[MAXPATHLEN]; 1893 char *oldsnap; 1894 char *newsnap; 1895 }; 1896 1897 static int 1898 dsl_snapshot_rename_one(char *name, void *arg) 1899 { 1900 struct renamesnaparg *ra = arg; 1901 dsl_dataset_t *ds = NULL; 1902 char *cp; 1903 int err; 1904 1905 cp = name + strlen(name); 1906 *cp = '@'; 1907 (void) strcpy(cp + 1, ra->oldsnap); 1908 1909 /* 1910 * For recursive snapshot renames the parent won't be changing 1911 * so we just pass name for both the to/from argument. 1912 */ 1913 if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { 1914 (void) strcpy(ra->failed, name); 1915 return (err); 1916 } 1917 1918 err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, 1919 ra->dstg, &ds); 1920 if (err == ENOENT) { 1921 *cp = '\0'; 1922 return (0); 1923 } 1924 if (err) { 1925 (void) strcpy(ra->failed, name); 1926 *cp = '\0'; 1927 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1928 return (err); 1929 } 1930 1931 #ifdef _KERNEL 1932 /* for all filesystems undergoing rename, we'll need to unmount it */ 1933 (void) zfs_unmount_snap(name, NULL); 1934 #endif 1935 1936 *cp = '\0'; 1937 1938 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 1939 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 1940 1941 return (0); 1942 } 1943 1944 static int 1945 dsl_recursive_rename(char *oldname, const char *newname) 1946 { 1947 int err; 1948 struct renamesnaparg *ra; 1949 dsl_sync_task_t *dst; 1950 spa_t *spa; 1951 char *cp, *fsname = spa_strdup(oldname); 1952 int len = strlen(oldname); 1953 1954 /* truncate the snapshot name to get the fsname */ 1955 cp = strchr(fsname, '@'); 1956 *cp = '\0'; 1957 1958 err = spa_open(fsname, &spa, FTAG); 1959 if (err) { 1960 kmem_free(fsname, len + 1); 1961 return (err); 1962 } 1963 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 1964 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 1965 1966 ra->oldsnap = strchr(oldname, '@') + 1; 1967 ra->newsnap = strchr(newname, '@') + 1; 1968 *ra->failed = '\0'; 1969 1970 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 1971 DS_FIND_CHILDREN); 1972 kmem_free(fsname, len + 1); 1973 1974 if (err == 0) { 1975 err = dsl_sync_task_group_wait(ra->dstg); 1976 } 1977 1978 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 1979 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 1980 dsl_dataset_t *ds = dst->dst_arg1; 1981 if (dst->dst_err) { 1982 dsl_dir_name(ds->ds_dir, ra->failed); 1983 (void) strcat(ra->failed, "@"); 1984 (void) strcat(ra->failed, ra->newsnap); 1985 } 1986 dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); 1987 } 1988 1989 if (err) 1990 (void) strcpy(oldname, ra->failed); 1991 1992 dsl_sync_task_group_destroy(ra->dstg); 1993 kmem_free(ra, sizeof (struct renamesnaparg)); 1994 spa_close(spa, FTAG); 1995 return (err); 1996 } 1997 1998 static int 1999 dsl_valid_rename(char *oldname, void *arg) 2000 { 2001 int delta = *(int *)arg; 2002 2003 if (strlen(oldname) + delta >= MAXNAMELEN) 2004 return (ENAMETOOLONG); 2005 2006 return (0); 2007 } 2008 2009 #pragma weak dmu_objset_rename = dsl_dataset_rename 2010 int 2011 dsl_dataset_rename(char *oldname, const char *newname, 2012 boolean_t recursive) 2013 { 2014 dsl_dir_t *dd; 2015 dsl_dataset_t *ds; 2016 const char *tail; 2017 int err; 2018 2019 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2020 if (err) 2021 return (err); 2022 if (tail == NULL) { 2023 int delta = strlen(newname) - strlen(oldname); 2024 2025 /* if we're growing, validate child size lengths */ 2026 if (delta > 0) 2027 err = dmu_objset_find(oldname, dsl_valid_rename, 2028 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2029 2030 if (!err) 2031 err = dsl_dir_rename(dd, newname); 2032 dsl_dir_close(dd, FTAG); 2033 return (err); 2034 } 2035 if (tail[0] != '@') { 2036 /* the name ended in a nonexistant component */ 2037 dsl_dir_close(dd, FTAG); 2038 return (ENOENT); 2039 } 2040 2041 dsl_dir_close(dd, FTAG); 2042 2043 /* new name must be snapshot in same filesystem */ 2044 tail = strchr(newname, '@'); 2045 if (tail == NULL) 2046 return (EINVAL); 2047 tail++; 2048 if (strncmp(oldname, newname, tail - newname) != 0) 2049 return (EXDEV); 2050 2051 if (recursive) { 2052 err = dsl_recursive_rename(oldname, newname); 2053 } else { 2054 err = dsl_dataset_open(oldname, 2055 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 2056 if (err) 2057 return (err); 2058 2059 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2060 dsl_dataset_snapshot_rename_check, 2061 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2062 2063 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2064 } 2065 2066 return (err); 2067 } 2068 2069 struct promotearg { 2070 uint64_t used, comp, uncomp, unique; 2071 uint64_t newnext_obj, snapnames_obj; 2072 }; 2073 2074 /* ARGSUSED */ 2075 static int 2076 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2077 { 2078 dsl_dataset_t *hds = arg1; 2079 struct promotearg *pa = arg2; 2080 dsl_dir_t *dd = hds->ds_dir; 2081 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2082 dsl_dir_t *odd = NULL; 2083 dsl_dataset_t *ds = NULL; 2084 dsl_dataset_t *origin_ds = NULL; 2085 dsl_dataset_t *newnext_ds = NULL; 2086 int err; 2087 char *name = NULL; 2088 uint64_t itor = 0; 2089 blkptr_t bp; 2090 2091 bzero(pa, sizeof (*pa)); 2092 2093 /* Check that it is a clone */ 2094 if (dd->dd_phys->dd_origin_obj == 0) 2095 return (EINVAL); 2096 2097 /* Since this is so expensive, don't do the preliminary check */ 2098 if (!dmu_tx_is_syncing(tx)) 2099 return (0); 2100 2101 if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2102 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) 2103 goto out; 2104 odd = origin_ds->ds_dir; 2105 2106 { 2107 dsl_dataset_t *phds; 2108 if (err = dsl_dataset_open_obj(dd->dd_pool, 2109 odd->dd_phys->dd_head_dataset_obj, 2110 NULL, DS_MODE_NONE, FTAG, &phds)) 2111 goto out; 2112 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 2113 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 2114 } 2115 2116 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2117 err = EXDEV; 2118 goto out; 2119 } 2120 2121 /* find origin's new next ds */ 2122 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 2123 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 2124 while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { 2125 dsl_dataset_t *prev; 2126 2127 if (err = dsl_dataset_open_obj(dd->dd_pool, 2128 newnext_ds->ds_phys->ds_prev_snap_obj, 2129 NULL, DS_MODE_NONE, FTAG, &prev)) 2130 goto out; 2131 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2132 newnext_ds = prev; 2133 } 2134 pa->newnext_obj = newnext_ds->ds_object; 2135 2136 /* compute origin's new unique space */ 2137 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 2138 &itor, &bp)) == 0) { 2139 if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) 2140 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 2141 } 2142 if (err != ENOENT) 2143 goto out; 2144 2145 /* Walk the snapshots that we are moving */ 2146 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2147 ds = origin_ds; 2148 /* CONSTCOND */ 2149 while (TRUE) { 2150 uint64_t val, dlused, dlcomp, dluncomp; 2151 dsl_dataset_t *prev; 2152 2153 /* Check that the snapshot name does not conflict */ 2154 dsl_dataset_name(ds, name); 2155 err = zap_lookup(dd->dd_pool->dp_meta_objset, 2156 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2157 8, 1, &val); 2158 if (err != ENOENT) { 2159 if (err == 0) 2160 err = EEXIST; 2161 goto out; 2162 } 2163 2164 /* 2165 * compute space to transfer. Each snapshot gave birth to: 2166 * (my used) - (prev's used) + (deadlist's used) 2167 */ 2168 pa->used += ds->ds_phys->ds_used_bytes; 2169 pa->comp += ds->ds_phys->ds_compressed_bytes; 2170 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 2171 2172 /* If we reach the first snapshot, we're done. */ 2173 if (ds->ds_phys->ds_prev_snap_obj == 0) 2174 break; 2175 2176 if (err = bplist_space(&ds->ds_deadlist, 2177 &dlused, &dlcomp, &dluncomp)) 2178 goto out; 2179 if (err = dsl_dataset_open_obj(dd->dd_pool, 2180 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2181 FTAG, &prev)) 2182 goto out; 2183 pa->used += dlused - prev->ds_phys->ds_used_bytes; 2184 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 2185 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 2186 2187 /* 2188 * We could be a clone of a clone. If we reach our 2189 * parent's branch point, we're done. 2190 */ 2191 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2192 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2193 break; 2194 } 2195 if (ds != origin_ds) 2196 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2197 ds = prev; 2198 } 2199 2200 /* Check that there is enough space here */ 2201 err = dsl_dir_transfer_possible(odd, dd, pa->used); 2202 2203 out: 2204 if (ds && ds != origin_ds) 2205 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2206 if (origin_ds) 2207 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2208 if (newnext_ds) 2209 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 2210 if (name) 2211 kmem_free(name, MAXPATHLEN); 2212 return (err); 2213 } 2214 2215 static void 2216 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2217 { 2218 dsl_dataset_t *hds = arg1; 2219 struct promotearg *pa = arg2; 2220 dsl_dir_t *dd = hds->ds_dir; 2221 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2222 dsl_dir_t *odd = NULL; 2223 dsl_dataset_t *ds, *origin_ds; 2224 char *name; 2225 2226 ASSERT(dd->dd_phys->dd_origin_obj != 0); 2227 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2228 2229 VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, 2230 NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); 2231 /* 2232 * We need to explicitly open odd, since origin_ds's dd will be 2233 * changing. 2234 */ 2235 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2236 NULL, FTAG, &odd)); 2237 2238 /* move snapshots to this dir */ 2239 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2240 ds = origin_ds; 2241 /* CONSTCOND */ 2242 while (TRUE) { 2243 dsl_dataset_t *prev; 2244 2245 /* move snap name entry */ 2246 dsl_dataset_name(ds, name); 2247 VERIFY(0 == zap_remove(dp->dp_meta_objset, 2248 pa->snapnames_obj, ds->ds_snapname, tx)); 2249 VERIFY(0 == zap_add(dp->dp_meta_objset, 2250 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2251 8, 1, &ds->ds_object, tx)); 2252 2253 /* change containing dsl_dir */ 2254 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2255 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2256 ds->ds_phys->ds_dir_obj = dd->dd_object; 2257 ASSERT3P(ds->ds_dir, ==, odd); 2258 dsl_dir_close(ds->ds_dir, ds); 2259 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2260 NULL, ds, &ds->ds_dir)); 2261 2262 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2263 2264 if (ds->ds_phys->ds_prev_snap_obj == 0) 2265 break; 2266 2267 VERIFY(0 == dsl_dataset_open_obj(dp, 2268 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 2269 FTAG, &prev)); 2270 2271 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 2272 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 2273 break; 2274 } 2275 if (ds != origin_ds) 2276 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2277 ds = prev; 2278 } 2279 if (ds != origin_ds) 2280 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 2281 2282 /* change origin's next snap */ 2283 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2284 origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 2285 2286 /* change origin */ 2287 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2288 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2289 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2290 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2291 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2292 2293 /* change space accounting */ 2294 dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); 2295 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 2296 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2297 2298 /* log history record */ 2299 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2300 cr, "dataset = %llu", ds->ds_object); 2301 2302 dsl_dir_close(odd, FTAG); 2303 dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); 2304 kmem_free(name, MAXPATHLEN); 2305 } 2306 2307 int 2308 dsl_dataset_promote(const char *name) 2309 { 2310 dsl_dataset_t *ds; 2311 int err; 2312 dmu_object_info_t doi; 2313 struct promotearg pa; 2314 2315 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 2316 if (err) 2317 return (err); 2318 2319 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 2320 ds->ds_phys->ds_snapnames_zapobj, &doi); 2321 if (err) { 2322 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2323 return (err); 2324 } 2325 2326 /* 2327 * Add in 128x the snapnames zapobj size, since we will be moving 2328 * a bunch of snapnames to the promoted ds, and dirtying their 2329 * bonus buffers. 2330 */ 2331 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2332 dsl_dataset_promote_check, 2333 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 2334 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2335 return (err); 2336 } 2337 2338 struct cloneswaparg { 2339 dsl_dataset_t *cds; /* clone dataset */ 2340 dsl_dataset_t *ohds; /* origin's head dataset */ 2341 boolean_t force; 2342 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2343 }; 2344 2345 /* ARGSUSED */ 2346 static int 2347 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2348 { 2349 struct cloneswaparg *csa = arg1; 2350 2351 /* they should both be heads */ 2352 if (dsl_dataset_is_snapshot(csa->cds) || 2353 dsl_dataset_is_snapshot(csa->ohds)) 2354 return (EINVAL); 2355 2356 /* the branch point should be just before them */ 2357 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2358 return (EINVAL); 2359 2360 /* cds should be the clone */ 2361 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2362 csa->ohds->ds_object) 2363 return (EINVAL); 2364 2365 /* the clone should be a child of the origin */ 2366 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2367 return (EINVAL); 2368 2369 /* ohds shouldn't be modified unless 'force' */ 2370 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2371 return (ETXTBSY); 2372 2373 /* adjust amount of any unconsumed refreservation */ 2374 csa->unused_refres_delta = 2375 (int64_t)MIN(csa->ohds->ds_reserved, 2376 csa->ohds->ds_phys->ds_unique_bytes) - 2377 (int64_t)MIN(csa->ohds->ds_reserved, 2378 csa->cds->ds_phys->ds_unique_bytes); 2379 2380 if (csa->unused_refres_delta > 0 && 2381 csa->unused_refres_delta > 2382 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2383 return (ENOSPC); 2384 2385 return (0); 2386 } 2387 2388 /* ARGSUSED */ 2389 static void 2390 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2391 { 2392 struct cloneswaparg *csa = arg1; 2393 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2394 uint64_t itor = 0; 2395 blkptr_t bp; 2396 uint64_t unique = 0; 2397 int err; 2398 2399 ASSERT(csa->cds->ds_reserved == 0); 2400 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2401 2402 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2403 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2404 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2405 2406 if (csa->cds->ds_user_ptr != NULL) { 2407 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2408 csa->cds->ds_user_ptr = NULL; 2409 } 2410 2411 if (csa->ohds->ds_user_ptr != NULL) { 2412 csa->ohds->ds_user_evict_func(csa->ohds, 2413 csa->ohds->ds_user_ptr); 2414 csa->ohds->ds_user_ptr = NULL; 2415 } 2416 2417 /* compute unique space */ 2418 while ((err = bplist_iterate(&csa->cds->ds_deadlist, 2419 &itor, &bp)) == 0) { 2420 if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) 2421 unique += bp_get_dasize(dp->dp_spa, &bp); 2422 } 2423 VERIFY(err == ENOENT); 2424 2425 /* reset origin's unique bytes */ 2426 csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; 2427 2428 /* swap blkptrs */ 2429 { 2430 blkptr_t tmp; 2431 tmp = csa->ohds->ds_phys->ds_bp; 2432 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2433 csa->cds->ds_phys->ds_bp = tmp; 2434 } 2435 2436 /* set dd_*_bytes */ 2437 { 2438 int64_t dused, dcomp, duncomp; 2439 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2440 uint64_t odl_used, odl_comp, odl_uncomp; 2441 2442 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2443 &cdl_comp, &cdl_uncomp)); 2444 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2445 &odl_comp, &odl_uncomp)); 2446 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2447 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2448 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2449 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2450 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2451 cdl_uncomp - 2452 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2453 2454 dsl_dir_diduse_space(csa->ohds->ds_dir, 2455 dused, dcomp, duncomp, tx); 2456 dsl_dir_diduse_space(csa->cds->ds_dir, 2457 -dused, -dcomp, -duncomp, tx); 2458 } 2459 2460 #define SWITCH64(x, y) \ 2461 { \ 2462 uint64_t __tmp = (x); \ 2463 (x) = (y); \ 2464 (y) = __tmp; \ 2465 } 2466 2467 /* swap ds_*_bytes */ 2468 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2469 csa->cds->ds_phys->ds_used_bytes); 2470 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2471 csa->cds->ds_phys->ds_compressed_bytes); 2472 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2473 csa->cds->ds_phys->ds_uncompressed_bytes); 2474 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2475 csa->cds->ds_phys->ds_unique_bytes); 2476 2477 /* apply any parent delta for change in unconsumed refreservation */ 2478 dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta, 2479 0, 0, tx); 2480 2481 /* swap deadlists */ 2482 bplist_close(&csa->cds->ds_deadlist); 2483 bplist_close(&csa->ohds->ds_deadlist); 2484 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2485 csa->cds->ds_phys->ds_deadlist_obj); 2486 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2487 csa->cds->ds_phys->ds_deadlist_obj)); 2488 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2489 csa->ohds->ds_phys->ds_deadlist_obj)); 2490 } 2491 2492 /* 2493 * Swap 'clone' with its origin head file system. 2494 */ 2495 int 2496 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2497 boolean_t force) 2498 { 2499 struct cloneswaparg csa; 2500 2501 ASSERT(clone->ds_open_refcount == DS_REF_MAX); 2502 ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); 2503 2504 csa.cds = clone; 2505 csa.ohds = origin_head; 2506 csa.force = force; 2507 return (dsl_sync_task_do(clone->ds_dir->dd_pool, 2508 dsl_dataset_clone_swap_check, 2509 dsl_dataset_clone_swap_sync, &csa, NULL, 9)); 2510 } 2511 2512 /* 2513 * Given a pool name and a dataset object number in that pool, 2514 * return the name of that dataset. 2515 */ 2516 int 2517 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2518 { 2519 spa_t *spa; 2520 dsl_pool_t *dp; 2521 dsl_dataset_t *ds = NULL; 2522 int error; 2523 2524 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2525 return (error); 2526 dp = spa_get_dsl(spa); 2527 rw_enter(&dp->dp_config_rwlock, RW_READER); 2528 if ((error = dsl_dataset_open_obj(dp, obj, 2529 NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { 2530 rw_exit(&dp->dp_config_rwlock); 2531 spa_close(spa, FTAG); 2532 return (error); 2533 } 2534 dsl_dataset_name(ds, buf); 2535 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 2536 rw_exit(&dp->dp_config_rwlock); 2537 spa_close(spa, FTAG); 2538 2539 return (0); 2540 } 2541 2542 int 2543 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2544 uint64_t asize, uint64_t inflight, uint64_t *used) 2545 { 2546 int error = 0; 2547 2548 ASSERT3S(asize, >, 0); 2549 2550 mutex_enter(&ds->ds_lock); 2551 /* 2552 * Make a space adjustment for reserved bytes. 2553 */ 2554 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2555 ASSERT3U(*used, >=, 2556 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2557 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2558 } 2559 2560 if (!check_quota || ds->ds_quota == 0) { 2561 mutex_exit(&ds->ds_lock); 2562 return (0); 2563 } 2564 /* 2565 * If they are requesting more space, and our current estimate 2566 * is over quota, they get to try again unless the actual 2567 * on-disk is over quota and there are no pending changes (which 2568 * may free up space for us). 2569 */ 2570 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2571 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2572 error = ERESTART; 2573 else 2574 error = EDQUOT; 2575 } 2576 mutex_exit(&ds->ds_lock); 2577 2578 return (error); 2579 } 2580 2581 /* ARGSUSED */ 2582 static int 2583 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2584 { 2585 dsl_dataset_t *ds = arg1; 2586 uint64_t *quotap = arg2; 2587 uint64_t new_quota = *quotap; 2588 2589 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2590 return (ENOTSUP); 2591 2592 if (new_quota == 0) 2593 return (0); 2594 2595 if (new_quota < ds->ds_phys->ds_used_bytes || 2596 new_quota < ds->ds_reserved) 2597 return (ENOSPC); 2598 2599 return (0); 2600 } 2601 2602 /* ARGSUSED */ 2603 void 2604 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2605 { 2606 dsl_dataset_t *ds = arg1; 2607 uint64_t *quotap = arg2; 2608 uint64_t new_quota = *quotap; 2609 2610 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2611 2612 mutex_enter(&ds->ds_lock); 2613 ds->ds_quota = new_quota; 2614 mutex_exit(&ds->ds_lock); 2615 2616 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 2617 2618 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 2619 tx, cr, "%lld dataset = %llu ", 2620 (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj); 2621 } 2622 2623 int 2624 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 2625 { 2626 dsl_dataset_t *ds; 2627 int err; 2628 2629 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2630 if (err) 2631 return (err); 2632 2633 if (quota != ds->ds_quota) { 2634 /* 2635 * If someone removes a file, then tries to set the quota, we 2636 * want to make sure the file freeing takes effect. 2637 */ 2638 txg_wait_open(ds->ds_dir->dd_pool, 0); 2639 2640 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2641 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 2642 ds, "a, 0); 2643 } 2644 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2645 return (err); 2646 } 2647 2648 static int 2649 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 2650 { 2651 dsl_dataset_t *ds = arg1; 2652 uint64_t *reservationp = arg2; 2653 uint64_t new_reservation = *reservationp; 2654 int64_t delta; 2655 uint64_t unique; 2656 2657 if (new_reservation > INT64_MAX) 2658 return (EOVERFLOW); 2659 2660 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 2661 SPA_VERSION_REFRESERVATION) 2662 return (ENOTSUP); 2663 2664 if (dsl_dataset_is_snapshot(ds)) 2665 return (EINVAL); 2666 2667 /* 2668 * If we are doing the preliminary check in open context, the 2669 * space estimates may be inaccurate. 2670 */ 2671 if (!dmu_tx_is_syncing(tx)) 2672 return (0); 2673 2674 mutex_enter(&ds->ds_lock); 2675 unique = dsl_dataset_unique(ds); 2676 delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); 2677 mutex_exit(&ds->ds_lock); 2678 2679 if (delta > 0 && 2680 delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 2681 return (ENOSPC); 2682 if (delta > 0 && ds->ds_quota > 0 && 2683 new_reservation > ds->ds_quota) 2684 return (ENOSPC); 2685 2686 return (0); 2687 } 2688 2689 /* ARGSUSED */ 2690 static void 2691 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 2692 dmu_tx_t *tx) 2693 { 2694 dsl_dataset_t *ds = arg1; 2695 uint64_t *reservationp = arg2; 2696 uint64_t new_reservation = *reservationp; 2697 uint64_t unique; 2698 int64_t delta; 2699 2700 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2701 2702 mutex_enter(&ds->ds_lock); 2703 unique = dsl_dataset_unique(ds); 2704 delta = MAX(0, (int64_t)(new_reservation - unique)) - 2705 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2706 ds->ds_reserved = new_reservation; 2707 mutex_exit(&ds->ds_lock); 2708 2709 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 2710 new_reservation, cr, tx); 2711 2712 dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx); 2713 2714 spa_history_internal_log(LOG_DS_REFRESERV, 2715 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 2716 (longlong_t)new_reservation, 2717 ds->ds_dir->dd_phys->dd_head_dataset_obj); 2718 } 2719 2720 int 2721 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 2722 { 2723 dsl_dataset_t *ds; 2724 int err; 2725 2726 err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); 2727 if (err) 2728 return (err); 2729 2730 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2731 dsl_dataset_set_reservation_check, 2732 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 2733 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 2734 return (err); 2735 } 2736