1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Portions Copyright 2010 Robert Milkowski */ 26 27 #include <sys/cred.h> 28 #include <sys/zfs_context.h> 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_dataset.h> 32 #include <sys/dsl_prop.h> 33 #include <sys/dsl_pool.h> 34 #include <sys/dsl_synctask.h> 35 #include <sys/dsl_deleg.h> 36 #include <sys/dnode.h> 37 #include <sys/dbuf.h> 38 #include <sys/zvol.h> 39 #include <sys/dmu_tx.h> 40 #include <sys/zap.h> 41 #include <sys/zil.h> 42 #include <sys/dmu_impl.h> 43 #include <sys/zfs_ioctl.h> 44 #include <sys/sunddi.h> 45 #include <sys/sa.h> 46 47 spa_t * 48 dmu_objset_spa(objset_t *os) 49 { 50 return (os->os_spa); 51 } 52 53 zilog_t * 54 dmu_objset_zil(objset_t *os) 55 { 56 return (os->os_zil); 57 } 58 59 dsl_pool_t * 60 dmu_objset_pool(objset_t *os) 61 { 62 dsl_dataset_t *ds; 63 64 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 65 return (ds->ds_dir->dd_pool); 66 else 67 return (spa_get_dsl(os->os_spa)); 68 } 69 70 dsl_dataset_t * 71 dmu_objset_ds(objset_t *os) 72 { 73 return (os->os_dsl_dataset); 74 } 75 76 dmu_objset_type_t 77 dmu_objset_type(objset_t *os) 78 { 79 return (os->os_phys->os_type); 80 } 81 82 void 83 dmu_objset_name(objset_t *os, char *buf) 84 { 85 dsl_dataset_name(os->os_dsl_dataset, buf); 86 } 87 88 uint64_t 89 dmu_objset_id(objset_t *os) 90 { 91 dsl_dataset_t *ds = os->os_dsl_dataset; 92 93 return (ds ? ds->ds_object : 0); 94 } 95 96 uint64_t 97 dmu_objset_syncprop(objset_t *os) 98 { 99 return (os->os_sync); 100 } 101 102 uint64_t 103 dmu_objset_logbias(objset_t *os) 104 { 105 return (os->os_logbias); 106 } 107 108 static void 109 checksum_changed_cb(void *arg, uint64_t newval) 110 { 111 objset_t *os = arg; 112 113 /* 114 * Inheritance should have been done by now. 115 */ 116 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 117 118 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 119 } 120 121 static void 122 compression_changed_cb(void *arg, uint64_t newval) 123 { 124 objset_t *os = arg; 125 126 /* 127 * Inheritance and range checking should have been done by now. 128 */ 129 ASSERT(newval != ZIO_COMPRESS_INHERIT); 130 131 os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 132 } 133 134 static void 135 copies_changed_cb(void *arg, uint64_t newval) 136 { 137 objset_t *os = arg; 138 139 /* 140 * Inheritance and range checking should have been done by now. 141 */ 142 ASSERT(newval > 0); 143 ASSERT(newval <= spa_max_replication(os->os_spa)); 144 145 os->os_copies = newval; 146 } 147 148 static void 149 dedup_changed_cb(void *arg, uint64_t newval) 150 { 151 objset_t *os = arg; 152 spa_t *spa = os->os_spa; 153 enum zio_checksum checksum; 154 155 /* 156 * Inheritance should have been done by now. 157 */ 158 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 159 160 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 161 162 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 163 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 164 } 165 166 static void 167 primary_cache_changed_cb(void *arg, uint64_t newval) 168 { 169 objset_t *os = arg; 170 171 /* 172 * Inheritance and range checking should have been done by now. 173 */ 174 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 175 newval == ZFS_CACHE_METADATA); 176 177 os->os_primary_cache = newval; 178 } 179 180 static void 181 secondary_cache_changed_cb(void *arg, uint64_t newval) 182 { 183 objset_t *os = arg; 184 185 /* 186 * Inheritance and range checking should have been done by now. 187 */ 188 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 189 newval == ZFS_CACHE_METADATA); 190 191 os->os_secondary_cache = newval; 192 } 193 194 static void 195 sync_changed_cb(void *arg, uint64_t newval) 196 { 197 objset_t *os = arg; 198 199 /* 200 * Inheritance and range checking should have been done by now. 201 */ 202 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 203 newval == ZFS_SYNC_DISABLED); 204 205 os->os_sync = newval; 206 if (os->os_zil) 207 zil_set_sync(os->os_zil, newval); 208 } 209 210 static void 211 logbias_changed_cb(void *arg, uint64_t newval) 212 { 213 objset_t *os = arg; 214 215 ASSERT(newval == ZFS_LOGBIAS_LATENCY || 216 newval == ZFS_LOGBIAS_THROUGHPUT); 217 os->os_logbias = newval; 218 if (os->os_zil) 219 zil_set_logbias(os->os_zil, newval); 220 } 221 222 void 223 dmu_objset_byteswap(void *buf, size_t size) 224 { 225 objset_phys_t *osp = buf; 226 227 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 228 dnode_byteswap(&osp->os_meta_dnode); 229 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 230 osp->os_type = BSWAP_64(osp->os_type); 231 osp->os_flags = BSWAP_64(osp->os_flags); 232 if (size == sizeof (objset_phys_t)) { 233 dnode_byteswap(&osp->os_userused_dnode); 234 dnode_byteswap(&osp->os_groupused_dnode); 235 } 236 } 237 238 int 239 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 240 objset_t **osp) 241 { 242 objset_t *os; 243 int i, err; 244 245 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 246 247 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 248 os->os_dsl_dataset = ds; 249 os->os_spa = spa; 250 os->os_rootbp = bp; 251 if (!BP_IS_HOLE(os->os_rootbp)) { 252 uint32_t aflags = ARC_WAIT; 253 zbookmark_t zb; 254 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 255 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 256 257 if (DMU_OS_IS_L2CACHEABLE(os)) 258 aflags |= ARC_L2CACHE; 259 260 dprintf_bp(os->os_rootbp, "reading %s", ""); 261 /* 262 * XXX when bprewrite scrub can change the bp, 263 * and this is called from dmu_objset_open_ds_os, the bp 264 * could change, and we'll need a lock. 265 */ 266 err = dsl_read_nolock(NULL, spa, os->os_rootbp, 267 arc_getbuf_func, &os->os_phys_buf, 268 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 269 if (err) { 270 kmem_free(os, sizeof (objset_t)); 271 /* convert checksum errors into IO errors */ 272 if (err == ECKSUM) 273 err = EIO; 274 return (err); 275 } 276 277 /* Increase the blocksize if we are permitted. */ 278 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 279 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 280 arc_buf_t *buf = arc_buf_alloc(spa, 281 sizeof (objset_phys_t), &os->os_phys_buf, 282 ARC_BUFC_METADATA); 283 bzero(buf->b_data, sizeof (objset_phys_t)); 284 bcopy(os->os_phys_buf->b_data, buf->b_data, 285 arc_buf_size(os->os_phys_buf)); 286 (void) arc_buf_remove_ref(os->os_phys_buf, 287 &os->os_phys_buf); 288 os->os_phys_buf = buf; 289 } 290 291 os->os_phys = os->os_phys_buf->b_data; 292 os->os_flags = os->os_phys->os_flags; 293 } else { 294 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 295 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 296 os->os_phys_buf = arc_buf_alloc(spa, size, 297 &os->os_phys_buf, ARC_BUFC_METADATA); 298 os->os_phys = os->os_phys_buf->b_data; 299 bzero(os->os_phys, size); 300 } 301 302 /* 303 * Note: the changed_cb will be called once before the register 304 * func returns, thus changing the checksum/compression from the 305 * default (fletcher2/off). Snapshots don't need to know about 306 * checksum/compression/copies. 307 */ 308 if (ds) { 309 err = dsl_prop_register(ds, "primarycache", 310 primary_cache_changed_cb, os); 311 if (err == 0) 312 err = dsl_prop_register(ds, "secondarycache", 313 secondary_cache_changed_cb, os); 314 if (!dsl_dataset_is_snapshot(ds)) { 315 if (err == 0) 316 err = dsl_prop_register(ds, "checksum", 317 checksum_changed_cb, os); 318 if (err == 0) 319 err = dsl_prop_register(ds, "compression", 320 compression_changed_cb, os); 321 if (err == 0) 322 err = dsl_prop_register(ds, "copies", 323 copies_changed_cb, os); 324 if (err == 0) 325 err = dsl_prop_register(ds, "dedup", 326 dedup_changed_cb, os); 327 if (err == 0) 328 err = dsl_prop_register(ds, "logbias", 329 logbias_changed_cb, os); 330 if (err == 0) 331 err = dsl_prop_register(ds, "sync", 332 sync_changed_cb, os); 333 } 334 if (err) { 335 VERIFY(arc_buf_remove_ref(os->os_phys_buf, 336 &os->os_phys_buf) == 1); 337 kmem_free(os, sizeof (objset_t)); 338 return (err); 339 } 340 } else if (ds == NULL) { 341 /* It's the meta-objset. */ 342 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 343 os->os_compress = ZIO_COMPRESS_LZJB; 344 os->os_copies = spa_max_replication(spa); 345 os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 346 os->os_dedup_verify = 0; 347 os->os_logbias = 0; 348 os->os_sync = 0; 349 os->os_primary_cache = ZFS_CACHE_ALL; 350 os->os_secondary_cache = ZFS_CACHE_ALL; 351 } 352 353 os->os_zil_header = os->os_phys->os_zil_header; 354 os->os_zil = zil_alloc(os, &os->os_zil_header); 355 356 for (i = 0; i < TXG_SIZE; i++) { 357 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 358 offsetof(dnode_t, dn_dirty_link[i])); 359 list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 360 offsetof(dnode_t, dn_dirty_link[i])); 361 } 362 list_create(&os->os_dnodes, sizeof (dnode_t), 363 offsetof(dnode_t, dn_link)); 364 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 365 offsetof(dmu_buf_impl_t, db_link)); 366 367 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 368 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 369 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 370 371 os->os_meta_dnode = dnode_special_open(os, 372 &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 373 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 374 os->os_userused_dnode = dnode_special_open(os, 375 &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); 376 os->os_groupused_dnode = dnode_special_open(os, 377 &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); 378 } 379 380 /* 381 * We should be the only thread trying to do this because we 382 * have ds_opening_lock 383 */ 384 if (ds) { 385 mutex_enter(&ds->ds_lock); 386 ASSERT(ds->ds_objset == NULL); 387 ds->ds_objset = os; 388 mutex_exit(&ds->ds_lock); 389 } 390 391 *osp = os; 392 return (0); 393 } 394 395 int 396 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 397 { 398 int err = 0; 399 400 mutex_enter(&ds->ds_opening_lock); 401 *osp = ds->ds_objset; 402 if (*osp == NULL) { 403 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 404 ds, &ds->ds_phys->ds_bp, osp); 405 } 406 mutex_exit(&ds->ds_opening_lock); 407 return (err); 408 } 409 410 /* called from zpl */ 411 int 412 dmu_objset_hold(const char *name, void *tag, objset_t **osp) 413 { 414 dsl_dataset_t *ds; 415 int err; 416 417 err = dsl_dataset_hold(name, tag, &ds); 418 if (err) 419 return (err); 420 421 err = dmu_objset_from_ds(ds, osp); 422 if (err) 423 dsl_dataset_rele(ds, tag); 424 425 return (err); 426 } 427 428 /* called from zpl */ 429 int 430 dmu_objset_own(const char *name, dmu_objset_type_t type, 431 boolean_t readonly, void *tag, objset_t **osp) 432 { 433 dsl_dataset_t *ds; 434 int err; 435 436 err = dsl_dataset_own(name, B_FALSE, tag, &ds); 437 if (err) 438 return (err); 439 440 err = dmu_objset_from_ds(ds, osp); 441 if (err) { 442 dsl_dataset_disown(ds, tag); 443 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 444 dmu_objset_disown(*osp, tag); 445 return (EINVAL); 446 } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 447 dmu_objset_disown(*osp, tag); 448 return (EROFS); 449 } 450 return (err); 451 } 452 453 void 454 dmu_objset_rele(objset_t *os, void *tag) 455 { 456 dsl_dataset_rele(os->os_dsl_dataset, tag); 457 } 458 459 void 460 dmu_objset_disown(objset_t *os, void *tag) 461 { 462 dsl_dataset_disown(os->os_dsl_dataset, tag); 463 } 464 465 int 466 dmu_objset_evict_dbufs(objset_t *os) 467 { 468 dnode_t *dn; 469 470 mutex_enter(&os->os_lock); 471 472 /* process the mdn last, since the other dnodes have holds on it */ 473 list_remove(&os->os_dnodes, os->os_meta_dnode); 474 list_insert_tail(&os->os_dnodes, os->os_meta_dnode); 475 476 /* 477 * Find the first dnode with holds. We have to do this dance 478 * because dnode_add_ref() only works if you already have a 479 * hold. If there are no holds then it has no dbufs so OK to 480 * skip. 481 */ 482 for (dn = list_head(&os->os_dnodes); 483 dn && !dnode_add_ref(dn, FTAG); 484 dn = list_next(&os->os_dnodes, dn)) 485 continue; 486 487 while (dn) { 488 dnode_t *next_dn = dn; 489 490 do { 491 next_dn = list_next(&os->os_dnodes, next_dn); 492 } while (next_dn && !dnode_add_ref(next_dn, FTAG)); 493 494 mutex_exit(&os->os_lock); 495 dnode_evict_dbufs(dn); 496 dnode_rele(dn, FTAG); 497 mutex_enter(&os->os_lock); 498 dn = next_dn; 499 } 500 mutex_exit(&os->os_lock); 501 return (list_head(&os->os_dnodes) != os->os_meta_dnode); 502 } 503 504 void 505 dmu_objset_evict(objset_t *os) 506 { 507 dsl_dataset_t *ds = os->os_dsl_dataset; 508 509 for (int t = 0; t < TXG_SIZE; t++) 510 ASSERT(!dmu_objset_is_dirty(os, t)); 511 512 if (ds) { 513 if (!dsl_dataset_is_snapshot(ds)) { 514 VERIFY(0 == dsl_prop_unregister(ds, "checksum", 515 checksum_changed_cb, os)); 516 VERIFY(0 == dsl_prop_unregister(ds, "compression", 517 compression_changed_cb, os)); 518 VERIFY(0 == dsl_prop_unregister(ds, "copies", 519 copies_changed_cb, os)); 520 VERIFY(0 == dsl_prop_unregister(ds, "dedup", 521 dedup_changed_cb, os)); 522 VERIFY(0 == dsl_prop_unregister(ds, "logbias", 523 logbias_changed_cb, os)); 524 VERIFY(0 == dsl_prop_unregister(ds, "sync", 525 sync_changed_cb, os)); 526 } 527 VERIFY(0 == dsl_prop_unregister(ds, "primarycache", 528 primary_cache_changed_cb, os)); 529 VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", 530 secondary_cache_changed_cb, os)); 531 } 532 533 if (os->os_sa) 534 sa_tear_down(os); 535 536 /* 537 * We should need only a single pass over the dnode list, since 538 * nothing can be added to the list at this point. 539 */ 540 (void) dmu_objset_evict_dbufs(os); 541 542 dnode_special_close(os->os_meta_dnode); 543 if (os->os_userused_dnode) { 544 dnode_special_close(os->os_userused_dnode); 545 dnode_special_close(os->os_groupused_dnode); 546 } 547 zil_free(os->os_zil); 548 549 ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 550 551 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); 552 mutex_destroy(&os->os_lock); 553 mutex_destroy(&os->os_obj_lock); 554 mutex_destroy(&os->os_user_ptr_lock); 555 kmem_free(os, sizeof (objset_t)); 556 } 557 558 timestruc_t 559 dmu_objset_snap_cmtime(objset_t *os) 560 { 561 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 562 } 563 564 /* called from dsl for meta-objset */ 565 objset_t * 566 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 567 dmu_objset_type_t type, dmu_tx_t *tx) 568 { 569 objset_t *os; 570 dnode_t *mdn; 571 572 ASSERT(dmu_tx_is_syncing(tx)); 573 if (ds) 574 mutex_enter(&ds->ds_opening_lock); 575 VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os)); 576 if (ds) 577 mutex_exit(&ds->ds_opening_lock); 578 mdn = os->os_meta_dnode; 579 580 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 581 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 582 583 /* 584 * We don't want to have to increase the meta-dnode's nlevels 585 * later, because then we could do it in quescing context while 586 * we are also accessing it in open context. 587 * 588 * This precaution is not necessary for the MOS (ds == NULL), 589 * because the MOS is only updated in syncing context. 590 * This is most fortunate: the MOS is the only objset that 591 * needs to be synced multiple times as spa_sync() iterates 592 * to convergence, so minimizing its dn_nlevels matters. 593 */ 594 if (ds != NULL) { 595 int levels = 1; 596 597 /* 598 * Determine the number of levels necessary for the meta-dnode 599 * to contain DN_MAX_OBJECT dnodes. 600 */ 601 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 602 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 603 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 604 levels++; 605 606 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 607 mdn->dn_nlevels = levels; 608 } 609 610 ASSERT(type != DMU_OST_NONE); 611 ASSERT(type != DMU_OST_ANY); 612 ASSERT(type < DMU_OST_NUMTYPES); 613 os->os_phys->os_type = type; 614 if (dmu_objset_userused_enabled(os)) { 615 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 616 os->os_flags = os->os_phys->os_flags; 617 } 618 619 dsl_dataset_dirty(ds, tx); 620 621 return (os); 622 } 623 624 struct oscarg { 625 void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); 626 void *userarg; 627 dsl_dataset_t *clone_origin; 628 const char *lastname; 629 dmu_objset_type_t type; 630 uint64_t flags; 631 cred_t *cr; 632 }; 633 634 /*ARGSUSED*/ 635 static int 636 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 637 { 638 dsl_dir_t *dd = arg1; 639 struct oscarg *oa = arg2; 640 objset_t *mos = dd->dd_pool->dp_meta_objset; 641 int err; 642 uint64_t ddobj; 643 644 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 645 oa->lastname, sizeof (uint64_t), 1, &ddobj); 646 if (err != ENOENT) 647 return (err ? err : EEXIST); 648 649 if (oa->clone_origin != NULL) { 650 /* You can't clone across pools. */ 651 if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) 652 return (EXDEV); 653 654 /* You can only clone snapshots, not the head datasets. */ 655 if (!dsl_dataset_is_snapshot(oa->clone_origin)) 656 return (EINVAL); 657 } 658 659 return (0); 660 } 661 662 static void 663 dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 664 { 665 dsl_dir_t *dd = arg1; 666 struct oscarg *oa = arg2; 667 uint64_t dsobj; 668 669 ASSERT(dmu_tx_is_syncing(tx)); 670 671 dsobj = dsl_dataset_create_sync(dd, oa->lastname, 672 oa->clone_origin, oa->flags, oa->cr, tx); 673 674 if (oa->clone_origin == NULL) { 675 dsl_dataset_t *ds; 676 blkptr_t *bp; 677 objset_t *os; 678 679 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, 680 FTAG, &ds)); 681 bp = dsl_dataset_get_blkptr(ds); 682 ASSERT(BP_IS_HOLE(bp)); 683 684 os = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 685 ds, bp, oa->type, tx); 686 687 if (oa->userfunc) 688 oa->userfunc(os, oa->userarg, oa->cr, tx); 689 dsl_dataset_rele(ds, FTAG); 690 } 691 692 spa_history_log_internal(LOG_DS_CREATE, dd->dd_pool->dp_spa, 693 tx, "dataset = %llu", dsobj); 694 } 695 696 int 697 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 698 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 699 { 700 dsl_dir_t *pdd; 701 const char *tail; 702 int err = 0; 703 struct oscarg oa = { 0 }; 704 705 ASSERT(strchr(name, '@') == NULL); 706 err = dsl_dir_open(name, FTAG, &pdd, &tail); 707 if (err) 708 return (err); 709 if (tail == NULL) { 710 dsl_dir_close(pdd, FTAG); 711 return (EEXIST); 712 } 713 714 oa.userfunc = func; 715 oa.userarg = arg; 716 oa.lastname = tail; 717 oa.type = type; 718 oa.flags = flags; 719 oa.cr = CRED(); 720 721 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 722 dmu_objset_create_sync, pdd, &oa, 5); 723 dsl_dir_close(pdd, FTAG); 724 return (err); 725 } 726 727 int 728 dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) 729 { 730 dsl_dir_t *pdd; 731 const char *tail; 732 int err = 0; 733 struct oscarg oa = { 0 }; 734 735 ASSERT(strchr(name, '@') == NULL); 736 err = dsl_dir_open(name, FTAG, &pdd, &tail); 737 if (err) 738 return (err); 739 if (tail == NULL) { 740 dsl_dir_close(pdd, FTAG); 741 return (EEXIST); 742 } 743 744 oa.lastname = tail; 745 oa.clone_origin = clone_origin; 746 oa.flags = flags; 747 oa.cr = CRED(); 748 749 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 750 dmu_objset_create_sync, pdd, &oa, 5); 751 dsl_dir_close(pdd, FTAG); 752 return (err); 753 } 754 755 int 756 dmu_objset_destroy(const char *name, boolean_t defer) 757 { 758 dsl_dataset_t *ds; 759 int error; 760 761 /* 762 * dsl_dataset_destroy() can free any claimed-but-unplayed 763 * intent log, but if there is an active log, it has blocks that 764 * are allocated, but may not yet be reflected in the on-disk 765 * structure. Only the ZIL knows how to free them, so we have 766 * to call into it here. 767 */ 768 error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); 769 if (error == 0) { 770 objset_t *os; 771 if (dmu_objset_from_ds(ds, &os) == 0) 772 zil_destroy(dmu_objset_zil(os), B_FALSE); 773 error = dsl_dataset_destroy(ds, FTAG, defer); 774 /* dsl_dataset_destroy() closes the ds. */ 775 } 776 777 return (error); 778 } 779 780 struct snaparg { 781 dsl_sync_task_group_t *dstg; 782 char *snapname; 783 char failed[MAXPATHLEN]; 784 boolean_t recursive; 785 nvlist_t *props; 786 }; 787 788 static int 789 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 790 { 791 objset_t *os = arg1; 792 struct snaparg *sn = arg2; 793 794 /* The props have already been checked by zfs_check_userprops(). */ 795 796 return (dsl_dataset_snapshot_check(os->os_dsl_dataset, 797 sn->snapname, tx)); 798 } 799 800 static void 801 snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 802 { 803 objset_t *os = arg1; 804 dsl_dataset_t *ds = os->os_dsl_dataset; 805 struct snaparg *sn = arg2; 806 807 dsl_dataset_snapshot_sync(ds, sn->snapname, tx); 808 809 if (sn->props) { 810 dsl_props_arg_t pa; 811 pa.pa_props = sn->props; 812 pa.pa_source = ZPROP_SRC_LOCAL; 813 dsl_props_set_sync(ds->ds_prev, &pa, tx); 814 } 815 } 816 817 static int 818 dmu_objset_snapshot_one(const char *name, void *arg) 819 { 820 struct snaparg *sn = arg; 821 objset_t *os; 822 int err; 823 char *cp; 824 825 /* 826 * If the objset starts with a '%', then ignore it unless it was 827 * explicitly named (ie, not recursive). These hidden datasets 828 * are always inconsistent, and by not opening them here, we can 829 * avoid a race with dsl_dir_destroy_check(). 830 */ 831 cp = strrchr(name, '/'); 832 if (cp && cp[1] == '%' && sn->recursive) 833 return (0); 834 835 (void) strcpy(sn->failed, name); 836 837 /* 838 * Check permissions if we are doing a recursive snapshot. The 839 * permission checks for the starting dataset have already been 840 * performed in zfs_secpolicy_snapshot() 841 */ 842 if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) 843 return (err); 844 845 err = dmu_objset_hold(name, sn, &os); 846 if (err != 0) 847 return (err); 848 849 /* 850 * If the objset is in an inconsistent state (eg, in the process 851 * of being destroyed), don't snapshot it. As with %hidden 852 * datasets, we return EBUSY if this name was explicitly 853 * requested (ie, not recursive), and otherwise ignore it. 854 */ 855 if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { 856 dmu_objset_rele(os, sn); 857 return (sn->recursive ? 0 : EBUSY); 858 } 859 860 /* 861 * NB: we need to wait for all in-flight changes to get to disk, 862 * so that we snapshot those changes. zil_suspend does this as 863 * a side effect. 864 */ 865 err = zil_suspend(dmu_objset_zil(os)); 866 if (err == 0) { 867 dsl_sync_task_create(sn->dstg, snapshot_check, 868 snapshot_sync, os, sn, 3); 869 } else { 870 dmu_objset_rele(os, sn); 871 } 872 873 return (err); 874 } 875 876 int 877 dmu_objset_snapshot(char *fsname, char *snapname, 878 nvlist_t *props, boolean_t recursive) 879 { 880 dsl_sync_task_t *dst; 881 struct snaparg sn; 882 spa_t *spa; 883 int err; 884 885 (void) strcpy(sn.failed, fsname); 886 887 err = spa_open(fsname, &spa, FTAG); 888 if (err) 889 return (err); 890 891 sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 892 sn.snapname = snapname; 893 sn.props = props; 894 sn.recursive = recursive; 895 896 if (recursive) { 897 err = dmu_objset_find(fsname, 898 dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 899 } else { 900 err = dmu_objset_snapshot_one(fsname, &sn); 901 } 902 903 if (err == 0) 904 err = dsl_sync_task_group_wait(sn.dstg); 905 906 for (dst = list_head(&sn.dstg->dstg_tasks); dst; 907 dst = list_next(&sn.dstg->dstg_tasks, dst)) { 908 objset_t *os = dst->dst_arg1; 909 dsl_dataset_t *ds = os->os_dsl_dataset; 910 if (dst->dst_err) 911 dsl_dataset_name(ds, sn.failed); 912 zil_resume(dmu_objset_zil(os)); 913 dmu_objset_rele(os, &sn); 914 } 915 916 if (err) 917 (void) strcpy(fsname, sn.failed); 918 dsl_sync_task_group_destroy(sn.dstg); 919 spa_close(spa, FTAG); 920 return (err); 921 } 922 923 static void 924 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 925 { 926 dnode_t *dn; 927 928 while (dn = list_head(list)) { 929 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 930 ASSERT(dn->dn_dbuf->db_data_pending); 931 /* 932 * Initialize dn_zio outside dnode_sync() because the 933 * meta-dnode needs to set it ouside dnode_sync(). 934 */ 935 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 936 ASSERT(dn->dn_zio); 937 938 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 939 list_remove(list, dn); 940 941 if (newlist) { 942 (void) dnode_add_ref(dn, newlist); 943 list_insert_tail(newlist, dn); 944 } 945 946 dnode_sync(dn, tx); 947 } 948 } 949 950 /* ARGSUSED */ 951 static void 952 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 953 { 954 blkptr_t *bp = zio->io_bp; 955 objset_t *os = arg; 956 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 957 958 ASSERT(bp == os->os_rootbp); 959 ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); 960 ASSERT(BP_GET_LEVEL(bp) == 0); 961 962 /* 963 * Update rootbp fill count: it should be the number of objects 964 * allocated in the object set (not counting the "special" 965 * objects that are stored in the objset_phys_t -- the meta 966 * dnode and user/group accounting objects). 967 */ 968 bp->blk_fill = 0; 969 for (int i = 0; i < dnp->dn_nblkptr; i++) 970 bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 971 } 972 973 /* ARGSUSED */ 974 static void 975 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 976 { 977 blkptr_t *bp = zio->io_bp; 978 blkptr_t *bp_orig = &zio->io_bp_orig; 979 objset_t *os = arg; 980 981 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 982 ASSERT(BP_EQUAL(bp, bp_orig)); 983 } else { 984 dsl_dataset_t *ds = os->os_dsl_dataset; 985 dmu_tx_t *tx = os->os_synctx; 986 987 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 988 dsl_dataset_block_born(ds, bp, tx); 989 } 990 } 991 992 /* called from dsl */ 993 void 994 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 995 { 996 int txgoff; 997 zbookmark_t zb; 998 zio_prop_t zp; 999 zio_t *zio; 1000 list_t *list; 1001 list_t *newlist = NULL; 1002 dbuf_dirty_record_t *dr; 1003 1004 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1005 1006 ASSERT(dmu_tx_is_syncing(tx)); 1007 /* XXX the write_done callback should really give us the tx... */ 1008 os->os_synctx = tx; 1009 1010 if (os->os_dsl_dataset == NULL) { 1011 /* 1012 * This is the MOS. If we have upgraded, 1013 * spa_max_replication() could change, so reset 1014 * os_copies here. 1015 */ 1016 os->os_copies = spa_max_replication(os->os_spa); 1017 } 1018 1019 /* 1020 * Create the root block IO 1021 */ 1022 SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1023 os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1024 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1025 VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf, 1026 os->os_rootbp, os->os_spa, &zb)); 1027 1028 dmu_write_policy(os, NULL, 0, 0, &zp); 1029 1030 zio = arc_write(pio, os->os_spa, tx->tx_txg, 1031 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, 1032 dmu_objset_write_ready, dmu_objset_write_done, os, 1033 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 1034 1035 /* 1036 * Sync special dnodes - the parent IO for the sync is the root block 1037 */ 1038 os->os_meta_dnode->dn_zio = zio; 1039 dnode_sync(os->os_meta_dnode, tx); 1040 1041 os->os_phys->os_flags = os->os_flags; 1042 1043 if (os->os_userused_dnode && 1044 os->os_userused_dnode->dn_type != DMU_OT_NONE) { 1045 os->os_userused_dnode->dn_zio = zio; 1046 dnode_sync(os->os_userused_dnode, tx); 1047 os->os_groupused_dnode->dn_zio = zio; 1048 dnode_sync(os->os_groupused_dnode, tx); 1049 } 1050 1051 txgoff = tx->tx_txg & TXG_MASK; 1052 1053 if (dmu_objset_userused_enabled(os)) { 1054 newlist = &os->os_synced_dnodes; 1055 /* 1056 * We must create the list here because it uses the 1057 * dn_dirty_link[] of this txg. 1058 */ 1059 list_create(newlist, sizeof (dnode_t), 1060 offsetof(dnode_t, dn_dirty_link[txgoff])); 1061 } 1062 1063 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1064 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1065 1066 list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 1067 while (dr = list_head(list)) { 1068 ASSERT(dr->dr_dbuf->db_level == 0); 1069 list_remove(list, dr); 1070 if (dr->dr_zio) 1071 zio_nowait(dr->dr_zio); 1072 } 1073 /* 1074 * Free intent log blocks up to this tx. 1075 */ 1076 zil_sync(os->os_zil, tx); 1077 os->os_phys->os_zil_header = os->os_zil_header; 1078 zio_nowait(zio); 1079 } 1080 1081 boolean_t 1082 dmu_objset_is_dirty(objset_t *os, uint64_t txg) 1083 { 1084 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1085 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1086 } 1087 1088 objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1089 1090 void 1091 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1092 { 1093 used_cbs[ost] = cb; 1094 } 1095 1096 boolean_t 1097 dmu_objset_userused_enabled(objset_t *os) 1098 { 1099 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1100 used_cbs[os->os_phys->os_type] && 1101 os->os_userused_dnode); 1102 } 1103 1104 static void 1105 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, 1106 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) 1107 { 1108 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1109 int64_t delta = DNODE_SIZE + used; 1110 if (subtract) 1111 delta = -delta; 1112 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, 1113 user, delta, tx)); 1114 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1115 group, delta, tx)); 1116 } 1117 } 1118 1119 void 1120 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1121 { 1122 dnode_t *dn; 1123 list_t *list = &os->os_synced_dnodes; 1124 1125 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1126 1127 while (dn = list_head(list)) { 1128 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1129 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1130 dn->dn_phys->dn_flags & 1131 DNODE_FLAG_USERUSED_ACCOUNTED); 1132 1133 /* Allocate the user/groupused objects if necessary. */ 1134 if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { 1135 VERIFY(0 == zap_create_claim(os, 1136 DMU_USERUSED_OBJECT, 1137 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1138 VERIFY(0 == zap_create_claim(os, 1139 DMU_GROUPUSED_OBJECT, 1140 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1141 } 1142 1143 /* 1144 * We intentionally modify the zap object even if the 1145 * net delta is zero. Otherwise 1146 * the block of the zap obj could be shared between 1147 * datasets but need to be different between them after 1148 * a bprewrite. 1149 */ 1150 1151 mutex_enter(&dn->dn_mtx); 1152 ASSERT(dn->dn_id_flags); 1153 if (dn->dn_id_flags & DN_ID_OLD_EXIST) { 1154 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, 1155 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); 1156 } 1157 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1158 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), 1159 dn->dn_phys->dn_flags, dn->dn_newuid, 1160 dn->dn_newgid, B_FALSE, tx); 1161 } 1162 1163 dn->dn_oldused = 0; 1164 dn->dn_oldflags = 0; 1165 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1166 dn->dn_olduid = dn->dn_newuid; 1167 dn->dn_oldgid = dn->dn_newgid; 1168 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1169 if (dn->dn_bonuslen == 0) 1170 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1171 else 1172 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1173 } 1174 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); 1175 mutex_exit(&dn->dn_mtx); 1176 1177 list_remove(list, dn); 1178 dnode_rele(dn, list); 1179 } 1180 } 1181 1182 /* 1183 * Returns a pointer to data to find uid/gid from 1184 * 1185 * If a dirty record for transaction group that is syncing can't 1186 * be found then NULL is returned. In the NULL case it is assumed 1187 * the uid/gid aren't changing. 1188 */ 1189 static void * 1190 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1191 { 1192 dbuf_dirty_record_t *dr, **drp; 1193 void *data; 1194 1195 if (db->db_dirtycnt == 0) 1196 return (db->db.db_data); /* Nothing is changing */ 1197 1198 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1199 if (dr->dr_txg == tx->tx_txg) 1200 break; 1201 1202 if (dr == NULL) 1203 data = NULL; 1204 else if (dr->dr_dbuf->db_dnode->dn_bonuslen == 0 && 1205 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1206 data = dr->dt.dl.dr_data->b_data; 1207 else 1208 data = dr->dt.dl.dr_data; 1209 return (data); 1210 } 1211 1212 void 1213 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1214 { 1215 objset_t *os = dn->dn_objset; 1216 void *data = NULL; 1217 dmu_buf_impl_t *db = NULL; 1218 uint64_t *user, *group; 1219 int flags = dn->dn_id_flags; 1220 int error; 1221 boolean_t have_spill = B_FALSE; 1222 1223 if (!dmu_objset_userused_enabled(dn->dn_objset)) 1224 return; 1225 1226 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1227 DN_ID_CHKED_SPILL))) 1228 return; 1229 1230 if (before && dn->dn_bonuslen != 0) 1231 data = DN_BONUS(dn->dn_phys); 1232 else if (!before && dn->dn_bonuslen != 0) { 1233 if (dn->dn_bonus) { 1234 db = dn->dn_bonus; 1235 mutex_enter(&db->db_mtx); 1236 data = dmu_objset_userquota_find_data(db, tx); 1237 } else { 1238 data = DN_BONUS(dn->dn_phys); 1239 } 1240 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1241 int rf = 0; 1242 1243 if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1244 rf |= DB_RF_HAVESTRUCT; 1245 error = dmu_spill_hold_by_dnode(dn, rf, 1246 FTAG, (dmu_buf_t **)&db); 1247 ASSERT(error == 0); 1248 mutex_enter(&db->db_mtx); 1249 data = (before) ? db->db.db_data : 1250 dmu_objset_userquota_find_data(db, tx); 1251 have_spill = B_TRUE; 1252 } else { 1253 mutex_enter(&dn->dn_mtx); 1254 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1255 mutex_exit(&dn->dn_mtx); 1256 return; 1257 } 1258 1259 if (before) { 1260 ASSERT(data); 1261 user = &dn->dn_olduid; 1262 group = &dn->dn_oldgid; 1263 } else if (data) { 1264 user = &dn->dn_newuid; 1265 group = &dn->dn_newgid; 1266 } 1267 1268 /* 1269 * Must always call the callback in case the object 1270 * type has changed and that type isn't an object type to track 1271 */ 1272 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1273 user, group); 1274 1275 /* 1276 * Preserve existing uid/gid when the callback can't determine 1277 * what the new uid/gid are and the callback returned EEXIST. 1278 * The EEXIST error tells us to just use the existing uid/gid. 1279 * If we don't know what the old values are then just assign 1280 * them to 0, since that is a new file being created. 1281 */ 1282 if (!before && data == NULL && error == EEXIST) { 1283 if (flags & DN_ID_OLD_EXIST) { 1284 dn->dn_newuid = dn->dn_olduid; 1285 dn->dn_newgid = dn->dn_oldgid; 1286 } else { 1287 dn->dn_newuid = 0; 1288 dn->dn_newgid = 0; 1289 } 1290 error = 0; 1291 } 1292 1293 if (db) 1294 mutex_exit(&db->db_mtx); 1295 1296 mutex_enter(&dn->dn_mtx); 1297 if (error == 0 && before) 1298 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1299 if (error == 0 && !before) 1300 dn->dn_id_flags |= DN_ID_NEW_EXIST; 1301 1302 if (have_spill) { 1303 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1304 } else { 1305 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1306 } 1307 mutex_exit(&dn->dn_mtx); 1308 if (have_spill) 1309 dmu_buf_rele((dmu_buf_t *)db, FTAG); 1310 } 1311 1312 boolean_t 1313 dmu_objset_userspace_present(objset_t *os) 1314 { 1315 return (os->os_phys->os_flags & 1316 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1317 } 1318 1319 int 1320 dmu_objset_userspace_upgrade(objset_t *os) 1321 { 1322 uint64_t obj; 1323 int err = 0; 1324 1325 if (dmu_objset_userspace_present(os)) 1326 return (0); 1327 if (!dmu_objset_userused_enabled(os)) 1328 return (ENOTSUP); 1329 if (dmu_objset_is_snapshot(os)) 1330 return (EINVAL); 1331 1332 /* 1333 * We simply need to mark every object dirty, so that it will be 1334 * synced out and now accounted. If this is called 1335 * concurrently, or if we already did some work before crashing, 1336 * that's fine, since we track each object's accounted state 1337 * independently. 1338 */ 1339 1340 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1341 dmu_tx_t *tx; 1342 dmu_buf_t *db; 1343 int objerr; 1344 1345 if (issig(JUSTLOOKING) && issig(FORREAL)) 1346 return (EINTR); 1347 1348 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1349 if (objerr) 1350 continue; 1351 tx = dmu_tx_create(os); 1352 dmu_tx_hold_bonus(tx, obj); 1353 objerr = dmu_tx_assign(tx, TXG_WAIT); 1354 if (objerr) { 1355 dmu_tx_abort(tx); 1356 continue; 1357 } 1358 dmu_buf_will_dirty(db, tx); 1359 dmu_buf_rele(db, FTAG); 1360 dmu_tx_commit(tx); 1361 } 1362 1363 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1364 txg_wait_synced(dmu_objset_pool(os), 0); 1365 return (0); 1366 } 1367 1368 void 1369 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1370 uint64_t *usedobjsp, uint64_t *availobjsp) 1371 { 1372 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1373 usedobjsp, availobjsp); 1374 } 1375 1376 uint64_t 1377 dmu_objset_fsid_guid(objset_t *os) 1378 { 1379 return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1380 } 1381 1382 void 1383 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1384 { 1385 stat->dds_type = os->os_phys->os_type; 1386 if (os->os_dsl_dataset) 1387 dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1388 } 1389 1390 void 1391 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1392 { 1393 ASSERT(os->os_dsl_dataset || 1394 os->os_phys->os_type == DMU_OST_META); 1395 1396 if (os->os_dsl_dataset != NULL) 1397 dsl_dataset_stats(os->os_dsl_dataset, nv); 1398 1399 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1400 os->os_phys->os_type); 1401 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1402 dmu_objset_userspace_present(os)); 1403 } 1404 1405 int 1406 dmu_objset_is_snapshot(objset_t *os) 1407 { 1408 if (os->os_dsl_dataset != NULL) 1409 return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); 1410 else 1411 return (B_FALSE); 1412 } 1413 1414 int 1415 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1416 boolean_t *conflict) 1417 { 1418 dsl_dataset_t *ds = os->os_dsl_dataset; 1419 uint64_t ignored; 1420 1421 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1422 return (ENOENT); 1423 1424 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1425 ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, 1426 real, maxlen, conflict)); 1427 } 1428 1429 int 1430 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1431 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1432 { 1433 dsl_dataset_t *ds = os->os_dsl_dataset; 1434 zap_cursor_t cursor; 1435 zap_attribute_t attr; 1436 1437 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1438 return (ENOENT); 1439 1440 zap_cursor_init_serialized(&cursor, 1441 ds->ds_dir->dd_pool->dp_meta_objset, 1442 ds->ds_phys->ds_snapnames_zapobj, *offp); 1443 1444 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1445 zap_cursor_fini(&cursor); 1446 return (ENOENT); 1447 } 1448 1449 if (strlen(attr.za_name) + 1 > namelen) { 1450 zap_cursor_fini(&cursor); 1451 return (ENAMETOOLONG); 1452 } 1453 1454 (void) strcpy(name, attr.za_name); 1455 if (idp) 1456 *idp = attr.za_first_integer; 1457 if (case_conflict) 1458 *case_conflict = attr.za_normalization_conflict; 1459 zap_cursor_advance(&cursor); 1460 *offp = zap_cursor_serialize(&cursor); 1461 zap_cursor_fini(&cursor); 1462 1463 return (0); 1464 } 1465 1466 int 1467 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1468 uint64_t *idp, uint64_t *offp) 1469 { 1470 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1471 zap_cursor_t cursor; 1472 zap_attribute_t attr; 1473 1474 /* there is no next dir on a snapshot! */ 1475 if (os->os_dsl_dataset->ds_object != 1476 dd->dd_phys->dd_head_dataset_obj) 1477 return (ENOENT); 1478 1479 zap_cursor_init_serialized(&cursor, 1480 dd->dd_pool->dp_meta_objset, 1481 dd->dd_phys->dd_child_dir_zapobj, *offp); 1482 1483 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1484 zap_cursor_fini(&cursor); 1485 return (ENOENT); 1486 } 1487 1488 if (strlen(attr.za_name) + 1 > namelen) { 1489 zap_cursor_fini(&cursor); 1490 return (ENAMETOOLONG); 1491 } 1492 1493 (void) strcpy(name, attr.za_name); 1494 if (idp) 1495 *idp = attr.za_first_integer; 1496 zap_cursor_advance(&cursor); 1497 *offp = zap_cursor_serialize(&cursor); 1498 zap_cursor_fini(&cursor); 1499 1500 return (0); 1501 } 1502 1503 struct findarg { 1504 int (*func)(const char *, void *); 1505 void *arg; 1506 }; 1507 1508 /* ARGSUSED */ 1509 static int 1510 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 1511 { 1512 struct findarg *fa = arg; 1513 return (fa->func(dsname, fa->arg)); 1514 } 1515 1516 /* 1517 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1518 * Perhaps change all callers to use dmu_objset_find_spa()? 1519 */ 1520 int 1521 dmu_objset_find(char *name, int func(const char *, void *), void *arg, 1522 int flags) 1523 { 1524 struct findarg fa; 1525 fa.func = func; 1526 fa.arg = arg; 1527 return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); 1528 } 1529 1530 /* 1531 * Find all objsets under name, call func on each 1532 */ 1533 int 1534 dmu_objset_find_spa(spa_t *spa, const char *name, 1535 int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) 1536 { 1537 dsl_dir_t *dd; 1538 dsl_pool_t *dp; 1539 dsl_dataset_t *ds; 1540 zap_cursor_t zc; 1541 zap_attribute_t *attr; 1542 char *child; 1543 uint64_t thisobj; 1544 int err; 1545 1546 if (name == NULL) 1547 name = spa_name(spa); 1548 err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); 1549 if (err) 1550 return (err); 1551 1552 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1553 if (dd->dd_myname[0] == '$') { 1554 dsl_dir_close(dd, FTAG); 1555 return (0); 1556 } 1557 1558 thisobj = dd->dd_phys->dd_head_dataset_obj; 1559 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1560 dp = dd->dd_pool; 1561 1562 /* 1563 * Iterate over all children. 1564 */ 1565 if (flags & DS_FIND_CHILDREN) { 1566 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1567 dd->dd_phys->dd_child_dir_zapobj); 1568 zap_cursor_retrieve(&zc, attr) == 0; 1569 (void) zap_cursor_advance(&zc)) { 1570 ASSERT(attr->za_integer_length == sizeof (uint64_t)); 1571 ASSERT(attr->za_num_integers == 1); 1572 1573 child = kmem_asprintf("%s/%s", name, attr->za_name); 1574 err = dmu_objset_find_spa(spa, child, func, arg, flags); 1575 strfree(child); 1576 if (err) 1577 break; 1578 } 1579 zap_cursor_fini(&zc); 1580 1581 if (err) { 1582 dsl_dir_close(dd, FTAG); 1583 kmem_free(attr, sizeof (zap_attribute_t)); 1584 return (err); 1585 } 1586 } 1587 1588 /* 1589 * Iterate over all snapshots. 1590 */ 1591 if (flags & DS_FIND_SNAPSHOTS) { 1592 if (!dsl_pool_sync_context(dp)) 1593 rw_enter(&dp->dp_config_rwlock, RW_READER); 1594 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1595 if (!dsl_pool_sync_context(dp)) 1596 rw_exit(&dp->dp_config_rwlock); 1597 1598 if (err == 0) { 1599 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 1600 dsl_dataset_rele(ds, FTAG); 1601 1602 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1603 zap_cursor_retrieve(&zc, attr) == 0; 1604 (void) zap_cursor_advance(&zc)) { 1605 ASSERT(attr->za_integer_length == 1606 sizeof (uint64_t)); 1607 ASSERT(attr->za_num_integers == 1); 1608 1609 child = kmem_asprintf("%s@%s", 1610 name, attr->za_name); 1611 err = func(spa, attr->za_first_integer, 1612 child, arg); 1613 strfree(child); 1614 if (err) 1615 break; 1616 } 1617 zap_cursor_fini(&zc); 1618 } 1619 } 1620 1621 dsl_dir_close(dd, FTAG); 1622 kmem_free(attr, sizeof (zap_attribute_t)); 1623 1624 if (err) 1625 return (err); 1626 1627 /* 1628 * Apply to self if appropriate. 1629 */ 1630 err = func(spa, thisobj, name, arg); 1631 return (err); 1632 } 1633 1634 /* ARGSUSED */ 1635 int 1636 dmu_objset_prefetch(const char *name, void *arg) 1637 { 1638 dsl_dataset_t *ds; 1639 1640 if (dsl_dataset_hold(name, FTAG, &ds)) 1641 return (0); 1642 1643 if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { 1644 mutex_enter(&ds->ds_opening_lock); 1645 if (ds->ds_objset == NULL) { 1646 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1647 zbookmark_t zb; 1648 1649 SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, 1650 ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1651 1652 (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds), 1653 &ds->ds_phys->ds_bp, NULL, NULL, 1654 ZIO_PRIORITY_ASYNC_READ, 1655 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1656 &aflags, &zb); 1657 } 1658 mutex_exit(&ds->ds_opening_lock); 1659 } 1660 1661 dsl_dataset_rele(ds, FTAG); 1662 return (0); 1663 } 1664 1665 void 1666 dmu_objset_set_user(objset_t *os, void *user_ptr) 1667 { 1668 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1669 os->os_user_ptr = user_ptr; 1670 } 1671 1672 void * 1673 dmu_objset_get_user(objset_t *os) 1674 { 1675 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1676 return (os->os_user_ptr); 1677 } 1678