1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Portions Copyright 2010 Robert Milkowski */ 26 27 #include <sys/cred.h> 28 #include <sys/zfs_context.h> 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_dataset.h> 32 #include <sys/dsl_prop.h> 33 #include <sys/dsl_pool.h> 34 #include <sys/dsl_synctask.h> 35 #include <sys/dsl_deleg.h> 36 #include <sys/dnode.h> 37 #include <sys/dbuf.h> 38 #include <sys/zvol.h> 39 #include <sys/dmu_tx.h> 40 #include <sys/zap.h> 41 #include <sys/zil.h> 42 #include <sys/dmu_impl.h> 43 #include <sys/zfs_ioctl.h> 44 #include <sys/sunddi.h> 45 #include <sys/sa.h> 46 47 spa_t * 48 dmu_objset_spa(objset_t *os) 49 { 50 return (os->os_spa); 51 } 52 53 zilog_t * 54 dmu_objset_zil(objset_t *os) 55 { 56 return (os->os_zil); 57 } 58 59 dsl_pool_t * 60 dmu_objset_pool(objset_t *os) 61 { 62 dsl_dataset_t *ds; 63 64 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 65 return (ds->ds_dir->dd_pool); 66 else 67 return (spa_get_dsl(os->os_spa)); 68 } 69 70 dsl_dataset_t * 71 dmu_objset_ds(objset_t *os) 72 { 73 return (os->os_dsl_dataset); 74 } 75 76 dmu_objset_type_t 77 dmu_objset_type(objset_t *os) 78 { 79 return (os->os_phys->os_type); 80 } 81 82 void 83 dmu_objset_name(objset_t *os, char *buf) 84 { 85 dsl_dataset_name(os->os_dsl_dataset, buf); 86 } 87 88 uint64_t 89 dmu_objset_id(objset_t *os) 90 { 91 dsl_dataset_t *ds = os->os_dsl_dataset; 92 93 return (ds ? ds->ds_object : 0); 94 } 95 96 uint64_t 97 dmu_objset_syncprop(objset_t *os) 98 { 99 return (os->os_sync); 100 } 101 102 uint64_t 103 dmu_objset_logbias(objset_t *os) 104 { 105 return (os->os_logbias); 106 } 107 108 static void 109 checksum_changed_cb(void *arg, uint64_t newval) 110 { 111 objset_t *os = arg; 112 113 /* 114 * Inheritance should have been done by now. 115 */ 116 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 117 118 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 119 } 120 121 static void 122 compression_changed_cb(void *arg, uint64_t newval) 123 { 124 objset_t *os = arg; 125 126 /* 127 * Inheritance and range checking should have been done by now. 128 */ 129 ASSERT(newval != ZIO_COMPRESS_INHERIT); 130 131 os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 132 } 133 134 static void 135 copies_changed_cb(void *arg, uint64_t newval) 136 { 137 objset_t *os = arg; 138 139 /* 140 * Inheritance and range checking should have been done by now. 141 */ 142 ASSERT(newval > 0); 143 ASSERT(newval <= spa_max_replication(os->os_spa)); 144 145 os->os_copies = newval; 146 } 147 148 static void 149 dedup_changed_cb(void *arg, uint64_t newval) 150 { 151 objset_t *os = arg; 152 spa_t *spa = os->os_spa; 153 enum zio_checksum checksum; 154 155 /* 156 * Inheritance should have been done by now. 157 */ 158 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 159 160 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 161 162 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 163 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 164 } 165 166 static void 167 primary_cache_changed_cb(void *arg, uint64_t newval) 168 { 169 objset_t *os = arg; 170 171 /* 172 * Inheritance and range checking should have been done by now. 173 */ 174 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 175 newval == ZFS_CACHE_METADATA); 176 177 os->os_primary_cache = newval; 178 } 179 180 static void 181 secondary_cache_changed_cb(void *arg, uint64_t newval) 182 { 183 objset_t *os = arg; 184 185 /* 186 * Inheritance and range checking should have been done by now. 187 */ 188 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 189 newval == ZFS_CACHE_METADATA); 190 191 os->os_secondary_cache = newval; 192 } 193 194 static void 195 sync_changed_cb(void *arg, uint64_t newval) 196 { 197 objset_t *os = arg; 198 199 /* 200 * Inheritance and range checking should have been done by now. 201 */ 202 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 203 newval == ZFS_SYNC_DISABLED); 204 205 os->os_sync = newval; 206 if (os->os_zil) 207 zil_set_sync(os->os_zil, newval); 208 } 209 210 static void 211 logbias_changed_cb(void *arg, uint64_t newval) 212 { 213 objset_t *os = arg; 214 215 ASSERT(newval == ZFS_LOGBIAS_LATENCY || 216 newval == ZFS_LOGBIAS_THROUGHPUT); 217 os->os_logbias = newval; 218 if (os->os_zil) 219 zil_set_logbias(os->os_zil, newval); 220 } 221 222 void 223 dmu_objset_byteswap(void *buf, size_t size) 224 { 225 objset_phys_t *osp = buf; 226 227 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 228 dnode_byteswap(&osp->os_meta_dnode); 229 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 230 osp->os_type = BSWAP_64(osp->os_type); 231 osp->os_flags = BSWAP_64(osp->os_flags); 232 if (size == sizeof (objset_phys_t)) { 233 dnode_byteswap(&osp->os_userused_dnode); 234 dnode_byteswap(&osp->os_groupused_dnode); 235 } 236 } 237 238 int 239 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 240 objset_t **osp) 241 { 242 objset_t *os; 243 int i, err; 244 245 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 246 247 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 248 os->os_dsl_dataset = ds; 249 os->os_spa = spa; 250 os->os_rootbp = bp; 251 if (!BP_IS_HOLE(os->os_rootbp)) { 252 uint32_t aflags = ARC_WAIT; 253 zbookmark_t zb; 254 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 255 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 256 257 if (DMU_OS_IS_L2CACHEABLE(os)) 258 aflags |= ARC_L2CACHE; 259 260 dprintf_bp(os->os_rootbp, "reading %s", ""); 261 /* 262 * NB: when bprewrite scrub can change the bp, 263 * and this is called from dmu_objset_open_ds_os, the bp 264 * could change, and we'll need a lock. 265 */ 266 err = arc_read_nolock(NULL, spa, os->os_rootbp, 267 arc_getbuf_func, &os->os_phys_buf, 268 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 269 if (err) { 270 kmem_free(os, sizeof (objset_t)); 271 /* convert checksum errors into IO errors */ 272 if (err == ECKSUM) 273 err = EIO; 274 return (err); 275 } 276 277 /* Increase the blocksize if we are permitted. */ 278 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 279 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 280 arc_buf_t *buf = arc_buf_alloc(spa, 281 sizeof (objset_phys_t), &os->os_phys_buf, 282 ARC_BUFC_METADATA); 283 bzero(buf->b_data, sizeof (objset_phys_t)); 284 bcopy(os->os_phys_buf->b_data, buf->b_data, 285 arc_buf_size(os->os_phys_buf)); 286 (void) arc_buf_remove_ref(os->os_phys_buf, 287 &os->os_phys_buf); 288 os->os_phys_buf = buf; 289 } 290 291 os->os_phys = os->os_phys_buf->b_data; 292 os->os_flags = os->os_phys->os_flags; 293 } else { 294 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 295 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 296 os->os_phys_buf = arc_buf_alloc(spa, size, 297 &os->os_phys_buf, ARC_BUFC_METADATA); 298 os->os_phys = os->os_phys_buf->b_data; 299 bzero(os->os_phys, size); 300 } 301 302 /* 303 * Note: the changed_cb will be called once before the register 304 * func returns, thus changing the checksum/compression from the 305 * default (fletcher2/off). Snapshots don't need to know about 306 * checksum/compression/copies. 307 */ 308 if (ds) { 309 err = dsl_prop_register(ds, "primarycache", 310 primary_cache_changed_cb, os); 311 if (err == 0) 312 err = dsl_prop_register(ds, "secondarycache", 313 secondary_cache_changed_cb, os); 314 if (!dsl_dataset_is_snapshot(ds)) { 315 if (err == 0) 316 err = dsl_prop_register(ds, "checksum", 317 checksum_changed_cb, os); 318 if (err == 0) 319 err = dsl_prop_register(ds, "compression", 320 compression_changed_cb, os); 321 if (err == 0) 322 err = dsl_prop_register(ds, "copies", 323 copies_changed_cb, os); 324 if (err == 0) 325 err = dsl_prop_register(ds, "dedup", 326 dedup_changed_cb, os); 327 if (err == 0) 328 err = dsl_prop_register(ds, "logbias", 329 logbias_changed_cb, os); 330 if (err == 0) 331 err = dsl_prop_register(ds, "sync", 332 sync_changed_cb, os); 333 } 334 if (err) { 335 VERIFY(arc_buf_remove_ref(os->os_phys_buf, 336 &os->os_phys_buf) == 1); 337 kmem_free(os, sizeof (objset_t)); 338 return (err); 339 } 340 } else if (ds == NULL) { 341 /* It's the meta-objset. */ 342 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 343 os->os_compress = ZIO_COMPRESS_LZJB; 344 os->os_copies = spa_max_replication(spa); 345 os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 346 os->os_dedup_verify = 0; 347 os->os_logbias = 0; 348 os->os_sync = 0; 349 os->os_primary_cache = ZFS_CACHE_ALL; 350 os->os_secondary_cache = ZFS_CACHE_ALL; 351 } 352 353 os->os_zil_header = os->os_phys->os_zil_header; 354 os->os_zil = zil_alloc(os, &os->os_zil_header); 355 356 for (i = 0; i < TXG_SIZE; i++) { 357 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 358 offsetof(dnode_t, dn_dirty_link[i])); 359 list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 360 offsetof(dnode_t, dn_dirty_link[i])); 361 } 362 list_create(&os->os_dnodes, sizeof (dnode_t), 363 offsetof(dnode_t, dn_link)); 364 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 365 offsetof(dmu_buf_impl_t, db_link)); 366 367 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 368 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 369 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 370 371 os->os_meta_dnode = dnode_special_open(os, 372 &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 373 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 374 os->os_userused_dnode = dnode_special_open(os, 375 &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); 376 os->os_groupused_dnode = dnode_special_open(os, 377 &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); 378 } 379 380 /* 381 * We should be the only thread trying to do this because we 382 * have ds_opening_lock 383 */ 384 if (ds) { 385 mutex_enter(&ds->ds_lock); 386 ASSERT(ds->ds_objset == NULL); 387 ds->ds_objset = os; 388 mutex_exit(&ds->ds_lock); 389 } 390 391 *osp = os; 392 return (0); 393 } 394 395 int 396 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 397 { 398 int err = 0; 399 400 mutex_enter(&ds->ds_opening_lock); 401 *osp = ds->ds_objset; 402 if (*osp == NULL) { 403 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 404 ds, &ds->ds_phys->ds_bp, osp); 405 } 406 mutex_exit(&ds->ds_opening_lock); 407 return (err); 408 } 409 410 /* called from zpl */ 411 int 412 dmu_objset_hold(const char *name, void *tag, objset_t **osp) 413 { 414 dsl_dataset_t *ds; 415 int err; 416 417 err = dsl_dataset_hold(name, tag, &ds); 418 if (err) 419 return (err); 420 421 err = dmu_objset_from_ds(ds, osp); 422 if (err) 423 dsl_dataset_rele(ds, tag); 424 425 return (err); 426 } 427 428 /* called from zpl */ 429 int 430 dmu_objset_own(const char *name, dmu_objset_type_t type, 431 boolean_t readonly, void *tag, objset_t **osp) 432 { 433 dsl_dataset_t *ds; 434 int err; 435 436 err = dsl_dataset_own(name, B_FALSE, tag, &ds); 437 if (err) 438 return (err); 439 440 err = dmu_objset_from_ds(ds, osp); 441 if (err) { 442 dsl_dataset_disown(ds, tag); 443 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 444 dmu_objset_disown(*osp, tag); 445 return (EINVAL); 446 } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 447 dmu_objset_disown(*osp, tag); 448 return (EROFS); 449 } 450 return (err); 451 } 452 453 void 454 dmu_objset_rele(objset_t *os, void *tag) 455 { 456 dsl_dataset_rele(os->os_dsl_dataset, tag); 457 } 458 459 void 460 dmu_objset_disown(objset_t *os, void *tag) 461 { 462 dsl_dataset_disown(os->os_dsl_dataset, tag); 463 } 464 465 int 466 dmu_objset_evict_dbufs(objset_t *os) 467 { 468 dnode_t *dn; 469 470 mutex_enter(&os->os_lock); 471 472 /* process the mdn last, since the other dnodes have holds on it */ 473 list_remove(&os->os_dnodes, os->os_meta_dnode); 474 list_insert_tail(&os->os_dnodes, os->os_meta_dnode); 475 476 /* 477 * Find the first dnode with holds. We have to do this dance 478 * because dnode_add_ref() only works if you already have a 479 * hold. If there are no holds then it has no dbufs so OK to 480 * skip. 481 */ 482 for (dn = list_head(&os->os_dnodes); 483 dn && !dnode_add_ref(dn, FTAG); 484 dn = list_next(&os->os_dnodes, dn)) 485 continue; 486 487 while (dn) { 488 dnode_t *next_dn = dn; 489 490 do { 491 next_dn = list_next(&os->os_dnodes, next_dn); 492 } while (next_dn && !dnode_add_ref(next_dn, FTAG)); 493 494 mutex_exit(&os->os_lock); 495 dnode_evict_dbufs(dn); 496 dnode_rele(dn, FTAG); 497 mutex_enter(&os->os_lock); 498 dn = next_dn; 499 } 500 mutex_exit(&os->os_lock); 501 return (list_head(&os->os_dnodes) != os->os_meta_dnode); 502 } 503 504 void 505 dmu_objset_evict(objset_t *os) 506 { 507 dsl_dataset_t *ds = os->os_dsl_dataset; 508 509 for (int t = 0; t < TXG_SIZE; t++) 510 ASSERT(!dmu_objset_is_dirty(os, t)); 511 512 if (ds) { 513 if (!dsl_dataset_is_snapshot(ds)) { 514 VERIFY(0 == dsl_prop_unregister(ds, "checksum", 515 checksum_changed_cb, os)); 516 VERIFY(0 == dsl_prop_unregister(ds, "compression", 517 compression_changed_cb, os)); 518 VERIFY(0 == dsl_prop_unregister(ds, "copies", 519 copies_changed_cb, os)); 520 VERIFY(0 == dsl_prop_unregister(ds, "dedup", 521 dedup_changed_cb, os)); 522 VERIFY(0 == dsl_prop_unregister(ds, "logbias", 523 logbias_changed_cb, os)); 524 VERIFY(0 == dsl_prop_unregister(ds, "sync", 525 sync_changed_cb, os)); 526 } 527 VERIFY(0 == dsl_prop_unregister(ds, "primarycache", 528 primary_cache_changed_cb, os)); 529 VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", 530 secondary_cache_changed_cb, os)); 531 } 532 533 if (os->os_sa) 534 sa_tear_down(os); 535 536 /* 537 * We should need only a single pass over the dnode list, since 538 * nothing can be added to the list at this point. 539 */ 540 (void) dmu_objset_evict_dbufs(os); 541 542 dnode_special_close(os->os_meta_dnode); 543 if (os->os_userused_dnode) { 544 dnode_special_close(os->os_userused_dnode); 545 dnode_special_close(os->os_groupused_dnode); 546 } 547 zil_free(os->os_zil); 548 549 ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 550 551 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); 552 mutex_destroy(&os->os_lock); 553 mutex_destroy(&os->os_obj_lock); 554 mutex_destroy(&os->os_user_ptr_lock); 555 kmem_free(os, sizeof (objset_t)); 556 } 557 558 timestruc_t 559 dmu_objset_snap_cmtime(objset_t *os) 560 { 561 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 562 } 563 564 /* called from dsl for meta-objset */ 565 objset_t * 566 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 567 dmu_objset_type_t type, dmu_tx_t *tx) 568 { 569 objset_t *os; 570 dnode_t *mdn; 571 572 ASSERT(dmu_tx_is_syncing(tx)); 573 if (ds) 574 mutex_enter(&ds->ds_opening_lock); 575 VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os)); 576 if (ds) 577 mutex_exit(&ds->ds_opening_lock); 578 mdn = os->os_meta_dnode; 579 580 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 581 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 582 583 /* 584 * We don't want to have to increase the meta-dnode's nlevels 585 * later, because then we could do it in quescing context while 586 * we are also accessing it in open context. 587 * 588 * This precaution is not necessary for the MOS (ds == NULL), 589 * because the MOS is only updated in syncing context. 590 * This is most fortunate: the MOS is the only objset that 591 * needs to be synced multiple times as spa_sync() iterates 592 * to convergence, so minimizing its dn_nlevels matters. 593 */ 594 if (ds != NULL) { 595 int levels = 1; 596 597 /* 598 * Determine the number of levels necessary for the meta-dnode 599 * to contain DN_MAX_OBJECT dnodes. 600 */ 601 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 602 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 603 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 604 levels++; 605 606 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 607 mdn->dn_nlevels = levels; 608 } 609 610 ASSERT(type != DMU_OST_NONE); 611 ASSERT(type != DMU_OST_ANY); 612 ASSERT(type < DMU_OST_NUMTYPES); 613 os->os_phys->os_type = type; 614 if (dmu_objset_userused_enabled(os)) { 615 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 616 os->os_flags = os->os_phys->os_flags; 617 } 618 619 dsl_dataset_dirty(ds, tx); 620 621 return (os); 622 } 623 624 struct oscarg { 625 void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); 626 void *userarg; 627 dsl_dataset_t *clone_origin; 628 const char *lastname; 629 dmu_objset_type_t type; 630 uint64_t flags; 631 }; 632 633 /*ARGSUSED*/ 634 static int 635 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 636 { 637 dsl_dir_t *dd = arg1; 638 struct oscarg *oa = arg2; 639 objset_t *mos = dd->dd_pool->dp_meta_objset; 640 int err; 641 uint64_t ddobj; 642 643 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 644 oa->lastname, sizeof (uint64_t), 1, &ddobj); 645 if (err != ENOENT) 646 return (err ? err : EEXIST); 647 648 if (oa->clone_origin != NULL) { 649 /* You can't clone across pools. */ 650 if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) 651 return (EXDEV); 652 653 /* You can only clone snapshots, not the head datasets. */ 654 if (!dsl_dataset_is_snapshot(oa->clone_origin)) 655 return (EINVAL); 656 } 657 658 return (0); 659 } 660 661 static void 662 dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 663 { 664 dsl_dir_t *dd = arg1; 665 struct oscarg *oa = arg2; 666 uint64_t dsobj; 667 668 ASSERT(dmu_tx_is_syncing(tx)); 669 670 dsobj = dsl_dataset_create_sync(dd, oa->lastname, 671 oa->clone_origin, oa->flags, cr, tx); 672 673 if (oa->clone_origin == NULL) { 674 dsl_dataset_t *ds; 675 blkptr_t *bp; 676 objset_t *os; 677 678 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, 679 FTAG, &ds)); 680 bp = dsl_dataset_get_blkptr(ds); 681 ASSERT(BP_IS_HOLE(bp)); 682 683 os = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 684 ds, bp, oa->type, tx); 685 686 if (oa->userfunc) 687 oa->userfunc(os, oa->userarg, cr, tx); 688 dsl_dataset_rele(ds, FTAG); 689 } 690 691 spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, 692 tx, cr, "dataset = %llu", dsobj); 693 } 694 695 int 696 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 697 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 698 { 699 dsl_dir_t *pdd; 700 const char *tail; 701 int err = 0; 702 struct oscarg oa = { 0 }; 703 704 ASSERT(strchr(name, '@') == NULL); 705 err = dsl_dir_open(name, FTAG, &pdd, &tail); 706 if (err) 707 return (err); 708 if (tail == NULL) { 709 dsl_dir_close(pdd, FTAG); 710 return (EEXIST); 711 } 712 713 oa.userfunc = func; 714 oa.userarg = arg; 715 oa.lastname = tail; 716 oa.type = type; 717 oa.flags = flags; 718 719 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 720 dmu_objset_create_sync, pdd, &oa, 5); 721 dsl_dir_close(pdd, FTAG); 722 return (err); 723 } 724 725 int 726 dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) 727 { 728 dsl_dir_t *pdd; 729 const char *tail; 730 int err = 0; 731 struct oscarg oa = { 0 }; 732 733 ASSERT(strchr(name, '@') == NULL); 734 err = dsl_dir_open(name, FTAG, &pdd, &tail); 735 if (err) 736 return (err); 737 if (tail == NULL) { 738 dsl_dir_close(pdd, FTAG); 739 return (EEXIST); 740 } 741 742 oa.lastname = tail; 743 oa.clone_origin = clone_origin; 744 oa.flags = flags; 745 746 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 747 dmu_objset_create_sync, pdd, &oa, 5); 748 dsl_dir_close(pdd, FTAG); 749 return (err); 750 } 751 752 int 753 dmu_objset_destroy(const char *name, boolean_t defer) 754 { 755 dsl_dataset_t *ds; 756 int error; 757 758 /* 759 * dsl_dataset_destroy() can free any claimed-but-unplayed 760 * intent log, but if there is an active log, it has blocks that 761 * are allocated, but may not yet be reflected in the on-disk 762 * structure. Only the ZIL knows how to free them, so we have 763 * to call into it here. 764 */ 765 error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); 766 if (error == 0) { 767 objset_t *os; 768 if (dmu_objset_from_ds(ds, &os) == 0) 769 zil_destroy(dmu_objset_zil(os), B_FALSE); 770 error = dsl_dataset_destroy(ds, FTAG, defer); 771 /* dsl_dataset_destroy() closes the ds. */ 772 } 773 774 return (error); 775 } 776 777 struct snaparg { 778 dsl_sync_task_group_t *dstg; 779 char *snapname; 780 char failed[MAXPATHLEN]; 781 boolean_t recursive; 782 nvlist_t *props; 783 }; 784 785 static int 786 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 787 { 788 objset_t *os = arg1; 789 struct snaparg *sn = arg2; 790 791 /* The props have already been checked by zfs_check_userprops(). */ 792 793 return (dsl_dataset_snapshot_check(os->os_dsl_dataset, 794 sn->snapname, tx)); 795 } 796 797 static void 798 snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 799 { 800 objset_t *os = arg1; 801 dsl_dataset_t *ds = os->os_dsl_dataset; 802 struct snaparg *sn = arg2; 803 804 dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx); 805 806 if (sn->props) { 807 dsl_props_arg_t pa; 808 pa.pa_props = sn->props; 809 pa.pa_source = ZPROP_SRC_LOCAL; 810 dsl_props_set_sync(ds->ds_prev, &pa, cr, tx); 811 } 812 } 813 814 static int 815 dmu_objset_snapshot_one(const char *name, void *arg) 816 { 817 struct snaparg *sn = arg; 818 objset_t *os; 819 int err; 820 char *cp; 821 822 /* 823 * If the objset starts with a '%', then ignore it unless it was 824 * explicitly named (ie, not recursive). These hidden datasets 825 * are always inconsistent, and by not opening them here, we can 826 * avoid a race with dsl_dir_destroy_check(). 827 */ 828 cp = strrchr(name, '/'); 829 if (cp && cp[1] == '%' && sn->recursive) 830 return (0); 831 832 (void) strcpy(sn->failed, name); 833 834 /* 835 * Check permissions if we are doing a recursive snapshot. The 836 * permission checks for the starting dataset have already been 837 * performed in zfs_secpolicy_snapshot() 838 */ 839 if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) 840 return (err); 841 842 err = dmu_objset_hold(name, sn, &os); 843 if (err != 0) 844 return (err); 845 846 /* 847 * If the objset is in an inconsistent state (eg, in the process 848 * of being destroyed), don't snapshot it. As with %hidden 849 * datasets, we return EBUSY if this name was explicitly 850 * requested (ie, not recursive), and otherwise ignore it. 851 */ 852 if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { 853 dmu_objset_rele(os, sn); 854 return (sn->recursive ? 0 : EBUSY); 855 } 856 857 /* 858 * NB: we need to wait for all in-flight changes to get to disk, 859 * so that we snapshot those changes. zil_suspend does this as 860 * a side effect. 861 */ 862 err = zil_suspend(dmu_objset_zil(os)); 863 if (err == 0) { 864 dsl_sync_task_create(sn->dstg, snapshot_check, 865 snapshot_sync, os, sn, 3); 866 } else { 867 dmu_objset_rele(os, sn); 868 } 869 870 return (err); 871 } 872 873 int 874 dmu_objset_snapshot(char *fsname, char *snapname, 875 nvlist_t *props, boolean_t recursive) 876 { 877 dsl_sync_task_t *dst; 878 struct snaparg sn; 879 spa_t *spa; 880 int err; 881 882 (void) strcpy(sn.failed, fsname); 883 884 err = spa_open(fsname, &spa, FTAG); 885 if (err) 886 return (err); 887 888 sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 889 sn.snapname = snapname; 890 sn.props = props; 891 sn.recursive = recursive; 892 893 if (recursive) { 894 err = dmu_objset_find(fsname, 895 dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 896 } else { 897 err = dmu_objset_snapshot_one(fsname, &sn); 898 } 899 900 if (err == 0) 901 err = dsl_sync_task_group_wait(sn.dstg); 902 903 for (dst = list_head(&sn.dstg->dstg_tasks); dst; 904 dst = list_next(&sn.dstg->dstg_tasks, dst)) { 905 objset_t *os = dst->dst_arg1; 906 dsl_dataset_t *ds = os->os_dsl_dataset; 907 if (dst->dst_err) 908 dsl_dataset_name(ds, sn.failed); 909 zil_resume(dmu_objset_zil(os)); 910 dmu_objset_rele(os, &sn); 911 } 912 913 if (err) 914 (void) strcpy(fsname, sn.failed); 915 dsl_sync_task_group_destroy(sn.dstg); 916 spa_close(spa, FTAG); 917 return (err); 918 } 919 920 static void 921 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 922 { 923 dnode_t *dn; 924 925 while (dn = list_head(list)) { 926 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 927 ASSERT(dn->dn_dbuf->db_data_pending); 928 /* 929 * Initialize dn_zio outside dnode_sync() because the 930 * meta-dnode needs to set it ouside dnode_sync(). 931 */ 932 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 933 ASSERT(dn->dn_zio); 934 935 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 936 list_remove(list, dn); 937 938 if (newlist) { 939 (void) dnode_add_ref(dn, newlist); 940 list_insert_tail(newlist, dn); 941 } 942 943 dnode_sync(dn, tx); 944 } 945 } 946 947 /* ARGSUSED */ 948 static void 949 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 950 { 951 blkptr_t *bp = zio->io_bp; 952 objset_t *os = arg; 953 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 954 955 ASSERT(bp == os->os_rootbp); 956 ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); 957 ASSERT(BP_GET_LEVEL(bp) == 0); 958 959 /* 960 * Update rootbp fill count: it should be the number of objects 961 * allocated in the object set (not counting the "special" 962 * objects that are stored in the objset_phys_t -- the meta 963 * dnode and user/group accounting objects). 964 */ 965 bp->blk_fill = 0; 966 for (int i = 0; i < dnp->dn_nblkptr; i++) 967 bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 968 } 969 970 /* ARGSUSED */ 971 static void 972 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 973 { 974 blkptr_t *bp = zio->io_bp; 975 blkptr_t *bp_orig = &zio->io_bp_orig; 976 objset_t *os = arg; 977 978 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 979 ASSERT(BP_EQUAL(bp, bp_orig)); 980 } else { 981 dsl_dataset_t *ds = os->os_dsl_dataset; 982 dmu_tx_t *tx = os->os_synctx; 983 984 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 985 dsl_dataset_block_born(ds, bp, tx); 986 } 987 } 988 989 /* called from dsl */ 990 void 991 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 992 { 993 int txgoff; 994 zbookmark_t zb; 995 zio_prop_t zp; 996 zio_t *zio; 997 list_t *list; 998 list_t *newlist = NULL; 999 dbuf_dirty_record_t *dr; 1000 1001 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1002 1003 ASSERT(dmu_tx_is_syncing(tx)); 1004 /* XXX the write_done callback should really give us the tx... */ 1005 os->os_synctx = tx; 1006 1007 if (os->os_dsl_dataset == NULL) { 1008 /* 1009 * This is the MOS. If we have upgraded, 1010 * spa_max_replication() could change, so reset 1011 * os_copies here. 1012 */ 1013 os->os_copies = spa_max_replication(os->os_spa); 1014 } 1015 1016 /* 1017 * Create the root block IO 1018 */ 1019 arc_release(os->os_phys_buf, &os->os_phys_buf); 1020 1021 SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1022 os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1023 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1024 1025 dmu_write_policy(os, NULL, 0, 0, &zp); 1026 1027 zio = arc_write(pio, os->os_spa, tx->tx_txg, 1028 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, 1029 dmu_objset_write_ready, dmu_objset_write_done, os, 1030 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 1031 1032 /* 1033 * Sync special dnodes - the parent IO for the sync is the root block 1034 */ 1035 os->os_meta_dnode->dn_zio = zio; 1036 dnode_sync(os->os_meta_dnode, tx); 1037 1038 os->os_phys->os_flags = os->os_flags; 1039 1040 if (os->os_userused_dnode && 1041 os->os_userused_dnode->dn_type != DMU_OT_NONE) { 1042 os->os_userused_dnode->dn_zio = zio; 1043 dnode_sync(os->os_userused_dnode, tx); 1044 os->os_groupused_dnode->dn_zio = zio; 1045 dnode_sync(os->os_groupused_dnode, tx); 1046 } 1047 1048 txgoff = tx->tx_txg & TXG_MASK; 1049 1050 if (dmu_objset_userused_enabled(os)) { 1051 newlist = &os->os_synced_dnodes; 1052 /* 1053 * We must create the list here because it uses the 1054 * dn_dirty_link[] of this txg. 1055 */ 1056 list_create(newlist, sizeof (dnode_t), 1057 offsetof(dnode_t, dn_dirty_link[txgoff])); 1058 } 1059 1060 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1061 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1062 1063 list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 1064 while (dr = list_head(list)) { 1065 ASSERT(dr->dr_dbuf->db_level == 0); 1066 list_remove(list, dr); 1067 if (dr->dr_zio) 1068 zio_nowait(dr->dr_zio); 1069 } 1070 /* 1071 * Free intent log blocks up to this tx. 1072 */ 1073 zil_sync(os->os_zil, tx); 1074 os->os_phys->os_zil_header = os->os_zil_header; 1075 zio_nowait(zio); 1076 } 1077 1078 boolean_t 1079 dmu_objset_is_dirty(objset_t *os, uint64_t txg) 1080 { 1081 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1082 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1083 } 1084 1085 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1086 1087 void 1088 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1089 { 1090 used_cbs[ost] = cb; 1091 } 1092 1093 boolean_t 1094 dmu_objset_userused_enabled(objset_t *os) 1095 { 1096 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1097 used_cbs[os->os_phys->os_type] && 1098 os->os_userused_dnode); 1099 } 1100 1101 static void 1102 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, 1103 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) 1104 { 1105 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1106 int64_t delta = DNODE_SIZE + used; 1107 if (subtract) 1108 delta = -delta; 1109 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, 1110 user, delta, tx)); 1111 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1112 group, delta, tx)); 1113 } 1114 } 1115 1116 void 1117 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1118 { 1119 dnode_t *dn; 1120 list_t *list = &os->os_synced_dnodes; 1121 1122 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1123 1124 while (dn = list_head(list)) { 1125 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1126 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1127 dn->dn_phys->dn_flags & 1128 DNODE_FLAG_USERUSED_ACCOUNTED); 1129 1130 /* Allocate the user/groupused objects if necessary. */ 1131 if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { 1132 VERIFY(0 == zap_create_claim(os, 1133 DMU_USERUSED_OBJECT, 1134 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1135 VERIFY(0 == zap_create_claim(os, 1136 DMU_GROUPUSED_OBJECT, 1137 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1138 } 1139 1140 /* 1141 * We intentionally modify the zap object even if the 1142 * net delta is zero. Otherwise 1143 * the block of the zap obj could be shared between 1144 * datasets but need to be different between them after 1145 * a bprewrite. 1146 */ 1147 1148 /* 1149 * The mutex is needed here for interlock with dnode_allocate. 1150 */ 1151 mutex_enter(&dn->dn_mtx); 1152 ASSERT(dn->dn_id_flags); 1153 if (dn->dn_id_flags & DN_ID_OLD_EXIST) { 1154 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, 1155 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); 1156 } 1157 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1158 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), 1159 dn->dn_phys->dn_flags, dn->dn_newuid, 1160 dn->dn_newgid, B_FALSE, tx); 1161 } 1162 1163 dn->dn_oldused = 0; 1164 dn->dn_oldflags = 0; 1165 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1166 dn->dn_olduid = dn->dn_newuid; 1167 dn->dn_oldgid = dn->dn_newgid; 1168 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1169 if (dn->dn_bonuslen == 0) 1170 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1171 else 1172 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1173 } 1174 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST|DN_ID_SYNC); 1175 mutex_exit(&dn->dn_mtx); 1176 1177 list_remove(list, dn); 1178 dnode_rele(dn, list); 1179 } 1180 } 1181 1182 /* 1183 * Returns a pointer to data to find uid/gid from 1184 * 1185 * If a dirty record for transaction group that is syncing can't 1186 * be found then NULL is returned. In the NULL case it is assumed 1187 * the uid/gid aren't changing. 1188 */ 1189 static void * 1190 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1191 { 1192 dbuf_dirty_record_t *dr, **drp; 1193 void *data; 1194 1195 if (db->db_dirtycnt == 0) 1196 return (db->db.db_data); /* Nothing is changing */ 1197 1198 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1199 if (dr->dr_txg == tx->tx_txg) 1200 break; 1201 1202 if (dr == NULL) 1203 data = NULL; 1204 else if (dr->dr_dbuf->db_dnode->dn_bonuslen == 0 && 1205 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1206 data = dr->dt.dl.dr_data->b_data; 1207 else 1208 data = dr->dt.dl.dr_data; 1209 return (data); 1210 } 1211 1212 void 1213 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1214 { 1215 objset_t *os = dn->dn_objset; 1216 void *data = NULL; 1217 dmu_buf_impl_t *db = NULL; 1218 uint64_t *user, *group; 1219 int flags = dn->dn_id_flags; 1220 int error; 1221 boolean_t have_spill = B_FALSE; 1222 1223 if (!dmu_objset_userused_enabled(dn->dn_objset)) 1224 return; 1225 1226 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1227 DN_ID_CHKED_SPILL))) 1228 return; 1229 1230 if (before && dn->dn_bonuslen != 0) 1231 data = DN_BONUS(dn->dn_phys); 1232 else if (!before && dn->dn_bonuslen != 0) { 1233 if (dn->dn_bonus) { 1234 db = dn->dn_bonus; 1235 mutex_enter(&db->db_mtx); 1236 data = dmu_objset_userquota_find_data(db, tx); 1237 } else { 1238 data = DN_BONUS(dn->dn_phys); 1239 } 1240 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1241 int rf = 0; 1242 1243 if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1244 rf |= DB_RF_HAVESTRUCT; 1245 error = dmu_spill_hold_by_dnode(dn, rf, 1246 FTAG, (dmu_buf_t **)&db); 1247 ASSERT(error == 0); 1248 mutex_enter(&db->db_mtx); 1249 data = (before) ? db->db.db_data : 1250 dmu_objset_userquota_find_data(db, tx); 1251 have_spill = B_TRUE; 1252 } else { 1253 mutex_enter(&dn->dn_mtx); 1254 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1255 mutex_exit(&dn->dn_mtx); 1256 return; 1257 } 1258 1259 if (before) { 1260 ASSERT(data); 1261 user = &dn->dn_olduid; 1262 group = &dn->dn_oldgid; 1263 } else if (data) { 1264 user = &dn->dn_newuid; 1265 group = &dn->dn_newgid; 1266 } 1267 1268 /* 1269 * Must always call the callback in case the object 1270 * type has changed and that type isn't an object type to track 1271 */ 1272 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1273 user, group); 1274 1275 /* 1276 * Preserve existing uid/gid when the callback can't determine 1277 * what the new uid/gid are and the callback returned EEXIST. 1278 * The EEXIST error tells us to just use the existing uid/gid. 1279 * If we don't know what the old values are then just assign 1280 * them to 0, since that is a new file being created. 1281 */ 1282 if (!before && data == NULL && error == EEXIST) { 1283 if (flags & DN_ID_OLD_EXIST) { 1284 dn->dn_newuid = dn->dn_olduid; 1285 dn->dn_newgid = dn->dn_oldgid; 1286 } else { 1287 dn->dn_newuid = 0; 1288 dn->dn_newgid = 0; 1289 } 1290 error = 0; 1291 } 1292 1293 if (db) 1294 mutex_exit(&db->db_mtx); 1295 1296 mutex_enter(&dn->dn_mtx); 1297 if (error == 0 && before) 1298 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1299 if (error == 0 && !before) 1300 dn->dn_id_flags |= DN_ID_NEW_EXIST; 1301 1302 if (have_spill) { 1303 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1304 } else { 1305 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1306 } 1307 mutex_exit(&dn->dn_mtx); 1308 if (have_spill) 1309 dmu_buf_rele((dmu_buf_t *)db, FTAG); 1310 } 1311 1312 boolean_t 1313 dmu_objset_userspace_present(objset_t *os) 1314 { 1315 return (os->os_phys->os_flags & 1316 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1317 } 1318 1319 int 1320 dmu_objset_userspace_upgrade(objset_t *os) 1321 { 1322 uint64_t obj; 1323 int err = 0; 1324 1325 if (dmu_objset_userspace_present(os)) 1326 return (0); 1327 if (!dmu_objset_userused_enabled(os)) 1328 return (ENOTSUP); 1329 if (dmu_objset_is_snapshot(os)) 1330 return (EINVAL); 1331 1332 /* 1333 * We simply need to mark every object dirty, so that it will be 1334 * synced out and now accounted. If this is called 1335 * concurrently, or if we already did some work before crashing, 1336 * that's fine, since we track each object's accounted state 1337 * independently. 1338 */ 1339 1340 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1341 dmu_tx_t *tx; 1342 dmu_buf_t *db; 1343 int objerr; 1344 1345 if (issig(JUSTLOOKING) && issig(FORREAL)) 1346 return (EINTR); 1347 1348 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1349 if (objerr) 1350 continue; 1351 tx = dmu_tx_create(os); 1352 dmu_tx_hold_bonus(tx, obj); 1353 objerr = dmu_tx_assign(tx, TXG_WAIT); 1354 if (objerr) { 1355 dmu_tx_abort(tx); 1356 continue; 1357 } 1358 dmu_buf_will_dirty(db, tx); 1359 dmu_buf_rele(db, FTAG); 1360 dmu_tx_commit(tx); 1361 } 1362 1363 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1364 txg_wait_synced(dmu_objset_pool(os), 0); 1365 return (0); 1366 } 1367 1368 void 1369 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1370 uint64_t *usedobjsp, uint64_t *availobjsp) 1371 { 1372 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1373 usedobjsp, availobjsp); 1374 } 1375 1376 uint64_t 1377 dmu_objset_fsid_guid(objset_t *os) 1378 { 1379 return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1380 } 1381 1382 void 1383 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1384 { 1385 stat->dds_type = os->os_phys->os_type; 1386 if (os->os_dsl_dataset) 1387 dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1388 } 1389 1390 void 1391 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1392 { 1393 ASSERT(os->os_dsl_dataset || 1394 os->os_phys->os_type == DMU_OST_META); 1395 1396 if (os->os_dsl_dataset != NULL) 1397 dsl_dataset_stats(os->os_dsl_dataset, nv); 1398 1399 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1400 os->os_phys->os_type); 1401 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1402 dmu_objset_userspace_present(os)); 1403 } 1404 1405 int 1406 dmu_objset_is_snapshot(objset_t *os) 1407 { 1408 if (os->os_dsl_dataset != NULL) 1409 return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); 1410 else 1411 return (B_FALSE); 1412 } 1413 1414 int 1415 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1416 boolean_t *conflict) 1417 { 1418 dsl_dataset_t *ds = os->os_dsl_dataset; 1419 uint64_t ignored; 1420 1421 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1422 return (ENOENT); 1423 1424 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1425 ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, 1426 real, maxlen, conflict)); 1427 } 1428 1429 int 1430 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1431 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1432 { 1433 dsl_dataset_t *ds = os->os_dsl_dataset; 1434 zap_cursor_t cursor; 1435 zap_attribute_t attr; 1436 1437 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1438 return (ENOENT); 1439 1440 zap_cursor_init_serialized(&cursor, 1441 ds->ds_dir->dd_pool->dp_meta_objset, 1442 ds->ds_phys->ds_snapnames_zapobj, *offp); 1443 1444 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1445 zap_cursor_fini(&cursor); 1446 return (ENOENT); 1447 } 1448 1449 if (strlen(attr.za_name) + 1 > namelen) { 1450 zap_cursor_fini(&cursor); 1451 return (ENAMETOOLONG); 1452 } 1453 1454 (void) strcpy(name, attr.za_name); 1455 if (idp) 1456 *idp = attr.za_first_integer; 1457 if (case_conflict) 1458 *case_conflict = attr.za_normalization_conflict; 1459 zap_cursor_advance(&cursor); 1460 *offp = zap_cursor_serialize(&cursor); 1461 zap_cursor_fini(&cursor); 1462 1463 return (0); 1464 } 1465 1466 int 1467 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1468 uint64_t *idp, uint64_t *offp) 1469 { 1470 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1471 zap_cursor_t cursor; 1472 zap_attribute_t attr; 1473 1474 /* there is no next dir on a snapshot! */ 1475 if (os->os_dsl_dataset->ds_object != 1476 dd->dd_phys->dd_head_dataset_obj) 1477 return (ENOENT); 1478 1479 zap_cursor_init_serialized(&cursor, 1480 dd->dd_pool->dp_meta_objset, 1481 dd->dd_phys->dd_child_dir_zapobj, *offp); 1482 1483 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1484 zap_cursor_fini(&cursor); 1485 return (ENOENT); 1486 } 1487 1488 if (strlen(attr.za_name) + 1 > namelen) { 1489 zap_cursor_fini(&cursor); 1490 return (ENAMETOOLONG); 1491 } 1492 1493 (void) strcpy(name, attr.za_name); 1494 if (idp) 1495 *idp = attr.za_first_integer; 1496 zap_cursor_advance(&cursor); 1497 *offp = zap_cursor_serialize(&cursor); 1498 zap_cursor_fini(&cursor); 1499 1500 return (0); 1501 } 1502 1503 struct findarg { 1504 int (*func)(const char *, void *); 1505 void *arg; 1506 }; 1507 1508 /* ARGSUSED */ 1509 static int 1510 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 1511 { 1512 struct findarg *fa = arg; 1513 return (fa->func(dsname, fa->arg)); 1514 } 1515 1516 /* 1517 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1518 * Perhaps change all callers to use dmu_objset_find_spa()? 1519 */ 1520 int 1521 dmu_objset_find(char *name, int func(const char *, void *), void *arg, 1522 int flags) 1523 { 1524 struct findarg fa; 1525 fa.func = func; 1526 fa.arg = arg; 1527 return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); 1528 } 1529 1530 /* 1531 * Find all objsets under name, call func on each 1532 */ 1533 int 1534 dmu_objset_find_spa(spa_t *spa, const char *name, 1535 int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) 1536 { 1537 dsl_dir_t *dd; 1538 dsl_pool_t *dp; 1539 dsl_dataset_t *ds; 1540 zap_cursor_t zc; 1541 zap_attribute_t *attr; 1542 char *child; 1543 uint64_t thisobj; 1544 int err; 1545 1546 if (name == NULL) 1547 name = spa_name(spa); 1548 err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); 1549 if (err) 1550 return (err); 1551 1552 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1553 if (dd->dd_myname[0] == '$') { 1554 dsl_dir_close(dd, FTAG); 1555 return (0); 1556 } 1557 1558 thisobj = dd->dd_phys->dd_head_dataset_obj; 1559 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1560 dp = dd->dd_pool; 1561 1562 /* 1563 * Iterate over all children. 1564 */ 1565 if (flags & DS_FIND_CHILDREN) { 1566 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1567 dd->dd_phys->dd_child_dir_zapobj); 1568 zap_cursor_retrieve(&zc, attr) == 0; 1569 (void) zap_cursor_advance(&zc)) { 1570 ASSERT(attr->za_integer_length == sizeof (uint64_t)); 1571 ASSERT(attr->za_num_integers == 1); 1572 1573 child = kmem_asprintf("%s/%s", name, attr->za_name); 1574 err = dmu_objset_find_spa(spa, child, func, arg, flags); 1575 strfree(child); 1576 if (err) 1577 break; 1578 } 1579 zap_cursor_fini(&zc); 1580 1581 if (err) { 1582 dsl_dir_close(dd, FTAG); 1583 kmem_free(attr, sizeof (zap_attribute_t)); 1584 return (err); 1585 } 1586 } 1587 1588 /* 1589 * Iterate over all snapshots. 1590 */ 1591 if (flags & DS_FIND_SNAPSHOTS) { 1592 if (!dsl_pool_sync_context(dp)) 1593 rw_enter(&dp->dp_config_rwlock, RW_READER); 1594 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1595 if (!dsl_pool_sync_context(dp)) 1596 rw_exit(&dp->dp_config_rwlock); 1597 1598 if (err == 0) { 1599 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 1600 dsl_dataset_rele(ds, FTAG); 1601 1602 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1603 zap_cursor_retrieve(&zc, attr) == 0; 1604 (void) zap_cursor_advance(&zc)) { 1605 ASSERT(attr->za_integer_length == 1606 sizeof (uint64_t)); 1607 ASSERT(attr->za_num_integers == 1); 1608 1609 child = kmem_asprintf("%s@%s", 1610 name, attr->za_name); 1611 err = func(spa, attr->za_first_integer, 1612 child, arg); 1613 strfree(child); 1614 if (err) 1615 break; 1616 } 1617 zap_cursor_fini(&zc); 1618 } 1619 } 1620 1621 dsl_dir_close(dd, FTAG); 1622 kmem_free(attr, sizeof (zap_attribute_t)); 1623 1624 if (err) 1625 return (err); 1626 1627 /* 1628 * Apply to self if appropriate. 1629 */ 1630 err = func(spa, thisobj, name, arg); 1631 return (err); 1632 } 1633 1634 /* ARGSUSED */ 1635 int 1636 dmu_objset_prefetch(const char *name, void *arg) 1637 { 1638 dsl_dataset_t *ds; 1639 1640 if (dsl_dataset_hold(name, FTAG, &ds)) 1641 return (0); 1642 1643 if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { 1644 mutex_enter(&ds->ds_opening_lock); 1645 if (ds->ds_objset == NULL) { 1646 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1647 zbookmark_t zb; 1648 1649 SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, 1650 ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1651 1652 (void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds), 1653 &ds->ds_phys->ds_bp, NULL, NULL, 1654 ZIO_PRIORITY_ASYNC_READ, 1655 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1656 &aflags, &zb); 1657 } 1658 mutex_exit(&ds->ds_opening_lock); 1659 } 1660 1661 dsl_dataset_rele(ds, FTAG); 1662 return (0); 1663 } 1664 1665 void 1666 dmu_objset_set_user(objset_t *os, void *user_ptr) 1667 { 1668 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1669 os->os_user_ptr = user_ptr; 1670 } 1671 1672 void * 1673 dmu_objset_get_user(objset_t *os) 1674 { 1675 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1676 return (os->os_user_ptr); 1677 } 1678