1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 28 * Copyright (c) 2015, STRATO AG, Inc. All rights reserved. 29 */ 30 31 /* Portions Copyright 2010 Robert Milkowski */ 32 33 #include <sys/cred.h> 34 #include <sys/zfs_context.h> 35 #include <sys/dmu_objset.h> 36 #include <sys/dsl_dir.h> 37 #include <sys/dsl_dataset.h> 38 #include <sys/dsl_prop.h> 39 #include <sys/dsl_pool.h> 40 #include <sys/dsl_synctask.h> 41 #include <sys/dsl_deleg.h> 42 #include <sys/dnode.h> 43 #include <sys/dbuf.h> 44 #include <sys/zvol.h> 45 #include <sys/dmu_tx.h> 46 #include <sys/zap.h> 47 #include <sys/zil.h> 48 #include <sys/dmu_impl.h> 49 #include <sys/zfs_ioctl.h> 50 #include <sys/sa.h> 51 #include <sys/zfs_onexit.h> 52 #include <sys/dsl_destroy.h> 53 #include <sys/vdev.h> 54 55 /* 56 * Needed to close a window in dnode_move() that allows the objset to be freed 57 * before it can be safely accessed. 58 */ 59 krwlock_t os_lock; 60 61 /* 62 * Tunable to overwrite the maximum number of threads for the parallization 63 * of dmu_objset_find_dp, needed to speed up the import of pools with many 64 * datasets. 65 * Default is 4 times the number of leaf vdevs. 66 */ 67 int dmu_find_threads = 0; 68 69 static void dmu_objset_find_dp_cb(void *arg); 70 71 void 72 dmu_objset_init(void) 73 { 74 rw_init(&os_lock, NULL, RW_DEFAULT, NULL); 75 } 76 77 void 78 dmu_objset_fini(void) 79 { 80 rw_destroy(&os_lock); 81 } 82 83 spa_t * 84 dmu_objset_spa(objset_t *os) 85 { 86 return (os->os_spa); 87 } 88 89 zilog_t * 90 dmu_objset_zil(objset_t *os) 91 { 92 return (os->os_zil); 93 } 94 95 dsl_pool_t * 96 dmu_objset_pool(objset_t *os) 97 { 98 dsl_dataset_t *ds; 99 100 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 101 return (ds->ds_dir->dd_pool); 102 else 103 return (spa_get_dsl(os->os_spa)); 104 } 105 106 dsl_dataset_t * 107 dmu_objset_ds(objset_t *os) 108 { 109 return (os->os_dsl_dataset); 110 } 111 112 dmu_objset_type_t 113 dmu_objset_type(objset_t *os) 114 { 115 return (os->os_phys->os_type); 116 } 117 118 void 119 dmu_objset_name(objset_t *os, char *buf) 120 { 121 dsl_dataset_name(os->os_dsl_dataset, buf); 122 } 123 124 uint64_t 125 dmu_objset_id(objset_t *os) 126 { 127 dsl_dataset_t *ds = os->os_dsl_dataset; 128 129 return (ds ? ds->ds_object : 0); 130 } 131 132 zfs_sync_type_t 133 dmu_objset_syncprop(objset_t *os) 134 { 135 return (os->os_sync); 136 } 137 138 zfs_logbias_op_t 139 dmu_objset_logbias(objset_t *os) 140 { 141 return (os->os_logbias); 142 } 143 144 static void 145 checksum_changed_cb(void *arg, uint64_t newval) 146 { 147 objset_t *os = arg; 148 149 /* 150 * Inheritance should have been done by now. 151 */ 152 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 153 154 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 155 } 156 157 static void 158 compression_changed_cb(void *arg, uint64_t newval) 159 { 160 objset_t *os = arg; 161 162 /* 163 * Inheritance and range checking should have been done by now. 164 */ 165 ASSERT(newval != ZIO_COMPRESS_INHERIT); 166 167 os->os_compress = zio_compress_select(os->os_spa, newval, 168 ZIO_COMPRESS_ON); 169 } 170 171 static void 172 copies_changed_cb(void *arg, uint64_t newval) 173 { 174 objset_t *os = arg; 175 176 /* 177 * Inheritance and range checking should have been done by now. 178 */ 179 ASSERT(newval > 0); 180 ASSERT(newval <= spa_max_replication(os->os_spa)); 181 182 os->os_copies = newval; 183 } 184 185 static void 186 dedup_changed_cb(void *arg, uint64_t newval) 187 { 188 objset_t *os = arg; 189 spa_t *spa = os->os_spa; 190 enum zio_checksum checksum; 191 192 /* 193 * Inheritance should have been done by now. 194 */ 195 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 196 197 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 198 199 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 200 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 201 } 202 203 static void 204 primary_cache_changed_cb(void *arg, uint64_t newval) 205 { 206 objset_t *os = arg; 207 208 /* 209 * Inheritance and range checking should have been done by now. 210 */ 211 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 212 newval == ZFS_CACHE_METADATA); 213 214 os->os_primary_cache = newval; 215 } 216 217 static void 218 secondary_cache_changed_cb(void *arg, uint64_t newval) 219 { 220 objset_t *os = arg; 221 222 /* 223 * Inheritance and range checking should have been done by now. 224 */ 225 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 226 newval == ZFS_CACHE_METADATA); 227 228 os->os_secondary_cache = newval; 229 } 230 231 static void 232 sync_changed_cb(void *arg, uint64_t newval) 233 { 234 objset_t *os = arg; 235 236 /* 237 * Inheritance and range checking should have been done by now. 238 */ 239 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 240 newval == ZFS_SYNC_DISABLED); 241 242 os->os_sync = newval; 243 if (os->os_zil) 244 zil_set_sync(os->os_zil, newval); 245 } 246 247 static void 248 redundant_metadata_changed_cb(void *arg, uint64_t newval) 249 { 250 objset_t *os = arg; 251 252 /* 253 * Inheritance and range checking should have been done by now. 254 */ 255 ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || 256 newval == ZFS_REDUNDANT_METADATA_MOST); 257 258 os->os_redundant_metadata = newval; 259 } 260 261 static void 262 logbias_changed_cb(void *arg, uint64_t newval) 263 { 264 objset_t *os = arg; 265 266 ASSERT(newval == ZFS_LOGBIAS_LATENCY || 267 newval == ZFS_LOGBIAS_THROUGHPUT); 268 os->os_logbias = newval; 269 if (os->os_zil) 270 zil_set_logbias(os->os_zil, newval); 271 } 272 273 static void 274 recordsize_changed_cb(void *arg, uint64_t newval) 275 { 276 objset_t *os = arg; 277 278 os->os_recordsize = newval; 279 } 280 281 void 282 dmu_objset_byteswap(void *buf, size_t size) 283 { 284 objset_phys_t *osp = buf; 285 286 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 287 dnode_byteswap(&osp->os_meta_dnode); 288 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 289 osp->os_type = BSWAP_64(osp->os_type); 290 osp->os_flags = BSWAP_64(osp->os_flags); 291 if (size == sizeof (objset_phys_t)) { 292 dnode_byteswap(&osp->os_userused_dnode); 293 dnode_byteswap(&osp->os_groupused_dnode); 294 } 295 } 296 297 int 298 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 299 objset_t **osp) 300 { 301 objset_t *os; 302 int i, err; 303 304 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 305 306 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 307 os->os_dsl_dataset = ds; 308 os->os_spa = spa; 309 os->os_rootbp = bp; 310 if (!BP_IS_HOLE(os->os_rootbp)) { 311 arc_flags_t aflags = ARC_FLAG_WAIT; 312 zbookmark_phys_t zb; 313 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 314 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 315 316 if (DMU_OS_IS_L2CACHEABLE(os)) 317 aflags |= ARC_FLAG_L2CACHE; 318 if (DMU_OS_IS_L2COMPRESSIBLE(os)) 319 aflags |= ARC_FLAG_L2COMPRESS; 320 321 dprintf_bp(os->os_rootbp, "reading %s", ""); 322 err = arc_read(NULL, spa, os->os_rootbp, 323 arc_getbuf_func, &os->os_phys_buf, 324 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 325 if (err != 0) { 326 kmem_free(os, sizeof (objset_t)); 327 /* convert checksum errors into IO errors */ 328 if (err == ECKSUM) 329 err = SET_ERROR(EIO); 330 return (err); 331 } 332 333 /* Increase the blocksize if we are permitted. */ 334 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 335 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 336 arc_buf_t *buf = arc_buf_alloc(spa, 337 sizeof (objset_phys_t), &os->os_phys_buf, 338 ARC_BUFC_METADATA); 339 bzero(buf->b_data, sizeof (objset_phys_t)); 340 bcopy(os->os_phys_buf->b_data, buf->b_data, 341 arc_buf_size(os->os_phys_buf)); 342 (void) arc_buf_remove_ref(os->os_phys_buf, 343 &os->os_phys_buf); 344 os->os_phys_buf = buf; 345 } 346 347 os->os_phys = os->os_phys_buf->b_data; 348 os->os_flags = os->os_phys->os_flags; 349 } else { 350 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 351 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 352 os->os_phys_buf = arc_buf_alloc(spa, size, 353 &os->os_phys_buf, ARC_BUFC_METADATA); 354 os->os_phys = os->os_phys_buf->b_data; 355 bzero(os->os_phys, size); 356 } 357 358 /* 359 * Note: the changed_cb will be called once before the register 360 * func returns, thus changing the checksum/compression from the 361 * default (fletcher2/off). Snapshots don't need to know about 362 * checksum/compression/copies. 363 */ 364 if (ds != NULL) { 365 boolean_t needlock = B_FALSE; 366 367 /* 368 * Note: it's valid to open the objset if the dataset is 369 * long-held, in which case the pool_config lock will not 370 * be held. 371 */ 372 if (!dsl_pool_config_held(dmu_objset_pool(os))) { 373 needlock = B_TRUE; 374 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 375 } 376 err = dsl_prop_register(ds, 377 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), 378 primary_cache_changed_cb, os); 379 if (err == 0) { 380 err = dsl_prop_register(ds, 381 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), 382 secondary_cache_changed_cb, os); 383 } 384 if (!ds->ds_is_snapshot) { 385 if (err == 0) { 386 err = dsl_prop_register(ds, 387 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 388 checksum_changed_cb, os); 389 } 390 if (err == 0) { 391 err = dsl_prop_register(ds, 392 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 393 compression_changed_cb, os); 394 } 395 if (err == 0) { 396 err = dsl_prop_register(ds, 397 zfs_prop_to_name(ZFS_PROP_COPIES), 398 copies_changed_cb, os); 399 } 400 if (err == 0) { 401 err = dsl_prop_register(ds, 402 zfs_prop_to_name(ZFS_PROP_DEDUP), 403 dedup_changed_cb, os); 404 } 405 if (err == 0) { 406 err = dsl_prop_register(ds, 407 zfs_prop_to_name(ZFS_PROP_LOGBIAS), 408 logbias_changed_cb, os); 409 } 410 if (err == 0) { 411 err = dsl_prop_register(ds, 412 zfs_prop_to_name(ZFS_PROP_SYNC), 413 sync_changed_cb, os); 414 } 415 if (err == 0) { 416 err = dsl_prop_register(ds, 417 zfs_prop_to_name( 418 ZFS_PROP_REDUNDANT_METADATA), 419 redundant_metadata_changed_cb, os); 420 } 421 if (err == 0) { 422 err = dsl_prop_register(ds, 423 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 424 recordsize_changed_cb, os); 425 } 426 } 427 if (needlock) 428 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 429 if (err != 0) { 430 VERIFY(arc_buf_remove_ref(os->os_phys_buf, 431 &os->os_phys_buf)); 432 kmem_free(os, sizeof (objset_t)); 433 return (err); 434 } 435 } else { 436 /* It's the meta-objset. */ 437 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 438 os->os_compress = ZIO_COMPRESS_ON; 439 os->os_copies = spa_max_replication(spa); 440 os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 441 os->os_dedup_verify = B_FALSE; 442 os->os_logbias = ZFS_LOGBIAS_LATENCY; 443 os->os_sync = ZFS_SYNC_STANDARD; 444 os->os_primary_cache = ZFS_CACHE_ALL; 445 os->os_secondary_cache = ZFS_CACHE_ALL; 446 } 447 448 if (ds == NULL || !ds->ds_is_snapshot) 449 os->os_zil_header = os->os_phys->os_zil_header; 450 os->os_zil = zil_alloc(os, &os->os_zil_header); 451 452 for (i = 0; i < TXG_SIZE; i++) { 453 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 454 offsetof(dnode_t, dn_dirty_link[i])); 455 list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 456 offsetof(dnode_t, dn_dirty_link[i])); 457 } 458 list_create(&os->os_dnodes, sizeof (dnode_t), 459 offsetof(dnode_t, dn_link)); 460 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 461 offsetof(dmu_buf_impl_t, db_link)); 462 463 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 464 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 465 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 466 467 dnode_special_open(os, &os->os_phys->os_meta_dnode, 468 DMU_META_DNODE_OBJECT, &os->os_meta_dnode); 469 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 470 dnode_special_open(os, &os->os_phys->os_userused_dnode, 471 DMU_USERUSED_OBJECT, &os->os_userused_dnode); 472 dnode_special_open(os, &os->os_phys->os_groupused_dnode, 473 DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); 474 } 475 476 *osp = os; 477 return (0); 478 } 479 480 int 481 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 482 { 483 int err = 0; 484 485 /* 486 * We shouldn't be doing anything with dsl_dataset_t's unless the 487 * pool_config lock is held, or the dataset is long-held. 488 */ 489 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) || 490 dsl_dataset_long_held(ds)); 491 492 mutex_enter(&ds->ds_opening_lock); 493 if (ds->ds_objset == NULL) { 494 objset_t *os; 495 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 496 ds, dsl_dataset_get_blkptr(ds), &os); 497 498 if (err == 0) { 499 mutex_enter(&ds->ds_lock); 500 ASSERT(ds->ds_objset == NULL); 501 ds->ds_objset = os; 502 mutex_exit(&ds->ds_lock); 503 } 504 } 505 *osp = ds->ds_objset; 506 mutex_exit(&ds->ds_opening_lock); 507 return (err); 508 } 509 510 /* 511 * Holds the pool while the objset is held. Therefore only one objset 512 * can be held at a time. 513 */ 514 int 515 dmu_objset_hold(const char *name, void *tag, objset_t **osp) 516 { 517 dsl_pool_t *dp; 518 dsl_dataset_t *ds; 519 int err; 520 521 err = dsl_pool_hold(name, tag, &dp); 522 if (err != 0) 523 return (err); 524 err = dsl_dataset_hold(dp, name, tag, &ds); 525 if (err != 0) { 526 dsl_pool_rele(dp, tag); 527 return (err); 528 } 529 530 err = dmu_objset_from_ds(ds, osp); 531 if (err != 0) { 532 dsl_dataset_rele(ds, tag); 533 dsl_pool_rele(dp, tag); 534 } 535 536 return (err); 537 } 538 539 static int 540 dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, 541 boolean_t readonly, void *tag, objset_t **osp) 542 { 543 int err; 544 545 err = dmu_objset_from_ds(ds, osp); 546 if (err != 0) { 547 dsl_dataset_disown(ds, tag); 548 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 549 dsl_dataset_disown(ds, tag); 550 return (SET_ERROR(EINVAL)); 551 } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 552 dsl_dataset_disown(ds, tag); 553 return (SET_ERROR(EROFS)); 554 } 555 return (err); 556 } 557 558 /* 559 * dsl_pool must not be held when this is called. 560 * Upon successful return, there will be a longhold on the dataset, 561 * and the dsl_pool will not be held. 562 */ 563 int 564 dmu_objset_own(const char *name, dmu_objset_type_t type, 565 boolean_t readonly, void *tag, objset_t **osp) 566 { 567 dsl_pool_t *dp; 568 dsl_dataset_t *ds; 569 int err; 570 571 err = dsl_pool_hold(name, FTAG, &dp); 572 if (err != 0) 573 return (err); 574 err = dsl_dataset_own(dp, name, tag, &ds); 575 if (err != 0) { 576 dsl_pool_rele(dp, FTAG); 577 return (err); 578 } 579 err = dmu_objset_own_impl(ds, type, readonly, tag, osp); 580 dsl_pool_rele(dp, FTAG); 581 582 return (err); 583 } 584 585 int 586 dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, 587 boolean_t readonly, void *tag, objset_t **osp) 588 { 589 dsl_dataset_t *ds; 590 int err; 591 592 err = dsl_dataset_own_obj(dp, obj, tag, &ds); 593 if (err != 0) 594 return (err); 595 596 return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); 597 } 598 599 void 600 dmu_objset_rele(objset_t *os, void *tag) 601 { 602 dsl_pool_t *dp = dmu_objset_pool(os); 603 dsl_dataset_rele(os->os_dsl_dataset, tag); 604 dsl_pool_rele(dp, tag); 605 } 606 607 /* 608 * When we are called, os MUST refer to an objset associated with a dataset 609 * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner 610 * == tag. We will then release and reacquire ownership of the dataset while 611 * holding the pool config_rwlock to avoid intervening namespace or ownership 612 * changes may occur. 613 * 614 * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to 615 * release the hold on its dataset and acquire a new one on the dataset of the 616 * same name so that it can be partially torn down and reconstructed. 617 */ 618 void 619 dmu_objset_refresh_ownership(objset_t *os, void *tag) 620 { 621 dsl_pool_t *dp; 622 dsl_dataset_t *ds, *newds; 623 char name[ZFS_MAX_DATASET_NAME_LEN]; 624 625 ds = os->os_dsl_dataset; 626 VERIFY3P(ds, !=, NULL); 627 VERIFY3P(ds->ds_owner, ==, tag); 628 VERIFY(dsl_dataset_long_held(ds)); 629 630 dsl_dataset_name(ds, name); 631 dp = dmu_objset_pool(os); 632 dsl_pool_config_enter(dp, FTAG); 633 dmu_objset_disown(os, tag); 634 VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); 635 VERIFY3P(newds, ==, os->os_dsl_dataset); 636 dsl_pool_config_exit(dp, FTAG); 637 } 638 639 void 640 dmu_objset_disown(objset_t *os, void *tag) 641 { 642 dsl_dataset_disown(os->os_dsl_dataset, tag); 643 } 644 645 void 646 dmu_objset_evict_dbufs(objset_t *os) 647 { 648 dnode_t dn_marker; 649 dnode_t *dn; 650 651 mutex_enter(&os->os_lock); 652 dn = list_head(&os->os_dnodes); 653 while (dn != NULL) { 654 /* 655 * Skip dnodes without holds. We have to do this dance 656 * because dnode_add_ref() only works if there is already a 657 * hold. If the dnode has no holds, then it has no dbufs. 658 */ 659 if (dnode_add_ref(dn, FTAG)) { 660 list_insert_after(&os->os_dnodes, dn, &dn_marker); 661 mutex_exit(&os->os_lock); 662 663 dnode_evict_dbufs(dn); 664 dnode_rele(dn, FTAG); 665 666 mutex_enter(&os->os_lock); 667 dn = list_next(&os->os_dnodes, &dn_marker); 668 list_remove(&os->os_dnodes, &dn_marker); 669 } else { 670 dn = list_next(&os->os_dnodes, dn); 671 } 672 } 673 mutex_exit(&os->os_lock); 674 675 if (DMU_USERUSED_DNODE(os) != NULL) { 676 dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os)); 677 dnode_evict_dbufs(DMU_USERUSED_DNODE(os)); 678 } 679 dnode_evict_dbufs(DMU_META_DNODE(os)); 680 } 681 682 /* 683 * Objset eviction processing is split into into two pieces. 684 * The first marks the objset as evicting, evicts any dbufs that 685 * have a refcount of zero, and then queues up the objset for the 686 * second phase of eviction. Once os->os_dnodes has been cleared by 687 * dnode_buf_pageout()->dnode_destroy(), the second phase is executed. 688 * The second phase closes the special dnodes, dequeues the objset from 689 * the list of those undergoing eviction, and finally frees the objset. 690 * 691 * NOTE: Due to asynchronous eviction processing (invocation of 692 * dnode_buf_pageout()), it is possible for the meta dnode for the 693 * objset to have no holds even though os->os_dnodes is not empty. 694 */ 695 void 696 dmu_objset_evict(objset_t *os) 697 { 698 dsl_dataset_t *ds = os->os_dsl_dataset; 699 700 for (int t = 0; t < TXG_SIZE; t++) 701 ASSERT(!dmu_objset_is_dirty(os, t)); 702 703 if (ds) { 704 if (!ds->ds_is_snapshot) { 705 VERIFY0(dsl_prop_unregister(ds, 706 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 707 checksum_changed_cb, os)); 708 VERIFY0(dsl_prop_unregister(ds, 709 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 710 compression_changed_cb, os)); 711 VERIFY0(dsl_prop_unregister(ds, 712 zfs_prop_to_name(ZFS_PROP_COPIES), 713 copies_changed_cb, os)); 714 VERIFY0(dsl_prop_unregister(ds, 715 zfs_prop_to_name(ZFS_PROP_DEDUP), 716 dedup_changed_cb, os)); 717 VERIFY0(dsl_prop_unregister(ds, 718 zfs_prop_to_name(ZFS_PROP_LOGBIAS), 719 logbias_changed_cb, os)); 720 VERIFY0(dsl_prop_unregister(ds, 721 zfs_prop_to_name(ZFS_PROP_SYNC), 722 sync_changed_cb, os)); 723 VERIFY0(dsl_prop_unregister(ds, 724 zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA), 725 redundant_metadata_changed_cb, os)); 726 VERIFY0(dsl_prop_unregister(ds, 727 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 728 recordsize_changed_cb, os)); 729 } 730 VERIFY0(dsl_prop_unregister(ds, 731 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), 732 primary_cache_changed_cb, os)); 733 VERIFY0(dsl_prop_unregister(ds, 734 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), 735 secondary_cache_changed_cb, os)); 736 } 737 738 if (os->os_sa) 739 sa_tear_down(os); 740 741 dmu_objset_evict_dbufs(os); 742 743 mutex_enter(&os->os_lock); 744 spa_evicting_os_register(os->os_spa, os); 745 if (list_is_empty(&os->os_dnodes)) { 746 mutex_exit(&os->os_lock); 747 dmu_objset_evict_done(os); 748 } else { 749 mutex_exit(&os->os_lock); 750 } 751 } 752 753 void 754 dmu_objset_evict_done(objset_t *os) 755 { 756 ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 757 758 dnode_special_close(&os->os_meta_dnode); 759 if (DMU_USERUSED_DNODE(os)) { 760 dnode_special_close(&os->os_userused_dnode); 761 dnode_special_close(&os->os_groupused_dnode); 762 } 763 zil_free(os->os_zil); 764 765 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); 766 767 /* 768 * This is a barrier to prevent the objset from going away in 769 * dnode_move() until we can safely ensure that the objset is still in 770 * use. We consider the objset valid before the barrier and invalid 771 * after the barrier. 772 */ 773 rw_enter(&os_lock, RW_READER); 774 rw_exit(&os_lock); 775 776 mutex_destroy(&os->os_lock); 777 mutex_destroy(&os->os_obj_lock); 778 mutex_destroy(&os->os_user_ptr_lock); 779 spa_evicting_os_deregister(os->os_spa, os); 780 kmem_free(os, sizeof (objset_t)); 781 } 782 783 timestruc_t 784 dmu_objset_snap_cmtime(objset_t *os) 785 { 786 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 787 } 788 789 /* called from dsl for meta-objset */ 790 objset_t * 791 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 792 dmu_objset_type_t type, dmu_tx_t *tx) 793 { 794 objset_t *os; 795 dnode_t *mdn; 796 797 ASSERT(dmu_tx_is_syncing(tx)); 798 799 if (ds != NULL) 800 VERIFY0(dmu_objset_from_ds(ds, &os)); 801 else 802 VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); 803 804 mdn = DMU_META_DNODE(os); 805 806 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 807 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 808 809 /* 810 * We don't want to have to increase the meta-dnode's nlevels 811 * later, because then we could do it in quescing context while 812 * we are also accessing it in open context. 813 * 814 * This precaution is not necessary for the MOS (ds == NULL), 815 * because the MOS is only updated in syncing context. 816 * This is most fortunate: the MOS is the only objset that 817 * needs to be synced multiple times as spa_sync() iterates 818 * to convergence, so minimizing its dn_nlevels matters. 819 */ 820 if (ds != NULL) { 821 int levels = 1; 822 823 /* 824 * Determine the number of levels necessary for the meta-dnode 825 * to contain DN_MAX_OBJECT dnodes. 826 */ 827 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 828 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 829 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 830 levels++; 831 832 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 833 mdn->dn_nlevels = levels; 834 } 835 836 ASSERT(type != DMU_OST_NONE); 837 ASSERT(type != DMU_OST_ANY); 838 ASSERT(type < DMU_OST_NUMTYPES); 839 os->os_phys->os_type = type; 840 if (dmu_objset_userused_enabled(os)) { 841 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 842 os->os_flags = os->os_phys->os_flags; 843 } 844 845 dsl_dataset_dirty(ds, tx); 846 847 return (os); 848 } 849 850 typedef struct dmu_objset_create_arg { 851 const char *doca_name; 852 cred_t *doca_cred; 853 void (*doca_userfunc)(objset_t *os, void *arg, 854 cred_t *cr, dmu_tx_t *tx); 855 void *doca_userarg; 856 dmu_objset_type_t doca_type; 857 uint64_t doca_flags; 858 } dmu_objset_create_arg_t; 859 860 /*ARGSUSED*/ 861 static int 862 dmu_objset_create_check(void *arg, dmu_tx_t *tx) 863 { 864 dmu_objset_create_arg_t *doca = arg; 865 dsl_pool_t *dp = dmu_tx_pool(tx); 866 dsl_dir_t *pdd; 867 const char *tail; 868 int error; 869 870 if (strchr(doca->doca_name, '@') != NULL) 871 return (SET_ERROR(EINVAL)); 872 873 if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN) 874 return (SET_ERROR(ENAMETOOLONG)); 875 876 error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); 877 if (error != 0) 878 return (error); 879 if (tail == NULL) { 880 dsl_dir_rele(pdd, FTAG); 881 return (SET_ERROR(EEXIST)); 882 } 883 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 884 doca->doca_cred); 885 dsl_dir_rele(pdd, FTAG); 886 887 return (error); 888 } 889 890 static void 891 dmu_objset_create_sync(void *arg, dmu_tx_t *tx) 892 { 893 dmu_objset_create_arg_t *doca = arg; 894 dsl_pool_t *dp = dmu_tx_pool(tx); 895 dsl_dir_t *pdd; 896 const char *tail; 897 dsl_dataset_t *ds; 898 uint64_t obj; 899 blkptr_t *bp; 900 objset_t *os; 901 902 VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); 903 904 obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, 905 doca->doca_cred, tx); 906 907 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 908 bp = dsl_dataset_get_blkptr(ds); 909 os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, 910 ds, bp, doca->doca_type, tx); 911 912 if (doca->doca_userfunc != NULL) { 913 doca->doca_userfunc(os, doca->doca_userarg, 914 doca->doca_cred, tx); 915 } 916 917 spa_history_log_internal_ds(ds, "create", tx, ""); 918 dsl_dataset_rele(ds, FTAG); 919 dsl_dir_rele(pdd, FTAG); 920 } 921 922 int 923 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 924 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 925 { 926 dmu_objset_create_arg_t doca; 927 928 doca.doca_name = name; 929 doca.doca_cred = CRED(); 930 doca.doca_flags = flags; 931 doca.doca_userfunc = func; 932 doca.doca_userarg = arg; 933 doca.doca_type = type; 934 935 return (dsl_sync_task(name, 936 dmu_objset_create_check, dmu_objset_create_sync, &doca, 937 5, ZFS_SPACE_CHECK_NORMAL)); 938 } 939 940 typedef struct dmu_objset_clone_arg { 941 const char *doca_clone; 942 const char *doca_origin; 943 cred_t *doca_cred; 944 } dmu_objset_clone_arg_t; 945 946 /*ARGSUSED*/ 947 static int 948 dmu_objset_clone_check(void *arg, dmu_tx_t *tx) 949 { 950 dmu_objset_clone_arg_t *doca = arg; 951 dsl_dir_t *pdd; 952 const char *tail; 953 int error; 954 dsl_dataset_t *origin; 955 dsl_pool_t *dp = dmu_tx_pool(tx); 956 957 if (strchr(doca->doca_clone, '@') != NULL) 958 return (SET_ERROR(EINVAL)); 959 960 if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN) 961 return (SET_ERROR(ENAMETOOLONG)); 962 963 error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); 964 if (error != 0) 965 return (error); 966 if (tail == NULL) { 967 dsl_dir_rele(pdd, FTAG); 968 return (SET_ERROR(EEXIST)); 969 } 970 971 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 972 doca->doca_cred); 973 if (error != 0) { 974 dsl_dir_rele(pdd, FTAG); 975 return (SET_ERROR(EDQUOT)); 976 } 977 dsl_dir_rele(pdd, FTAG); 978 979 error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); 980 if (error != 0) 981 return (error); 982 983 /* You can only clone snapshots, not the head datasets. */ 984 if (!origin->ds_is_snapshot) { 985 dsl_dataset_rele(origin, FTAG); 986 return (SET_ERROR(EINVAL)); 987 } 988 dsl_dataset_rele(origin, FTAG); 989 990 return (0); 991 } 992 993 static void 994 dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) 995 { 996 dmu_objset_clone_arg_t *doca = arg; 997 dsl_pool_t *dp = dmu_tx_pool(tx); 998 dsl_dir_t *pdd; 999 const char *tail; 1000 dsl_dataset_t *origin, *ds; 1001 uint64_t obj; 1002 char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 1003 1004 VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); 1005 VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); 1006 1007 obj = dsl_dataset_create_sync(pdd, tail, origin, 0, 1008 doca->doca_cred, tx); 1009 1010 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 1011 dsl_dataset_name(origin, namebuf); 1012 spa_history_log_internal_ds(ds, "clone", tx, 1013 "origin=%s (%llu)", namebuf, origin->ds_object); 1014 dsl_dataset_rele(ds, FTAG); 1015 dsl_dataset_rele(origin, FTAG); 1016 dsl_dir_rele(pdd, FTAG); 1017 } 1018 1019 int 1020 dmu_objset_clone(const char *clone, const char *origin) 1021 { 1022 dmu_objset_clone_arg_t doca; 1023 1024 doca.doca_clone = clone; 1025 doca.doca_origin = origin; 1026 doca.doca_cred = CRED(); 1027 1028 return (dsl_sync_task(clone, 1029 dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 1030 5, ZFS_SPACE_CHECK_NORMAL)); 1031 } 1032 1033 int 1034 dmu_objset_snapshot_one(const char *fsname, const char *snapname) 1035 { 1036 int err; 1037 char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); 1038 nvlist_t *snaps = fnvlist_alloc(); 1039 1040 fnvlist_add_boolean(snaps, longsnap); 1041 strfree(longsnap); 1042 err = dsl_dataset_snapshot(snaps, NULL, NULL); 1043 fnvlist_free(snaps); 1044 return (err); 1045 } 1046 1047 static void 1048 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 1049 { 1050 dnode_t *dn; 1051 1052 while (dn = list_head(list)) { 1053 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 1054 ASSERT(dn->dn_dbuf->db_data_pending); 1055 /* 1056 * Initialize dn_zio outside dnode_sync() because the 1057 * meta-dnode needs to set it ouside dnode_sync(). 1058 */ 1059 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 1060 ASSERT(dn->dn_zio); 1061 1062 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 1063 list_remove(list, dn); 1064 1065 if (newlist) { 1066 (void) dnode_add_ref(dn, newlist); 1067 list_insert_tail(newlist, dn); 1068 } 1069 1070 dnode_sync(dn, tx); 1071 } 1072 } 1073 1074 /* ARGSUSED */ 1075 static void 1076 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 1077 { 1078 blkptr_t *bp = zio->io_bp; 1079 objset_t *os = arg; 1080 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 1081 1082 ASSERT(!BP_IS_EMBEDDED(bp)); 1083 ASSERT3P(bp, ==, os->os_rootbp); 1084 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 1085 ASSERT0(BP_GET_LEVEL(bp)); 1086 1087 /* 1088 * Update rootbp fill count: it should be the number of objects 1089 * allocated in the object set (not counting the "special" 1090 * objects that are stored in the objset_phys_t -- the meta 1091 * dnode and user/group accounting objects). 1092 */ 1093 bp->blk_fill = 0; 1094 for (int i = 0; i < dnp->dn_nblkptr; i++) 1095 bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); 1096 } 1097 1098 /* ARGSUSED */ 1099 static void 1100 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 1101 { 1102 blkptr_t *bp = zio->io_bp; 1103 blkptr_t *bp_orig = &zio->io_bp_orig; 1104 objset_t *os = arg; 1105 1106 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 1107 ASSERT(BP_EQUAL(bp, bp_orig)); 1108 } else { 1109 dsl_dataset_t *ds = os->os_dsl_dataset; 1110 dmu_tx_t *tx = os->os_synctx; 1111 1112 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 1113 dsl_dataset_block_born(ds, bp, tx); 1114 } 1115 } 1116 1117 /* called from dsl */ 1118 void 1119 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 1120 { 1121 int txgoff; 1122 zbookmark_phys_t zb; 1123 zio_prop_t zp; 1124 zio_t *zio; 1125 list_t *list; 1126 list_t *newlist = NULL; 1127 dbuf_dirty_record_t *dr; 1128 1129 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1130 1131 ASSERT(dmu_tx_is_syncing(tx)); 1132 /* XXX the write_done callback should really give us the tx... */ 1133 os->os_synctx = tx; 1134 1135 if (os->os_dsl_dataset == NULL) { 1136 /* 1137 * This is the MOS. If we have upgraded, 1138 * spa_max_replication() could change, so reset 1139 * os_copies here. 1140 */ 1141 os->os_copies = spa_max_replication(os->os_spa); 1142 } 1143 1144 /* 1145 * Create the root block IO 1146 */ 1147 SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1148 os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1149 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1150 arc_release(os->os_phys_buf, &os->os_phys_buf); 1151 1152 dmu_write_policy(os, NULL, 0, 0, &zp); 1153 1154 zio = arc_write(pio, os->os_spa, tx->tx_txg, 1155 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), 1156 DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready, 1157 NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, 1158 ZIO_FLAG_MUSTSUCCEED, &zb); 1159 1160 /* 1161 * Sync special dnodes - the parent IO for the sync is the root block 1162 */ 1163 DMU_META_DNODE(os)->dn_zio = zio; 1164 dnode_sync(DMU_META_DNODE(os), tx); 1165 1166 os->os_phys->os_flags = os->os_flags; 1167 1168 if (DMU_USERUSED_DNODE(os) && 1169 DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { 1170 DMU_USERUSED_DNODE(os)->dn_zio = zio; 1171 dnode_sync(DMU_USERUSED_DNODE(os), tx); 1172 DMU_GROUPUSED_DNODE(os)->dn_zio = zio; 1173 dnode_sync(DMU_GROUPUSED_DNODE(os), tx); 1174 } 1175 1176 txgoff = tx->tx_txg & TXG_MASK; 1177 1178 if (dmu_objset_userused_enabled(os)) { 1179 newlist = &os->os_synced_dnodes; 1180 /* 1181 * We must create the list here because it uses the 1182 * dn_dirty_link[] of this txg. 1183 */ 1184 list_create(newlist, sizeof (dnode_t), 1185 offsetof(dnode_t, dn_dirty_link[txgoff])); 1186 } 1187 1188 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1189 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1190 1191 list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; 1192 while (dr = list_head(list)) { 1193 ASSERT0(dr->dr_dbuf->db_level); 1194 list_remove(list, dr); 1195 if (dr->dr_zio) 1196 zio_nowait(dr->dr_zio); 1197 } 1198 /* 1199 * Free intent log blocks up to this tx. 1200 */ 1201 zil_sync(os->os_zil, tx); 1202 os->os_phys->os_zil_header = os->os_zil_header; 1203 zio_nowait(zio); 1204 } 1205 1206 boolean_t 1207 dmu_objset_is_dirty(objset_t *os, uint64_t txg) 1208 { 1209 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1210 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1211 } 1212 1213 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1214 1215 void 1216 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1217 { 1218 used_cbs[ost] = cb; 1219 } 1220 1221 boolean_t 1222 dmu_objset_userused_enabled(objset_t *os) 1223 { 1224 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1225 used_cbs[os->os_phys->os_type] != NULL && 1226 DMU_USERUSED_DNODE(os) != NULL); 1227 } 1228 1229 static void 1230 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, 1231 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) 1232 { 1233 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1234 int64_t delta = DNODE_SIZE + used; 1235 if (subtract) 1236 delta = -delta; 1237 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, 1238 user, delta, tx)); 1239 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1240 group, delta, tx)); 1241 } 1242 } 1243 1244 void 1245 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1246 { 1247 dnode_t *dn; 1248 list_t *list = &os->os_synced_dnodes; 1249 1250 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1251 1252 while (dn = list_head(list)) { 1253 int flags; 1254 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1255 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1256 dn->dn_phys->dn_flags & 1257 DNODE_FLAG_USERUSED_ACCOUNTED); 1258 1259 /* Allocate the user/groupused objects if necessary. */ 1260 if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { 1261 VERIFY(0 == zap_create_claim(os, 1262 DMU_USERUSED_OBJECT, 1263 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1264 VERIFY(0 == zap_create_claim(os, 1265 DMU_GROUPUSED_OBJECT, 1266 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1267 } 1268 1269 /* 1270 * We intentionally modify the zap object even if the 1271 * net delta is zero. Otherwise 1272 * the block of the zap obj could be shared between 1273 * datasets but need to be different between them after 1274 * a bprewrite. 1275 */ 1276 1277 flags = dn->dn_id_flags; 1278 ASSERT(flags); 1279 if (flags & DN_ID_OLD_EXIST) { 1280 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, 1281 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); 1282 } 1283 if (flags & DN_ID_NEW_EXIST) { 1284 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), 1285 dn->dn_phys->dn_flags, dn->dn_newuid, 1286 dn->dn_newgid, B_FALSE, tx); 1287 } 1288 1289 mutex_enter(&dn->dn_mtx); 1290 dn->dn_oldused = 0; 1291 dn->dn_oldflags = 0; 1292 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1293 dn->dn_olduid = dn->dn_newuid; 1294 dn->dn_oldgid = dn->dn_newgid; 1295 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1296 if (dn->dn_bonuslen == 0) 1297 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1298 else 1299 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1300 } 1301 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); 1302 mutex_exit(&dn->dn_mtx); 1303 1304 list_remove(list, dn); 1305 dnode_rele(dn, list); 1306 } 1307 } 1308 1309 /* 1310 * Returns a pointer to data to find uid/gid from 1311 * 1312 * If a dirty record for transaction group that is syncing can't 1313 * be found then NULL is returned. In the NULL case it is assumed 1314 * the uid/gid aren't changing. 1315 */ 1316 static void * 1317 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1318 { 1319 dbuf_dirty_record_t *dr, **drp; 1320 void *data; 1321 1322 if (db->db_dirtycnt == 0) 1323 return (db->db.db_data); /* Nothing is changing */ 1324 1325 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1326 if (dr->dr_txg == tx->tx_txg) 1327 break; 1328 1329 if (dr == NULL) { 1330 data = NULL; 1331 } else { 1332 dnode_t *dn; 1333 1334 DB_DNODE_ENTER(dr->dr_dbuf); 1335 dn = DB_DNODE(dr->dr_dbuf); 1336 1337 if (dn->dn_bonuslen == 0 && 1338 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1339 data = dr->dt.dl.dr_data->b_data; 1340 else 1341 data = dr->dt.dl.dr_data; 1342 1343 DB_DNODE_EXIT(dr->dr_dbuf); 1344 } 1345 1346 return (data); 1347 } 1348 1349 void 1350 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1351 { 1352 objset_t *os = dn->dn_objset; 1353 void *data = NULL; 1354 dmu_buf_impl_t *db = NULL; 1355 uint64_t *user = NULL; 1356 uint64_t *group = NULL; 1357 int flags = dn->dn_id_flags; 1358 int error; 1359 boolean_t have_spill = B_FALSE; 1360 1361 if (!dmu_objset_userused_enabled(dn->dn_objset)) 1362 return; 1363 1364 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1365 DN_ID_CHKED_SPILL))) 1366 return; 1367 1368 if (before && dn->dn_bonuslen != 0) 1369 data = DN_BONUS(dn->dn_phys); 1370 else if (!before && dn->dn_bonuslen != 0) { 1371 if (dn->dn_bonus) { 1372 db = dn->dn_bonus; 1373 mutex_enter(&db->db_mtx); 1374 data = dmu_objset_userquota_find_data(db, tx); 1375 } else { 1376 data = DN_BONUS(dn->dn_phys); 1377 } 1378 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1379 int rf = 0; 1380 1381 if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1382 rf |= DB_RF_HAVESTRUCT; 1383 error = dmu_spill_hold_by_dnode(dn, 1384 rf | DB_RF_MUST_SUCCEED, 1385 FTAG, (dmu_buf_t **)&db); 1386 ASSERT(error == 0); 1387 mutex_enter(&db->db_mtx); 1388 data = (before) ? db->db.db_data : 1389 dmu_objset_userquota_find_data(db, tx); 1390 have_spill = B_TRUE; 1391 } else { 1392 mutex_enter(&dn->dn_mtx); 1393 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1394 mutex_exit(&dn->dn_mtx); 1395 return; 1396 } 1397 1398 if (before) { 1399 ASSERT(data); 1400 user = &dn->dn_olduid; 1401 group = &dn->dn_oldgid; 1402 } else if (data) { 1403 user = &dn->dn_newuid; 1404 group = &dn->dn_newgid; 1405 } 1406 1407 /* 1408 * Must always call the callback in case the object 1409 * type has changed and that type isn't an object type to track 1410 */ 1411 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1412 user, group); 1413 1414 /* 1415 * Preserve existing uid/gid when the callback can't determine 1416 * what the new uid/gid are and the callback returned EEXIST. 1417 * The EEXIST error tells us to just use the existing uid/gid. 1418 * If we don't know what the old values are then just assign 1419 * them to 0, since that is a new file being created. 1420 */ 1421 if (!before && data == NULL && error == EEXIST) { 1422 if (flags & DN_ID_OLD_EXIST) { 1423 dn->dn_newuid = dn->dn_olduid; 1424 dn->dn_newgid = dn->dn_oldgid; 1425 } else { 1426 dn->dn_newuid = 0; 1427 dn->dn_newgid = 0; 1428 } 1429 error = 0; 1430 } 1431 1432 if (db) 1433 mutex_exit(&db->db_mtx); 1434 1435 mutex_enter(&dn->dn_mtx); 1436 if (error == 0 && before) 1437 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1438 if (error == 0 && !before) 1439 dn->dn_id_flags |= DN_ID_NEW_EXIST; 1440 1441 if (have_spill) { 1442 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1443 } else { 1444 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1445 } 1446 mutex_exit(&dn->dn_mtx); 1447 if (have_spill) 1448 dmu_buf_rele((dmu_buf_t *)db, FTAG); 1449 } 1450 1451 boolean_t 1452 dmu_objset_userspace_present(objset_t *os) 1453 { 1454 return (os->os_phys->os_flags & 1455 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1456 } 1457 1458 int 1459 dmu_objset_userspace_upgrade(objset_t *os) 1460 { 1461 uint64_t obj; 1462 int err = 0; 1463 1464 if (dmu_objset_userspace_present(os)) 1465 return (0); 1466 if (!dmu_objset_userused_enabled(os)) 1467 return (SET_ERROR(ENOTSUP)); 1468 if (dmu_objset_is_snapshot(os)) 1469 return (SET_ERROR(EINVAL)); 1470 1471 /* 1472 * We simply need to mark every object dirty, so that it will be 1473 * synced out and now accounted. If this is called 1474 * concurrently, or if we already did some work before crashing, 1475 * that's fine, since we track each object's accounted state 1476 * independently. 1477 */ 1478 1479 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1480 dmu_tx_t *tx; 1481 dmu_buf_t *db; 1482 int objerr; 1483 1484 if (issig(JUSTLOOKING) && issig(FORREAL)) 1485 return (SET_ERROR(EINTR)); 1486 1487 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1488 if (objerr != 0) 1489 continue; 1490 tx = dmu_tx_create(os); 1491 dmu_tx_hold_bonus(tx, obj); 1492 objerr = dmu_tx_assign(tx, TXG_WAIT); 1493 if (objerr != 0) { 1494 dmu_tx_abort(tx); 1495 continue; 1496 } 1497 dmu_buf_will_dirty(db, tx); 1498 dmu_buf_rele(db, FTAG); 1499 dmu_tx_commit(tx); 1500 } 1501 1502 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1503 txg_wait_synced(dmu_objset_pool(os), 0); 1504 return (0); 1505 } 1506 1507 void 1508 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1509 uint64_t *usedobjsp, uint64_t *availobjsp) 1510 { 1511 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1512 usedobjsp, availobjsp); 1513 } 1514 1515 uint64_t 1516 dmu_objset_fsid_guid(objset_t *os) 1517 { 1518 return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1519 } 1520 1521 void 1522 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1523 { 1524 stat->dds_type = os->os_phys->os_type; 1525 if (os->os_dsl_dataset) 1526 dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1527 } 1528 1529 void 1530 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1531 { 1532 ASSERT(os->os_dsl_dataset || 1533 os->os_phys->os_type == DMU_OST_META); 1534 1535 if (os->os_dsl_dataset != NULL) 1536 dsl_dataset_stats(os->os_dsl_dataset, nv); 1537 1538 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1539 os->os_phys->os_type); 1540 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1541 dmu_objset_userspace_present(os)); 1542 } 1543 1544 int 1545 dmu_objset_is_snapshot(objset_t *os) 1546 { 1547 if (os->os_dsl_dataset != NULL) 1548 return (os->os_dsl_dataset->ds_is_snapshot); 1549 else 1550 return (B_FALSE); 1551 } 1552 1553 int 1554 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1555 boolean_t *conflict) 1556 { 1557 dsl_dataset_t *ds = os->os_dsl_dataset; 1558 uint64_t ignored; 1559 1560 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1561 return (SET_ERROR(ENOENT)); 1562 1563 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1564 dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored, 1565 MT_FIRST, real, maxlen, conflict)); 1566 } 1567 1568 int 1569 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1570 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1571 { 1572 dsl_dataset_t *ds = os->os_dsl_dataset; 1573 zap_cursor_t cursor; 1574 zap_attribute_t attr; 1575 1576 ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); 1577 1578 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1579 return (SET_ERROR(ENOENT)); 1580 1581 zap_cursor_init_serialized(&cursor, 1582 ds->ds_dir->dd_pool->dp_meta_objset, 1583 dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp); 1584 1585 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1586 zap_cursor_fini(&cursor); 1587 return (SET_ERROR(ENOENT)); 1588 } 1589 1590 if (strlen(attr.za_name) + 1 > namelen) { 1591 zap_cursor_fini(&cursor); 1592 return (SET_ERROR(ENAMETOOLONG)); 1593 } 1594 1595 (void) strcpy(name, attr.za_name); 1596 if (idp) 1597 *idp = attr.za_first_integer; 1598 if (case_conflict) 1599 *case_conflict = attr.za_normalization_conflict; 1600 zap_cursor_advance(&cursor); 1601 *offp = zap_cursor_serialize(&cursor); 1602 zap_cursor_fini(&cursor); 1603 1604 return (0); 1605 } 1606 1607 int 1608 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1609 uint64_t *idp, uint64_t *offp) 1610 { 1611 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1612 zap_cursor_t cursor; 1613 zap_attribute_t attr; 1614 1615 /* there is no next dir on a snapshot! */ 1616 if (os->os_dsl_dataset->ds_object != 1617 dsl_dir_phys(dd)->dd_head_dataset_obj) 1618 return (SET_ERROR(ENOENT)); 1619 1620 zap_cursor_init_serialized(&cursor, 1621 dd->dd_pool->dp_meta_objset, 1622 dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp); 1623 1624 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1625 zap_cursor_fini(&cursor); 1626 return (SET_ERROR(ENOENT)); 1627 } 1628 1629 if (strlen(attr.za_name) + 1 > namelen) { 1630 zap_cursor_fini(&cursor); 1631 return (SET_ERROR(ENAMETOOLONG)); 1632 } 1633 1634 (void) strcpy(name, attr.za_name); 1635 if (idp) 1636 *idp = attr.za_first_integer; 1637 zap_cursor_advance(&cursor); 1638 *offp = zap_cursor_serialize(&cursor); 1639 zap_cursor_fini(&cursor); 1640 1641 return (0); 1642 } 1643 1644 typedef struct dmu_objset_find_ctx { 1645 taskq_t *dc_tq; 1646 dsl_pool_t *dc_dp; 1647 uint64_t dc_ddobj; 1648 int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *); 1649 void *dc_arg; 1650 int dc_flags; 1651 kmutex_t *dc_error_lock; 1652 int *dc_error; 1653 } dmu_objset_find_ctx_t; 1654 1655 static void 1656 dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) 1657 { 1658 dsl_pool_t *dp = dcp->dc_dp; 1659 dmu_objset_find_ctx_t *child_dcp; 1660 dsl_dir_t *dd; 1661 dsl_dataset_t *ds; 1662 zap_cursor_t zc; 1663 zap_attribute_t *attr; 1664 uint64_t thisobj; 1665 int err = 0; 1666 1667 /* don't process if there already was an error */ 1668 if (*dcp->dc_error != 0) 1669 goto out; 1670 1671 err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd); 1672 if (err != 0) 1673 goto out; 1674 1675 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1676 if (dd->dd_myname[0] == '$') { 1677 dsl_dir_rele(dd, FTAG); 1678 goto out; 1679 } 1680 1681 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1682 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1683 1684 /* 1685 * Iterate over all children. 1686 */ 1687 if (dcp->dc_flags & DS_FIND_CHILDREN) { 1688 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1689 dsl_dir_phys(dd)->dd_child_dir_zapobj); 1690 zap_cursor_retrieve(&zc, attr) == 0; 1691 (void) zap_cursor_advance(&zc)) { 1692 ASSERT3U(attr->za_integer_length, ==, 1693 sizeof (uint64_t)); 1694 ASSERT3U(attr->za_num_integers, ==, 1); 1695 1696 child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP); 1697 *child_dcp = *dcp; 1698 child_dcp->dc_ddobj = attr->za_first_integer; 1699 if (dcp->dc_tq != NULL) 1700 (void) taskq_dispatch(dcp->dc_tq, 1701 dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP); 1702 else 1703 dmu_objset_find_dp_impl(child_dcp); 1704 } 1705 zap_cursor_fini(&zc); 1706 } 1707 1708 /* 1709 * Iterate over all snapshots. 1710 */ 1711 if (dcp->dc_flags & DS_FIND_SNAPSHOTS) { 1712 dsl_dataset_t *ds; 1713 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1714 1715 if (err == 0) { 1716 uint64_t snapobj; 1717 1718 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1719 dsl_dataset_rele(ds, FTAG); 1720 1721 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1722 zap_cursor_retrieve(&zc, attr) == 0; 1723 (void) zap_cursor_advance(&zc)) { 1724 ASSERT3U(attr->za_integer_length, ==, 1725 sizeof (uint64_t)); 1726 ASSERT3U(attr->za_num_integers, ==, 1); 1727 1728 err = dsl_dataset_hold_obj(dp, 1729 attr->za_first_integer, FTAG, &ds); 1730 if (err != 0) 1731 break; 1732 err = dcp->dc_func(dp, ds, dcp->dc_arg); 1733 dsl_dataset_rele(ds, FTAG); 1734 if (err != 0) 1735 break; 1736 } 1737 zap_cursor_fini(&zc); 1738 } 1739 } 1740 1741 dsl_dir_rele(dd, FTAG); 1742 kmem_free(attr, sizeof (zap_attribute_t)); 1743 1744 if (err != 0) 1745 goto out; 1746 1747 /* 1748 * Apply to self. 1749 */ 1750 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1751 if (err != 0) 1752 goto out; 1753 err = dcp->dc_func(dp, ds, dcp->dc_arg); 1754 dsl_dataset_rele(ds, FTAG); 1755 1756 out: 1757 if (err != 0) { 1758 mutex_enter(dcp->dc_error_lock); 1759 /* only keep first error */ 1760 if (*dcp->dc_error == 0) 1761 *dcp->dc_error = err; 1762 mutex_exit(dcp->dc_error_lock); 1763 } 1764 1765 kmem_free(dcp, sizeof (*dcp)); 1766 } 1767 1768 static void 1769 dmu_objset_find_dp_cb(void *arg) 1770 { 1771 dmu_objset_find_ctx_t *dcp = arg; 1772 dsl_pool_t *dp = dcp->dc_dp; 1773 1774 /* 1775 * We need to get a pool_config_lock here, as there are several 1776 * asssert(pool_config_held) down the stack. Getting a lock via 1777 * dsl_pool_config_enter is risky, as it might be stalled by a 1778 * pending writer. This would deadlock, as the write lock can 1779 * only be granted when our parent thread gives up the lock. 1780 * The _prio interface gives us priority over a pending writer. 1781 */ 1782 dsl_pool_config_enter_prio(dp, FTAG); 1783 1784 dmu_objset_find_dp_impl(dcp); 1785 1786 dsl_pool_config_exit(dp, FTAG); 1787 } 1788 1789 /* 1790 * Find objsets under and including ddobj, call func(ds) on each. 1791 * The order for the enumeration is completely undefined. 1792 * func is called with dsl_pool_config held. 1793 */ 1794 int 1795 dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, 1796 int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) 1797 { 1798 int error = 0; 1799 taskq_t *tq = NULL; 1800 int ntasks; 1801 dmu_objset_find_ctx_t *dcp; 1802 kmutex_t err_lock; 1803 1804 mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL); 1805 dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP); 1806 dcp->dc_tq = NULL; 1807 dcp->dc_dp = dp; 1808 dcp->dc_ddobj = ddobj; 1809 dcp->dc_func = func; 1810 dcp->dc_arg = arg; 1811 dcp->dc_flags = flags; 1812 dcp->dc_error_lock = &err_lock; 1813 dcp->dc_error = &error; 1814 1815 if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) { 1816 /* 1817 * In case a write lock is held we can't make use of 1818 * parallelism, as down the stack of the worker threads 1819 * the lock is asserted via dsl_pool_config_held. 1820 * In case of a read lock this is solved by getting a read 1821 * lock in each worker thread, which isn't possible in case 1822 * of a writer lock. So we fall back to the synchronous path 1823 * here. 1824 * In the future it might be possible to get some magic into 1825 * dsl_pool_config_held in a way that it returns true for 1826 * the worker threads so that a single lock held from this 1827 * thread suffices. For now, stay single threaded. 1828 */ 1829 dmu_objset_find_dp_impl(dcp); 1830 1831 return (error); 1832 } 1833 1834 ntasks = dmu_find_threads; 1835 if (ntasks == 0) 1836 ntasks = vdev_count_leaves(dp->dp_spa) * 4; 1837 tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks, 1838 INT_MAX, 0); 1839 if (tq == NULL) { 1840 kmem_free(dcp, sizeof (*dcp)); 1841 return (SET_ERROR(ENOMEM)); 1842 } 1843 dcp->dc_tq = tq; 1844 1845 /* dcp will be freed by task */ 1846 (void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP); 1847 1848 /* 1849 * PORTING: this code relies on the property of taskq_wait to wait 1850 * until no more tasks are queued and no more tasks are active. As 1851 * we always queue new tasks from within other tasks, task_wait 1852 * reliably waits for the full recursion to finish, even though we 1853 * enqueue new tasks after taskq_wait has been called. 1854 * On platforms other than illumos, taskq_wait may not have this 1855 * property. 1856 */ 1857 taskq_wait(tq); 1858 taskq_destroy(tq); 1859 mutex_destroy(&err_lock); 1860 1861 return (error); 1862 } 1863 1864 /* 1865 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1866 * The dp_config_rwlock must not be held when this is called, and it 1867 * will not be held when the callback is called. 1868 * Therefore this function should only be used when the pool is not changing 1869 * (e.g. in syncing context), or the callback can deal with the possible races. 1870 */ 1871 static int 1872 dmu_objset_find_impl(spa_t *spa, const char *name, 1873 int func(const char *, void *), void *arg, int flags) 1874 { 1875 dsl_dir_t *dd; 1876 dsl_pool_t *dp = spa_get_dsl(spa); 1877 dsl_dataset_t *ds; 1878 zap_cursor_t zc; 1879 zap_attribute_t *attr; 1880 char *child; 1881 uint64_t thisobj; 1882 int err; 1883 1884 dsl_pool_config_enter(dp, FTAG); 1885 1886 err = dsl_dir_hold(dp, name, FTAG, &dd, NULL); 1887 if (err != 0) { 1888 dsl_pool_config_exit(dp, FTAG); 1889 return (err); 1890 } 1891 1892 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1893 if (dd->dd_myname[0] == '$') { 1894 dsl_dir_rele(dd, FTAG); 1895 dsl_pool_config_exit(dp, FTAG); 1896 return (0); 1897 } 1898 1899 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1900 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1901 1902 /* 1903 * Iterate over all children. 1904 */ 1905 if (flags & DS_FIND_CHILDREN) { 1906 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1907 dsl_dir_phys(dd)->dd_child_dir_zapobj); 1908 zap_cursor_retrieve(&zc, attr) == 0; 1909 (void) zap_cursor_advance(&zc)) { 1910 ASSERT3U(attr->za_integer_length, ==, 1911 sizeof (uint64_t)); 1912 ASSERT3U(attr->za_num_integers, ==, 1); 1913 1914 child = kmem_asprintf("%s/%s", name, attr->za_name); 1915 dsl_pool_config_exit(dp, FTAG); 1916 err = dmu_objset_find_impl(spa, child, 1917 func, arg, flags); 1918 dsl_pool_config_enter(dp, FTAG); 1919 strfree(child); 1920 if (err != 0) 1921 break; 1922 } 1923 zap_cursor_fini(&zc); 1924 1925 if (err != 0) { 1926 dsl_dir_rele(dd, FTAG); 1927 dsl_pool_config_exit(dp, FTAG); 1928 kmem_free(attr, sizeof (zap_attribute_t)); 1929 return (err); 1930 } 1931 } 1932 1933 /* 1934 * Iterate over all snapshots. 1935 */ 1936 if (flags & DS_FIND_SNAPSHOTS) { 1937 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1938 1939 if (err == 0) { 1940 uint64_t snapobj; 1941 1942 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1943 dsl_dataset_rele(ds, FTAG); 1944 1945 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1946 zap_cursor_retrieve(&zc, attr) == 0; 1947 (void) zap_cursor_advance(&zc)) { 1948 ASSERT3U(attr->za_integer_length, ==, 1949 sizeof (uint64_t)); 1950 ASSERT3U(attr->za_num_integers, ==, 1); 1951 1952 child = kmem_asprintf("%s@%s", 1953 name, attr->za_name); 1954 dsl_pool_config_exit(dp, FTAG); 1955 err = func(child, arg); 1956 dsl_pool_config_enter(dp, FTAG); 1957 strfree(child); 1958 if (err != 0) 1959 break; 1960 } 1961 zap_cursor_fini(&zc); 1962 } 1963 } 1964 1965 dsl_dir_rele(dd, FTAG); 1966 kmem_free(attr, sizeof (zap_attribute_t)); 1967 dsl_pool_config_exit(dp, FTAG); 1968 1969 if (err != 0) 1970 return (err); 1971 1972 /* Apply to self. */ 1973 return (func(name, arg)); 1974 } 1975 1976 /* 1977 * See comment above dmu_objset_find_impl(). 1978 */ 1979 int 1980 dmu_objset_find(char *name, int func(const char *, void *), void *arg, 1981 int flags) 1982 { 1983 spa_t *spa; 1984 int error; 1985 1986 error = spa_open(name, &spa, FTAG); 1987 if (error != 0) 1988 return (error); 1989 error = dmu_objset_find_impl(spa, name, func, arg, flags); 1990 spa_close(spa, FTAG); 1991 return (error); 1992 } 1993 1994 void 1995 dmu_objset_set_user(objset_t *os, void *user_ptr) 1996 { 1997 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1998 os->os_user_ptr = user_ptr; 1999 } 2000 2001 void * 2002 dmu_objset_get_user(objset_t *os) 2003 { 2004 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 2005 return (os->os_user_ptr); 2006 } 2007 2008 /* 2009 * Determine name of filesystem, given name of snapshot. 2010 * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes 2011 */ 2012 int 2013 dmu_fsname(const char *snapname, char *buf) 2014 { 2015 char *atp = strchr(snapname, '@'); 2016 if (atp == NULL) 2017 return (SET_ERROR(EINVAL)); 2018 if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN) 2019 return (SET_ERROR(ENAMETOOLONG)); 2020 (void) strlcpy(buf, snapname, atp - snapname + 1); 2021 return (0); 2022 } 2023