1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 28 * Copyright (c) 2015, STRATO AG, Inc. All rights reserved. 29 */ 30 31 /* Portions Copyright 2010 Robert Milkowski */ 32 33 #include <sys/cred.h> 34 #include <sys/zfs_context.h> 35 #include <sys/dmu_objset.h> 36 #include <sys/dsl_dir.h> 37 #include <sys/dsl_dataset.h> 38 #include <sys/dsl_prop.h> 39 #include <sys/dsl_pool.h> 40 #include <sys/dsl_synctask.h> 41 #include <sys/dsl_deleg.h> 42 #include <sys/dnode.h> 43 #include <sys/dbuf.h> 44 #include <sys/zvol.h> 45 #include <sys/dmu_tx.h> 46 #include <sys/zap.h> 47 #include <sys/zil.h> 48 #include <sys/dmu_impl.h> 49 #include <sys/zfs_ioctl.h> 50 #include <sys/sa.h> 51 #include <sys/zfs_onexit.h> 52 #include <sys/dsl_destroy.h> 53 #include <sys/vdev.h> 54 55 /* 56 * Needed to close a window in dnode_move() that allows the objset to be freed 57 * before it can be safely accessed. 58 */ 59 krwlock_t os_lock; 60 61 /* 62 * Tunable to overwrite the maximum number of threads for the parallization 63 * of dmu_objset_find_dp, needed to speed up the import of pools with many 64 * datasets. 65 * Default is 4 times the number of leaf vdevs. 66 */ 67 int dmu_find_threads = 0; 68 69 static void dmu_objset_find_dp_cb(void *arg); 70 71 void 72 dmu_objset_init(void) 73 { 74 rw_init(&os_lock, NULL, RW_DEFAULT, NULL); 75 } 76 77 void 78 dmu_objset_fini(void) 79 { 80 rw_destroy(&os_lock); 81 } 82 83 spa_t * 84 dmu_objset_spa(objset_t *os) 85 { 86 return (os->os_spa); 87 } 88 89 zilog_t * 90 dmu_objset_zil(objset_t *os) 91 { 92 return (os->os_zil); 93 } 94 95 dsl_pool_t * 96 dmu_objset_pool(objset_t *os) 97 { 98 dsl_dataset_t *ds; 99 100 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 101 return (ds->ds_dir->dd_pool); 102 else 103 return (spa_get_dsl(os->os_spa)); 104 } 105 106 dsl_dataset_t * 107 dmu_objset_ds(objset_t *os) 108 { 109 return (os->os_dsl_dataset); 110 } 111 112 dmu_objset_type_t 113 dmu_objset_type(objset_t *os) 114 { 115 return (os->os_phys->os_type); 116 } 117 118 void 119 dmu_objset_name(objset_t *os, char *buf) 120 { 121 dsl_dataset_name(os->os_dsl_dataset, buf); 122 } 123 124 uint64_t 125 dmu_objset_id(objset_t *os) 126 { 127 dsl_dataset_t *ds = os->os_dsl_dataset; 128 129 return (ds ? ds->ds_object : 0); 130 } 131 132 zfs_sync_type_t 133 dmu_objset_syncprop(objset_t *os) 134 { 135 return (os->os_sync); 136 } 137 138 zfs_logbias_op_t 139 dmu_objset_logbias(objset_t *os) 140 { 141 return (os->os_logbias); 142 } 143 144 static void 145 checksum_changed_cb(void *arg, uint64_t newval) 146 { 147 objset_t *os = arg; 148 149 /* 150 * Inheritance should have been done by now. 151 */ 152 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 153 154 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 155 } 156 157 static void 158 compression_changed_cb(void *arg, uint64_t newval) 159 { 160 objset_t *os = arg; 161 162 /* 163 * Inheritance and range checking should have been done by now. 164 */ 165 ASSERT(newval != ZIO_COMPRESS_INHERIT); 166 167 os->os_compress = zio_compress_select(os->os_spa, newval, 168 ZIO_COMPRESS_ON); 169 } 170 171 static void 172 copies_changed_cb(void *arg, uint64_t newval) 173 { 174 objset_t *os = arg; 175 176 /* 177 * Inheritance and range checking should have been done by now. 178 */ 179 ASSERT(newval > 0); 180 ASSERT(newval <= spa_max_replication(os->os_spa)); 181 182 os->os_copies = newval; 183 } 184 185 static void 186 dedup_changed_cb(void *arg, uint64_t newval) 187 { 188 objset_t *os = arg; 189 spa_t *spa = os->os_spa; 190 enum zio_checksum checksum; 191 192 /* 193 * Inheritance should have been done by now. 194 */ 195 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 196 197 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 198 199 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 200 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 201 } 202 203 static void 204 primary_cache_changed_cb(void *arg, uint64_t newval) 205 { 206 objset_t *os = arg; 207 208 /* 209 * Inheritance and range checking should have been done by now. 210 */ 211 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 212 newval == ZFS_CACHE_METADATA); 213 214 os->os_primary_cache = newval; 215 } 216 217 static void 218 secondary_cache_changed_cb(void *arg, uint64_t newval) 219 { 220 objset_t *os = arg; 221 222 /* 223 * Inheritance and range checking should have been done by now. 224 */ 225 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 226 newval == ZFS_CACHE_METADATA); 227 228 os->os_secondary_cache = newval; 229 } 230 231 static void 232 sync_changed_cb(void *arg, uint64_t newval) 233 { 234 objset_t *os = arg; 235 236 /* 237 * Inheritance and range checking should have been done by now. 238 */ 239 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 240 newval == ZFS_SYNC_DISABLED); 241 242 os->os_sync = newval; 243 if (os->os_zil) 244 zil_set_sync(os->os_zil, newval); 245 } 246 247 static void 248 redundant_metadata_changed_cb(void *arg, uint64_t newval) 249 { 250 objset_t *os = arg; 251 252 /* 253 * Inheritance and range checking should have been done by now. 254 */ 255 ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || 256 newval == ZFS_REDUNDANT_METADATA_MOST); 257 258 os->os_redundant_metadata = newval; 259 } 260 261 static void 262 logbias_changed_cb(void *arg, uint64_t newval) 263 { 264 objset_t *os = arg; 265 266 ASSERT(newval == ZFS_LOGBIAS_LATENCY || 267 newval == ZFS_LOGBIAS_THROUGHPUT); 268 os->os_logbias = newval; 269 if (os->os_zil) 270 zil_set_logbias(os->os_zil, newval); 271 } 272 273 static void 274 recordsize_changed_cb(void *arg, uint64_t newval) 275 { 276 objset_t *os = arg; 277 278 os->os_recordsize = newval; 279 } 280 281 void 282 dmu_objset_byteswap(void *buf, size_t size) 283 { 284 objset_phys_t *osp = buf; 285 286 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 287 dnode_byteswap(&osp->os_meta_dnode); 288 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 289 osp->os_type = BSWAP_64(osp->os_type); 290 osp->os_flags = BSWAP_64(osp->os_flags); 291 if (size == sizeof (objset_phys_t)) { 292 dnode_byteswap(&osp->os_userused_dnode); 293 dnode_byteswap(&osp->os_groupused_dnode); 294 } 295 } 296 297 int 298 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 299 objset_t **osp) 300 { 301 objset_t *os; 302 int i, err; 303 304 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 305 306 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 307 os->os_dsl_dataset = ds; 308 os->os_spa = spa; 309 os->os_rootbp = bp; 310 if (!BP_IS_HOLE(os->os_rootbp)) { 311 arc_flags_t aflags = ARC_FLAG_WAIT; 312 zbookmark_phys_t zb; 313 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 314 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 315 316 if (DMU_OS_IS_L2CACHEABLE(os)) 317 aflags |= ARC_FLAG_L2CACHE; 318 if (DMU_OS_IS_L2COMPRESSIBLE(os)) 319 aflags |= ARC_FLAG_L2COMPRESS; 320 321 dprintf_bp(os->os_rootbp, "reading %s", ""); 322 err = arc_read(NULL, spa, os->os_rootbp, 323 arc_getbuf_func, &os->os_phys_buf, 324 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 325 if (err != 0) { 326 kmem_free(os, sizeof (objset_t)); 327 /* convert checksum errors into IO errors */ 328 if (err == ECKSUM) 329 err = SET_ERROR(EIO); 330 return (err); 331 } 332 333 /* Increase the blocksize if we are permitted. */ 334 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 335 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 336 arc_buf_t *buf = arc_buf_alloc(spa, 337 sizeof (objset_phys_t), &os->os_phys_buf, 338 ARC_BUFC_METADATA); 339 bzero(buf->b_data, sizeof (objset_phys_t)); 340 bcopy(os->os_phys_buf->b_data, buf->b_data, 341 arc_buf_size(os->os_phys_buf)); 342 (void) arc_buf_remove_ref(os->os_phys_buf, 343 &os->os_phys_buf); 344 os->os_phys_buf = buf; 345 } 346 347 os->os_phys = os->os_phys_buf->b_data; 348 os->os_flags = os->os_phys->os_flags; 349 } else { 350 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 351 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 352 os->os_phys_buf = arc_buf_alloc(spa, size, 353 &os->os_phys_buf, ARC_BUFC_METADATA); 354 os->os_phys = os->os_phys_buf->b_data; 355 bzero(os->os_phys, size); 356 } 357 358 /* 359 * Note: the changed_cb will be called once before the register 360 * func returns, thus changing the checksum/compression from the 361 * default (fletcher2/off). Snapshots don't need to know about 362 * checksum/compression/copies. 363 */ 364 if (ds != NULL) { 365 boolean_t needlock = B_FALSE; 366 367 /* 368 * Note: it's valid to open the objset if the dataset is 369 * long-held, in which case the pool_config lock will not 370 * be held. 371 */ 372 if (!dsl_pool_config_held(dmu_objset_pool(os))) { 373 needlock = B_TRUE; 374 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 375 } 376 err = dsl_prop_register(ds, 377 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), 378 primary_cache_changed_cb, os); 379 if (err == 0) { 380 err = dsl_prop_register(ds, 381 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), 382 secondary_cache_changed_cb, os); 383 } 384 if (!ds->ds_is_snapshot) { 385 if (err == 0) { 386 err = dsl_prop_register(ds, 387 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 388 checksum_changed_cb, os); 389 } 390 if (err == 0) { 391 err = dsl_prop_register(ds, 392 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 393 compression_changed_cb, os); 394 } 395 if (err == 0) { 396 err = dsl_prop_register(ds, 397 zfs_prop_to_name(ZFS_PROP_COPIES), 398 copies_changed_cb, os); 399 } 400 if (err == 0) { 401 err = dsl_prop_register(ds, 402 zfs_prop_to_name(ZFS_PROP_DEDUP), 403 dedup_changed_cb, os); 404 } 405 if (err == 0) { 406 err = dsl_prop_register(ds, 407 zfs_prop_to_name(ZFS_PROP_LOGBIAS), 408 logbias_changed_cb, os); 409 } 410 if (err == 0) { 411 err = dsl_prop_register(ds, 412 zfs_prop_to_name(ZFS_PROP_SYNC), 413 sync_changed_cb, os); 414 } 415 if (err == 0) { 416 err = dsl_prop_register(ds, 417 zfs_prop_to_name( 418 ZFS_PROP_REDUNDANT_METADATA), 419 redundant_metadata_changed_cb, os); 420 } 421 if (err == 0) { 422 err = dsl_prop_register(ds, 423 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 424 recordsize_changed_cb, os); 425 } 426 } 427 if (needlock) 428 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 429 if (err != 0) { 430 VERIFY(arc_buf_remove_ref(os->os_phys_buf, 431 &os->os_phys_buf)); 432 kmem_free(os, sizeof (objset_t)); 433 return (err); 434 } 435 } else { 436 /* It's the meta-objset. */ 437 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 438 os->os_compress = ZIO_COMPRESS_ON; 439 os->os_copies = spa_max_replication(spa); 440 os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 441 os->os_dedup_verify = B_FALSE; 442 os->os_logbias = ZFS_LOGBIAS_LATENCY; 443 os->os_sync = ZFS_SYNC_STANDARD; 444 os->os_primary_cache = ZFS_CACHE_ALL; 445 os->os_secondary_cache = ZFS_CACHE_ALL; 446 } 447 448 if (ds == NULL || !ds->ds_is_snapshot) 449 os->os_zil_header = os->os_phys->os_zil_header; 450 os->os_zil = zil_alloc(os, &os->os_zil_header); 451 452 for (i = 0; i < TXG_SIZE; i++) { 453 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 454 offsetof(dnode_t, dn_dirty_link[i])); 455 list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 456 offsetof(dnode_t, dn_dirty_link[i])); 457 } 458 list_create(&os->os_dnodes, sizeof (dnode_t), 459 offsetof(dnode_t, dn_link)); 460 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 461 offsetof(dmu_buf_impl_t, db_link)); 462 463 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 464 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 465 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 466 467 dnode_special_open(os, &os->os_phys->os_meta_dnode, 468 DMU_META_DNODE_OBJECT, &os->os_meta_dnode); 469 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 470 dnode_special_open(os, &os->os_phys->os_userused_dnode, 471 DMU_USERUSED_OBJECT, &os->os_userused_dnode); 472 dnode_special_open(os, &os->os_phys->os_groupused_dnode, 473 DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); 474 } 475 476 *osp = os; 477 return (0); 478 } 479 480 int 481 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 482 { 483 int err = 0; 484 485 /* 486 * We shouldn't be doing anything with dsl_dataset_t's unless the 487 * pool_config lock is held, or the dataset is long-held. 488 */ 489 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) || 490 dsl_dataset_long_held(ds)); 491 492 mutex_enter(&ds->ds_opening_lock); 493 if (ds->ds_objset == NULL) { 494 objset_t *os; 495 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 496 ds, dsl_dataset_get_blkptr(ds), &os); 497 498 if (err == 0) { 499 mutex_enter(&ds->ds_lock); 500 ASSERT(ds->ds_objset == NULL); 501 ds->ds_objset = os; 502 mutex_exit(&ds->ds_lock); 503 } 504 } 505 *osp = ds->ds_objset; 506 mutex_exit(&ds->ds_opening_lock); 507 return (err); 508 } 509 510 /* 511 * Holds the pool while the objset is held. Therefore only one objset 512 * can be held at a time. 513 */ 514 int 515 dmu_objset_hold(const char *name, void *tag, objset_t **osp) 516 { 517 dsl_pool_t *dp; 518 dsl_dataset_t *ds; 519 int err; 520 521 err = dsl_pool_hold(name, tag, &dp); 522 if (err != 0) 523 return (err); 524 err = dsl_dataset_hold(dp, name, tag, &ds); 525 if (err != 0) { 526 dsl_pool_rele(dp, tag); 527 return (err); 528 } 529 530 err = dmu_objset_from_ds(ds, osp); 531 if (err != 0) { 532 dsl_dataset_rele(ds, tag); 533 dsl_pool_rele(dp, tag); 534 } 535 536 return (err); 537 } 538 539 static int 540 dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, 541 boolean_t readonly, void *tag, objset_t **osp) 542 { 543 int err; 544 545 err = dmu_objset_from_ds(ds, osp); 546 if (err != 0) { 547 dsl_dataset_disown(ds, tag); 548 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 549 dsl_dataset_disown(ds, tag); 550 return (SET_ERROR(EINVAL)); 551 } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 552 dsl_dataset_disown(ds, tag); 553 return (SET_ERROR(EROFS)); 554 } 555 return (err); 556 } 557 558 /* 559 * dsl_pool must not be held when this is called. 560 * Upon successful return, there will be a longhold on the dataset, 561 * and the dsl_pool will not be held. 562 */ 563 int 564 dmu_objset_own(const char *name, dmu_objset_type_t type, 565 boolean_t readonly, void *tag, objset_t **osp) 566 { 567 dsl_pool_t *dp; 568 dsl_dataset_t *ds; 569 int err; 570 571 err = dsl_pool_hold(name, FTAG, &dp); 572 if (err != 0) 573 return (err); 574 err = dsl_dataset_own(dp, name, tag, &ds); 575 if (err != 0) { 576 dsl_pool_rele(dp, FTAG); 577 return (err); 578 } 579 err = dmu_objset_own_impl(ds, type, readonly, tag, osp); 580 dsl_pool_rele(dp, FTAG); 581 582 return (err); 583 } 584 585 int 586 dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, 587 boolean_t readonly, void *tag, objset_t **osp) 588 { 589 dsl_dataset_t *ds; 590 int err; 591 592 err = dsl_dataset_own_obj(dp, obj, tag, &ds); 593 if (err != 0) 594 return (err); 595 596 return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); 597 } 598 599 void 600 dmu_objset_rele(objset_t *os, void *tag) 601 { 602 dsl_pool_t *dp = dmu_objset_pool(os); 603 dsl_dataset_rele(os->os_dsl_dataset, tag); 604 dsl_pool_rele(dp, tag); 605 } 606 607 /* 608 * When we are called, os MUST refer to an objset associated with a dataset 609 * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner 610 * == tag. We will then release and reacquire ownership of the dataset while 611 * holding the pool config_rwlock to avoid intervening namespace or ownership 612 * changes may occur. 613 * 614 * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to 615 * release the hold on its dataset and acquire a new one on the dataset of the 616 * same name so that it can be partially torn down and reconstructed. 617 */ 618 void 619 dmu_objset_refresh_ownership(objset_t *os, void *tag) 620 { 621 dsl_pool_t *dp; 622 dsl_dataset_t *ds, *newds; 623 char name[MAXNAMELEN]; 624 625 ds = os->os_dsl_dataset; 626 VERIFY3P(ds, !=, NULL); 627 VERIFY3P(ds->ds_owner, ==, tag); 628 VERIFY(dsl_dataset_long_held(ds)); 629 630 dsl_dataset_name(ds, name); 631 dp = dmu_objset_pool(os); 632 dsl_pool_config_enter(dp, FTAG); 633 dmu_objset_disown(os, tag); 634 VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); 635 VERIFY3P(newds, ==, os->os_dsl_dataset); 636 dsl_pool_config_exit(dp, FTAG); 637 } 638 639 void 640 dmu_objset_disown(objset_t *os, void *tag) 641 { 642 dsl_dataset_disown(os->os_dsl_dataset, tag); 643 } 644 645 void 646 dmu_objset_evict_dbufs(objset_t *os) 647 { 648 dnode_t dn_marker; 649 dnode_t *dn; 650 651 mutex_enter(&os->os_lock); 652 dn = list_head(&os->os_dnodes); 653 while (dn != NULL) { 654 /* 655 * Skip dnodes without holds. We have to do this dance 656 * because dnode_add_ref() only works if there is already a 657 * hold. If the dnode has no holds, then it has no dbufs. 658 */ 659 if (dnode_add_ref(dn, FTAG)) { 660 list_insert_after(&os->os_dnodes, dn, &dn_marker); 661 mutex_exit(&os->os_lock); 662 663 dnode_evict_dbufs(dn); 664 dnode_rele(dn, FTAG); 665 666 mutex_enter(&os->os_lock); 667 dn = list_next(&os->os_dnodes, &dn_marker); 668 list_remove(&os->os_dnodes, &dn_marker); 669 } else { 670 dn = list_next(&os->os_dnodes, dn); 671 } 672 } 673 mutex_exit(&os->os_lock); 674 675 if (DMU_USERUSED_DNODE(os) != NULL) { 676 dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os)); 677 dnode_evict_dbufs(DMU_USERUSED_DNODE(os)); 678 } 679 dnode_evict_dbufs(DMU_META_DNODE(os)); 680 } 681 682 /* 683 * Objset eviction processing is split into into two pieces. 684 * The first marks the objset as evicting, evicts any dbufs that 685 * have a refcount of zero, and then queues up the objset for the 686 * second phase of eviction. Once os->os_dnodes has been cleared by 687 * dnode_buf_pageout()->dnode_destroy(), the second phase is executed. 688 * The second phase closes the special dnodes, dequeues the objset from 689 * the list of those undergoing eviction, and finally frees the objset. 690 * 691 * NOTE: Due to asynchronous eviction processing (invocation of 692 * dnode_buf_pageout()), it is possible for the meta dnode for the 693 * objset to have no holds even though os->os_dnodes is not empty. 694 */ 695 void 696 dmu_objset_evict(objset_t *os) 697 { 698 dsl_dataset_t *ds = os->os_dsl_dataset; 699 700 for (int t = 0; t < TXG_SIZE; t++) 701 ASSERT(!dmu_objset_is_dirty(os, t)); 702 703 if (ds) { 704 if (!ds->ds_is_snapshot) { 705 VERIFY0(dsl_prop_unregister(ds, 706 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 707 checksum_changed_cb, os)); 708 VERIFY0(dsl_prop_unregister(ds, 709 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 710 compression_changed_cb, os)); 711 VERIFY0(dsl_prop_unregister(ds, 712 zfs_prop_to_name(ZFS_PROP_COPIES), 713 copies_changed_cb, os)); 714 VERIFY0(dsl_prop_unregister(ds, 715 zfs_prop_to_name(ZFS_PROP_DEDUP), 716 dedup_changed_cb, os)); 717 VERIFY0(dsl_prop_unregister(ds, 718 zfs_prop_to_name(ZFS_PROP_LOGBIAS), 719 logbias_changed_cb, os)); 720 VERIFY0(dsl_prop_unregister(ds, 721 zfs_prop_to_name(ZFS_PROP_SYNC), 722 sync_changed_cb, os)); 723 VERIFY0(dsl_prop_unregister(ds, 724 zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA), 725 redundant_metadata_changed_cb, os)); 726 VERIFY0(dsl_prop_unregister(ds, 727 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 728 recordsize_changed_cb, os)); 729 } 730 VERIFY0(dsl_prop_unregister(ds, 731 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), 732 primary_cache_changed_cb, os)); 733 VERIFY0(dsl_prop_unregister(ds, 734 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), 735 secondary_cache_changed_cb, os)); 736 } 737 738 if (os->os_sa) 739 sa_tear_down(os); 740 741 os->os_evicting = B_TRUE; 742 dmu_objset_evict_dbufs(os); 743 744 mutex_enter(&os->os_lock); 745 spa_evicting_os_register(os->os_spa, os); 746 if (list_is_empty(&os->os_dnodes)) { 747 mutex_exit(&os->os_lock); 748 dmu_objset_evict_done(os); 749 } else { 750 mutex_exit(&os->os_lock); 751 } 752 } 753 754 void 755 dmu_objset_evict_done(objset_t *os) 756 { 757 ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 758 759 dnode_special_close(&os->os_meta_dnode); 760 if (DMU_USERUSED_DNODE(os)) { 761 dnode_special_close(&os->os_userused_dnode); 762 dnode_special_close(&os->os_groupused_dnode); 763 } 764 zil_free(os->os_zil); 765 766 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); 767 768 /* 769 * This is a barrier to prevent the objset from going away in 770 * dnode_move() until we can safely ensure that the objset is still in 771 * use. We consider the objset valid before the barrier and invalid 772 * after the barrier. 773 */ 774 rw_enter(&os_lock, RW_READER); 775 rw_exit(&os_lock); 776 777 mutex_destroy(&os->os_lock); 778 mutex_destroy(&os->os_obj_lock); 779 mutex_destroy(&os->os_user_ptr_lock); 780 spa_evicting_os_deregister(os->os_spa, os); 781 kmem_free(os, sizeof (objset_t)); 782 } 783 784 timestruc_t 785 dmu_objset_snap_cmtime(objset_t *os) 786 { 787 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 788 } 789 790 /* called from dsl for meta-objset */ 791 objset_t * 792 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 793 dmu_objset_type_t type, dmu_tx_t *tx) 794 { 795 objset_t *os; 796 dnode_t *mdn; 797 798 ASSERT(dmu_tx_is_syncing(tx)); 799 800 if (ds != NULL) 801 VERIFY0(dmu_objset_from_ds(ds, &os)); 802 else 803 VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); 804 805 mdn = DMU_META_DNODE(os); 806 807 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 808 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 809 810 /* 811 * We don't want to have to increase the meta-dnode's nlevels 812 * later, because then we could do it in quescing context while 813 * we are also accessing it in open context. 814 * 815 * This precaution is not necessary for the MOS (ds == NULL), 816 * because the MOS is only updated in syncing context. 817 * This is most fortunate: the MOS is the only objset that 818 * needs to be synced multiple times as spa_sync() iterates 819 * to convergence, so minimizing its dn_nlevels matters. 820 */ 821 if (ds != NULL) { 822 int levels = 1; 823 824 /* 825 * Determine the number of levels necessary for the meta-dnode 826 * to contain DN_MAX_OBJECT dnodes. 827 */ 828 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 829 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 830 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 831 levels++; 832 833 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 834 mdn->dn_nlevels = levels; 835 } 836 837 ASSERT(type != DMU_OST_NONE); 838 ASSERT(type != DMU_OST_ANY); 839 ASSERT(type < DMU_OST_NUMTYPES); 840 os->os_phys->os_type = type; 841 if (dmu_objset_userused_enabled(os)) { 842 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 843 os->os_flags = os->os_phys->os_flags; 844 } 845 846 dsl_dataset_dirty(ds, tx); 847 848 return (os); 849 } 850 851 typedef struct dmu_objset_create_arg { 852 const char *doca_name; 853 cred_t *doca_cred; 854 void (*doca_userfunc)(objset_t *os, void *arg, 855 cred_t *cr, dmu_tx_t *tx); 856 void *doca_userarg; 857 dmu_objset_type_t doca_type; 858 uint64_t doca_flags; 859 } dmu_objset_create_arg_t; 860 861 /*ARGSUSED*/ 862 static int 863 dmu_objset_create_check(void *arg, dmu_tx_t *tx) 864 { 865 dmu_objset_create_arg_t *doca = arg; 866 dsl_pool_t *dp = dmu_tx_pool(tx); 867 dsl_dir_t *pdd; 868 const char *tail; 869 int error; 870 871 if (strchr(doca->doca_name, '@') != NULL) 872 return (SET_ERROR(EINVAL)); 873 874 error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); 875 if (error != 0) 876 return (error); 877 if (tail == NULL) { 878 dsl_dir_rele(pdd, FTAG); 879 return (SET_ERROR(EEXIST)); 880 } 881 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 882 doca->doca_cred); 883 dsl_dir_rele(pdd, FTAG); 884 885 return (error); 886 } 887 888 static void 889 dmu_objset_create_sync(void *arg, dmu_tx_t *tx) 890 { 891 dmu_objset_create_arg_t *doca = arg; 892 dsl_pool_t *dp = dmu_tx_pool(tx); 893 dsl_dir_t *pdd; 894 const char *tail; 895 dsl_dataset_t *ds; 896 uint64_t obj; 897 blkptr_t *bp; 898 objset_t *os; 899 900 VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); 901 902 obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, 903 doca->doca_cred, tx); 904 905 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 906 bp = dsl_dataset_get_blkptr(ds); 907 os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, 908 ds, bp, doca->doca_type, tx); 909 910 if (doca->doca_userfunc != NULL) { 911 doca->doca_userfunc(os, doca->doca_userarg, 912 doca->doca_cred, tx); 913 } 914 915 spa_history_log_internal_ds(ds, "create", tx, ""); 916 dsl_dataset_rele(ds, FTAG); 917 dsl_dir_rele(pdd, FTAG); 918 } 919 920 int 921 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 922 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 923 { 924 dmu_objset_create_arg_t doca; 925 926 doca.doca_name = name; 927 doca.doca_cred = CRED(); 928 doca.doca_flags = flags; 929 doca.doca_userfunc = func; 930 doca.doca_userarg = arg; 931 doca.doca_type = type; 932 933 return (dsl_sync_task(name, 934 dmu_objset_create_check, dmu_objset_create_sync, &doca, 935 5, ZFS_SPACE_CHECK_NORMAL)); 936 } 937 938 typedef struct dmu_objset_clone_arg { 939 const char *doca_clone; 940 const char *doca_origin; 941 cred_t *doca_cred; 942 } dmu_objset_clone_arg_t; 943 944 /*ARGSUSED*/ 945 static int 946 dmu_objset_clone_check(void *arg, dmu_tx_t *tx) 947 { 948 dmu_objset_clone_arg_t *doca = arg; 949 dsl_dir_t *pdd; 950 const char *tail; 951 int error; 952 dsl_dataset_t *origin; 953 dsl_pool_t *dp = dmu_tx_pool(tx); 954 955 if (strchr(doca->doca_clone, '@') != NULL) 956 return (SET_ERROR(EINVAL)); 957 958 error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); 959 if (error != 0) 960 return (error); 961 if (tail == NULL) { 962 dsl_dir_rele(pdd, FTAG); 963 return (SET_ERROR(EEXIST)); 964 } 965 966 error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 967 doca->doca_cred); 968 if (error != 0) { 969 dsl_dir_rele(pdd, FTAG); 970 return (SET_ERROR(EDQUOT)); 971 } 972 dsl_dir_rele(pdd, FTAG); 973 974 error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); 975 if (error != 0) 976 return (error); 977 978 /* You can only clone snapshots, not the head datasets. */ 979 if (!origin->ds_is_snapshot) { 980 dsl_dataset_rele(origin, FTAG); 981 return (SET_ERROR(EINVAL)); 982 } 983 dsl_dataset_rele(origin, FTAG); 984 985 return (0); 986 } 987 988 static void 989 dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) 990 { 991 dmu_objset_clone_arg_t *doca = arg; 992 dsl_pool_t *dp = dmu_tx_pool(tx); 993 dsl_dir_t *pdd; 994 const char *tail; 995 dsl_dataset_t *origin, *ds; 996 uint64_t obj; 997 char namebuf[MAXNAMELEN]; 998 999 VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); 1000 VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); 1001 1002 obj = dsl_dataset_create_sync(pdd, tail, origin, 0, 1003 doca->doca_cred, tx); 1004 1005 VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 1006 dsl_dataset_name(origin, namebuf); 1007 spa_history_log_internal_ds(ds, "clone", tx, 1008 "origin=%s (%llu)", namebuf, origin->ds_object); 1009 dsl_dataset_rele(ds, FTAG); 1010 dsl_dataset_rele(origin, FTAG); 1011 dsl_dir_rele(pdd, FTAG); 1012 } 1013 1014 int 1015 dmu_objset_clone(const char *clone, const char *origin) 1016 { 1017 dmu_objset_clone_arg_t doca; 1018 1019 doca.doca_clone = clone; 1020 doca.doca_origin = origin; 1021 doca.doca_cred = CRED(); 1022 1023 return (dsl_sync_task(clone, 1024 dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 1025 5, ZFS_SPACE_CHECK_NORMAL)); 1026 } 1027 1028 int 1029 dmu_objset_snapshot_one(const char *fsname, const char *snapname) 1030 { 1031 int err; 1032 char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); 1033 nvlist_t *snaps = fnvlist_alloc(); 1034 1035 fnvlist_add_boolean(snaps, longsnap); 1036 strfree(longsnap); 1037 err = dsl_dataset_snapshot(snaps, NULL, NULL); 1038 fnvlist_free(snaps); 1039 return (err); 1040 } 1041 1042 static void 1043 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 1044 { 1045 dnode_t *dn; 1046 1047 while (dn = list_head(list)) { 1048 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 1049 ASSERT(dn->dn_dbuf->db_data_pending); 1050 /* 1051 * Initialize dn_zio outside dnode_sync() because the 1052 * meta-dnode needs to set it ouside dnode_sync(). 1053 */ 1054 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 1055 ASSERT(dn->dn_zio); 1056 1057 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 1058 list_remove(list, dn); 1059 1060 if (newlist) { 1061 (void) dnode_add_ref(dn, newlist); 1062 list_insert_tail(newlist, dn); 1063 } 1064 1065 dnode_sync(dn, tx); 1066 } 1067 } 1068 1069 /* ARGSUSED */ 1070 static void 1071 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 1072 { 1073 blkptr_t *bp = zio->io_bp; 1074 objset_t *os = arg; 1075 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 1076 1077 ASSERT(!BP_IS_EMBEDDED(bp)); 1078 ASSERT3P(bp, ==, os->os_rootbp); 1079 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 1080 ASSERT0(BP_GET_LEVEL(bp)); 1081 1082 /* 1083 * Update rootbp fill count: it should be the number of objects 1084 * allocated in the object set (not counting the "special" 1085 * objects that are stored in the objset_phys_t -- the meta 1086 * dnode and user/group accounting objects). 1087 */ 1088 bp->blk_fill = 0; 1089 for (int i = 0; i < dnp->dn_nblkptr; i++) 1090 bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); 1091 } 1092 1093 /* ARGSUSED */ 1094 static void 1095 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 1096 { 1097 blkptr_t *bp = zio->io_bp; 1098 blkptr_t *bp_orig = &zio->io_bp_orig; 1099 objset_t *os = arg; 1100 1101 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 1102 ASSERT(BP_EQUAL(bp, bp_orig)); 1103 } else { 1104 dsl_dataset_t *ds = os->os_dsl_dataset; 1105 dmu_tx_t *tx = os->os_synctx; 1106 1107 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 1108 dsl_dataset_block_born(ds, bp, tx); 1109 } 1110 } 1111 1112 /* called from dsl */ 1113 void 1114 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 1115 { 1116 int txgoff; 1117 zbookmark_phys_t zb; 1118 zio_prop_t zp; 1119 zio_t *zio; 1120 list_t *list; 1121 list_t *newlist = NULL; 1122 dbuf_dirty_record_t *dr; 1123 1124 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1125 1126 ASSERT(dmu_tx_is_syncing(tx)); 1127 /* XXX the write_done callback should really give us the tx... */ 1128 os->os_synctx = tx; 1129 1130 if (os->os_dsl_dataset == NULL) { 1131 /* 1132 * This is the MOS. If we have upgraded, 1133 * spa_max_replication() could change, so reset 1134 * os_copies here. 1135 */ 1136 os->os_copies = spa_max_replication(os->os_spa); 1137 } 1138 1139 /* 1140 * Create the root block IO 1141 */ 1142 SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1143 os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1144 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1145 arc_release(os->os_phys_buf, &os->os_phys_buf); 1146 1147 dmu_write_policy(os, NULL, 0, 0, &zp); 1148 1149 zio = arc_write(pio, os->os_spa, tx->tx_txg, 1150 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), 1151 DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready, 1152 NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, 1153 ZIO_FLAG_MUSTSUCCEED, &zb); 1154 1155 /* 1156 * Sync special dnodes - the parent IO for the sync is the root block 1157 */ 1158 DMU_META_DNODE(os)->dn_zio = zio; 1159 dnode_sync(DMU_META_DNODE(os), tx); 1160 1161 os->os_phys->os_flags = os->os_flags; 1162 1163 if (DMU_USERUSED_DNODE(os) && 1164 DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { 1165 DMU_USERUSED_DNODE(os)->dn_zio = zio; 1166 dnode_sync(DMU_USERUSED_DNODE(os), tx); 1167 DMU_GROUPUSED_DNODE(os)->dn_zio = zio; 1168 dnode_sync(DMU_GROUPUSED_DNODE(os), tx); 1169 } 1170 1171 txgoff = tx->tx_txg & TXG_MASK; 1172 1173 if (dmu_objset_userused_enabled(os)) { 1174 newlist = &os->os_synced_dnodes; 1175 /* 1176 * We must create the list here because it uses the 1177 * dn_dirty_link[] of this txg. 1178 */ 1179 list_create(newlist, sizeof (dnode_t), 1180 offsetof(dnode_t, dn_dirty_link[txgoff])); 1181 } 1182 1183 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1184 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1185 1186 list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; 1187 while (dr = list_head(list)) { 1188 ASSERT0(dr->dr_dbuf->db_level); 1189 list_remove(list, dr); 1190 if (dr->dr_zio) 1191 zio_nowait(dr->dr_zio); 1192 } 1193 /* 1194 * Free intent log blocks up to this tx. 1195 */ 1196 zil_sync(os->os_zil, tx); 1197 os->os_phys->os_zil_header = os->os_zil_header; 1198 zio_nowait(zio); 1199 } 1200 1201 boolean_t 1202 dmu_objset_is_dirty(objset_t *os, uint64_t txg) 1203 { 1204 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1205 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1206 } 1207 1208 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1209 1210 void 1211 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1212 { 1213 used_cbs[ost] = cb; 1214 } 1215 1216 boolean_t 1217 dmu_objset_userused_enabled(objset_t *os) 1218 { 1219 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1220 used_cbs[os->os_phys->os_type] != NULL && 1221 DMU_USERUSED_DNODE(os) != NULL); 1222 } 1223 1224 static void 1225 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, 1226 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) 1227 { 1228 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1229 int64_t delta = DNODE_SIZE + used; 1230 if (subtract) 1231 delta = -delta; 1232 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, 1233 user, delta, tx)); 1234 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1235 group, delta, tx)); 1236 } 1237 } 1238 1239 void 1240 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1241 { 1242 dnode_t *dn; 1243 list_t *list = &os->os_synced_dnodes; 1244 1245 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1246 1247 while (dn = list_head(list)) { 1248 int flags; 1249 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1250 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1251 dn->dn_phys->dn_flags & 1252 DNODE_FLAG_USERUSED_ACCOUNTED); 1253 1254 /* Allocate the user/groupused objects if necessary. */ 1255 if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { 1256 VERIFY(0 == zap_create_claim(os, 1257 DMU_USERUSED_OBJECT, 1258 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1259 VERIFY(0 == zap_create_claim(os, 1260 DMU_GROUPUSED_OBJECT, 1261 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1262 } 1263 1264 /* 1265 * We intentionally modify the zap object even if the 1266 * net delta is zero. Otherwise 1267 * the block of the zap obj could be shared between 1268 * datasets but need to be different between them after 1269 * a bprewrite. 1270 */ 1271 1272 flags = dn->dn_id_flags; 1273 ASSERT(flags); 1274 if (flags & DN_ID_OLD_EXIST) { 1275 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, 1276 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); 1277 } 1278 if (flags & DN_ID_NEW_EXIST) { 1279 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), 1280 dn->dn_phys->dn_flags, dn->dn_newuid, 1281 dn->dn_newgid, B_FALSE, tx); 1282 } 1283 1284 mutex_enter(&dn->dn_mtx); 1285 dn->dn_oldused = 0; 1286 dn->dn_oldflags = 0; 1287 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1288 dn->dn_olduid = dn->dn_newuid; 1289 dn->dn_oldgid = dn->dn_newgid; 1290 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1291 if (dn->dn_bonuslen == 0) 1292 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1293 else 1294 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1295 } 1296 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); 1297 mutex_exit(&dn->dn_mtx); 1298 1299 list_remove(list, dn); 1300 dnode_rele(dn, list); 1301 } 1302 } 1303 1304 /* 1305 * Returns a pointer to data to find uid/gid from 1306 * 1307 * If a dirty record for transaction group that is syncing can't 1308 * be found then NULL is returned. In the NULL case it is assumed 1309 * the uid/gid aren't changing. 1310 */ 1311 static void * 1312 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1313 { 1314 dbuf_dirty_record_t *dr, **drp; 1315 void *data; 1316 1317 if (db->db_dirtycnt == 0) 1318 return (db->db.db_data); /* Nothing is changing */ 1319 1320 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1321 if (dr->dr_txg == tx->tx_txg) 1322 break; 1323 1324 if (dr == NULL) { 1325 data = NULL; 1326 } else { 1327 dnode_t *dn; 1328 1329 DB_DNODE_ENTER(dr->dr_dbuf); 1330 dn = DB_DNODE(dr->dr_dbuf); 1331 1332 if (dn->dn_bonuslen == 0 && 1333 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1334 data = dr->dt.dl.dr_data->b_data; 1335 else 1336 data = dr->dt.dl.dr_data; 1337 1338 DB_DNODE_EXIT(dr->dr_dbuf); 1339 } 1340 1341 return (data); 1342 } 1343 1344 void 1345 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1346 { 1347 objset_t *os = dn->dn_objset; 1348 void *data = NULL; 1349 dmu_buf_impl_t *db = NULL; 1350 uint64_t *user = NULL; 1351 uint64_t *group = NULL; 1352 int flags = dn->dn_id_flags; 1353 int error; 1354 boolean_t have_spill = B_FALSE; 1355 1356 if (!dmu_objset_userused_enabled(dn->dn_objset)) 1357 return; 1358 1359 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1360 DN_ID_CHKED_SPILL))) 1361 return; 1362 1363 if (before && dn->dn_bonuslen != 0) 1364 data = DN_BONUS(dn->dn_phys); 1365 else if (!before && dn->dn_bonuslen != 0) { 1366 if (dn->dn_bonus) { 1367 db = dn->dn_bonus; 1368 mutex_enter(&db->db_mtx); 1369 data = dmu_objset_userquota_find_data(db, tx); 1370 } else { 1371 data = DN_BONUS(dn->dn_phys); 1372 } 1373 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1374 int rf = 0; 1375 1376 if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1377 rf |= DB_RF_HAVESTRUCT; 1378 error = dmu_spill_hold_by_dnode(dn, 1379 rf | DB_RF_MUST_SUCCEED, 1380 FTAG, (dmu_buf_t **)&db); 1381 ASSERT(error == 0); 1382 mutex_enter(&db->db_mtx); 1383 data = (before) ? db->db.db_data : 1384 dmu_objset_userquota_find_data(db, tx); 1385 have_spill = B_TRUE; 1386 } else { 1387 mutex_enter(&dn->dn_mtx); 1388 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1389 mutex_exit(&dn->dn_mtx); 1390 return; 1391 } 1392 1393 if (before) { 1394 ASSERT(data); 1395 user = &dn->dn_olduid; 1396 group = &dn->dn_oldgid; 1397 } else if (data) { 1398 user = &dn->dn_newuid; 1399 group = &dn->dn_newgid; 1400 } 1401 1402 /* 1403 * Must always call the callback in case the object 1404 * type has changed and that type isn't an object type to track 1405 */ 1406 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1407 user, group); 1408 1409 /* 1410 * Preserve existing uid/gid when the callback can't determine 1411 * what the new uid/gid are and the callback returned EEXIST. 1412 * The EEXIST error tells us to just use the existing uid/gid. 1413 * If we don't know what the old values are then just assign 1414 * them to 0, since that is a new file being created. 1415 */ 1416 if (!before && data == NULL && error == EEXIST) { 1417 if (flags & DN_ID_OLD_EXIST) { 1418 dn->dn_newuid = dn->dn_olduid; 1419 dn->dn_newgid = dn->dn_oldgid; 1420 } else { 1421 dn->dn_newuid = 0; 1422 dn->dn_newgid = 0; 1423 } 1424 error = 0; 1425 } 1426 1427 if (db) 1428 mutex_exit(&db->db_mtx); 1429 1430 mutex_enter(&dn->dn_mtx); 1431 if (error == 0 && before) 1432 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1433 if (error == 0 && !before) 1434 dn->dn_id_flags |= DN_ID_NEW_EXIST; 1435 1436 if (have_spill) { 1437 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1438 } else { 1439 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1440 } 1441 mutex_exit(&dn->dn_mtx); 1442 if (have_spill) 1443 dmu_buf_rele((dmu_buf_t *)db, FTAG); 1444 } 1445 1446 boolean_t 1447 dmu_objset_userspace_present(objset_t *os) 1448 { 1449 return (os->os_phys->os_flags & 1450 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1451 } 1452 1453 int 1454 dmu_objset_userspace_upgrade(objset_t *os) 1455 { 1456 uint64_t obj; 1457 int err = 0; 1458 1459 if (dmu_objset_userspace_present(os)) 1460 return (0); 1461 if (!dmu_objset_userused_enabled(os)) 1462 return (SET_ERROR(ENOTSUP)); 1463 if (dmu_objset_is_snapshot(os)) 1464 return (SET_ERROR(EINVAL)); 1465 1466 /* 1467 * We simply need to mark every object dirty, so that it will be 1468 * synced out and now accounted. If this is called 1469 * concurrently, or if we already did some work before crashing, 1470 * that's fine, since we track each object's accounted state 1471 * independently. 1472 */ 1473 1474 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1475 dmu_tx_t *tx; 1476 dmu_buf_t *db; 1477 int objerr; 1478 1479 if (issig(JUSTLOOKING) && issig(FORREAL)) 1480 return (SET_ERROR(EINTR)); 1481 1482 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1483 if (objerr != 0) 1484 continue; 1485 tx = dmu_tx_create(os); 1486 dmu_tx_hold_bonus(tx, obj); 1487 objerr = dmu_tx_assign(tx, TXG_WAIT); 1488 if (objerr != 0) { 1489 dmu_tx_abort(tx); 1490 continue; 1491 } 1492 dmu_buf_will_dirty(db, tx); 1493 dmu_buf_rele(db, FTAG); 1494 dmu_tx_commit(tx); 1495 } 1496 1497 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1498 txg_wait_synced(dmu_objset_pool(os), 0); 1499 return (0); 1500 } 1501 1502 void 1503 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1504 uint64_t *usedobjsp, uint64_t *availobjsp) 1505 { 1506 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1507 usedobjsp, availobjsp); 1508 } 1509 1510 uint64_t 1511 dmu_objset_fsid_guid(objset_t *os) 1512 { 1513 return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1514 } 1515 1516 void 1517 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1518 { 1519 stat->dds_type = os->os_phys->os_type; 1520 if (os->os_dsl_dataset) 1521 dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1522 } 1523 1524 void 1525 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1526 { 1527 ASSERT(os->os_dsl_dataset || 1528 os->os_phys->os_type == DMU_OST_META); 1529 1530 if (os->os_dsl_dataset != NULL) 1531 dsl_dataset_stats(os->os_dsl_dataset, nv); 1532 1533 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1534 os->os_phys->os_type); 1535 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1536 dmu_objset_userspace_present(os)); 1537 } 1538 1539 int 1540 dmu_objset_is_snapshot(objset_t *os) 1541 { 1542 if (os->os_dsl_dataset != NULL) 1543 return (os->os_dsl_dataset->ds_is_snapshot); 1544 else 1545 return (B_FALSE); 1546 } 1547 1548 int 1549 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1550 boolean_t *conflict) 1551 { 1552 dsl_dataset_t *ds = os->os_dsl_dataset; 1553 uint64_t ignored; 1554 1555 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1556 return (SET_ERROR(ENOENT)); 1557 1558 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1559 dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored, 1560 MT_FIRST, real, maxlen, conflict)); 1561 } 1562 1563 int 1564 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1565 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1566 { 1567 dsl_dataset_t *ds = os->os_dsl_dataset; 1568 zap_cursor_t cursor; 1569 zap_attribute_t attr; 1570 1571 ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); 1572 1573 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1574 return (SET_ERROR(ENOENT)); 1575 1576 zap_cursor_init_serialized(&cursor, 1577 ds->ds_dir->dd_pool->dp_meta_objset, 1578 dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp); 1579 1580 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1581 zap_cursor_fini(&cursor); 1582 return (SET_ERROR(ENOENT)); 1583 } 1584 1585 if (strlen(attr.za_name) + 1 > namelen) { 1586 zap_cursor_fini(&cursor); 1587 return (SET_ERROR(ENAMETOOLONG)); 1588 } 1589 1590 (void) strcpy(name, attr.za_name); 1591 if (idp) 1592 *idp = attr.za_first_integer; 1593 if (case_conflict) 1594 *case_conflict = attr.za_normalization_conflict; 1595 zap_cursor_advance(&cursor); 1596 *offp = zap_cursor_serialize(&cursor); 1597 zap_cursor_fini(&cursor); 1598 1599 return (0); 1600 } 1601 1602 int 1603 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1604 uint64_t *idp, uint64_t *offp) 1605 { 1606 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1607 zap_cursor_t cursor; 1608 zap_attribute_t attr; 1609 1610 /* there is no next dir on a snapshot! */ 1611 if (os->os_dsl_dataset->ds_object != 1612 dsl_dir_phys(dd)->dd_head_dataset_obj) 1613 return (SET_ERROR(ENOENT)); 1614 1615 zap_cursor_init_serialized(&cursor, 1616 dd->dd_pool->dp_meta_objset, 1617 dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp); 1618 1619 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1620 zap_cursor_fini(&cursor); 1621 return (SET_ERROR(ENOENT)); 1622 } 1623 1624 if (strlen(attr.za_name) + 1 > namelen) { 1625 zap_cursor_fini(&cursor); 1626 return (SET_ERROR(ENAMETOOLONG)); 1627 } 1628 1629 (void) strcpy(name, attr.za_name); 1630 if (idp) 1631 *idp = attr.za_first_integer; 1632 zap_cursor_advance(&cursor); 1633 *offp = zap_cursor_serialize(&cursor); 1634 zap_cursor_fini(&cursor); 1635 1636 return (0); 1637 } 1638 1639 typedef struct dmu_objset_find_ctx { 1640 taskq_t *dc_tq; 1641 dsl_pool_t *dc_dp; 1642 uint64_t dc_ddobj; 1643 int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *); 1644 void *dc_arg; 1645 int dc_flags; 1646 kmutex_t *dc_error_lock; 1647 int *dc_error; 1648 } dmu_objset_find_ctx_t; 1649 1650 static void 1651 dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) 1652 { 1653 dsl_pool_t *dp = dcp->dc_dp; 1654 dmu_objset_find_ctx_t *child_dcp; 1655 dsl_dir_t *dd; 1656 dsl_dataset_t *ds; 1657 zap_cursor_t zc; 1658 zap_attribute_t *attr; 1659 uint64_t thisobj; 1660 int err = 0; 1661 1662 /* don't process if there already was an error */ 1663 if (*dcp->dc_error != 0) 1664 goto out; 1665 1666 err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd); 1667 if (err != 0) 1668 goto out; 1669 1670 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1671 if (dd->dd_myname[0] == '$') { 1672 dsl_dir_rele(dd, FTAG); 1673 goto out; 1674 } 1675 1676 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1677 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1678 1679 /* 1680 * Iterate over all children. 1681 */ 1682 if (dcp->dc_flags & DS_FIND_CHILDREN) { 1683 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1684 dsl_dir_phys(dd)->dd_child_dir_zapobj); 1685 zap_cursor_retrieve(&zc, attr) == 0; 1686 (void) zap_cursor_advance(&zc)) { 1687 ASSERT3U(attr->za_integer_length, ==, 1688 sizeof (uint64_t)); 1689 ASSERT3U(attr->za_num_integers, ==, 1); 1690 1691 child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP); 1692 *child_dcp = *dcp; 1693 child_dcp->dc_ddobj = attr->za_first_integer; 1694 if (dcp->dc_tq != NULL) 1695 (void) taskq_dispatch(dcp->dc_tq, 1696 dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP); 1697 else 1698 dmu_objset_find_dp_impl(child_dcp); 1699 } 1700 zap_cursor_fini(&zc); 1701 } 1702 1703 /* 1704 * Iterate over all snapshots. 1705 */ 1706 if (dcp->dc_flags & DS_FIND_SNAPSHOTS) { 1707 dsl_dataset_t *ds; 1708 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1709 1710 if (err == 0) { 1711 uint64_t snapobj; 1712 1713 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1714 dsl_dataset_rele(ds, FTAG); 1715 1716 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1717 zap_cursor_retrieve(&zc, attr) == 0; 1718 (void) zap_cursor_advance(&zc)) { 1719 ASSERT3U(attr->za_integer_length, ==, 1720 sizeof (uint64_t)); 1721 ASSERT3U(attr->za_num_integers, ==, 1); 1722 1723 err = dsl_dataset_hold_obj(dp, 1724 attr->za_first_integer, FTAG, &ds); 1725 if (err != 0) 1726 break; 1727 err = dcp->dc_func(dp, ds, dcp->dc_arg); 1728 dsl_dataset_rele(ds, FTAG); 1729 if (err != 0) 1730 break; 1731 } 1732 zap_cursor_fini(&zc); 1733 } 1734 } 1735 1736 dsl_dir_rele(dd, FTAG); 1737 kmem_free(attr, sizeof (zap_attribute_t)); 1738 1739 if (err != 0) 1740 goto out; 1741 1742 /* 1743 * Apply to self. 1744 */ 1745 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1746 if (err != 0) 1747 goto out; 1748 err = dcp->dc_func(dp, ds, dcp->dc_arg); 1749 dsl_dataset_rele(ds, FTAG); 1750 1751 out: 1752 if (err != 0) { 1753 mutex_enter(dcp->dc_error_lock); 1754 /* only keep first error */ 1755 if (*dcp->dc_error == 0) 1756 *dcp->dc_error = err; 1757 mutex_exit(dcp->dc_error_lock); 1758 } 1759 1760 kmem_free(dcp, sizeof (*dcp)); 1761 } 1762 1763 static void 1764 dmu_objset_find_dp_cb(void *arg) 1765 { 1766 dmu_objset_find_ctx_t *dcp = arg; 1767 dsl_pool_t *dp = dcp->dc_dp; 1768 1769 /* 1770 * We need to get a pool_config_lock here, as there are several 1771 * asssert(pool_config_held) down the stack. Getting a lock via 1772 * dsl_pool_config_enter is risky, as it might be stalled by a 1773 * pending writer. This would deadlock, as the write lock can 1774 * only be granted when our parent thread gives up the lock. 1775 * The _prio interface gives us priority over a pending writer. 1776 */ 1777 dsl_pool_config_enter_prio(dp, FTAG); 1778 1779 dmu_objset_find_dp_impl(dcp); 1780 1781 dsl_pool_config_exit(dp, FTAG); 1782 } 1783 1784 /* 1785 * Find objsets under and including ddobj, call func(ds) on each. 1786 * The order for the enumeration is completely undefined. 1787 * func is called with dsl_pool_config held. 1788 */ 1789 int 1790 dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, 1791 int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) 1792 { 1793 int error = 0; 1794 taskq_t *tq = NULL; 1795 int ntasks; 1796 dmu_objset_find_ctx_t *dcp; 1797 kmutex_t err_lock; 1798 1799 mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL); 1800 dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP); 1801 dcp->dc_tq = NULL; 1802 dcp->dc_dp = dp; 1803 dcp->dc_ddobj = ddobj; 1804 dcp->dc_func = func; 1805 dcp->dc_arg = arg; 1806 dcp->dc_flags = flags; 1807 dcp->dc_error_lock = &err_lock; 1808 dcp->dc_error = &error; 1809 1810 if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) { 1811 /* 1812 * In case a write lock is held we can't make use of 1813 * parallelism, as down the stack of the worker threads 1814 * the lock is asserted via dsl_pool_config_held. 1815 * In case of a read lock this is solved by getting a read 1816 * lock in each worker thread, which isn't possible in case 1817 * of a writer lock. So we fall back to the synchronous path 1818 * here. 1819 * In the future it might be possible to get some magic into 1820 * dsl_pool_config_held in a way that it returns true for 1821 * the worker threads so that a single lock held from this 1822 * thread suffices. For now, stay single threaded. 1823 */ 1824 dmu_objset_find_dp_impl(dcp); 1825 1826 return (error); 1827 } 1828 1829 ntasks = dmu_find_threads; 1830 if (ntasks == 0) 1831 ntasks = vdev_count_leaves(dp->dp_spa) * 4; 1832 tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks, 1833 INT_MAX, 0); 1834 if (tq == NULL) { 1835 kmem_free(dcp, sizeof (*dcp)); 1836 return (SET_ERROR(ENOMEM)); 1837 } 1838 dcp->dc_tq = tq; 1839 1840 /* dcp will be freed by task */ 1841 (void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP); 1842 1843 /* 1844 * PORTING: this code relies on the property of taskq_wait to wait 1845 * until no more tasks are queued and no more tasks are active. As 1846 * we always queue new tasks from within other tasks, task_wait 1847 * reliably waits for the full recursion to finish, even though we 1848 * enqueue new tasks after taskq_wait has been called. 1849 * On platforms other than illumos, taskq_wait may not have this 1850 * property. 1851 */ 1852 taskq_wait(tq); 1853 taskq_destroy(tq); 1854 mutex_destroy(&err_lock); 1855 1856 return (error); 1857 } 1858 1859 /* 1860 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1861 * The dp_config_rwlock must not be held when this is called, and it 1862 * will not be held when the callback is called. 1863 * Therefore this function should only be used when the pool is not changing 1864 * (e.g. in syncing context), or the callback can deal with the possible races. 1865 */ 1866 static int 1867 dmu_objset_find_impl(spa_t *spa, const char *name, 1868 int func(const char *, void *), void *arg, int flags) 1869 { 1870 dsl_dir_t *dd; 1871 dsl_pool_t *dp = spa_get_dsl(spa); 1872 dsl_dataset_t *ds; 1873 zap_cursor_t zc; 1874 zap_attribute_t *attr; 1875 char *child; 1876 uint64_t thisobj; 1877 int err; 1878 1879 dsl_pool_config_enter(dp, FTAG); 1880 1881 err = dsl_dir_hold(dp, name, FTAG, &dd, NULL); 1882 if (err != 0) { 1883 dsl_pool_config_exit(dp, FTAG); 1884 return (err); 1885 } 1886 1887 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1888 if (dd->dd_myname[0] == '$') { 1889 dsl_dir_rele(dd, FTAG); 1890 dsl_pool_config_exit(dp, FTAG); 1891 return (0); 1892 } 1893 1894 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1895 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1896 1897 /* 1898 * Iterate over all children. 1899 */ 1900 if (flags & DS_FIND_CHILDREN) { 1901 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1902 dsl_dir_phys(dd)->dd_child_dir_zapobj); 1903 zap_cursor_retrieve(&zc, attr) == 0; 1904 (void) zap_cursor_advance(&zc)) { 1905 ASSERT3U(attr->za_integer_length, ==, 1906 sizeof (uint64_t)); 1907 ASSERT3U(attr->za_num_integers, ==, 1); 1908 1909 child = kmem_asprintf("%s/%s", name, attr->za_name); 1910 dsl_pool_config_exit(dp, FTAG); 1911 err = dmu_objset_find_impl(spa, child, 1912 func, arg, flags); 1913 dsl_pool_config_enter(dp, FTAG); 1914 strfree(child); 1915 if (err != 0) 1916 break; 1917 } 1918 zap_cursor_fini(&zc); 1919 1920 if (err != 0) { 1921 dsl_dir_rele(dd, FTAG); 1922 dsl_pool_config_exit(dp, FTAG); 1923 kmem_free(attr, sizeof (zap_attribute_t)); 1924 return (err); 1925 } 1926 } 1927 1928 /* 1929 * Iterate over all snapshots. 1930 */ 1931 if (flags & DS_FIND_SNAPSHOTS) { 1932 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1933 1934 if (err == 0) { 1935 uint64_t snapobj; 1936 1937 snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1938 dsl_dataset_rele(ds, FTAG); 1939 1940 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1941 zap_cursor_retrieve(&zc, attr) == 0; 1942 (void) zap_cursor_advance(&zc)) { 1943 ASSERT3U(attr->za_integer_length, ==, 1944 sizeof (uint64_t)); 1945 ASSERT3U(attr->za_num_integers, ==, 1); 1946 1947 child = kmem_asprintf("%s@%s", 1948 name, attr->za_name); 1949 dsl_pool_config_exit(dp, FTAG); 1950 err = func(child, arg); 1951 dsl_pool_config_enter(dp, FTAG); 1952 strfree(child); 1953 if (err != 0) 1954 break; 1955 } 1956 zap_cursor_fini(&zc); 1957 } 1958 } 1959 1960 dsl_dir_rele(dd, FTAG); 1961 kmem_free(attr, sizeof (zap_attribute_t)); 1962 dsl_pool_config_exit(dp, FTAG); 1963 1964 if (err != 0) 1965 return (err); 1966 1967 /* Apply to self. */ 1968 return (func(name, arg)); 1969 } 1970 1971 /* 1972 * See comment above dmu_objset_find_impl(). 1973 */ 1974 int 1975 dmu_objset_find(char *name, int func(const char *, void *), void *arg, 1976 int flags) 1977 { 1978 spa_t *spa; 1979 int error; 1980 1981 error = spa_open(name, &spa, FTAG); 1982 if (error != 0) 1983 return (error); 1984 error = dmu_objset_find_impl(spa, name, func, arg, flags); 1985 spa_close(spa, FTAG); 1986 return (error); 1987 } 1988 1989 void 1990 dmu_objset_set_user(objset_t *os, void *user_ptr) 1991 { 1992 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1993 os->os_user_ptr = user_ptr; 1994 } 1995 1996 void * 1997 dmu_objset_get_user(objset_t *os) 1998 { 1999 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 2000 return (os->os_user_ptr); 2001 } 2002 2003 /* 2004 * Determine name of filesystem, given name of snapshot. 2005 * buf must be at least MAXNAMELEN bytes 2006 */ 2007 int 2008 dmu_fsname(const char *snapname, char *buf) 2009 { 2010 char *atp = strchr(snapname, '@'); 2011 if (atp == NULL) 2012 return (SET_ERROR(EINVAL)); 2013 if (atp - snapname >= MAXNAMELEN) 2014 return (SET_ERROR(ENAMETOOLONG)); 2015 (void) strlcpy(buf, snapname, atp - snapname + 1); 2016 return (0); 2017 } 2018