1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cred.h> 27 #include <sys/zfs_context.h> 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dir.h> 30 #include <sys/dsl_dataset.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_pool.h> 33 #include <sys/dsl_synctask.h> 34 #include <sys/dsl_deleg.h> 35 #include <sys/dnode.h> 36 #include <sys/dbuf.h> 37 #include <sys/zvol.h> 38 #include <sys/dmu_tx.h> 39 #include <sys/zio_checksum.h> 40 #include <sys/zap.h> 41 #include <sys/zil.h> 42 #include <sys/dmu_impl.h> 43 #include <sys/zfs_ioctl.h> 44 45 spa_t * 46 dmu_objset_spa(objset_t *os) 47 { 48 return (os->os->os_spa); 49 } 50 51 zilog_t * 52 dmu_objset_zil(objset_t *os) 53 { 54 return (os->os->os_zil); 55 } 56 57 dsl_pool_t * 58 dmu_objset_pool(objset_t *os) 59 { 60 dsl_dataset_t *ds; 61 62 if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 63 return (ds->ds_dir->dd_pool); 64 else 65 return (spa_get_dsl(os->os->os_spa)); 66 } 67 68 dsl_dataset_t * 69 dmu_objset_ds(objset_t *os) 70 { 71 return (os->os->os_dsl_dataset); 72 } 73 74 dmu_objset_type_t 75 dmu_objset_type(objset_t *os) 76 { 77 return (os->os->os_phys->os_type); 78 } 79 80 void 81 dmu_objset_name(objset_t *os, char *buf) 82 { 83 dsl_dataset_name(os->os->os_dsl_dataset, buf); 84 } 85 86 uint64_t 87 dmu_objset_id(objset_t *os) 88 { 89 dsl_dataset_t *ds = os->os->os_dsl_dataset; 90 91 return (ds ? ds->ds_object : 0); 92 } 93 94 static void 95 checksum_changed_cb(void *arg, uint64_t newval) 96 { 97 objset_impl_t *osi = arg; 98 99 /* 100 * Inheritance should have been done by now. 101 */ 102 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 103 104 osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 105 } 106 107 static void 108 compression_changed_cb(void *arg, uint64_t newval) 109 { 110 objset_impl_t *osi = arg; 111 112 /* 113 * Inheritance and range checking should have been done by now. 114 */ 115 ASSERT(newval != ZIO_COMPRESS_INHERIT); 116 117 osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 118 } 119 120 static void 121 copies_changed_cb(void *arg, uint64_t newval) 122 { 123 objset_impl_t *osi = arg; 124 125 /* 126 * Inheritance and range checking should have been done by now. 127 */ 128 ASSERT(newval > 0); 129 ASSERT(newval <= spa_max_replication(osi->os_spa)); 130 131 osi->os_copies = newval; 132 } 133 134 static void 135 primary_cache_changed_cb(void *arg, uint64_t newval) 136 { 137 objset_impl_t *osi = arg; 138 139 /* 140 * Inheritance and range checking should have been done by now. 141 */ 142 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 143 newval == ZFS_CACHE_METADATA); 144 145 osi->os_primary_cache = newval; 146 } 147 148 static void 149 secondary_cache_changed_cb(void *arg, uint64_t newval) 150 { 151 objset_impl_t *osi = arg; 152 153 /* 154 * Inheritance and range checking should have been done by now. 155 */ 156 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 157 newval == ZFS_CACHE_METADATA); 158 159 osi->os_secondary_cache = newval; 160 } 161 162 void 163 dmu_objset_byteswap(void *buf, size_t size) 164 { 165 objset_phys_t *osp = buf; 166 167 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 168 dnode_byteswap(&osp->os_meta_dnode); 169 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 170 osp->os_type = BSWAP_64(osp->os_type); 171 osp->os_flags = BSWAP_64(osp->os_flags); 172 if (size == sizeof (objset_phys_t)) { 173 dnode_byteswap(&osp->os_userused_dnode); 174 dnode_byteswap(&osp->os_groupused_dnode); 175 } 176 } 177 178 int 179 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 180 objset_impl_t **osip) 181 { 182 objset_impl_t *osi; 183 int i, err; 184 185 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 186 187 osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 188 osi->os.os = osi; 189 osi->os_dsl_dataset = ds; 190 osi->os_spa = spa; 191 osi->os_rootbp = bp; 192 if (!BP_IS_HOLE(osi->os_rootbp)) { 193 uint32_t aflags = ARC_WAIT; 194 zbookmark_t zb; 195 zb.zb_objset = ds ? ds->ds_object : 0; 196 zb.zb_object = 0; 197 zb.zb_level = -1; 198 zb.zb_blkid = 0; 199 if (DMU_OS_IS_L2CACHEABLE(osi)) 200 aflags |= ARC_L2CACHE; 201 202 dprintf_bp(osi->os_rootbp, "reading %s", ""); 203 /* 204 * NB: when bprewrite scrub can change the bp, 205 * and this is called from dmu_objset_open_ds_os, the bp 206 * could change, and we'll need a lock. 207 */ 208 err = arc_read_nolock(NULL, spa, osi->os_rootbp, 209 arc_getbuf_func, &osi->os_phys_buf, 210 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 211 if (err) { 212 kmem_free(osi, sizeof (objset_impl_t)); 213 /* convert checksum errors into IO errors */ 214 if (err == ECKSUM) 215 err = EIO; 216 return (err); 217 } 218 219 /* Increase the blocksize if we are permitted. */ 220 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 221 arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { 222 arc_buf_t *buf = arc_buf_alloc(spa, 223 sizeof (objset_phys_t), &osi->os_phys_buf, 224 ARC_BUFC_METADATA); 225 bzero(buf->b_data, sizeof (objset_phys_t)); 226 bcopy(osi->os_phys_buf->b_data, buf->b_data, 227 arc_buf_size(osi->os_phys_buf)); 228 (void) arc_buf_remove_ref(osi->os_phys_buf, 229 &osi->os_phys_buf); 230 osi->os_phys_buf = buf; 231 } 232 233 osi->os_phys = osi->os_phys_buf->b_data; 234 osi->os_flags = osi->os_phys->os_flags; 235 } else { 236 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 237 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 238 osi->os_phys_buf = arc_buf_alloc(spa, size, 239 &osi->os_phys_buf, ARC_BUFC_METADATA); 240 osi->os_phys = osi->os_phys_buf->b_data; 241 bzero(osi->os_phys, size); 242 } 243 244 /* 245 * Note: the changed_cb will be called once before the register 246 * func returns, thus changing the checksum/compression from the 247 * default (fletcher2/off). Snapshots don't need to know about 248 * checksum/compression/copies. 249 */ 250 if (ds) { 251 err = dsl_prop_register(ds, "primarycache", 252 primary_cache_changed_cb, osi); 253 if (err == 0) 254 err = dsl_prop_register(ds, "secondarycache", 255 secondary_cache_changed_cb, osi); 256 if (!dsl_dataset_is_snapshot(ds)) { 257 if (err == 0) 258 err = dsl_prop_register(ds, "checksum", 259 checksum_changed_cb, osi); 260 if (err == 0) 261 err = dsl_prop_register(ds, "compression", 262 compression_changed_cb, osi); 263 if (err == 0) 264 err = dsl_prop_register(ds, "copies", 265 copies_changed_cb, osi); 266 } 267 if (err) { 268 VERIFY(arc_buf_remove_ref(osi->os_phys_buf, 269 &osi->os_phys_buf) == 1); 270 kmem_free(osi, sizeof (objset_impl_t)); 271 return (err); 272 } 273 } else if (ds == NULL) { 274 /* It's the meta-objset. */ 275 osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 276 osi->os_compress = ZIO_COMPRESS_LZJB; 277 osi->os_copies = spa_max_replication(spa); 278 osi->os_primary_cache = ZFS_CACHE_ALL; 279 osi->os_secondary_cache = ZFS_CACHE_ALL; 280 } 281 282 osi->os_zil_header = osi->os_phys->os_zil_header; 283 osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); 284 285 for (i = 0; i < TXG_SIZE; i++) { 286 list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 287 offsetof(dnode_t, dn_dirty_link[i])); 288 list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 289 offsetof(dnode_t, dn_dirty_link[i])); 290 } 291 list_create(&osi->os_dnodes, sizeof (dnode_t), 292 offsetof(dnode_t, dn_link)); 293 list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 294 offsetof(dmu_buf_impl_t, db_link)); 295 296 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 297 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 298 mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 299 300 osi->os_meta_dnode = dnode_special_open(osi, 301 &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 302 if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { 303 osi->os_userused_dnode = dnode_special_open(osi, 304 &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); 305 osi->os_groupused_dnode = dnode_special_open(osi, 306 &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); 307 } 308 309 /* 310 * We should be the only thread trying to do this because we 311 * have ds_opening_lock 312 */ 313 if (ds) { 314 VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, 315 dmu_objset_evict)); 316 } 317 318 *osip = osi; 319 return (0); 320 } 321 322 static int 323 dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type) 324 { 325 objset_impl_t *osi; 326 327 mutex_enter(&ds->ds_opening_lock); 328 osi = dsl_dataset_get_user_ptr(ds); 329 if (osi == NULL) { 330 int err; 331 332 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 333 ds, &ds->ds_phys->ds_bp, &osi); 334 if (err) { 335 mutex_exit(&ds->ds_opening_lock); 336 return (err); 337 } 338 } 339 mutex_exit(&ds->ds_opening_lock); 340 341 os->os = osi; 342 os->os_mode = DS_MODE_NOHOLD; 343 344 if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) 345 return (EINVAL); 346 return (0); 347 } 348 349 int 350 dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp) 351 { 352 objset_t *os; 353 int err; 354 355 os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 356 err = dmu_objset_open_ds_os(ds, os, type); 357 if (err) 358 kmem_free(os, sizeof (objset_t)); 359 else 360 *osp = os; 361 return (err); 362 } 363 364 /* called from zpl */ 365 int 366 dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 367 objset_t **osp) 368 { 369 objset_t *os; 370 dsl_dataset_t *ds; 371 int err; 372 373 ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER || 374 DS_MODE_TYPE(mode) == DS_MODE_OWNER); 375 376 os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 377 if (DS_MODE_TYPE(mode) == DS_MODE_USER) 378 err = dsl_dataset_hold(name, os, &ds); 379 else 380 err = dsl_dataset_own(name, mode, os, &ds); 381 if (err) { 382 kmem_free(os, sizeof (objset_t)); 383 return (err); 384 } 385 386 err = dmu_objset_open_ds_os(ds, os, type); 387 if (err) { 388 if (DS_MODE_TYPE(mode) == DS_MODE_USER) 389 dsl_dataset_rele(ds, os); 390 else 391 dsl_dataset_disown(ds, os); 392 kmem_free(os, sizeof (objset_t)); 393 } else { 394 os->os_mode = mode; 395 *osp = os; 396 } 397 return (err); 398 } 399 400 void 401 dmu_objset_close(objset_t *os) 402 { 403 ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER || 404 DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER || 405 DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD); 406 407 if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER) 408 dsl_dataset_rele(os->os->os_dsl_dataset, os); 409 else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER) 410 dsl_dataset_disown(os->os->os_dsl_dataset, os); 411 kmem_free(os, sizeof (objset_t)); 412 } 413 414 int 415 dmu_objset_evict_dbufs(objset_t *os) 416 { 417 objset_impl_t *osi = os->os; 418 dnode_t *dn; 419 420 mutex_enter(&osi->os_lock); 421 422 /* process the mdn last, since the other dnodes have holds on it */ 423 list_remove(&osi->os_dnodes, osi->os_meta_dnode); 424 list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 425 426 /* 427 * Find the first dnode with holds. We have to do this dance 428 * because dnode_add_ref() only works if you already have a 429 * hold. If there are no holds then it has no dbufs so OK to 430 * skip. 431 */ 432 for (dn = list_head(&osi->os_dnodes); 433 dn && !dnode_add_ref(dn, FTAG); 434 dn = list_next(&osi->os_dnodes, dn)) 435 continue; 436 437 while (dn) { 438 dnode_t *next_dn = dn; 439 440 do { 441 next_dn = list_next(&osi->os_dnodes, next_dn); 442 } while (next_dn && !dnode_add_ref(next_dn, FTAG)); 443 444 mutex_exit(&osi->os_lock); 445 dnode_evict_dbufs(dn); 446 dnode_rele(dn, FTAG); 447 mutex_enter(&osi->os_lock); 448 dn = next_dn; 449 } 450 mutex_exit(&osi->os_lock); 451 return (list_head(&osi->os_dnodes) != osi->os_meta_dnode); 452 } 453 454 void 455 dmu_objset_evict(dsl_dataset_t *ds, void *arg) 456 { 457 objset_impl_t *osi = arg; 458 objset_t os; 459 int i; 460 461 for (i = 0; i < TXG_SIZE; i++) { 462 ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 463 ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 464 } 465 466 if (ds) { 467 if (!dsl_dataset_is_snapshot(ds)) { 468 VERIFY(0 == dsl_prop_unregister(ds, "checksum", 469 checksum_changed_cb, osi)); 470 VERIFY(0 == dsl_prop_unregister(ds, "compression", 471 compression_changed_cb, osi)); 472 VERIFY(0 == dsl_prop_unregister(ds, "copies", 473 copies_changed_cb, osi)); 474 } 475 VERIFY(0 == dsl_prop_unregister(ds, "primarycache", 476 primary_cache_changed_cb, osi)); 477 VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", 478 secondary_cache_changed_cb, osi)); 479 } 480 481 /* 482 * We should need only a single pass over the dnode list, since 483 * nothing can be added to the list at this point. 484 */ 485 os.os = osi; 486 (void) dmu_objset_evict_dbufs(&os); 487 488 dnode_special_close(osi->os_meta_dnode); 489 if (osi->os_userused_dnode) { 490 dnode_special_close(osi->os_userused_dnode); 491 dnode_special_close(osi->os_groupused_dnode); 492 } 493 zil_free(osi->os_zil); 494 495 ASSERT3P(list_head(&osi->os_dnodes), ==, NULL); 496 497 VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); 498 mutex_destroy(&osi->os_lock); 499 mutex_destroy(&osi->os_obj_lock); 500 mutex_destroy(&osi->os_user_ptr_lock); 501 kmem_free(osi, sizeof (objset_impl_t)); 502 } 503 504 /* called from dsl for meta-objset */ 505 objset_impl_t * 506 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 507 dmu_objset_type_t type, dmu_tx_t *tx) 508 { 509 objset_impl_t *osi; 510 dnode_t *mdn; 511 512 ASSERT(dmu_tx_is_syncing(tx)); 513 if (ds) 514 mutex_enter(&ds->ds_opening_lock); 515 VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi)); 516 if (ds) 517 mutex_exit(&ds->ds_opening_lock); 518 mdn = osi->os_meta_dnode; 519 520 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 521 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 522 523 /* 524 * We don't want to have to increase the meta-dnode's nlevels 525 * later, because then we could do it in quescing context while 526 * we are also accessing it in open context. 527 * 528 * This precaution is not necessary for the MOS (ds == NULL), 529 * because the MOS is only updated in syncing context. 530 * This is most fortunate: the MOS is the only objset that 531 * needs to be synced multiple times as spa_sync() iterates 532 * to convergence, so minimizing its dn_nlevels matters. 533 */ 534 if (ds != NULL) { 535 int levels = 1; 536 537 /* 538 * Determine the number of levels necessary for the meta-dnode 539 * to contain DN_MAX_OBJECT dnodes. 540 */ 541 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 542 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 543 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 544 levels++; 545 546 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 547 mdn->dn_nlevels = levels; 548 } 549 550 ASSERT(type != DMU_OST_NONE); 551 ASSERT(type != DMU_OST_ANY); 552 ASSERT(type < DMU_OST_NUMTYPES); 553 osi->os_phys->os_type = type; 554 if (dmu_objset_userused_enabled(osi)) { 555 osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 556 osi->os_flags = osi->os_phys->os_flags; 557 } 558 559 dsl_dataset_dirty(ds, tx); 560 561 return (osi); 562 } 563 564 struct oscarg { 565 void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); 566 void *userarg; 567 dsl_dataset_t *clone_parent; 568 const char *lastname; 569 dmu_objset_type_t type; 570 uint64_t flags; 571 }; 572 573 /*ARGSUSED*/ 574 static int 575 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 576 { 577 dsl_dir_t *dd = arg1; 578 struct oscarg *oa = arg2; 579 objset_t *mos = dd->dd_pool->dp_meta_objset; 580 int err; 581 uint64_t ddobj; 582 583 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 584 oa->lastname, sizeof (uint64_t), 1, &ddobj); 585 if (err != ENOENT) 586 return (err ? err : EEXIST); 587 588 if (oa->clone_parent != NULL) { 589 /* 590 * You can't clone across pools. 591 */ 592 if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) 593 return (EXDEV); 594 595 /* 596 * You can only clone snapshots, not the head datasets. 597 */ 598 if (oa->clone_parent->ds_phys->ds_num_children == 0) 599 return (EINVAL); 600 } 601 602 return (0); 603 } 604 605 static void 606 dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 607 { 608 dsl_dir_t *dd = arg1; 609 struct oscarg *oa = arg2; 610 dsl_dataset_t *ds; 611 blkptr_t *bp; 612 uint64_t dsobj; 613 614 ASSERT(dmu_tx_is_syncing(tx)); 615 616 dsobj = dsl_dataset_create_sync(dd, oa->lastname, 617 oa->clone_parent, oa->flags, cr, tx); 618 619 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds)); 620 bp = dsl_dataset_get_blkptr(ds); 621 if (BP_IS_HOLE(bp)) { 622 objset_impl_t *osi; 623 624 /* This is an empty dmu_objset; not a clone. */ 625 osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 626 ds, bp, oa->type, tx); 627 628 if (oa->userfunc) 629 oa->userfunc(&osi->os, oa->userarg, cr, tx); 630 } 631 632 spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, 633 tx, cr, "dataset = %llu", dsobj); 634 635 dsl_dataset_rele(ds, FTAG); 636 } 637 638 int 639 dmu_objset_create(const char *name, dmu_objset_type_t type, 640 objset_t *clone_parent, uint64_t flags, 641 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 642 { 643 dsl_dir_t *pdd; 644 const char *tail; 645 int err = 0; 646 struct oscarg oa = { 0 }; 647 648 ASSERT(strchr(name, '@') == NULL); 649 err = dsl_dir_open(name, FTAG, &pdd, &tail); 650 if (err) 651 return (err); 652 if (tail == NULL) { 653 dsl_dir_close(pdd, FTAG); 654 return (EEXIST); 655 } 656 657 dprintf("name=%s\n", name); 658 659 oa.userfunc = func; 660 oa.userarg = arg; 661 oa.lastname = tail; 662 oa.type = type; 663 oa.flags = flags; 664 665 if (clone_parent != NULL) { 666 /* 667 * You can't clone to a different type. 668 */ 669 if (clone_parent->os->os_phys->os_type != type) { 670 dsl_dir_close(pdd, FTAG); 671 return (EINVAL); 672 } 673 oa.clone_parent = clone_parent->os->os_dsl_dataset; 674 } 675 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 676 dmu_objset_create_sync, pdd, &oa, 5); 677 dsl_dir_close(pdd, FTAG); 678 return (err); 679 } 680 681 int 682 dmu_objset_destroy(const char *name) 683 { 684 objset_t *os; 685 int error; 686 687 /* 688 * If it looks like we'll be able to destroy it, and there's 689 * an unplayed replay log sitting around, destroy the log. 690 * It would be nicer to do this in dsl_dataset_destroy_sync(), 691 * but the replay log objset is modified in open context. 692 */ 693 error = dmu_objset_open(name, DMU_OST_ANY, 694 DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); 695 if (error == 0) { 696 dsl_dataset_t *ds = os->os->os_dsl_dataset; 697 zil_destroy(dmu_objset_zil(os), B_FALSE); 698 699 error = dsl_dataset_destroy(ds, os); 700 /* 701 * dsl_dataset_destroy() closes the ds. 702 */ 703 kmem_free(os, sizeof (objset_t)); 704 } 705 706 return (error); 707 } 708 709 /* 710 * This will close the objset. 711 */ 712 int 713 dmu_objset_rollback(objset_t *os) 714 { 715 int err; 716 dsl_dataset_t *ds; 717 718 ds = os->os->os_dsl_dataset; 719 720 if (!dsl_dataset_tryown(ds, TRUE, os)) { 721 dmu_objset_close(os); 722 return (EBUSY); 723 } 724 725 err = dsl_dataset_rollback(ds, os->os->os_phys->os_type); 726 727 /* 728 * NB: we close the objset manually because the rollback 729 * actually implicitly called dmu_objset_evict(), thus freeing 730 * the objset_impl_t. 731 */ 732 dsl_dataset_disown(ds, os); 733 kmem_free(os, sizeof (objset_t)); 734 return (err); 735 } 736 737 struct snaparg { 738 dsl_sync_task_group_t *dstg; 739 char *snapname; 740 char failed[MAXPATHLEN]; 741 boolean_t checkperms; 742 nvlist_t *props; 743 }; 744 745 static int 746 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 747 { 748 objset_t *os = arg1; 749 struct snaparg *sn = arg2; 750 751 /* The props have already been checked by zfs_check_userprops(). */ 752 753 return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset, 754 sn->snapname, tx)); 755 } 756 757 static void 758 snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 759 { 760 objset_t *os = arg1; 761 dsl_dataset_t *ds = os->os->os_dsl_dataset; 762 struct snaparg *sn = arg2; 763 764 dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx); 765 766 if (sn->props) 767 dsl_props_set_sync(ds->ds_prev, sn->props, cr, tx); 768 } 769 770 static int 771 dmu_objset_snapshot_one(char *name, void *arg) 772 { 773 struct snaparg *sn = arg; 774 objset_t *os; 775 int err; 776 777 (void) strcpy(sn->failed, name); 778 779 /* 780 * Check permissions only when requested. This only applies when 781 * doing a recursive snapshot. The permission checks for the starting 782 * dataset have already been performed in zfs_secpolicy_snapshot() 783 */ 784 if (sn->checkperms == B_TRUE && 785 (err = zfs_secpolicy_snapshot_perms(name, CRED()))) 786 return (err); 787 788 err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); 789 if (err != 0) 790 return (err); 791 792 /* If the objset is in an inconsistent state, return busy */ 793 if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { 794 dmu_objset_close(os); 795 return (EBUSY); 796 } 797 798 /* 799 * NB: we need to wait for all in-flight changes to get to disk, 800 * so that we snapshot those changes. zil_suspend does this as 801 * a side effect. 802 */ 803 err = zil_suspend(dmu_objset_zil(os)); 804 if (err == 0) { 805 dsl_sync_task_create(sn->dstg, snapshot_check, 806 snapshot_sync, os, sn, 3); 807 } else { 808 dmu_objset_close(os); 809 } 810 811 return (err); 812 } 813 814 int 815 dmu_objset_snapshot(char *fsname, char *snapname, 816 nvlist_t *props, boolean_t recursive) 817 { 818 dsl_sync_task_t *dst; 819 struct snaparg sn; 820 spa_t *spa; 821 int err; 822 823 (void) strcpy(sn.failed, fsname); 824 825 err = spa_open(fsname, &spa, FTAG); 826 if (err) 827 return (err); 828 829 sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 830 sn.snapname = snapname; 831 sn.props = props; 832 833 if (recursive) { 834 sn.checkperms = B_TRUE; 835 err = dmu_objset_find(fsname, 836 dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 837 } else { 838 sn.checkperms = B_FALSE; 839 err = dmu_objset_snapshot_one(fsname, &sn); 840 } 841 842 if (err == 0) 843 err = dsl_sync_task_group_wait(sn.dstg); 844 845 for (dst = list_head(&sn.dstg->dstg_tasks); dst; 846 dst = list_next(&sn.dstg->dstg_tasks, dst)) { 847 objset_t *os = dst->dst_arg1; 848 dsl_dataset_t *ds = os->os->os_dsl_dataset; 849 if (dst->dst_err) 850 dsl_dataset_name(ds, sn.failed); 851 zil_resume(dmu_objset_zil(os)); 852 dmu_objset_close(os); 853 } 854 855 if (err) 856 (void) strcpy(fsname, sn.failed); 857 dsl_sync_task_group_destroy(sn.dstg); 858 spa_close(spa, FTAG); 859 return (err); 860 } 861 862 static void 863 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 864 { 865 dnode_t *dn; 866 867 while (dn = list_head(list)) { 868 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 869 ASSERT(dn->dn_dbuf->db_data_pending); 870 /* 871 * Initialize dn_zio outside dnode_sync() because the 872 * meta-dnode needs to set it ouside dnode_sync(). 873 */ 874 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 875 ASSERT(dn->dn_zio); 876 877 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 878 list_remove(list, dn); 879 880 if (newlist) { 881 (void) dnode_add_ref(dn, newlist); 882 list_insert_tail(newlist, dn); 883 } 884 885 dnode_sync(dn, tx); 886 } 887 } 888 889 /* ARGSUSED */ 890 static void 891 ready(zio_t *zio, arc_buf_t *abuf, void *arg) 892 { 893 blkptr_t *bp = zio->io_bp; 894 blkptr_t *bp_orig = &zio->io_bp_orig; 895 objset_impl_t *os = arg; 896 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 897 898 ASSERT(bp == os->os_rootbp); 899 ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); 900 ASSERT(BP_GET_LEVEL(bp) == 0); 901 902 /* 903 * Update rootbp fill count: it should be the number of objects 904 * allocated in the object set (not counting the "special" 905 * objects that are stored in the objset_phys_t -- the meta 906 * dnode and user/group accounting objects). 907 */ 908 bp->blk_fill = 0; 909 for (int i = 0; i < dnp->dn_nblkptr; i++) 910 bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 911 912 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 913 ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig))); 914 } else { 915 if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg) 916 (void) dsl_dataset_block_kill(os->os_dsl_dataset, 917 &zio->io_bp_orig, zio, os->os_synctx); 918 dsl_dataset_block_born(os->os_dsl_dataset, bp, os->os_synctx); 919 } 920 } 921 922 /* called from dsl */ 923 void 924 dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) 925 { 926 int txgoff; 927 zbookmark_t zb; 928 writeprops_t wp = { 0 }; 929 zio_t *zio; 930 list_t *list; 931 list_t *newlist = NULL; 932 dbuf_dirty_record_t *dr; 933 934 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 935 936 ASSERT(dmu_tx_is_syncing(tx)); 937 /* XXX the write_done callback should really give us the tx... */ 938 os->os_synctx = tx; 939 940 if (os->os_dsl_dataset == NULL) { 941 /* 942 * This is the MOS. If we have upgraded, 943 * spa_max_replication() could change, so reset 944 * os_copies here. 945 */ 946 os->os_copies = spa_max_replication(os->os_spa); 947 } 948 949 /* 950 * Create the root block IO 951 */ 952 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 953 zb.zb_object = 0; 954 zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ 955 zb.zb_blkid = 0; 956 957 wp.wp_type = DMU_OT_OBJSET; 958 wp.wp_level = 0; /* on-disk BP level; see above */ 959 wp.wp_copies = os->os_copies; 960 wp.wp_oschecksum = os->os_checksum; 961 wp.wp_oscompress = os->os_compress; 962 963 if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { 964 (void) dsl_dataset_block_kill(os->os_dsl_dataset, 965 os->os_rootbp, pio, tx); 966 } 967 968 arc_release(os->os_phys_buf, &os->os_phys_buf); 969 970 zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), 971 tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, 972 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 973 974 /* 975 * Sync special dnodes - the parent IO for the sync is the root block 976 */ 977 os->os_meta_dnode->dn_zio = zio; 978 dnode_sync(os->os_meta_dnode, tx); 979 980 os->os_phys->os_flags = os->os_flags; 981 982 if (os->os_userused_dnode && 983 os->os_userused_dnode->dn_type != DMU_OT_NONE) { 984 os->os_userused_dnode->dn_zio = zio; 985 dnode_sync(os->os_userused_dnode, tx); 986 os->os_groupused_dnode->dn_zio = zio; 987 dnode_sync(os->os_groupused_dnode, tx); 988 } 989 990 txgoff = tx->tx_txg & TXG_MASK; 991 992 if (dmu_objset_userused_enabled(os)) { 993 newlist = &os->os_synced_dnodes; 994 /* 995 * We must create the list here because it uses the 996 * dn_dirty_link[] of this txg. 997 */ 998 list_create(newlist, sizeof (dnode_t), 999 offsetof(dnode_t, dn_dirty_link[txgoff])); 1000 } 1001 1002 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1003 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1004 1005 list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 1006 while (dr = list_head(list)) { 1007 ASSERT(dr->dr_dbuf->db_level == 0); 1008 list_remove(list, dr); 1009 if (dr->dr_zio) 1010 zio_nowait(dr->dr_zio); 1011 } 1012 /* 1013 * Free intent log blocks up to this tx. 1014 */ 1015 zil_sync(os->os_zil, tx); 1016 os->os_phys->os_zil_header = os->os_zil_header; 1017 zio_nowait(zio); 1018 } 1019 1020 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1021 1022 void 1023 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1024 { 1025 used_cbs[ost] = cb; 1026 } 1027 1028 boolean_t 1029 dmu_objset_userused_enabled(objset_impl_t *os) 1030 { 1031 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1032 used_cbs[os->os_phys->os_type] && 1033 os->os_userused_dnode); 1034 } 1035 1036 void 1037 dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx) 1038 { 1039 dnode_t *dn; 1040 list_t *list = &os->os_synced_dnodes; 1041 static const char zerobuf[DN_MAX_BONUSLEN] = {0}; 1042 1043 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1044 1045 while (dn = list_head(list)) { 1046 dmu_object_type_t bonustype; 1047 1048 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1049 ASSERT(dn->dn_oldphys); 1050 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1051 dn->dn_phys->dn_flags & 1052 DNODE_FLAG_USERUSED_ACCOUNTED); 1053 1054 /* Allocate the user/groupused objects if necessary. */ 1055 if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { 1056 VERIFY(0 == zap_create_claim(&os->os, 1057 DMU_USERUSED_OBJECT, 1058 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1059 VERIFY(0 == zap_create_claim(&os->os, 1060 DMU_GROUPUSED_OBJECT, 1061 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1062 } 1063 1064 /* 1065 * If the object was not previously 1066 * accounted, pretend that it was free. 1067 */ 1068 if (!(dn->dn_oldphys->dn_flags & 1069 DNODE_FLAG_USERUSED_ACCOUNTED)) { 1070 bzero(dn->dn_oldphys, sizeof (dnode_phys_t)); 1071 } 1072 1073 /* 1074 * If the object was freed, use the previous bonustype. 1075 */ 1076 bonustype = dn->dn_phys->dn_bonustype ? 1077 dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype; 1078 ASSERT(dn->dn_phys->dn_type != 0 || 1079 (bcmp(DN_BONUS(dn->dn_phys), zerobuf, 1080 DN_MAX_BONUSLEN) == 0 && 1081 DN_USED_BYTES(dn->dn_phys) == 0)); 1082 ASSERT(dn->dn_oldphys->dn_type != 0 || 1083 (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf, 1084 DN_MAX_BONUSLEN) == 0 && 1085 DN_USED_BYTES(dn->dn_oldphys) == 0)); 1086 used_cbs[os->os_phys->os_type](&os->os, bonustype, 1087 DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys), 1088 DN_USED_BYTES(dn->dn_oldphys), 1089 DN_USED_BYTES(dn->dn_phys), tx); 1090 1091 /* 1092 * The mutex is needed here for interlock with dnode_allocate. 1093 */ 1094 mutex_enter(&dn->dn_mtx); 1095 zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t)); 1096 dn->dn_oldphys = NULL; 1097 mutex_exit(&dn->dn_mtx); 1098 1099 list_remove(list, dn); 1100 dnode_rele(dn, list); 1101 } 1102 } 1103 1104 boolean_t 1105 dmu_objset_userspace_present(objset_t *os) 1106 { 1107 return (os->os->os_phys->os_flags & 1108 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1109 } 1110 1111 int 1112 dmu_objset_userspace_upgrade(objset_t *os) 1113 { 1114 uint64_t obj; 1115 int err = 0; 1116 1117 if (dmu_objset_userspace_present(os)) 1118 return (0); 1119 if (!dmu_objset_userused_enabled(os->os)) 1120 return (ENOTSUP); 1121 if (dmu_objset_is_snapshot(os)) 1122 return (EINVAL); 1123 1124 /* 1125 * We simply need to mark every object dirty, so that it will be 1126 * synced out and now accounted. If this is called 1127 * concurrently, or if we already did some work before crashing, 1128 * that's fine, since we track each object's accounted state 1129 * independently. 1130 */ 1131 1132 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1133 dmu_tx_t *tx; 1134 dmu_buf_t *db; 1135 int objerr; 1136 1137 if (issig(JUSTLOOKING) && issig(FORREAL)) 1138 return (EINTR); 1139 1140 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1141 if (objerr) 1142 continue; 1143 tx = dmu_tx_create(os); 1144 dmu_tx_hold_bonus(tx, obj); 1145 objerr = dmu_tx_assign(tx, TXG_WAIT); 1146 if (objerr) { 1147 dmu_tx_abort(tx); 1148 continue; 1149 } 1150 dmu_buf_will_dirty(db, tx); 1151 dmu_buf_rele(db, FTAG); 1152 dmu_tx_commit(tx); 1153 } 1154 1155 os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1156 txg_wait_synced(dmu_objset_pool(os), 0); 1157 return (0); 1158 } 1159 1160 void 1161 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1162 uint64_t *usedobjsp, uint64_t *availobjsp) 1163 { 1164 dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, 1165 usedobjsp, availobjsp); 1166 } 1167 1168 uint64_t 1169 dmu_objset_fsid_guid(objset_t *os) 1170 { 1171 return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); 1172 } 1173 1174 void 1175 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1176 { 1177 stat->dds_type = os->os->os_phys->os_type; 1178 if (os->os->os_dsl_dataset) 1179 dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); 1180 } 1181 1182 void 1183 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1184 { 1185 ASSERT(os->os->os_dsl_dataset || 1186 os->os->os_phys->os_type == DMU_OST_META); 1187 1188 if (os->os->os_dsl_dataset != NULL) 1189 dsl_dataset_stats(os->os->os_dsl_dataset, nv); 1190 1191 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1192 os->os->os_phys->os_type); 1193 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1194 dmu_objset_userspace_present(os)); 1195 } 1196 1197 int 1198 dmu_objset_is_snapshot(objset_t *os) 1199 { 1200 if (os->os->os_dsl_dataset != NULL) 1201 return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 1202 else 1203 return (B_FALSE); 1204 } 1205 1206 int 1207 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1208 boolean_t *conflict) 1209 { 1210 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1211 uint64_t ignored; 1212 1213 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1214 return (ENOENT); 1215 1216 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1217 ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, 1218 real, maxlen, conflict)); 1219 } 1220 1221 int 1222 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1223 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1224 { 1225 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1226 zap_cursor_t cursor; 1227 zap_attribute_t attr; 1228 1229 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1230 return (ENOENT); 1231 1232 zap_cursor_init_serialized(&cursor, 1233 ds->ds_dir->dd_pool->dp_meta_objset, 1234 ds->ds_phys->ds_snapnames_zapobj, *offp); 1235 1236 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1237 zap_cursor_fini(&cursor); 1238 return (ENOENT); 1239 } 1240 1241 if (strlen(attr.za_name) + 1 > namelen) { 1242 zap_cursor_fini(&cursor); 1243 return (ENAMETOOLONG); 1244 } 1245 1246 (void) strcpy(name, attr.za_name); 1247 if (idp) 1248 *idp = attr.za_first_integer; 1249 if (case_conflict) 1250 *case_conflict = attr.za_normalization_conflict; 1251 zap_cursor_advance(&cursor); 1252 *offp = zap_cursor_serialize(&cursor); 1253 zap_cursor_fini(&cursor); 1254 1255 return (0); 1256 } 1257 1258 int 1259 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1260 uint64_t *idp, uint64_t *offp) 1261 { 1262 dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 1263 zap_cursor_t cursor; 1264 zap_attribute_t attr; 1265 1266 /* there is no next dir on a snapshot! */ 1267 if (os->os->os_dsl_dataset->ds_object != 1268 dd->dd_phys->dd_head_dataset_obj) 1269 return (ENOENT); 1270 1271 zap_cursor_init_serialized(&cursor, 1272 dd->dd_pool->dp_meta_objset, 1273 dd->dd_phys->dd_child_dir_zapobj, *offp); 1274 1275 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1276 zap_cursor_fini(&cursor); 1277 return (ENOENT); 1278 } 1279 1280 if (strlen(attr.za_name) + 1 > namelen) { 1281 zap_cursor_fini(&cursor); 1282 return (ENAMETOOLONG); 1283 } 1284 1285 (void) strcpy(name, attr.za_name); 1286 if (idp) 1287 *idp = attr.za_first_integer; 1288 zap_cursor_advance(&cursor); 1289 *offp = zap_cursor_serialize(&cursor); 1290 zap_cursor_fini(&cursor); 1291 1292 return (0); 1293 } 1294 1295 struct findarg { 1296 int (*func)(char *, void *); 1297 void *arg; 1298 }; 1299 1300 /* ARGSUSED */ 1301 static int 1302 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 1303 { 1304 struct findarg *fa = arg; 1305 return (fa->func((char *)dsname, fa->arg)); 1306 } 1307 1308 /* 1309 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1310 * Perhaps change all callers to use dmu_objset_find_spa()? 1311 */ 1312 int 1313 dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 1314 { 1315 struct findarg fa; 1316 fa.func = func; 1317 fa.arg = arg; 1318 return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); 1319 } 1320 1321 /* 1322 * Find all objsets under name, call func on each 1323 */ 1324 int 1325 dmu_objset_find_spa(spa_t *spa, const char *name, 1326 int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) 1327 { 1328 dsl_dir_t *dd; 1329 dsl_pool_t *dp; 1330 dsl_dataset_t *ds; 1331 zap_cursor_t zc; 1332 zap_attribute_t *attr; 1333 char *child; 1334 uint64_t thisobj; 1335 int err; 1336 1337 if (name == NULL) 1338 name = spa_name(spa); 1339 err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); 1340 if (err) 1341 return (err); 1342 1343 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1344 if (dd->dd_myname[0] == '$') { 1345 dsl_dir_close(dd, FTAG); 1346 return (0); 1347 } 1348 1349 thisobj = dd->dd_phys->dd_head_dataset_obj; 1350 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1351 dp = dd->dd_pool; 1352 1353 /* 1354 * Iterate over all children. 1355 */ 1356 if (flags & DS_FIND_CHILDREN) { 1357 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1358 dd->dd_phys->dd_child_dir_zapobj); 1359 zap_cursor_retrieve(&zc, attr) == 0; 1360 (void) zap_cursor_advance(&zc)) { 1361 ASSERT(attr->za_integer_length == sizeof (uint64_t)); 1362 ASSERT(attr->za_num_integers == 1); 1363 1364 child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1365 (void) strcpy(child, name); 1366 (void) strcat(child, "/"); 1367 (void) strcat(child, attr->za_name); 1368 err = dmu_objset_find_spa(spa, child, func, arg, flags); 1369 kmem_free(child, MAXPATHLEN); 1370 if (err) 1371 break; 1372 } 1373 zap_cursor_fini(&zc); 1374 1375 if (err) { 1376 dsl_dir_close(dd, FTAG); 1377 kmem_free(attr, sizeof (zap_attribute_t)); 1378 return (err); 1379 } 1380 } 1381 1382 /* 1383 * Iterate over all snapshots. 1384 */ 1385 if (flags & DS_FIND_SNAPSHOTS) { 1386 if (!dsl_pool_sync_context(dp)) 1387 rw_enter(&dp->dp_config_rwlock, RW_READER); 1388 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1389 if (!dsl_pool_sync_context(dp)) 1390 rw_exit(&dp->dp_config_rwlock); 1391 1392 if (err == 0) { 1393 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 1394 dsl_dataset_rele(ds, FTAG); 1395 1396 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1397 zap_cursor_retrieve(&zc, attr) == 0; 1398 (void) zap_cursor_advance(&zc)) { 1399 ASSERT(attr->za_integer_length == 1400 sizeof (uint64_t)); 1401 ASSERT(attr->za_num_integers == 1); 1402 1403 child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1404 (void) strcpy(child, name); 1405 (void) strcat(child, "@"); 1406 (void) strcat(child, attr->za_name); 1407 err = func(spa, attr->za_first_integer, 1408 child, arg); 1409 kmem_free(child, MAXPATHLEN); 1410 if (err) 1411 break; 1412 } 1413 zap_cursor_fini(&zc); 1414 } 1415 } 1416 1417 dsl_dir_close(dd, FTAG); 1418 kmem_free(attr, sizeof (zap_attribute_t)); 1419 1420 if (err) 1421 return (err); 1422 1423 /* 1424 * Apply to self if appropriate. 1425 */ 1426 err = func(spa, thisobj, name, arg); 1427 return (err); 1428 } 1429 1430 /* ARGSUSED */ 1431 int 1432 dmu_objset_prefetch(char *name, void *arg) 1433 { 1434 dsl_dataset_t *ds; 1435 1436 if (dsl_dataset_hold(name, FTAG, &ds)) 1437 return (0); 1438 1439 if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { 1440 mutex_enter(&ds->ds_opening_lock); 1441 if (!dsl_dataset_get_user_ptr(ds)) { 1442 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1443 zbookmark_t zb; 1444 1445 zb.zb_objset = ds->ds_object; 1446 zb.zb_object = 0; 1447 zb.zb_level = -1; 1448 zb.zb_blkid = 0; 1449 1450 (void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds), 1451 &ds->ds_phys->ds_bp, NULL, NULL, 1452 ZIO_PRIORITY_ASYNC_READ, 1453 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1454 &aflags, &zb); 1455 } 1456 mutex_exit(&ds->ds_opening_lock); 1457 } 1458 1459 dsl_dataset_rele(ds, FTAG); 1460 return (0); 1461 } 1462 1463 void 1464 dmu_objset_set_user(objset_t *os, void *user_ptr) 1465 { 1466 ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); 1467 os->os->os_user_ptr = user_ptr; 1468 } 1469 1470 void * 1471 dmu_objset_get_user(objset_t *os) 1472 { 1473 ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); 1474 return (os->os->os_user_ptr); 1475 } 1476