1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cred.h> 27 #include <sys/zfs_context.h> 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dir.h> 30 #include <sys/dsl_dataset.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_pool.h> 33 #include <sys/dsl_synctask.h> 34 #include <sys/dsl_deleg.h> 35 #include <sys/dnode.h> 36 #include <sys/dbuf.h> 37 #include <sys/zvol.h> 38 #include <sys/dmu_tx.h> 39 #include <sys/zio_checksum.h> 40 #include <sys/zap.h> 41 #include <sys/zil.h> 42 #include <sys/dmu_impl.h> 43 #include <sys/zfs_ioctl.h> 44 45 spa_t * 46 dmu_objset_spa(objset_t *os) 47 { 48 return (os->os->os_spa); 49 } 50 51 zilog_t * 52 dmu_objset_zil(objset_t *os) 53 { 54 return (os->os->os_zil); 55 } 56 57 dsl_pool_t * 58 dmu_objset_pool(objset_t *os) 59 { 60 dsl_dataset_t *ds; 61 62 if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 63 return (ds->ds_dir->dd_pool); 64 else 65 return (spa_get_dsl(os->os->os_spa)); 66 } 67 68 dsl_dataset_t * 69 dmu_objset_ds(objset_t *os) 70 { 71 return (os->os->os_dsl_dataset); 72 } 73 74 dmu_objset_type_t 75 dmu_objset_type(objset_t *os) 76 { 77 return (os->os->os_phys->os_type); 78 } 79 80 void 81 dmu_objset_name(objset_t *os, char *buf) 82 { 83 dsl_dataset_name(os->os->os_dsl_dataset, buf); 84 } 85 86 uint64_t 87 dmu_objset_id(objset_t *os) 88 { 89 dsl_dataset_t *ds = os->os->os_dsl_dataset; 90 91 return (ds ? ds->ds_object : 0); 92 } 93 94 static void 95 checksum_changed_cb(void *arg, uint64_t newval) 96 { 97 objset_impl_t *osi = arg; 98 99 /* 100 * Inheritance should have been done by now. 101 */ 102 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 103 104 osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 105 } 106 107 static void 108 compression_changed_cb(void *arg, uint64_t newval) 109 { 110 objset_impl_t *osi = arg; 111 112 /* 113 * Inheritance and range checking should have been done by now. 114 */ 115 ASSERT(newval != ZIO_COMPRESS_INHERIT); 116 117 osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 118 } 119 120 static void 121 copies_changed_cb(void *arg, uint64_t newval) 122 { 123 objset_impl_t *osi = arg; 124 125 /* 126 * Inheritance and range checking should have been done by now. 127 */ 128 ASSERT(newval > 0); 129 ASSERT(newval <= spa_max_replication(osi->os_spa)); 130 131 osi->os_copies = newval; 132 } 133 134 static void 135 primary_cache_changed_cb(void *arg, uint64_t newval) 136 { 137 objset_impl_t *osi = arg; 138 139 /* 140 * Inheritance and range checking should have been done by now. 141 */ 142 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 143 newval == ZFS_CACHE_METADATA); 144 145 osi->os_primary_cache = newval; 146 } 147 148 static void 149 secondary_cache_changed_cb(void *arg, uint64_t newval) 150 { 151 objset_impl_t *osi = arg; 152 153 /* 154 * Inheritance and range checking should have been done by now. 155 */ 156 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 157 newval == ZFS_CACHE_METADATA); 158 159 osi->os_secondary_cache = newval; 160 } 161 162 void 163 dmu_objset_byteswap(void *buf, size_t size) 164 { 165 objset_phys_t *osp = buf; 166 167 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 168 dnode_byteswap(&osp->os_meta_dnode); 169 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 170 osp->os_type = BSWAP_64(osp->os_type); 171 osp->os_flags = BSWAP_64(osp->os_flags); 172 if (size == sizeof (objset_phys_t)) { 173 dnode_byteswap(&osp->os_userused_dnode); 174 dnode_byteswap(&osp->os_groupused_dnode); 175 } 176 } 177 178 int 179 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 180 objset_impl_t **osip) 181 { 182 objset_impl_t *osi; 183 int i, err; 184 185 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 186 187 osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 188 osi->os.os = osi; 189 osi->os_dsl_dataset = ds; 190 osi->os_spa = spa; 191 osi->os_rootbp = bp; 192 if (!BP_IS_HOLE(osi->os_rootbp)) { 193 uint32_t aflags = ARC_WAIT; 194 zbookmark_t zb; 195 zb.zb_objset = ds ? ds->ds_object : 0; 196 zb.zb_object = 0; 197 zb.zb_level = -1; 198 zb.zb_blkid = 0; 199 if (DMU_OS_IS_L2CACHEABLE(osi)) 200 aflags |= ARC_L2CACHE; 201 202 dprintf_bp(osi->os_rootbp, "reading %s", ""); 203 /* 204 * NB: when bprewrite scrub can change the bp, 205 * and this is called from dmu_objset_open_ds_os, the bp 206 * could change, and we'll need a lock. 207 */ 208 err = arc_read_nolock(NULL, spa, osi->os_rootbp, 209 arc_getbuf_func, &osi->os_phys_buf, 210 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 211 if (err) { 212 kmem_free(osi, sizeof (objset_impl_t)); 213 /* convert checksum errors into IO errors */ 214 if (err == ECKSUM) 215 err = EIO; 216 return (err); 217 } 218 219 /* Increase the blocksize if we are permitted. */ 220 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 221 arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { 222 arc_buf_t *buf = arc_buf_alloc(spa, 223 sizeof (objset_phys_t), &osi->os_phys_buf, 224 ARC_BUFC_METADATA); 225 bzero(buf->b_data, sizeof (objset_phys_t)); 226 bcopy(osi->os_phys_buf->b_data, buf->b_data, 227 arc_buf_size(osi->os_phys_buf)); 228 (void) arc_buf_remove_ref(osi->os_phys_buf, 229 &osi->os_phys_buf); 230 osi->os_phys_buf = buf; 231 } 232 233 osi->os_phys = osi->os_phys_buf->b_data; 234 osi->os_flags = osi->os_phys->os_flags; 235 } else { 236 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 237 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 238 osi->os_phys_buf = arc_buf_alloc(spa, size, 239 &osi->os_phys_buf, ARC_BUFC_METADATA); 240 osi->os_phys = osi->os_phys_buf->b_data; 241 bzero(osi->os_phys, size); 242 } 243 244 /* 245 * Note: the changed_cb will be called once before the register 246 * func returns, thus changing the checksum/compression from the 247 * default (fletcher2/off). Snapshots don't need to know about 248 * checksum/compression/copies. 249 */ 250 if (ds) { 251 err = dsl_prop_register(ds, "primarycache", 252 primary_cache_changed_cb, osi); 253 if (err == 0) 254 err = dsl_prop_register(ds, "secondarycache", 255 secondary_cache_changed_cb, osi); 256 if (!dsl_dataset_is_snapshot(ds)) { 257 if (err == 0) 258 err = dsl_prop_register(ds, "checksum", 259 checksum_changed_cb, osi); 260 if (err == 0) 261 err = dsl_prop_register(ds, "compression", 262 compression_changed_cb, osi); 263 if (err == 0) 264 err = dsl_prop_register(ds, "copies", 265 copies_changed_cb, osi); 266 } 267 if (err) { 268 VERIFY(arc_buf_remove_ref(osi->os_phys_buf, 269 &osi->os_phys_buf) == 1); 270 kmem_free(osi, sizeof (objset_impl_t)); 271 return (err); 272 } 273 } else if (ds == NULL) { 274 /* It's the meta-objset. */ 275 osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 276 osi->os_compress = ZIO_COMPRESS_LZJB; 277 osi->os_copies = spa_max_replication(spa); 278 osi->os_primary_cache = ZFS_CACHE_ALL; 279 osi->os_secondary_cache = ZFS_CACHE_ALL; 280 } 281 282 osi->os_zil_header = osi->os_phys->os_zil_header; 283 osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); 284 285 for (i = 0; i < TXG_SIZE; i++) { 286 list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 287 offsetof(dnode_t, dn_dirty_link[i])); 288 list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 289 offsetof(dnode_t, dn_dirty_link[i])); 290 } 291 list_create(&osi->os_dnodes, sizeof (dnode_t), 292 offsetof(dnode_t, dn_link)); 293 list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 294 offsetof(dmu_buf_impl_t, db_link)); 295 296 mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); 297 mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 298 mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 299 300 osi->os_meta_dnode = dnode_special_open(osi, 301 &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 302 if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { 303 osi->os_userused_dnode = dnode_special_open(osi, 304 &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); 305 osi->os_groupused_dnode = dnode_special_open(osi, 306 &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); 307 } 308 309 /* 310 * We should be the only thread trying to do this because we 311 * have ds_opening_lock 312 */ 313 if (ds) { 314 VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, 315 dmu_objset_evict)); 316 } 317 318 *osip = osi; 319 return (0); 320 } 321 322 static int 323 dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type) 324 { 325 objset_impl_t *osi; 326 327 mutex_enter(&ds->ds_opening_lock); 328 osi = dsl_dataset_get_user_ptr(ds); 329 if (osi == NULL) { 330 int err; 331 332 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 333 ds, &ds->ds_phys->ds_bp, &osi); 334 if (err) { 335 mutex_exit(&ds->ds_opening_lock); 336 return (err); 337 } 338 } 339 mutex_exit(&ds->ds_opening_lock); 340 341 os->os = osi; 342 os->os_mode = DS_MODE_NOHOLD; 343 344 if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) 345 return (EINVAL); 346 return (0); 347 } 348 349 int 350 dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp) 351 { 352 objset_t *os; 353 int err; 354 355 os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 356 err = dmu_objset_open_ds_os(ds, os, type); 357 if (err) 358 kmem_free(os, sizeof (objset_t)); 359 else 360 *osp = os; 361 return (err); 362 } 363 364 /* called from zpl */ 365 int 366 dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 367 objset_t **osp) 368 { 369 objset_t *os; 370 dsl_dataset_t *ds; 371 int err; 372 373 ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER || 374 DS_MODE_TYPE(mode) == DS_MODE_OWNER); 375 376 os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 377 if (DS_MODE_TYPE(mode) == DS_MODE_USER) 378 err = dsl_dataset_hold(name, os, &ds); 379 else 380 err = dsl_dataset_own(name, mode, os, &ds); 381 if (err) { 382 kmem_free(os, sizeof (objset_t)); 383 return (err); 384 } 385 386 err = dmu_objset_open_ds_os(ds, os, type); 387 if (err) { 388 if (DS_MODE_TYPE(mode) == DS_MODE_USER) 389 dsl_dataset_rele(ds, os); 390 else 391 dsl_dataset_disown(ds, os); 392 kmem_free(os, sizeof (objset_t)); 393 } else { 394 os->os_mode = mode; 395 *osp = os; 396 } 397 return (err); 398 } 399 400 void 401 dmu_objset_close(objset_t *os) 402 { 403 ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER || 404 DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER || 405 DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD); 406 407 if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER) 408 dsl_dataset_rele(os->os->os_dsl_dataset, os); 409 else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER) 410 dsl_dataset_disown(os->os->os_dsl_dataset, os); 411 kmem_free(os, sizeof (objset_t)); 412 } 413 414 int 415 dmu_objset_evict_dbufs(objset_t *os) 416 { 417 objset_impl_t *osi = os->os; 418 dnode_t *dn; 419 420 mutex_enter(&osi->os_lock); 421 422 /* process the mdn last, since the other dnodes have holds on it */ 423 list_remove(&osi->os_dnodes, osi->os_meta_dnode); 424 list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); 425 426 /* 427 * Find the first dnode with holds. We have to do this dance 428 * because dnode_add_ref() only works if you already have a 429 * hold. If there are no holds then it has no dbufs so OK to 430 * skip. 431 */ 432 for (dn = list_head(&osi->os_dnodes); 433 dn && !dnode_add_ref(dn, FTAG); 434 dn = list_next(&osi->os_dnodes, dn)) 435 continue; 436 437 while (dn) { 438 dnode_t *next_dn = dn; 439 440 do { 441 next_dn = list_next(&osi->os_dnodes, next_dn); 442 } while (next_dn && !dnode_add_ref(next_dn, FTAG)); 443 444 mutex_exit(&osi->os_lock); 445 dnode_evict_dbufs(dn); 446 dnode_rele(dn, FTAG); 447 mutex_enter(&osi->os_lock); 448 dn = next_dn; 449 } 450 mutex_exit(&osi->os_lock); 451 return (list_head(&osi->os_dnodes) != osi->os_meta_dnode); 452 } 453 454 void 455 dmu_objset_evict(dsl_dataset_t *ds, void *arg) 456 { 457 objset_impl_t *osi = arg; 458 objset_t os; 459 int i; 460 461 for (i = 0; i < TXG_SIZE; i++) { 462 ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 463 ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 464 } 465 466 if (ds) { 467 if (!dsl_dataset_is_snapshot(ds)) { 468 VERIFY(0 == dsl_prop_unregister(ds, "checksum", 469 checksum_changed_cb, osi)); 470 VERIFY(0 == dsl_prop_unregister(ds, "compression", 471 compression_changed_cb, osi)); 472 VERIFY(0 == dsl_prop_unregister(ds, "copies", 473 copies_changed_cb, osi)); 474 } 475 VERIFY(0 == dsl_prop_unregister(ds, "primarycache", 476 primary_cache_changed_cb, osi)); 477 VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", 478 secondary_cache_changed_cb, osi)); 479 } 480 481 /* 482 * We should need only a single pass over the dnode list, since 483 * nothing can be added to the list at this point. 484 */ 485 os.os = osi; 486 (void) dmu_objset_evict_dbufs(&os); 487 488 dnode_special_close(osi->os_meta_dnode); 489 if (osi->os_userused_dnode) { 490 dnode_special_close(osi->os_userused_dnode); 491 dnode_special_close(osi->os_groupused_dnode); 492 } 493 zil_free(osi->os_zil); 494 495 ASSERT3P(list_head(&osi->os_dnodes), ==, NULL); 496 497 VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); 498 mutex_destroy(&osi->os_lock); 499 mutex_destroy(&osi->os_obj_lock); 500 mutex_destroy(&osi->os_user_ptr_lock); 501 kmem_free(osi, sizeof (objset_impl_t)); 502 } 503 504 /* called from dsl for meta-objset */ 505 objset_impl_t * 506 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 507 dmu_objset_type_t type, dmu_tx_t *tx) 508 { 509 objset_impl_t *osi; 510 dnode_t *mdn; 511 512 ASSERT(dmu_tx_is_syncing(tx)); 513 if (ds) 514 mutex_enter(&ds->ds_opening_lock); 515 VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi)); 516 if (ds) 517 mutex_exit(&ds->ds_opening_lock); 518 mdn = osi->os_meta_dnode; 519 520 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 521 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 522 523 /* 524 * We don't want to have to increase the meta-dnode's nlevels 525 * later, because then we could do it in quescing context while 526 * we are also accessing it in open context. 527 * 528 * This precaution is not necessary for the MOS (ds == NULL), 529 * because the MOS is only updated in syncing context. 530 * This is most fortunate: the MOS is the only objset that 531 * needs to be synced multiple times as spa_sync() iterates 532 * to convergence, so minimizing its dn_nlevels matters. 533 */ 534 if (ds != NULL) { 535 int levels = 1; 536 537 /* 538 * Determine the number of levels necessary for the meta-dnode 539 * to contain DN_MAX_OBJECT dnodes. 540 */ 541 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 542 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 543 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 544 levels++; 545 546 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 547 mdn->dn_nlevels = levels; 548 } 549 550 ASSERT(type != DMU_OST_NONE); 551 ASSERT(type != DMU_OST_ANY); 552 ASSERT(type < DMU_OST_NUMTYPES); 553 osi->os_phys->os_type = type; 554 if (dmu_objset_userused_enabled(osi)) { 555 osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 556 osi->os_flags = osi->os_phys->os_flags; 557 } 558 559 dsl_dataset_dirty(ds, tx); 560 561 return (osi); 562 } 563 564 struct oscarg { 565 void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); 566 void *userarg; 567 dsl_dataset_t *clone_origin; 568 const char *lastname; 569 dmu_objset_type_t type; 570 uint64_t flags; 571 }; 572 573 /*ARGSUSED*/ 574 static int 575 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 576 { 577 dsl_dir_t *dd = arg1; 578 struct oscarg *oa = arg2; 579 objset_t *mos = dd->dd_pool->dp_meta_objset; 580 int err; 581 uint64_t ddobj; 582 583 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 584 oa->lastname, sizeof (uint64_t), 1, &ddobj); 585 if (err != ENOENT) 586 return (err ? err : EEXIST); 587 588 if (oa->clone_origin != NULL) { 589 /* You can't clone across pools. */ 590 if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) 591 return (EXDEV); 592 593 /* You can only clone snapshots, not the head datasets. */ 594 if (!dsl_dataset_is_snapshot(oa->clone_origin)) 595 return (EINVAL); 596 } 597 598 return (0); 599 } 600 601 static void 602 dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 603 { 604 dsl_dir_t *dd = arg1; 605 struct oscarg *oa = arg2; 606 uint64_t dsobj; 607 608 ASSERT(dmu_tx_is_syncing(tx)); 609 610 dsobj = dsl_dataset_create_sync(dd, oa->lastname, 611 oa->clone_origin, oa->flags, cr, tx); 612 613 if (oa->clone_origin == NULL) { 614 dsl_dataset_t *ds; 615 blkptr_t *bp; 616 objset_impl_t *osi; 617 618 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, 619 FTAG, &ds)); 620 bp = dsl_dataset_get_blkptr(ds); 621 ASSERT(BP_IS_HOLE(bp)); 622 623 osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 624 ds, bp, oa->type, tx); 625 626 if (oa->userfunc) 627 oa->userfunc(&osi->os, oa->userarg, cr, tx); 628 dsl_dataset_rele(ds, FTAG); 629 } 630 631 spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, 632 tx, cr, "dataset = %llu", dsobj); 633 } 634 635 int 636 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 637 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 638 { 639 dsl_dir_t *pdd; 640 const char *tail; 641 int err = 0; 642 struct oscarg oa = { 0 }; 643 644 ASSERT(strchr(name, '@') == NULL); 645 err = dsl_dir_open(name, FTAG, &pdd, &tail); 646 if (err) 647 return (err); 648 if (tail == NULL) { 649 dsl_dir_close(pdd, FTAG); 650 return (EEXIST); 651 } 652 653 oa.userfunc = func; 654 oa.userarg = arg; 655 oa.lastname = tail; 656 oa.type = type; 657 oa.flags = flags; 658 659 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 660 dmu_objset_create_sync, pdd, &oa, 5); 661 dsl_dir_close(pdd, FTAG); 662 return (err); 663 } 664 665 int 666 dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) 667 { 668 dsl_dir_t *pdd; 669 const char *tail; 670 int err = 0; 671 struct oscarg oa = { 0 }; 672 673 ASSERT(strchr(name, '@') == NULL); 674 err = dsl_dir_open(name, FTAG, &pdd, &tail); 675 if (err) 676 return (err); 677 if (tail == NULL) { 678 dsl_dir_close(pdd, FTAG); 679 return (EEXIST); 680 } 681 682 oa.lastname = tail; 683 oa.clone_origin = clone_origin; 684 oa.flags = flags; 685 686 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 687 dmu_objset_create_sync, pdd, &oa, 5); 688 dsl_dir_close(pdd, FTAG); 689 return (err); 690 } 691 692 int 693 dmu_objset_destroy(const char *name, boolean_t defer) 694 { 695 objset_t *os; 696 int error; 697 698 /* 699 * dsl_dataset_destroy() can free any claimed-but-unplayed 700 * intent log, but if there is an active log, it has blocks that 701 * are allocated, but may not yet be reflected in the on-disk 702 * structure. Only the ZIL knows how to free them, so we have 703 * to call into it here. 704 */ 705 error = dmu_objset_open(name, DMU_OST_ANY, 706 DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); 707 if (error == 0) { 708 dsl_dataset_t *ds = os->os->os_dsl_dataset; 709 zil_destroy(dmu_objset_zil(os), B_FALSE); 710 711 error = dsl_dataset_destroy(ds, os, defer); 712 /* dsl_dataset_destroy() closes the ds. */ 713 kmem_free(os, sizeof (objset_t)); 714 } 715 716 return (error); 717 } 718 719 struct snaparg { 720 dsl_sync_task_group_t *dstg; 721 char *snapname; 722 char failed[MAXPATHLEN]; 723 boolean_t checkperms; 724 nvlist_t *props; 725 }; 726 727 static int 728 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 729 { 730 objset_t *os = arg1; 731 struct snaparg *sn = arg2; 732 733 /* The props have already been checked by zfs_check_userprops(). */ 734 735 return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset, 736 sn->snapname, tx)); 737 } 738 739 static void 740 snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 741 { 742 objset_t *os = arg1; 743 dsl_dataset_t *ds = os->os->os_dsl_dataset; 744 struct snaparg *sn = arg2; 745 746 dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx); 747 748 if (sn->props) 749 dsl_props_set_sync(ds->ds_prev, sn->props, cr, tx); 750 } 751 752 static int 753 dmu_objset_snapshot_one(char *name, void *arg) 754 { 755 struct snaparg *sn = arg; 756 objset_t *os; 757 int err; 758 759 (void) strcpy(sn->failed, name); 760 761 /* 762 * Check permissions only when requested. This only applies when 763 * doing a recursive snapshot. The permission checks for the starting 764 * dataset have already been performed in zfs_secpolicy_snapshot() 765 */ 766 if (sn->checkperms == B_TRUE && 767 (err = zfs_secpolicy_snapshot_perms(name, CRED()))) 768 return (err); 769 770 err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); 771 if (err != 0) 772 return (err); 773 774 /* If the objset is in an inconsistent state, return busy */ 775 if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { 776 dmu_objset_close(os); 777 return (EBUSY); 778 } 779 780 /* 781 * NB: we need to wait for all in-flight changes to get to disk, 782 * so that we snapshot those changes. zil_suspend does this as 783 * a side effect. 784 */ 785 err = zil_suspend(dmu_objset_zil(os)); 786 if (err == 0) { 787 dsl_sync_task_create(sn->dstg, snapshot_check, 788 snapshot_sync, os, sn, 3); 789 } else { 790 dmu_objset_close(os); 791 } 792 793 return (err); 794 } 795 796 int 797 dmu_objset_snapshot(char *fsname, char *snapname, 798 nvlist_t *props, boolean_t recursive) 799 { 800 dsl_sync_task_t *dst; 801 struct snaparg sn; 802 spa_t *spa; 803 int err; 804 805 (void) strcpy(sn.failed, fsname); 806 807 err = spa_open(fsname, &spa, FTAG); 808 if (err) 809 return (err); 810 811 sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 812 sn.snapname = snapname; 813 sn.props = props; 814 815 if (recursive) { 816 sn.checkperms = B_TRUE; 817 err = dmu_objset_find(fsname, 818 dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 819 } else { 820 sn.checkperms = B_FALSE; 821 err = dmu_objset_snapshot_one(fsname, &sn); 822 } 823 824 if (err == 0) 825 err = dsl_sync_task_group_wait(sn.dstg); 826 827 for (dst = list_head(&sn.dstg->dstg_tasks); dst; 828 dst = list_next(&sn.dstg->dstg_tasks, dst)) { 829 objset_t *os = dst->dst_arg1; 830 dsl_dataset_t *ds = os->os->os_dsl_dataset; 831 if (dst->dst_err) 832 dsl_dataset_name(ds, sn.failed); 833 zil_resume(dmu_objset_zil(os)); 834 dmu_objset_close(os); 835 } 836 837 if (err) 838 (void) strcpy(fsname, sn.failed); 839 dsl_sync_task_group_destroy(sn.dstg); 840 spa_close(spa, FTAG); 841 return (err); 842 } 843 844 static void 845 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 846 { 847 dnode_t *dn; 848 849 while (dn = list_head(list)) { 850 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 851 ASSERT(dn->dn_dbuf->db_data_pending); 852 /* 853 * Initialize dn_zio outside dnode_sync() because the 854 * meta-dnode needs to set it ouside dnode_sync(). 855 */ 856 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 857 ASSERT(dn->dn_zio); 858 859 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 860 list_remove(list, dn); 861 862 if (newlist) { 863 (void) dnode_add_ref(dn, newlist); 864 list_insert_tail(newlist, dn); 865 } 866 867 dnode_sync(dn, tx); 868 } 869 } 870 871 /* ARGSUSED */ 872 static void 873 ready(zio_t *zio, arc_buf_t *abuf, void *arg) 874 { 875 blkptr_t *bp = zio->io_bp; 876 blkptr_t *bp_orig = &zio->io_bp_orig; 877 objset_impl_t *os = arg; 878 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 879 880 ASSERT(bp == os->os_rootbp); 881 ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); 882 ASSERT(BP_GET_LEVEL(bp) == 0); 883 884 /* 885 * Update rootbp fill count: it should be the number of objects 886 * allocated in the object set (not counting the "special" 887 * objects that are stored in the objset_phys_t -- the meta 888 * dnode and user/group accounting objects). 889 */ 890 bp->blk_fill = 0; 891 for (int i = 0; i < dnp->dn_nblkptr; i++) 892 bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 893 894 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 895 ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig))); 896 } else { 897 if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg) 898 (void) dsl_dataset_block_kill(os->os_dsl_dataset, 899 &zio->io_bp_orig, zio, os->os_synctx); 900 dsl_dataset_block_born(os->os_dsl_dataset, bp, os->os_synctx); 901 } 902 } 903 904 /* called from dsl */ 905 void 906 dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) 907 { 908 int txgoff; 909 zbookmark_t zb; 910 writeprops_t wp = { 0 }; 911 zio_t *zio; 912 list_t *list; 913 list_t *newlist = NULL; 914 dbuf_dirty_record_t *dr; 915 916 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 917 918 ASSERT(dmu_tx_is_syncing(tx)); 919 /* XXX the write_done callback should really give us the tx... */ 920 os->os_synctx = tx; 921 922 if (os->os_dsl_dataset == NULL) { 923 /* 924 * This is the MOS. If we have upgraded, 925 * spa_max_replication() could change, so reset 926 * os_copies here. 927 */ 928 os->os_copies = spa_max_replication(os->os_spa); 929 } 930 931 /* 932 * Create the root block IO 933 */ 934 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 935 zb.zb_object = 0; 936 zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ 937 zb.zb_blkid = 0; 938 939 wp.wp_type = DMU_OT_OBJSET; 940 wp.wp_level = 0; /* on-disk BP level; see above */ 941 wp.wp_copies = os->os_copies; 942 wp.wp_oschecksum = os->os_checksum; 943 wp.wp_oscompress = os->os_compress; 944 945 if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { 946 (void) dsl_dataset_block_kill(os->os_dsl_dataset, 947 os->os_rootbp, pio, tx); 948 } 949 950 arc_release(os->os_phys_buf, &os->os_phys_buf); 951 952 zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), 953 tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, 954 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 955 956 /* 957 * Sync special dnodes - the parent IO for the sync is the root block 958 */ 959 os->os_meta_dnode->dn_zio = zio; 960 dnode_sync(os->os_meta_dnode, tx); 961 962 os->os_phys->os_flags = os->os_flags; 963 964 if (os->os_userused_dnode && 965 os->os_userused_dnode->dn_type != DMU_OT_NONE) { 966 os->os_userused_dnode->dn_zio = zio; 967 dnode_sync(os->os_userused_dnode, tx); 968 os->os_groupused_dnode->dn_zio = zio; 969 dnode_sync(os->os_groupused_dnode, tx); 970 } 971 972 txgoff = tx->tx_txg & TXG_MASK; 973 974 if (dmu_objset_userused_enabled(os)) { 975 newlist = &os->os_synced_dnodes; 976 /* 977 * We must create the list here because it uses the 978 * dn_dirty_link[] of this txg. 979 */ 980 list_create(newlist, sizeof (dnode_t), 981 offsetof(dnode_t, dn_dirty_link[txgoff])); 982 } 983 984 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 985 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 986 987 list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 988 while (dr = list_head(list)) { 989 ASSERT(dr->dr_dbuf->db_level == 0); 990 list_remove(list, dr); 991 if (dr->dr_zio) 992 zio_nowait(dr->dr_zio); 993 } 994 /* 995 * Free intent log blocks up to this tx. 996 */ 997 zil_sync(os->os_zil, tx); 998 os->os_phys->os_zil_header = os->os_zil_header; 999 zio_nowait(zio); 1000 } 1001 1002 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1003 1004 void 1005 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1006 { 1007 used_cbs[ost] = cb; 1008 } 1009 1010 boolean_t 1011 dmu_objset_userused_enabled(objset_impl_t *os) 1012 { 1013 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1014 used_cbs[os->os_phys->os_type] && 1015 os->os_userused_dnode); 1016 } 1017 1018 void 1019 dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx) 1020 { 1021 dnode_t *dn; 1022 list_t *list = &os->os_synced_dnodes; 1023 static const char zerobuf[DN_MAX_BONUSLEN] = {0}; 1024 1025 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1026 1027 while (dn = list_head(list)) { 1028 dmu_object_type_t bonustype; 1029 1030 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1031 ASSERT(dn->dn_oldphys); 1032 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1033 dn->dn_phys->dn_flags & 1034 DNODE_FLAG_USERUSED_ACCOUNTED); 1035 1036 /* Allocate the user/groupused objects if necessary. */ 1037 if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { 1038 VERIFY(0 == zap_create_claim(&os->os, 1039 DMU_USERUSED_OBJECT, 1040 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1041 VERIFY(0 == zap_create_claim(&os->os, 1042 DMU_GROUPUSED_OBJECT, 1043 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1044 } 1045 1046 /* 1047 * If the object was not previously 1048 * accounted, pretend that it was free. 1049 */ 1050 if (!(dn->dn_oldphys->dn_flags & 1051 DNODE_FLAG_USERUSED_ACCOUNTED)) { 1052 bzero(dn->dn_oldphys, sizeof (dnode_phys_t)); 1053 } 1054 1055 /* 1056 * If the object was freed, use the previous bonustype. 1057 */ 1058 bonustype = dn->dn_phys->dn_bonustype ? 1059 dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype; 1060 ASSERT(dn->dn_phys->dn_type != 0 || 1061 (bcmp(DN_BONUS(dn->dn_phys), zerobuf, 1062 DN_MAX_BONUSLEN) == 0 && 1063 DN_USED_BYTES(dn->dn_phys) == 0)); 1064 ASSERT(dn->dn_oldphys->dn_type != 0 || 1065 (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf, 1066 DN_MAX_BONUSLEN) == 0 && 1067 DN_USED_BYTES(dn->dn_oldphys) == 0)); 1068 used_cbs[os->os_phys->os_type](&os->os, bonustype, 1069 DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys), 1070 DN_USED_BYTES(dn->dn_oldphys), 1071 DN_USED_BYTES(dn->dn_phys), tx); 1072 1073 /* 1074 * The mutex is needed here for interlock with dnode_allocate. 1075 */ 1076 mutex_enter(&dn->dn_mtx); 1077 zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t)); 1078 dn->dn_oldphys = NULL; 1079 mutex_exit(&dn->dn_mtx); 1080 1081 list_remove(list, dn); 1082 dnode_rele(dn, list); 1083 } 1084 } 1085 1086 boolean_t 1087 dmu_objset_userspace_present(objset_t *os) 1088 { 1089 return (os->os->os_phys->os_flags & 1090 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1091 } 1092 1093 int 1094 dmu_objset_userspace_upgrade(objset_t *os) 1095 { 1096 uint64_t obj; 1097 int err = 0; 1098 1099 if (dmu_objset_userspace_present(os)) 1100 return (0); 1101 if (!dmu_objset_userused_enabled(os->os)) 1102 return (ENOTSUP); 1103 if (dmu_objset_is_snapshot(os)) 1104 return (EINVAL); 1105 1106 /* 1107 * We simply need to mark every object dirty, so that it will be 1108 * synced out and now accounted. If this is called 1109 * concurrently, or if we already did some work before crashing, 1110 * that's fine, since we track each object's accounted state 1111 * independently. 1112 */ 1113 1114 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1115 dmu_tx_t *tx; 1116 dmu_buf_t *db; 1117 int objerr; 1118 1119 if (issig(JUSTLOOKING) && issig(FORREAL)) 1120 return (EINTR); 1121 1122 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1123 if (objerr) 1124 continue; 1125 tx = dmu_tx_create(os); 1126 dmu_tx_hold_bonus(tx, obj); 1127 objerr = dmu_tx_assign(tx, TXG_WAIT); 1128 if (objerr) { 1129 dmu_tx_abort(tx); 1130 continue; 1131 } 1132 dmu_buf_will_dirty(db, tx); 1133 dmu_buf_rele(db, FTAG); 1134 dmu_tx_commit(tx); 1135 } 1136 1137 os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1138 txg_wait_synced(dmu_objset_pool(os), 0); 1139 return (0); 1140 } 1141 1142 void 1143 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1144 uint64_t *usedobjsp, uint64_t *availobjsp) 1145 { 1146 dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, 1147 usedobjsp, availobjsp); 1148 } 1149 1150 uint64_t 1151 dmu_objset_fsid_guid(objset_t *os) 1152 { 1153 return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); 1154 } 1155 1156 void 1157 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1158 { 1159 stat->dds_type = os->os->os_phys->os_type; 1160 if (os->os->os_dsl_dataset) 1161 dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); 1162 } 1163 1164 void 1165 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1166 { 1167 ASSERT(os->os->os_dsl_dataset || 1168 os->os->os_phys->os_type == DMU_OST_META); 1169 1170 if (os->os->os_dsl_dataset != NULL) 1171 dsl_dataset_stats(os->os->os_dsl_dataset, nv); 1172 1173 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1174 os->os->os_phys->os_type); 1175 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1176 dmu_objset_userspace_present(os)); 1177 } 1178 1179 int 1180 dmu_objset_is_snapshot(objset_t *os) 1181 { 1182 if (os->os->os_dsl_dataset != NULL) 1183 return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 1184 else 1185 return (B_FALSE); 1186 } 1187 1188 int 1189 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1190 boolean_t *conflict) 1191 { 1192 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1193 uint64_t ignored; 1194 1195 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1196 return (ENOENT); 1197 1198 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1199 ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, 1200 real, maxlen, conflict)); 1201 } 1202 1203 int 1204 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1205 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1206 { 1207 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1208 zap_cursor_t cursor; 1209 zap_attribute_t attr; 1210 1211 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1212 return (ENOENT); 1213 1214 zap_cursor_init_serialized(&cursor, 1215 ds->ds_dir->dd_pool->dp_meta_objset, 1216 ds->ds_phys->ds_snapnames_zapobj, *offp); 1217 1218 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1219 zap_cursor_fini(&cursor); 1220 return (ENOENT); 1221 } 1222 1223 if (strlen(attr.za_name) + 1 > namelen) { 1224 zap_cursor_fini(&cursor); 1225 return (ENAMETOOLONG); 1226 } 1227 1228 (void) strcpy(name, attr.za_name); 1229 if (idp) 1230 *idp = attr.za_first_integer; 1231 if (case_conflict) 1232 *case_conflict = attr.za_normalization_conflict; 1233 zap_cursor_advance(&cursor); 1234 *offp = zap_cursor_serialize(&cursor); 1235 zap_cursor_fini(&cursor); 1236 1237 return (0); 1238 } 1239 1240 int 1241 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1242 uint64_t *idp, uint64_t *offp) 1243 { 1244 dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; 1245 zap_cursor_t cursor; 1246 zap_attribute_t attr; 1247 1248 /* there is no next dir on a snapshot! */ 1249 if (os->os->os_dsl_dataset->ds_object != 1250 dd->dd_phys->dd_head_dataset_obj) 1251 return (ENOENT); 1252 1253 zap_cursor_init_serialized(&cursor, 1254 dd->dd_pool->dp_meta_objset, 1255 dd->dd_phys->dd_child_dir_zapobj, *offp); 1256 1257 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1258 zap_cursor_fini(&cursor); 1259 return (ENOENT); 1260 } 1261 1262 if (strlen(attr.za_name) + 1 > namelen) { 1263 zap_cursor_fini(&cursor); 1264 return (ENAMETOOLONG); 1265 } 1266 1267 (void) strcpy(name, attr.za_name); 1268 if (idp) 1269 *idp = attr.za_first_integer; 1270 zap_cursor_advance(&cursor); 1271 *offp = zap_cursor_serialize(&cursor); 1272 zap_cursor_fini(&cursor); 1273 1274 return (0); 1275 } 1276 1277 struct findarg { 1278 int (*func)(char *, void *); 1279 void *arg; 1280 }; 1281 1282 /* ARGSUSED */ 1283 static int 1284 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 1285 { 1286 struct findarg *fa = arg; 1287 return (fa->func((char *)dsname, fa->arg)); 1288 } 1289 1290 /* 1291 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1292 * Perhaps change all callers to use dmu_objset_find_spa()? 1293 */ 1294 int 1295 dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) 1296 { 1297 struct findarg fa; 1298 fa.func = func; 1299 fa.arg = arg; 1300 return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); 1301 } 1302 1303 /* 1304 * Find all objsets under name, call func on each 1305 */ 1306 int 1307 dmu_objset_find_spa(spa_t *spa, const char *name, 1308 int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) 1309 { 1310 dsl_dir_t *dd; 1311 dsl_pool_t *dp; 1312 dsl_dataset_t *ds; 1313 zap_cursor_t zc; 1314 zap_attribute_t *attr; 1315 char *child; 1316 uint64_t thisobj; 1317 int err; 1318 1319 if (name == NULL) 1320 name = spa_name(spa); 1321 err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); 1322 if (err) 1323 return (err); 1324 1325 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1326 if (dd->dd_myname[0] == '$') { 1327 dsl_dir_close(dd, FTAG); 1328 return (0); 1329 } 1330 1331 thisobj = dd->dd_phys->dd_head_dataset_obj; 1332 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1333 dp = dd->dd_pool; 1334 1335 /* 1336 * Iterate over all children. 1337 */ 1338 if (flags & DS_FIND_CHILDREN) { 1339 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1340 dd->dd_phys->dd_child_dir_zapobj); 1341 zap_cursor_retrieve(&zc, attr) == 0; 1342 (void) zap_cursor_advance(&zc)) { 1343 ASSERT(attr->za_integer_length == sizeof (uint64_t)); 1344 ASSERT(attr->za_num_integers == 1); 1345 1346 child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1347 (void) strcpy(child, name); 1348 (void) strcat(child, "/"); 1349 (void) strcat(child, attr->za_name); 1350 err = dmu_objset_find_spa(spa, child, func, arg, flags); 1351 kmem_free(child, MAXPATHLEN); 1352 if (err) 1353 break; 1354 } 1355 zap_cursor_fini(&zc); 1356 1357 if (err) { 1358 dsl_dir_close(dd, FTAG); 1359 kmem_free(attr, sizeof (zap_attribute_t)); 1360 return (err); 1361 } 1362 } 1363 1364 /* 1365 * Iterate over all snapshots. 1366 */ 1367 if (flags & DS_FIND_SNAPSHOTS) { 1368 if (!dsl_pool_sync_context(dp)) 1369 rw_enter(&dp->dp_config_rwlock, RW_READER); 1370 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1371 if (!dsl_pool_sync_context(dp)) 1372 rw_exit(&dp->dp_config_rwlock); 1373 1374 if (err == 0) { 1375 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 1376 dsl_dataset_rele(ds, FTAG); 1377 1378 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1379 zap_cursor_retrieve(&zc, attr) == 0; 1380 (void) zap_cursor_advance(&zc)) { 1381 ASSERT(attr->za_integer_length == 1382 sizeof (uint64_t)); 1383 ASSERT(attr->za_num_integers == 1); 1384 1385 child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1386 (void) strcpy(child, name); 1387 (void) strcat(child, "@"); 1388 (void) strcat(child, attr->za_name); 1389 err = func(spa, attr->za_first_integer, 1390 child, arg); 1391 kmem_free(child, MAXPATHLEN); 1392 if (err) 1393 break; 1394 } 1395 zap_cursor_fini(&zc); 1396 } 1397 } 1398 1399 dsl_dir_close(dd, FTAG); 1400 kmem_free(attr, sizeof (zap_attribute_t)); 1401 1402 if (err) 1403 return (err); 1404 1405 /* 1406 * Apply to self if appropriate. 1407 */ 1408 err = func(spa, thisobj, name, arg); 1409 return (err); 1410 } 1411 1412 /* ARGSUSED */ 1413 int 1414 dmu_objset_prefetch(char *name, void *arg) 1415 { 1416 dsl_dataset_t *ds; 1417 1418 if (dsl_dataset_hold(name, FTAG, &ds)) 1419 return (0); 1420 1421 if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { 1422 mutex_enter(&ds->ds_opening_lock); 1423 if (!dsl_dataset_get_user_ptr(ds)) { 1424 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1425 zbookmark_t zb; 1426 1427 zb.zb_objset = ds->ds_object; 1428 zb.zb_object = 0; 1429 zb.zb_level = -1; 1430 zb.zb_blkid = 0; 1431 1432 (void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds), 1433 &ds->ds_phys->ds_bp, NULL, NULL, 1434 ZIO_PRIORITY_ASYNC_READ, 1435 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1436 &aflags, &zb); 1437 } 1438 mutex_exit(&ds->ds_opening_lock); 1439 } 1440 1441 dsl_dataset_rele(ds, FTAG); 1442 return (0); 1443 } 1444 1445 void 1446 dmu_objset_set_user(objset_t *os, void *user_ptr) 1447 { 1448 ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); 1449 os->os->os_user_ptr = user_ptr; 1450 } 1451 1452 void * 1453 dmu_objset_get_user(objset_t *os) 1454 { 1455 ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); 1456 return (os->os->os_user_ptr); 1457 } 1458