1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Portions Copyright 2010 Robert Milkowski */ 26 27 #include <sys/cred.h> 28 #include <sys/zfs_context.h> 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_dataset.h> 32 #include <sys/dsl_prop.h> 33 #include <sys/dsl_pool.h> 34 #include <sys/dsl_synctask.h> 35 #include <sys/dsl_deleg.h> 36 #include <sys/dnode.h> 37 #include <sys/dbuf.h> 38 #include <sys/zvol.h> 39 #include <sys/dmu_tx.h> 40 #include <sys/zap.h> 41 #include <sys/zil.h> 42 #include <sys/dmu_impl.h> 43 #include <sys/zfs_ioctl.h> 44 #include <sys/sunddi.h> 45 #include <sys/sa.h> 46 47 spa_t * 48 dmu_objset_spa(objset_t *os) 49 { 50 return (os->os_spa); 51 } 52 53 zilog_t * 54 dmu_objset_zil(objset_t *os) 55 { 56 return (os->os_zil); 57 } 58 59 dsl_pool_t * 60 dmu_objset_pool(objset_t *os) 61 { 62 dsl_dataset_t *ds; 63 64 if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 65 return (ds->ds_dir->dd_pool); 66 else 67 return (spa_get_dsl(os->os_spa)); 68 } 69 70 dsl_dataset_t * 71 dmu_objset_ds(objset_t *os) 72 { 73 return (os->os_dsl_dataset); 74 } 75 76 dmu_objset_type_t 77 dmu_objset_type(objset_t *os) 78 { 79 return (os->os_phys->os_type); 80 } 81 82 void 83 dmu_objset_name(objset_t *os, char *buf) 84 { 85 dsl_dataset_name(os->os_dsl_dataset, buf); 86 } 87 88 uint64_t 89 dmu_objset_id(objset_t *os) 90 { 91 dsl_dataset_t *ds = os->os_dsl_dataset; 92 93 return (ds ? ds->ds_object : 0); 94 } 95 96 uint64_t 97 dmu_objset_syncprop(objset_t *os) 98 { 99 return (os->os_sync); 100 } 101 102 uint64_t 103 dmu_objset_logbias(objset_t *os) 104 { 105 return (os->os_logbias); 106 } 107 108 static void 109 checksum_changed_cb(void *arg, uint64_t newval) 110 { 111 objset_t *os = arg; 112 113 /* 114 * Inheritance should have been done by now. 115 */ 116 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 117 118 os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 119 } 120 121 static void 122 compression_changed_cb(void *arg, uint64_t newval) 123 { 124 objset_t *os = arg; 125 126 /* 127 * Inheritance and range checking should have been done by now. 128 */ 129 ASSERT(newval != ZIO_COMPRESS_INHERIT); 130 131 os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 132 } 133 134 static void 135 copies_changed_cb(void *arg, uint64_t newval) 136 { 137 objset_t *os = arg; 138 139 /* 140 * Inheritance and range checking should have been done by now. 141 */ 142 ASSERT(newval > 0); 143 ASSERT(newval <= spa_max_replication(os->os_spa)); 144 145 os->os_copies = newval; 146 } 147 148 static void 149 dedup_changed_cb(void *arg, uint64_t newval) 150 { 151 objset_t *os = arg; 152 spa_t *spa = os->os_spa; 153 enum zio_checksum checksum; 154 155 /* 156 * Inheritance should have been done by now. 157 */ 158 ASSERT(newval != ZIO_CHECKSUM_INHERIT); 159 160 checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 161 162 os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 163 os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 164 } 165 166 static void 167 primary_cache_changed_cb(void *arg, uint64_t newval) 168 { 169 objset_t *os = arg; 170 171 /* 172 * Inheritance and range checking should have been done by now. 173 */ 174 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 175 newval == ZFS_CACHE_METADATA); 176 177 os->os_primary_cache = newval; 178 } 179 180 static void 181 secondary_cache_changed_cb(void *arg, uint64_t newval) 182 { 183 objset_t *os = arg; 184 185 /* 186 * Inheritance and range checking should have been done by now. 187 */ 188 ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 189 newval == ZFS_CACHE_METADATA); 190 191 os->os_secondary_cache = newval; 192 } 193 194 static void 195 sync_changed_cb(void *arg, uint64_t newval) 196 { 197 objset_t *os = arg; 198 199 /* 200 * Inheritance and range checking should have been done by now. 201 */ 202 ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 203 newval == ZFS_SYNC_DISABLED); 204 205 os->os_sync = newval; 206 if (os->os_zil) 207 zil_set_sync(os->os_zil, newval); 208 } 209 210 static void 211 logbias_changed_cb(void *arg, uint64_t newval) 212 { 213 objset_t *os = arg; 214 215 ASSERT(newval == ZFS_LOGBIAS_LATENCY || 216 newval == ZFS_LOGBIAS_THROUGHPUT); 217 os->os_logbias = newval; 218 if (os->os_zil) 219 zil_set_logbias(os->os_zil, newval); 220 } 221 222 void 223 dmu_objset_byteswap(void *buf, size_t size) 224 { 225 objset_phys_t *osp = buf; 226 227 ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 228 dnode_byteswap(&osp->os_meta_dnode); 229 byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 230 osp->os_type = BSWAP_64(osp->os_type); 231 osp->os_flags = BSWAP_64(osp->os_flags); 232 if (size == sizeof (objset_phys_t)) { 233 dnode_byteswap(&osp->os_userused_dnode); 234 dnode_byteswap(&osp->os_groupused_dnode); 235 } 236 } 237 238 int 239 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 240 objset_t **osp) 241 { 242 objset_t *os; 243 int i, err; 244 245 ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 246 247 os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 248 os->os_dsl_dataset = ds; 249 os->os_spa = spa; 250 os->os_rootbp = bp; 251 if (!BP_IS_HOLE(os->os_rootbp)) { 252 uint32_t aflags = ARC_WAIT; 253 zbookmark_t zb; 254 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 255 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 256 257 if (DMU_OS_IS_L2CACHEABLE(os)) 258 aflags |= ARC_L2CACHE; 259 260 dprintf_bp(os->os_rootbp, "reading %s", ""); 261 /* 262 * XXX when bprewrite scrub can change the bp, 263 * and this is called from dmu_objset_open_ds_os, the bp 264 * could change, and we'll need a lock. 265 */ 266 err = dsl_read_nolock(NULL, spa, os->os_rootbp, 267 arc_getbuf_func, &os->os_phys_buf, 268 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 269 if (err) { 270 kmem_free(os, sizeof (objset_t)); 271 /* convert checksum errors into IO errors */ 272 if (err == ECKSUM) 273 err = EIO; 274 return (err); 275 } 276 277 /* Increase the blocksize if we are permitted. */ 278 if (spa_version(spa) >= SPA_VERSION_USERSPACE && 279 arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 280 arc_buf_t *buf = arc_buf_alloc(spa, 281 sizeof (objset_phys_t), &os->os_phys_buf, 282 ARC_BUFC_METADATA); 283 bzero(buf->b_data, sizeof (objset_phys_t)); 284 bcopy(os->os_phys_buf->b_data, buf->b_data, 285 arc_buf_size(os->os_phys_buf)); 286 (void) arc_buf_remove_ref(os->os_phys_buf, 287 &os->os_phys_buf); 288 os->os_phys_buf = buf; 289 } 290 291 os->os_phys = os->os_phys_buf->b_data; 292 os->os_flags = os->os_phys->os_flags; 293 } else { 294 int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 295 sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 296 os->os_phys_buf = arc_buf_alloc(spa, size, 297 &os->os_phys_buf, ARC_BUFC_METADATA); 298 os->os_phys = os->os_phys_buf->b_data; 299 bzero(os->os_phys, size); 300 } 301 302 /* 303 * Note: the changed_cb will be called once before the register 304 * func returns, thus changing the checksum/compression from the 305 * default (fletcher2/off). Snapshots don't need to know about 306 * checksum/compression/copies. 307 */ 308 if (ds) { 309 err = dsl_prop_register(ds, "primarycache", 310 primary_cache_changed_cb, os); 311 if (err == 0) 312 err = dsl_prop_register(ds, "secondarycache", 313 secondary_cache_changed_cb, os); 314 if (!dsl_dataset_is_snapshot(ds)) { 315 if (err == 0) 316 err = dsl_prop_register(ds, "checksum", 317 checksum_changed_cb, os); 318 if (err == 0) 319 err = dsl_prop_register(ds, "compression", 320 compression_changed_cb, os); 321 if (err == 0) 322 err = dsl_prop_register(ds, "copies", 323 copies_changed_cb, os); 324 if (err == 0) 325 err = dsl_prop_register(ds, "dedup", 326 dedup_changed_cb, os); 327 if (err == 0) 328 err = dsl_prop_register(ds, "logbias", 329 logbias_changed_cb, os); 330 if (err == 0) 331 err = dsl_prop_register(ds, "sync", 332 sync_changed_cb, os); 333 } 334 if (err) { 335 VERIFY(arc_buf_remove_ref(os->os_phys_buf, 336 &os->os_phys_buf) == 1); 337 kmem_free(os, sizeof (objset_t)); 338 return (err); 339 } 340 } else if (ds == NULL) { 341 /* It's the meta-objset. */ 342 os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 343 os->os_compress = ZIO_COMPRESS_LZJB; 344 os->os_copies = spa_max_replication(spa); 345 os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 346 os->os_dedup_verify = 0; 347 os->os_logbias = 0; 348 os->os_sync = 0; 349 os->os_primary_cache = ZFS_CACHE_ALL; 350 os->os_secondary_cache = ZFS_CACHE_ALL; 351 } 352 353 os->os_zil_header = os->os_phys->os_zil_header; 354 os->os_zil = zil_alloc(os, &os->os_zil_header); 355 356 for (i = 0; i < TXG_SIZE; i++) { 357 list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 358 offsetof(dnode_t, dn_dirty_link[i])); 359 list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 360 offsetof(dnode_t, dn_dirty_link[i])); 361 } 362 list_create(&os->os_dnodes, sizeof (dnode_t), 363 offsetof(dnode_t, dn_link)); 364 list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 365 offsetof(dmu_buf_impl_t, db_link)); 366 367 mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 368 mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 369 mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 370 371 os->os_meta_dnode = dnode_special_open(os, 372 &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 373 if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 374 os->os_userused_dnode = dnode_special_open(os, 375 &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); 376 os->os_groupused_dnode = dnode_special_open(os, 377 &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); 378 } 379 380 /* 381 * We should be the only thread trying to do this because we 382 * have ds_opening_lock 383 */ 384 if (ds) { 385 mutex_enter(&ds->ds_lock); 386 ASSERT(ds->ds_objset == NULL); 387 ds->ds_objset = os; 388 mutex_exit(&ds->ds_lock); 389 } 390 391 *osp = os; 392 return (0); 393 } 394 395 int 396 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 397 { 398 int err = 0; 399 400 mutex_enter(&ds->ds_opening_lock); 401 *osp = ds->ds_objset; 402 if (*osp == NULL) { 403 err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 404 ds, &ds->ds_phys->ds_bp, osp); 405 } 406 mutex_exit(&ds->ds_opening_lock); 407 return (err); 408 } 409 410 /* called from zpl */ 411 int 412 dmu_objset_hold(const char *name, void *tag, objset_t **osp) 413 { 414 dsl_dataset_t *ds; 415 int err; 416 417 err = dsl_dataset_hold(name, tag, &ds); 418 if (err) 419 return (err); 420 421 err = dmu_objset_from_ds(ds, osp); 422 if (err) 423 dsl_dataset_rele(ds, tag); 424 425 return (err); 426 } 427 428 /* called from zpl */ 429 int 430 dmu_objset_own(const char *name, dmu_objset_type_t type, 431 boolean_t readonly, void *tag, objset_t **osp) 432 { 433 dsl_dataset_t *ds; 434 int err; 435 436 err = dsl_dataset_own(name, B_FALSE, tag, &ds); 437 if (err) 438 return (err); 439 440 err = dmu_objset_from_ds(ds, osp); 441 if (err) { 442 dsl_dataset_disown(ds, tag); 443 } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 444 dmu_objset_disown(*osp, tag); 445 return (EINVAL); 446 } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 447 dmu_objset_disown(*osp, tag); 448 return (EROFS); 449 } 450 return (err); 451 } 452 453 void 454 dmu_objset_rele(objset_t *os, void *tag) 455 { 456 dsl_dataset_rele(os->os_dsl_dataset, tag); 457 } 458 459 void 460 dmu_objset_disown(objset_t *os, void *tag) 461 { 462 dsl_dataset_disown(os->os_dsl_dataset, tag); 463 } 464 465 int 466 dmu_objset_evict_dbufs(objset_t *os) 467 { 468 dnode_t *dn; 469 470 mutex_enter(&os->os_lock); 471 472 /* process the mdn last, since the other dnodes have holds on it */ 473 list_remove(&os->os_dnodes, os->os_meta_dnode); 474 list_insert_tail(&os->os_dnodes, os->os_meta_dnode); 475 476 /* 477 * Find the first dnode with holds. We have to do this dance 478 * because dnode_add_ref() only works if you already have a 479 * hold. If there are no holds then it has no dbufs so OK to 480 * skip. 481 */ 482 for (dn = list_head(&os->os_dnodes); 483 dn && !dnode_add_ref(dn, FTAG); 484 dn = list_next(&os->os_dnodes, dn)) 485 continue; 486 487 while (dn) { 488 dnode_t *next_dn = dn; 489 490 do { 491 next_dn = list_next(&os->os_dnodes, next_dn); 492 } while (next_dn && !dnode_add_ref(next_dn, FTAG)); 493 494 mutex_exit(&os->os_lock); 495 dnode_evict_dbufs(dn); 496 dnode_rele(dn, FTAG); 497 mutex_enter(&os->os_lock); 498 dn = next_dn; 499 } 500 mutex_exit(&os->os_lock); 501 return (list_head(&os->os_dnodes) != os->os_meta_dnode); 502 } 503 504 void 505 dmu_objset_evict(objset_t *os) 506 { 507 dsl_dataset_t *ds = os->os_dsl_dataset; 508 509 for (int t = 0; t < TXG_SIZE; t++) 510 ASSERT(!dmu_objset_is_dirty(os, t)); 511 512 if (ds) { 513 if (!dsl_dataset_is_snapshot(ds)) { 514 VERIFY(0 == dsl_prop_unregister(ds, "checksum", 515 checksum_changed_cb, os)); 516 VERIFY(0 == dsl_prop_unregister(ds, "compression", 517 compression_changed_cb, os)); 518 VERIFY(0 == dsl_prop_unregister(ds, "copies", 519 copies_changed_cb, os)); 520 VERIFY(0 == dsl_prop_unregister(ds, "dedup", 521 dedup_changed_cb, os)); 522 VERIFY(0 == dsl_prop_unregister(ds, "logbias", 523 logbias_changed_cb, os)); 524 VERIFY(0 == dsl_prop_unregister(ds, "sync", 525 sync_changed_cb, os)); 526 } 527 VERIFY(0 == dsl_prop_unregister(ds, "primarycache", 528 primary_cache_changed_cb, os)); 529 VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", 530 secondary_cache_changed_cb, os)); 531 } 532 533 if (os->os_sa) 534 sa_tear_down(os); 535 536 /* 537 * We should need only a single pass over the dnode list, since 538 * nothing can be added to the list at this point. 539 */ 540 (void) dmu_objset_evict_dbufs(os); 541 542 dnode_special_close(os->os_meta_dnode); 543 if (os->os_userused_dnode) { 544 dnode_special_close(os->os_userused_dnode); 545 dnode_special_close(os->os_groupused_dnode); 546 } 547 zil_free(os->os_zil); 548 549 ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 550 551 VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); 552 mutex_destroy(&os->os_lock); 553 mutex_destroy(&os->os_obj_lock); 554 mutex_destroy(&os->os_user_ptr_lock); 555 kmem_free(os, sizeof (objset_t)); 556 } 557 558 timestruc_t 559 dmu_objset_snap_cmtime(objset_t *os) 560 { 561 return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 562 } 563 564 /* called from dsl for meta-objset */ 565 objset_t * 566 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 567 dmu_objset_type_t type, dmu_tx_t *tx) 568 { 569 objset_t *os; 570 dnode_t *mdn; 571 572 ASSERT(dmu_tx_is_syncing(tx)); 573 if (ds) 574 mutex_enter(&ds->ds_opening_lock); 575 VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os)); 576 if (ds) 577 mutex_exit(&ds->ds_opening_lock); 578 mdn = os->os_meta_dnode; 579 580 dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 581 DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 582 583 /* 584 * We don't want to have to increase the meta-dnode's nlevels 585 * later, because then we could do it in quescing context while 586 * we are also accessing it in open context. 587 * 588 * This precaution is not necessary for the MOS (ds == NULL), 589 * because the MOS is only updated in syncing context. 590 * This is most fortunate: the MOS is the only objset that 591 * needs to be synced multiple times as spa_sync() iterates 592 * to convergence, so minimizing its dn_nlevels matters. 593 */ 594 if (ds != NULL) { 595 int levels = 1; 596 597 /* 598 * Determine the number of levels necessary for the meta-dnode 599 * to contain DN_MAX_OBJECT dnodes. 600 */ 601 while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + 602 (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 603 DN_MAX_OBJECT * sizeof (dnode_phys_t)) 604 levels++; 605 606 mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 607 mdn->dn_nlevels = levels; 608 } 609 610 ASSERT(type != DMU_OST_NONE); 611 ASSERT(type != DMU_OST_ANY); 612 ASSERT(type < DMU_OST_NUMTYPES); 613 os->os_phys->os_type = type; 614 if (dmu_objset_userused_enabled(os)) { 615 os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 616 os->os_flags = os->os_phys->os_flags; 617 } 618 619 dsl_dataset_dirty(ds, tx); 620 621 return (os); 622 } 623 624 struct oscarg { 625 void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); 626 void *userarg; 627 dsl_dataset_t *clone_origin; 628 const char *lastname; 629 dmu_objset_type_t type; 630 uint64_t flags; 631 cred_t *cr; 632 }; 633 634 /*ARGSUSED*/ 635 static int 636 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) 637 { 638 dsl_dir_t *dd = arg1; 639 struct oscarg *oa = arg2; 640 objset_t *mos = dd->dd_pool->dp_meta_objset; 641 int err; 642 uint64_t ddobj; 643 644 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 645 oa->lastname, sizeof (uint64_t), 1, &ddobj); 646 if (err != ENOENT) 647 return (err ? err : EEXIST); 648 649 if (oa->clone_origin != NULL) { 650 /* You can't clone across pools. */ 651 if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) 652 return (EXDEV); 653 654 /* You can only clone snapshots, not the head datasets. */ 655 if (!dsl_dataset_is_snapshot(oa->clone_origin)) 656 return (EINVAL); 657 } 658 659 return (0); 660 } 661 662 static void 663 dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) 664 { 665 dsl_dir_t *dd = arg1; 666 struct oscarg *oa = arg2; 667 uint64_t dsobj; 668 669 ASSERT(dmu_tx_is_syncing(tx)); 670 671 dsobj = dsl_dataset_create_sync(dd, oa->lastname, 672 oa->clone_origin, oa->flags, oa->cr, tx); 673 674 if (oa->clone_origin == NULL) { 675 dsl_dataset_t *ds; 676 blkptr_t *bp; 677 objset_t *os; 678 679 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, 680 FTAG, &ds)); 681 bp = dsl_dataset_get_blkptr(ds); 682 ASSERT(BP_IS_HOLE(bp)); 683 684 os = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 685 ds, bp, oa->type, tx); 686 687 if (oa->userfunc) 688 oa->userfunc(os, oa->userarg, oa->cr, tx); 689 dsl_dataset_rele(ds, FTAG); 690 } 691 692 spa_history_log_internal(LOG_DS_CREATE, dd->dd_pool->dp_spa, 693 tx, "dataset = %llu", dsobj); 694 } 695 696 int 697 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 698 void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 699 { 700 dsl_dir_t *pdd; 701 const char *tail; 702 int err = 0; 703 struct oscarg oa = { 0 }; 704 705 ASSERT(strchr(name, '@') == NULL); 706 err = dsl_dir_open(name, FTAG, &pdd, &tail); 707 if (err) 708 return (err); 709 if (tail == NULL) { 710 dsl_dir_close(pdd, FTAG); 711 return (EEXIST); 712 } 713 714 oa.userfunc = func; 715 oa.userarg = arg; 716 oa.lastname = tail; 717 oa.type = type; 718 oa.flags = flags; 719 oa.cr = CRED(); 720 721 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 722 dmu_objset_create_sync, pdd, &oa, 5); 723 dsl_dir_close(pdd, FTAG); 724 return (err); 725 } 726 727 int 728 dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) 729 { 730 dsl_dir_t *pdd; 731 const char *tail; 732 int err = 0; 733 struct oscarg oa = { 0 }; 734 735 ASSERT(strchr(name, '@') == NULL); 736 err = dsl_dir_open(name, FTAG, &pdd, &tail); 737 if (err) 738 return (err); 739 if (tail == NULL) { 740 dsl_dir_close(pdd, FTAG); 741 return (EEXIST); 742 } 743 744 oa.lastname = tail; 745 oa.clone_origin = clone_origin; 746 oa.flags = flags; 747 oa.cr = CRED(); 748 749 err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, 750 dmu_objset_create_sync, pdd, &oa, 5); 751 dsl_dir_close(pdd, FTAG); 752 return (err); 753 } 754 755 int 756 dmu_objset_destroy(const char *name, boolean_t defer) 757 { 758 dsl_dataset_t *ds; 759 int error; 760 761 /* 762 * dsl_dataset_destroy() can free any claimed-but-unplayed 763 * intent log, but if there is an active log, it has blocks that 764 * are allocated, but may not yet be reflected in the on-disk 765 * structure. Only the ZIL knows how to free them, so we have 766 * to call into it here. 767 */ 768 error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); 769 if (error == 0) { 770 objset_t *os; 771 if (dmu_objset_from_ds(ds, &os) == 0) 772 zil_destroy(dmu_objset_zil(os), B_FALSE); 773 error = dsl_dataset_destroy(ds, FTAG, defer); 774 /* dsl_dataset_destroy() closes the ds. */ 775 } 776 777 return (error); 778 } 779 780 struct snaparg { 781 dsl_sync_task_group_t *dstg; 782 char *snapname; 783 char failed[MAXPATHLEN]; 784 boolean_t recursive; 785 nvlist_t *props; 786 }; 787 788 static int 789 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 790 { 791 objset_t *os = arg1; 792 struct snaparg *sn = arg2; 793 794 /* The props have already been checked by zfs_check_userprops(). */ 795 796 return (dsl_dataset_snapshot_check(os->os_dsl_dataset, 797 sn->snapname, tx)); 798 } 799 800 static void 801 snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 802 { 803 objset_t *os = arg1; 804 dsl_dataset_t *ds = os->os_dsl_dataset; 805 struct snaparg *sn = arg2; 806 807 dsl_dataset_snapshot_sync(ds, sn->snapname, tx); 808 809 if (sn->props) { 810 dsl_props_arg_t pa; 811 pa.pa_props = sn->props; 812 pa.pa_source = ZPROP_SRC_LOCAL; 813 dsl_props_set_sync(ds->ds_prev, &pa, tx); 814 } 815 } 816 817 static int 818 dmu_objset_snapshot_one(const char *name, void *arg) 819 { 820 struct snaparg *sn = arg; 821 objset_t *os; 822 int err; 823 char *cp; 824 825 /* 826 * If the objset starts with a '%', then ignore it unless it was 827 * explicitly named (ie, not recursive). These hidden datasets 828 * are always inconsistent, and by not opening them here, we can 829 * avoid a race with dsl_dir_destroy_check(). 830 */ 831 cp = strrchr(name, '/'); 832 if (cp && cp[1] == '%' && sn->recursive) 833 return (0); 834 835 (void) strcpy(sn->failed, name); 836 837 /* 838 * Check permissions if we are doing a recursive snapshot. The 839 * permission checks for the starting dataset have already been 840 * performed in zfs_secpolicy_snapshot() 841 */ 842 if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) 843 return (err); 844 845 err = dmu_objset_hold(name, sn, &os); 846 if (err != 0) 847 return (err); 848 849 /* 850 * If the objset is in an inconsistent state (eg, in the process 851 * of being destroyed), don't snapshot it. As with %hidden 852 * datasets, we return EBUSY if this name was explicitly 853 * requested (ie, not recursive), and otherwise ignore it. 854 */ 855 if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { 856 dmu_objset_rele(os, sn); 857 return (sn->recursive ? 0 : EBUSY); 858 } 859 860 /* 861 * NB: we need to wait for all in-flight changes to get to disk, 862 * so that we snapshot those changes. zil_suspend does this as 863 * a side effect. 864 */ 865 err = zil_suspend(dmu_objset_zil(os)); 866 if (err == 0) { 867 dsl_sync_task_create(sn->dstg, snapshot_check, 868 snapshot_sync, os, sn, 3); 869 } else { 870 dmu_objset_rele(os, sn); 871 } 872 873 return (err); 874 } 875 876 int 877 dmu_objset_snapshot(char *fsname, char *snapname, 878 nvlist_t *props, boolean_t recursive) 879 { 880 dsl_sync_task_t *dst; 881 struct snaparg sn; 882 spa_t *spa; 883 int err; 884 885 (void) strcpy(sn.failed, fsname); 886 887 err = spa_open(fsname, &spa, FTAG); 888 if (err) 889 return (err); 890 891 sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 892 sn.snapname = snapname; 893 sn.props = props; 894 sn.recursive = recursive; 895 896 if (recursive) { 897 err = dmu_objset_find(fsname, 898 dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); 899 } else { 900 err = dmu_objset_snapshot_one(fsname, &sn); 901 } 902 903 if (err == 0) 904 err = dsl_sync_task_group_wait(sn.dstg); 905 906 for (dst = list_head(&sn.dstg->dstg_tasks); dst; 907 dst = list_next(&sn.dstg->dstg_tasks, dst)) { 908 objset_t *os = dst->dst_arg1; 909 dsl_dataset_t *ds = os->os_dsl_dataset; 910 if (dst->dst_err) 911 dsl_dataset_name(ds, sn.failed); 912 zil_resume(dmu_objset_zil(os)); 913 dmu_objset_rele(os, &sn); 914 } 915 916 if (err) 917 (void) strcpy(fsname, sn.failed); 918 dsl_sync_task_group_destroy(sn.dstg); 919 spa_close(spa, FTAG); 920 return (err); 921 } 922 923 static void 924 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 925 { 926 dnode_t *dn; 927 928 while (dn = list_head(list)) { 929 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 930 ASSERT(dn->dn_dbuf->db_data_pending); 931 /* 932 * Initialize dn_zio outside dnode_sync() because the 933 * meta-dnode needs to set it ouside dnode_sync(). 934 */ 935 dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 936 ASSERT(dn->dn_zio); 937 938 ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 939 list_remove(list, dn); 940 941 if (newlist) { 942 (void) dnode_add_ref(dn, newlist); 943 list_insert_tail(newlist, dn); 944 } 945 946 dnode_sync(dn, tx); 947 } 948 } 949 950 /* ARGSUSED */ 951 static void 952 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 953 { 954 blkptr_t *bp = zio->io_bp; 955 objset_t *os = arg; 956 dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 957 958 ASSERT(bp == os->os_rootbp); 959 ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); 960 ASSERT(BP_GET_LEVEL(bp) == 0); 961 962 /* 963 * Update rootbp fill count: it should be the number of objects 964 * allocated in the object set (not counting the "special" 965 * objects that are stored in the objset_phys_t -- the meta 966 * dnode and user/group accounting objects). 967 */ 968 bp->blk_fill = 0; 969 for (int i = 0; i < dnp->dn_nblkptr; i++) 970 bp->blk_fill += dnp->dn_blkptr[i].blk_fill; 971 } 972 973 /* ARGSUSED */ 974 static void 975 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 976 { 977 blkptr_t *bp = zio->io_bp; 978 blkptr_t *bp_orig = &zio->io_bp_orig; 979 objset_t *os = arg; 980 981 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 982 ASSERT(BP_EQUAL(bp, bp_orig)); 983 } else { 984 dsl_dataset_t *ds = os->os_dsl_dataset; 985 dmu_tx_t *tx = os->os_synctx; 986 987 (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 988 dsl_dataset_block_born(ds, bp, tx); 989 } 990 } 991 992 /* called from dsl */ 993 void 994 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 995 { 996 int txgoff; 997 zbookmark_t zb; 998 zio_prop_t zp; 999 zio_t *zio; 1000 list_t *list; 1001 list_t *newlist = NULL; 1002 dbuf_dirty_record_t *dr; 1003 1004 dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1005 1006 ASSERT(dmu_tx_is_syncing(tx)); 1007 /* XXX the write_done callback should really give us the tx... */ 1008 os->os_synctx = tx; 1009 1010 if (os->os_dsl_dataset == NULL) { 1011 /* 1012 * This is the MOS. If we have upgraded, 1013 * spa_max_replication() could change, so reset 1014 * os_copies here. 1015 */ 1016 os->os_copies = spa_max_replication(os->os_spa); 1017 } 1018 1019 /* 1020 * Create the root block IO 1021 */ 1022 SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1023 os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1024 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1025 VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf, 1026 os->os_rootbp, os->os_spa, &zb)); 1027 1028 dmu_write_policy(os, NULL, 0, 0, &zp); 1029 1030 zio = arc_write(pio, os->os_spa, tx->tx_txg, 1031 os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, 1032 dmu_objset_write_ready, dmu_objset_write_done, os, 1033 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 1034 1035 /* 1036 * Sync special dnodes - the parent IO for the sync is the root block 1037 */ 1038 os->os_meta_dnode->dn_zio = zio; 1039 dnode_sync(os->os_meta_dnode, tx); 1040 1041 os->os_phys->os_flags = os->os_flags; 1042 1043 if (os->os_userused_dnode && 1044 os->os_userused_dnode->dn_type != DMU_OT_NONE) { 1045 os->os_userused_dnode->dn_zio = zio; 1046 dnode_sync(os->os_userused_dnode, tx); 1047 os->os_groupused_dnode->dn_zio = zio; 1048 dnode_sync(os->os_groupused_dnode, tx); 1049 } 1050 1051 txgoff = tx->tx_txg & TXG_MASK; 1052 1053 if (dmu_objset_userused_enabled(os)) { 1054 newlist = &os->os_synced_dnodes; 1055 /* 1056 * We must create the list here because it uses the 1057 * dn_dirty_link[] of this txg. 1058 */ 1059 list_create(newlist, sizeof (dnode_t), 1060 offsetof(dnode_t, dn_dirty_link[txgoff])); 1061 } 1062 1063 dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1064 dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1065 1066 list = &os->os_meta_dnode->dn_dirty_records[txgoff]; 1067 while (dr = list_head(list)) { 1068 ASSERT(dr->dr_dbuf->db_level == 0); 1069 list_remove(list, dr); 1070 if (dr->dr_zio) 1071 zio_nowait(dr->dr_zio); 1072 } 1073 /* 1074 * Free intent log blocks up to this tx. 1075 */ 1076 zil_sync(os->os_zil, tx); 1077 os->os_phys->os_zil_header = os->os_zil_header; 1078 zio_nowait(zio); 1079 } 1080 1081 boolean_t 1082 dmu_objset_is_dirty(objset_t *os, uint64_t txg) 1083 { 1084 return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1085 !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1086 } 1087 1088 objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1089 1090 void 1091 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1092 { 1093 used_cbs[ost] = cb; 1094 } 1095 1096 boolean_t 1097 dmu_objset_userused_enabled(objset_t *os) 1098 { 1099 return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1100 used_cbs[os->os_phys->os_type] && 1101 os->os_userused_dnode); 1102 } 1103 1104 static void 1105 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags, 1106 uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx) 1107 { 1108 if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1109 int64_t delta = DNODE_SIZE + used; 1110 if (subtract) 1111 delta = -delta; 1112 VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT, 1113 user, delta, tx)); 1114 VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1115 group, delta, tx)); 1116 } 1117 } 1118 1119 void 1120 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1121 { 1122 dnode_t *dn; 1123 list_t *list = &os->os_synced_dnodes; 1124 1125 ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1126 1127 while (dn = list_head(list)) { 1128 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1129 ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1130 dn->dn_phys->dn_flags & 1131 DNODE_FLAG_USERUSED_ACCOUNTED); 1132 1133 /* Allocate the user/groupused objects if necessary. */ 1134 if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { 1135 VERIFY(0 == zap_create_claim(os, 1136 DMU_USERUSED_OBJECT, 1137 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1138 VERIFY(0 == zap_create_claim(os, 1139 DMU_GROUPUSED_OBJECT, 1140 DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1141 } 1142 1143 /* 1144 * We intentionally modify the zap object even if the 1145 * net delta is zero. Otherwise 1146 * the block of the zap obj could be shared between 1147 * datasets but need to be different between them after 1148 * a bprewrite. 1149 */ 1150 1151 /* 1152 * The mutex is needed here for interlock with dnode_allocate. 1153 */ 1154 mutex_enter(&dn->dn_mtx); 1155 ASSERT(dn->dn_id_flags); 1156 if (dn->dn_id_flags & DN_ID_OLD_EXIST) { 1157 do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags, 1158 dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx); 1159 } 1160 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1161 do_userquota_update(os, DN_USED_BYTES(dn->dn_phys), 1162 dn->dn_phys->dn_flags, dn->dn_newuid, 1163 dn->dn_newgid, B_FALSE, tx); 1164 } 1165 1166 dn->dn_oldused = 0; 1167 dn->dn_oldflags = 0; 1168 if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1169 dn->dn_olduid = dn->dn_newuid; 1170 dn->dn_oldgid = dn->dn_newgid; 1171 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1172 if (dn->dn_bonuslen == 0) 1173 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1174 else 1175 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1176 } 1177 dn->dn_id_flags &= ~(DN_ID_NEW_EXIST|DN_ID_SYNC); 1178 mutex_exit(&dn->dn_mtx); 1179 1180 list_remove(list, dn); 1181 dnode_rele(dn, list); 1182 } 1183 } 1184 1185 /* 1186 * Returns a pointer to data to find uid/gid from 1187 * 1188 * If a dirty record for transaction group that is syncing can't 1189 * be found then NULL is returned. In the NULL case it is assumed 1190 * the uid/gid aren't changing. 1191 */ 1192 static void * 1193 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1194 { 1195 dbuf_dirty_record_t *dr, **drp; 1196 void *data; 1197 1198 if (db->db_dirtycnt == 0) 1199 return (db->db.db_data); /* Nothing is changing */ 1200 1201 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1202 if (dr->dr_txg == tx->tx_txg) 1203 break; 1204 1205 if (dr == NULL) 1206 data = NULL; 1207 else if (dr->dr_dbuf->db_dnode->dn_bonuslen == 0 && 1208 dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1209 data = dr->dt.dl.dr_data->b_data; 1210 else 1211 data = dr->dt.dl.dr_data; 1212 return (data); 1213 } 1214 1215 void 1216 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1217 { 1218 objset_t *os = dn->dn_objset; 1219 void *data = NULL; 1220 dmu_buf_impl_t *db = NULL; 1221 uint64_t *user, *group; 1222 int flags = dn->dn_id_flags; 1223 int error; 1224 boolean_t have_spill = B_FALSE; 1225 1226 if (!dmu_objset_userused_enabled(dn->dn_objset)) 1227 return; 1228 1229 if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1230 DN_ID_CHKED_SPILL))) 1231 return; 1232 1233 if (before && dn->dn_bonuslen != 0) 1234 data = DN_BONUS(dn->dn_phys); 1235 else if (!before && dn->dn_bonuslen != 0) { 1236 if (dn->dn_bonus) { 1237 db = dn->dn_bonus; 1238 mutex_enter(&db->db_mtx); 1239 data = dmu_objset_userquota_find_data(db, tx); 1240 } else { 1241 data = DN_BONUS(dn->dn_phys); 1242 } 1243 } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1244 int rf = 0; 1245 1246 if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1247 rf |= DB_RF_HAVESTRUCT; 1248 error = dmu_spill_hold_by_dnode(dn, rf, 1249 FTAG, (dmu_buf_t **)&db); 1250 ASSERT(error == 0); 1251 mutex_enter(&db->db_mtx); 1252 data = (before) ? db->db.db_data : 1253 dmu_objset_userquota_find_data(db, tx); 1254 have_spill = B_TRUE; 1255 } else { 1256 mutex_enter(&dn->dn_mtx); 1257 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1258 mutex_exit(&dn->dn_mtx); 1259 return; 1260 } 1261 1262 if (before) { 1263 ASSERT(data); 1264 user = &dn->dn_olduid; 1265 group = &dn->dn_oldgid; 1266 } else if (data) { 1267 user = &dn->dn_newuid; 1268 group = &dn->dn_newgid; 1269 } 1270 1271 /* 1272 * Must always call the callback in case the object 1273 * type has changed and that type isn't an object type to track 1274 */ 1275 error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1276 user, group); 1277 1278 /* 1279 * Preserve existing uid/gid when the callback can't determine 1280 * what the new uid/gid are and the callback returned EEXIST. 1281 * The EEXIST error tells us to just use the existing uid/gid. 1282 * If we don't know what the old values are then just assign 1283 * them to 0, since that is a new file being created. 1284 */ 1285 if (!before && data == NULL && error == EEXIST) { 1286 if (flags & DN_ID_OLD_EXIST) { 1287 dn->dn_newuid = dn->dn_olduid; 1288 dn->dn_newgid = dn->dn_oldgid; 1289 } else { 1290 dn->dn_newuid = 0; 1291 dn->dn_newgid = 0; 1292 } 1293 error = 0; 1294 } 1295 1296 if (db) 1297 mutex_exit(&db->db_mtx); 1298 1299 mutex_enter(&dn->dn_mtx); 1300 if (error == 0 && before) 1301 dn->dn_id_flags |= DN_ID_OLD_EXIST; 1302 if (error == 0 && !before) 1303 dn->dn_id_flags |= DN_ID_NEW_EXIST; 1304 1305 if (have_spill) { 1306 dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1307 } else { 1308 dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1309 } 1310 mutex_exit(&dn->dn_mtx); 1311 if (have_spill) 1312 dmu_buf_rele((dmu_buf_t *)db, FTAG); 1313 } 1314 1315 boolean_t 1316 dmu_objset_userspace_present(objset_t *os) 1317 { 1318 return (os->os_phys->os_flags & 1319 OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1320 } 1321 1322 int 1323 dmu_objset_userspace_upgrade(objset_t *os) 1324 { 1325 uint64_t obj; 1326 int err = 0; 1327 1328 if (dmu_objset_userspace_present(os)) 1329 return (0); 1330 if (!dmu_objset_userused_enabled(os)) 1331 return (ENOTSUP); 1332 if (dmu_objset_is_snapshot(os)) 1333 return (EINVAL); 1334 1335 /* 1336 * We simply need to mark every object dirty, so that it will be 1337 * synced out and now accounted. If this is called 1338 * concurrently, or if we already did some work before crashing, 1339 * that's fine, since we track each object's accounted state 1340 * independently. 1341 */ 1342 1343 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1344 dmu_tx_t *tx; 1345 dmu_buf_t *db; 1346 int objerr; 1347 1348 if (issig(JUSTLOOKING) && issig(FORREAL)) 1349 return (EINTR); 1350 1351 objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1352 if (objerr) 1353 continue; 1354 tx = dmu_tx_create(os); 1355 dmu_tx_hold_bonus(tx, obj); 1356 objerr = dmu_tx_assign(tx, TXG_WAIT); 1357 if (objerr) { 1358 dmu_tx_abort(tx); 1359 continue; 1360 } 1361 dmu_buf_will_dirty(db, tx); 1362 dmu_buf_rele(db, FTAG); 1363 dmu_tx_commit(tx); 1364 } 1365 1366 os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1367 txg_wait_synced(dmu_objset_pool(os), 0); 1368 return (0); 1369 } 1370 1371 void 1372 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1373 uint64_t *usedobjsp, uint64_t *availobjsp) 1374 { 1375 dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1376 usedobjsp, availobjsp); 1377 } 1378 1379 uint64_t 1380 dmu_objset_fsid_guid(objset_t *os) 1381 { 1382 return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1383 } 1384 1385 void 1386 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1387 { 1388 stat->dds_type = os->os_phys->os_type; 1389 if (os->os_dsl_dataset) 1390 dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1391 } 1392 1393 void 1394 dmu_objset_stats(objset_t *os, nvlist_t *nv) 1395 { 1396 ASSERT(os->os_dsl_dataset || 1397 os->os_phys->os_type == DMU_OST_META); 1398 1399 if (os->os_dsl_dataset != NULL) 1400 dsl_dataset_stats(os->os_dsl_dataset, nv); 1401 1402 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1403 os->os_phys->os_type); 1404 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1405 dmu_objset_userspace_present(os)); 1406 } 1407 1408 int 1409 dmu_objset_is_snapshot(objset_t *os) 1410 { 1411 if (os->os_dsl_dataset != NULL) 1412 return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); 1413 else 1414 return (B_FALSE); 1415 } 1416 1417 int 1418 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1419 boolean_t *conflict) 1420 { 1421 dsl_dataset_t *ds = os->os_dsl_dataset; 1422 uint64_t ignored; 1423 1424 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1425 return (ENOENT); 1426 1427 return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1428 ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, 1429 real, maxlen, conflict)); 1430 } 1431 1432 int 1433 dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1434 uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1435 { 1436 dsl_dataset_t *ds = os->os_dsl_dataset; 1437 zap_cursor_t cursor; 1438 zap_attribute_t attr; 1439 1440 if (ds->ds_phys->ds_snapnames_zapobj == 0) 1441 return (ENOENT); 1442 1443 zap_cursor_init_serialized(&cursor, 1444 ds->ds_dir->dd_pool->dp_meta_objset, 1445 ds->ds_phys->ds_snapnames_zapobj, *offp); 1446 1447 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1448 zap_cursor_fini(&cursor); 1449 return (ENOENT); 1450 } 1451 1452 if (strlen(attr.za_name) + 1 > namelen) { 1453 zap_cursor_fini(&cursor); 1454 return (ENAMETOOLONG); 1455 } 1456 1457 (void) strcpy(name, attr.za_name); 1458 if (idp) 1459 *idp = attr.za_first_integer; 1460 if (case_conflict) 1461 *case_conflict = attr.za_normalization_conflict; 1462 zap_cursor_advance(&cursor); 1463 *offp = zap_cursor_serialize(&cursor); 1464 zap_cursor_fini(&cursor); 1465 1466 return (0); 1467 } 1468 1469 int 1470 dmu_dir_list_next(objset_t *os, int namelen, char *name, 1471 uint64_t *idp, uint64_t *offp) 1472 { 1473 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1474 zap_cursor_t cursor; 1475 zap_attribute_t attr; 1476 1477 /* there is no next dir on a snapshot! */ 1478 if (os->os_dsl_dataset->ds_object != 1479 dd->dd_phys->dd_head_dataset_obj) 1480 return (ENOENT); 1481 1482 zap_cursor_init_serialized(&cursor, 1483 dd->dd_pool->dp_meta_objset, 1484 dd->dd_phys->dd_child_dir_zapobj, *offp); 1485 1486 if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1487 zap_cursor_fini(&cursor); 1488 return (ENOENT); 1489 } 1490 1491 if (strlen(attr.za_name) + 1 > namelen) { 1492 zap_cursor_fini(&cursor); 1493 return (ENAMETOOLONG); 1494 } 1495 1496 (void) strcpy(name, attr.za_name); 1497 if (idp) 1498 *idp = attr.za_first_integer; 1499 zap_cursor_advance(&cursor); 1500 *offp = zap_cursor_serialize(&cursor); 1501 zap_cursor_fini(&cursor); 1502 1503 return (0); 1504 } 1505 1506 struct findarg { 1507 int (*func)(const char *, void *); 1508 void *arg; 1509 }; 1510 1511 /* ARGSUSED */ 1512 static int 1513 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 1514 { 1515 struct findarg *fa = arg; 1516 return (fa->func(dsname, fa->arg)); 1517 } 1518 1519 /* 1520 * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1521 * Perhaps change all callers to use dmu_objset_find_spa()? 1522 */ 1523 int 1524 dmu_objset_find(char *name, int func(const char *, void *), void *arg, 1525 int flags) 1526 { 1527 struct findarg fa; 1528 fa.func = func; 1529 fa.arg = arg; 1530 return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); 1531 } 1532 1533 /* 1534 * Find all objsets under name, call func on each 1535 */ 1536 int 1537 dmu_objset_find_spa(spa_t *spa, const char *name, 1538 int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) 1539 { 1540 dsl_dir_t *dd; 1541 dsl_pool_t *dp; 1542 dsl_dataset_t *ds; 1543 zap_cursor_t zc; 1544 zap_attribute_t *attr; 1545 char *child; 1546 uint64_t thisobj; 1547 int err; 1548 1549 if (name == NULL) 1550 name = spa_name(spa); 1551 err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); 1552 if (err) 1553 return (err); 1554 1555 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1556 if (dd->dd_myname[0] == '$') { 1557 dsl_dir_close(dd, FTAG); 1558 return (0); 1559 } 1560 1561 thisobj = dd->dd_phys->dd_head_dataset_obj; 1562 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1563 dp = dd->dd_pool; 1564 1565 /* 1566 * Iterate over all children. 1567 */ 1568 if (flags & DS_FIND_CHILDREN) { 1569 for (zap_cursor_init(&zc, dp->dp_meta_objset, 1570 dd->dd_phys->dd_child_dir_zapobj); 1571 zap_cursor_retrieve(&zc, attr) == 0; 1572 (void) zap_cursor_advance(&zc)) { 1573 ASSERT(attr->za_integer_length == sizeof (uint64_t)); 1574 ASSERT(attr->za_num_integers == 1); 1575 1576 child = kmem_asprintf("%s/%s", name, attr->za_name); 1577 err = dmu_objset_find_spa(spa, child, func, arg, flags); 1578 strfree(child); 1579 if (err) 1580 break; 1581 } 1582 zap_cursor_fini(&zc); 1583 1584 if (err) { 1585 dsl_dir_close(dd, FTAG); 1586 kmem_free(attr, sizeof (zap_attribute_t)); 1587 return (err); 1588 } 1589 } 1590 1591 /* 1592 * Iterate over all snapshots. 1593 */ 1594 if (flags & DS_FIND_SNAPSHOTS) { 1595 if (!dsl_pool_sync_context(dp)) 1596 rw_enter(&dp->dp_config_rwlock, RW_READER); 1597 err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1598 if (!dsl_pool_sync_context(dp)) 1599 rw_exit(&dp->dp_config_rwlock); 1600 1601 if (err == 0) { 1602 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 1603 dsl_dataset_rele(ds, FTAG); 1604 1605 for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1606 zap_cursor_retrieve(&zc, attr) == 0; 1607 (void) zap_cursor_advance(&zc)) { 1608 ASSERT(attr->za_integer_length == 1609 sizeof (uint64_t)); 1610 ASSERT(attr->za_num_integers == 1); 1611 1612 child = kmem_asprintf("%s@%s", 1613 name, attr->za_name); 1614 err = func(spa, attr->za_first_integer, 1615 child, arg); 1616 strfree(child); 1617 if (err) 1618 break; 1619 } 1620 zap_cursor_fini(&zc); 1621 } 1622 } 1623 1624 dsl_dir_close(dd, FTAG); 1625 kmem_free(attr, sizeof (zap_attribute_t)); 1626 1627 if (err) 1628 return (err); 1629 1630 /* 1631 * Apply to self if appropriate. 1632 */ 1633 err = func(spa, thisobj, name, arg); 1634 return (err); 1635 } 1636 1637 /* ARGSUSED */ 1638 int 1639 dmu_objset_prefetch(const char *name, void *arg) 1640 { 1641 dsl_dataset_t *ds; 1642 1643 if (dsl_dataset_hold(name, FTAG, &ds)) 1644 return (0); 1645 1646 if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { 1647 mutex_enter(&ds->ds_opening_lock); 1648 if (ds->ds_objset == NULL) { 1649 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1650 zbookmark_t zb; 1651 1652 SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, 1653 ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1654 1655 (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds), 1656 &ds->ds_phys->ds_bp, NULL, NULL, 1657 ZIO_PRIORITY_ASYNC_READ, 1658 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1659 &aflags, &zb); 1660 } 1661 mutex_exit(&ds->ds_opening_lock); 1662 } 1663 1664 dsl_dataset_rele(ds, FTAG); 1665 return (0); 1666 } 1667 1668 void 1669 dmu_objset_set_user(objset_t *os, void *user_ptr) 1670 { 1671 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1672 os->os_user_ptr = user_ptr; 1673 } 1674 1675 void * 1676 dmu_objset_get_user(objset_t *os) 1677 { 1678 ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 1679 return (os->os_user_ptr); 1680 } 1681