1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 The FreeBSD Foundation 5 * 6 * This software was developed by Mark Johnston under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions are 11 * met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <assert.h> 32 #include <stdlib.h> 33 #include <string.h> 34 35 #include <util.h> 36 37 #include "makefs.h" 38 #include "zfs.h" 39 40 typedef struct zfs_dsl_dataset { 41 zfs_objset_t *os; /* referenced objset, may be null */ 42 dsl_dataset_phys_t *phys; /* on-disk representation */ 43 uint64_t dsid; /* DSL dataset dnode */ 44 45 struct zfs_dsl_dir *dir; /* containing parent */ 46 } zfs_dsl_dataset_t; 47 48 typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; 49 50 typedef struct zfs_dsl_dir { 51 char *fullname; /* full dataset name */ 52 char *name; /* basename(fullname) */ 53 dsl_dir_phys_t *phys; /* on-disk representation */ 54 nvlist_t *propsnv; /* properties saved in propszap */ 55 56 zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ 57 58 uint64_t dirid; /* DSL directory dnode */ 59 zfs_zap_t *propszap; /* dataset properties */ 60 zfs_zap_t *childzap; /* child directories */ 61 62 /* DSL directory tree linkage. */ 63 struct zfs_dsl_dir *parent; 64 zfs_dsl_dir_list_t children; 65 STAILQ_ENTRY(zfs_dsl_dir) next; 66 } zfs_dsl_dir_t; 67 68 static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); 69 static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); 70 71 static int 72 nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) 73 { 74 char *str; 75 int error, len; 76 77 error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); 78 if (error == 0) { 79 *retp = ecalloc(1, len + 1); 80 memcpy(*retp, str, len); 81 } 82 return (error); 83 } 84 85 static int 86 nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) 87 { 88 return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); 89 } 90 91 /* 92 * Return an allocated string containing the head dataset's mountpoint, 93 * including the root path prefix. 94 * 95 * If the dataset has a mountpoint property, it is returned. Otherwise we have 96 * to follow ZFS' inheritance rules. 97 */ 98 char * 99 dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 100 { 101 zfs_dsl_dir_t *pdir; 102 char *mountpoint, *origmountpoint; 103 104 if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { 105 if (strcmp(mountpoint, "none") == 0) 106 return (NULL); 107 108 /* 109 * nvlist_find_string() does not make a copy. 110 */ 111 mountpoint = estrdup(mountpoint); 112 } else { 113 /* 114 * If we don't have a mountpoint, it's inherited from one of our 115 * ancestors. Walk up the hierarchy until we find it, building 116 * up our mountpoint along the way. The mountpoint property is 117 * always set for the root dataset. 118 */ 119 for (pdir = dir->parent, mountpoint = estrdup(dir->name);;) { 120 origmountpoint = mountpoint; 121 122 if (nvlist_find_string(pdir->propsnv, "mountpoint", 123 &mountpoint) == 0) { 124 easprintf(&mountpoint, "%s%s%s", mountpoint, 125 mountpoint[strlen(mountpoint) - 1] == '/' ? 126 "" : "/", origmountpoint); 127 free(origmountpoint); 128 break; 129 } 130 131 easprintf(&mountpoint, "%s/%s", pdir->name, 132 origmountpoint); 133 free(origmountpoint); 134 pdir = pdir->parent; 135 } 136 } 137 assert(mountpoint[0] == '/'); 138 assert(strstr(mountpoint, zfs->rootpath) == mountpoint); 139 140 return (mountpoint); 141 } 142 143 int 144 dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) 145 { 146 return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); 147 } 148 149 /* 150 * Handle dataset properties that we know about; stash them into an nvlist to be 151 * written later to the properties ZAP object. 152 * 153 * If the set of properties we handle grows too much, we should probably explore 154 * using libzfs to manage them. 155 */ 156 static void 157 dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, 158 const char *val) 159 { 160 nvlist_t *nvl; 161 162 nvl = dir->propsnv; 163 if (val == NULL || val[0] == '\0') 164 errx(1, "missing value for property `%s'", key); 165 if (nvpair_find(nvl, key) != NULL) 166 errx(1, "property `%s' already set", key); 167 168 if (strcmp(key, "mountpoint") == 0) { 169 if (strcmp(val, "none") != 0) { 170 if (val[0] != '/') 171 errx(1, "mountpoint `%s' is not absolute", val); 172 if (strcmp(val, zfs->rootpath) != 0 && 173 strcmp(zfs->rootpath, "/") != 0 && 174 (strstr(val, zfs->rootpath) != val || 175 val[strlen(zfs->rootpath)] != '/')) { 176 errx(1, "mountpoint `%s' is not prefixed by " 177 "the root path `%s'", val, zfs->rootpath); 178 } 179 } 180 nvlist_add_string(nvl, key, val); 181 } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || 182 strcmp(key, "setuid") == 0) { 183 if (strcmp(val, "on") == 0) 184 nvlist_add_uint64(nvl, key, 1); 185 else if (strcmp(val, "off") == 0) 186 nvlist_add_uint64(nvl, key, 0); 187 else 188 errx(1, "invalid value `%s' for %s", val, key); 189 } else if (strcmp(key, "canmount") == 0) { 190 if (strcmp(val, "noauto") == 0) 191 nvlist_add_uint64(nvl, key, 2); 192 else if (strcmp(val, "on") == 0) 193 nvlist_add_uint64(nvl, key, 1); 194 else if (strcmp(val, "off") == 0) 195 nvlist_add_uint64(nvl, key, 0); 196 else 197 errx(1, "invalid value `%s' for %s", val, key); 198 } else { 199 errx(1, "unknown property `%s'", key); 200 } 201 } 202 203 static zfs_dsl_dir_t * 204 dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) 205 { 206 zfs_dsl_dir_t *dir; 207 char *path; 208 209 easprintf(&path, "%s/%s", zfs->poolname, name); 210 dir = dsl_dir_alloc(zfs, path); 211 free(path); 212 return (dir); 213 } 214 215 static void 216 dsl_origindir_init(zfs_opt_t *zfs) 217 { 218 dnode_phys_t *clones; 219 uint64_t clonesid; 220 221 zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); 222 zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); 223 zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); 224 225 clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); 226 zfs->cloneszap = zap_alloc(zfs->mos, clones); 227 zfs->origindsldir->phys->dd_clones = clonesid; 228 } 229 230 void 231 dsl_init(zfs_opt_t *zfs) 232 { 233 zfs_dsl_dir_t *dir; 234 struct dataset_desc *d; 235 const char *dspropdelim; 236 237 dspropdelim = ";"; 238 239 zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); 240 241 nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", 242 ZIO_COMPRESS_OFF); 243 244 zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); 245 zfs->rootdsldir->headds = zfs->rootds; 246 247 zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); 248 zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); 249 dsl_origindir_init(zfs); 250 251 /* 252 * Go through the list of user-specified datasets and create DSL objects 253 * for them. 254 */ 255 STAILQ_FOREACH(d, &zfs->datasetdescs, next) { 256 char *dsname, *next, *params, *param, *nextparam; 257 258 params = d->params; 259 dsname = strsep(¶ms, dspropdelim); 260 261 if (strcmp(dsname, zfs->poolname) == 0) { 262 /* 263 * This is the root dataset; it's already created, so 264 * we're just setting options. 265 */ 266 dir = zfs->rootdsldir; 267 } else { 268 /* 269 * This dataset must be a child of the root dataset. 270 */ 271 if (strstr(dsname, zfs->poolname) != dsname || 272 (next = strchr(dsname, '/')) == NULL || 273 (size_t)(next - dsname) != strlen(zfs->poolname)) { 274 errx(1, "dataset `%s' must be a child of `%s'", 275 dsname, zfs->poolname); 276 } 277 dir = dsl_dir_alloc(zfs, dsname); 278 dir->headds = dsl_dataset_alloc(zfs, dir); 279 } 280 281 for (nextparam = param = params; nextparam != NULL;) { 282 char *key, *val; 283 284 param = strsep(&nextparam, dspropdelim); 285 286 key = val = param; 287 key = strsep(&val, "="); 288 dsl_dir_set_prop(zfs, dir, key, val); 289 } 290 } 291 292 /* 293 * Set the root dataset's mount point if the user didn't override the 294 * default. 295 */ 296 if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { 297 nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", 298 zfs->rootpath); 299 } 300 } 301 302 uint64_t 303 dsl_dir_id(zfs_dsl_dir_t *dir) 304 { 305 return (dir->dirid); 306 } 307 308 uint64_t 309 dsl_dir_dataset_id(zfs_dsl_dir_t *dir) 310 { 311 return (dir->headds->dsid); 312 } 313 314 static void 315 dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 316 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 317 { 318 zfs_dsl_dir_t *cdsldir; 319 320 STAILQ_FOREACH(cdsldir, &dsldir->children, next) { 321 dsl_dir_foreach_post(zfs, cdsldir, cb, arg); 322 } 323 cb(zfs, dsldir, arg); 324 } 325 326 /* 327 * Used when the caller doesn't care about the order one way or another. 328 */ 329 void 330 dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 331 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 332 { 333 dsl_dir_foreach_post(zfs, dsldir, cb, arg); 334 } 335 336 const char * 337 dsl_dir_fullname(const zfs_dsl_dir_t *dir) 338 { 339 return (dir->fullname); 340 } 341 342 /* 343 * Create a DSL directory, which is effectively an entry in the ZFS namespace. 344 * We always create a root DSL directory, whose name is the pool's name, and 345 * several metadata directories. 346 * 347 * Each directory has two ZAP objects, one pointing to child directories, and 348 * one for properties (which are inherited by children unless overridden). 349 * Directories typically reference a DSL dataset, the "head dataset", which 350 * points to an object set. 351 */ 352 static zfs_dsl_dir_t * 353 dsl_dir_alloc(zfs_opt_t *zfs, const char *name) 354 { 355 zfs_dsl_dir_list_t l, *lp; 356 zfs_dsl_dir_t *dir, *parent; 357 dnode_phys_t *dnode; 358 char *dirname, *nextdir, *origname; 359 uint64_t childid, propsid; 360 361 dir = ecalloc(1, sizeof(*dir)); 362 363 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, 364 DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); 365 dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); 366 367 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); 368 dir->propszap = zap_alloc(zfs->mos, dnode); 369 370 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, 371 &childid); 372 dir->childzap = zap_alloc(zfs->mos, dnode); 373 374 dir->propsnv = nvlist_create(NV_UNIQUE_NAME); 375 STAILQ_INIT(&dir->children); 376 377 dir->phys->dd_child_dir_zapobj = childid; 378 dir->phys->dd_props_zapobj = propsid; 379 380 if (name == NULL) { 381 /* 382 * This is the root DSL directory. 383 */ 384 dir->name = estrdup(zfs->poolname); 385 dir->fullname = estrdup(zfs->poolname); 386 dir->parent = NULL; 387 dir->phys->dd_parent_obj = 0; 388 389 assert(zfs->rootdsldir == NULL); 390 zfs->rootdsldir = dir; 391 return (dir); 392 } 393 394 /* 395 * Insert the new directory into the hierarchy. Currently this must be 396 * done in order, e.g., when creating pool/a/b, pool/a must already 397 * exist. 398 */ 399 STAILQ_INIT(&l); 400 STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); 401 origname = dirname = nextdir = estrdup(name); 402 for (lp = &l;; lp = &parent->children) { 403 dirname = strsep(&nextdir, "/"); 404 if (nextdir == NULL) 405 break; 406 407 STAILQ_FOREACH(parent, lp, next) { 408 if (strcmp(parent->name, dirname) == 0) 409 break; 410 } 411 if (parent == NULL) { 412 errx(1, "no parent at `%s' for filesystem `%s'", 413 dirname, name); 414 } 415 } 416 417 dir->fullname = estrdup(name); 418 dir->name = estrdup(dirname); 419 free(origname); 420 STAILQ_INSERT_TAIL(lp, dir, next); 421 zap_add_uint64(parent->childzap, dir->name, dir->dirid); 422 423 dir->parent = parent; 424 dir->phys->dd_parent_obj = parent->dirid; 425 return (dir); 426 } 427 428 void 429 dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) 430 { 431 dir->phys->dd_used_bytes += bytes; 432 dir->phys->dd_compressed_bytes += bytes; 433 dir->phys->dd_uncompressed_bytes += bytes; 434 } 435 436 /* 437 * Convert dataset properties into entries in the DSL directory's properties 438 * ZAP. 439 */ 440 static void 441 dsl_dir_finalize_props(zfs_dsl_dir_t *dir) 442 { 443 for (nvp_header_t *nvh = NULL; 444 (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { 445 nv_string_t *nvname; 446 nv_pair_data_t *nvdata; 447 char *name; 448 449 nvname = (nv_string_t *)(nvh + 1); 450 nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + 451 NV_ALIGN4(nvname->nv_size)); 452 453 name = nvstring_get(nvname); 454 switch (nvdata->nv_type) { 455 case DATA_TYPE_UINT64: { 456 uint64_t val; 457 458 memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); 459 zap_add_uint64(dir->propszap, name, val); 460 break; 461 } 462 case DATA_TYPE_STRING: { 463 nv_string_t *nvstr; 464 char *val; 465 466 nvstr = (nv_string_t *)&nvdata->nv_data[0]; 467 val = nvstring_get(nvstr); 468 zap_add_string(dir->propszap, name, val); 469 free(val); 470 break; 471 } 472 default: 473 assert(0); 474 } 475 free(name); 476 } 477 } 478 479 static void 480 dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) 481 { 482 char key[32]; 483 zfs_dsl_dir_t *cdir; 484 dnode_phys_t *snapnames; 485 zfs_dsl_dataset_t *headds; 486 zfs_objset_t *os; 487 uint64_t bytes, snapnamesid; 488 489 dsl_dir_finalize_props(dir); 490 zap_write(zfs, dir->propszap); 491 zap_write(zfs, dir->childzap); 492 493 headds = dir->headds; 494 if (headds == NULL) 495 return; 496 os = headds->os; 497 if (os == NULL) 498 return; 499 500 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 501 &snapnamesid); 502 zap_write(zfs, zap_alloc(zfs->mos, snapnames)); 503 504 dir->phys->dd_head_dataset_obj = headds->dsid; 505 dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; 506 headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 507 headds->phys->ds_snapnames_zapobj = snapnamesid; 508 objset_root_blkptr_copy(os, &headds->phys->ds_bp); 509 510 zfs->snapds->phys->ds_num_children++; 511 snprintf(key, sizeof(key), "%jx", (uintmax_t)headds->dsid); 512 zap_add_uint64(zfs->cloneszap, key, headds->dsid); 513 514 bytes = objset_space(os); 515 headds->phys->ds_used_bytes = bytes; 516 headds->phys->ds_uncompressed_bytes = bytes; 517 headds->phys->ds_compressed_bytes = bytes; 518 519 STAILQ_FOREACH(cdir, &dir->children, next) { 520 /* 521 * The root directory needs a special case: the amount of 522 * space used for the MOS isn't known until everything else is 523 * finalized, so it can't be accounted in the MOS directory's 524 * parent until then. 525 */ 526 if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) 527 continue; 528 bytes += cdir->phys->dd_used_bytes; 529 } 530 dsl_dir_size_add(dir, bytes); 531 } 532 533 void 534 dsl_write(zfs_opt_t *zfs) 535 { 536 zfs_zap_t *snapnameszap; 537 dnode_phys_t *snapnames; 538 uint64_t snapmapid; 539 540 /* 541 * Perform accounting, starting from the leaves of the DSL directory 542 * tree. Accounting for $MOS is done later, once we've finished 543 * allocating space. 544 */ 545 dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); 546 547 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 548 &snapmapid); 549 snapnameszap = zap_alloc(zfs->mos, snapnames); 550 zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); 551 zap_write(zfs, snapnameszap); 552 553 zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; 554 zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 555 zfs->originds->phys->ds_snapnames_zapobj = snapmapid; 556 557 zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; 558 assert(zfs->snapds->phys->ds_num_children > 0); 559 zfs->snapds->phys->ds_num_children++; 560 561 zap_write(zfs, zfs->cloneszap); 562 563 /* XXX-MJ dirs and datasets are leaked */ 564 } 565 566 void 567 dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) 568 { 569 dir->headds->os = os; 570 objset_write(zfs, os); 571 } 572 573 bool 574 dsl_dir_has_dataset(zfs_dsl_dir_t *dir) 575 { 576 return (dir->headds != NULL); 577 } 578 579 bool 580 dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) 581 { 582 return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); 583 } 584 585 static zfs_dsl_dataset_t * 586 dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 587 { 588 zfs_dsl_dataset_t *ds; 589 dnode_phys_t *dnode; 590 uint64_t deadlistid; 591 592 ds = ecalloc(1, sizeof(*ds)); 593 594 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, 595 DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); 596 ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); 597 598 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, 599 DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); 600 zap_write(zfs, zap_alloc(zfs->mos, dnode)); 601 602 ds->phys->ds_dir_obj = dir->dirid; 603 ds->phys->ds_deadlist_obj = deadlistid; 604 ds->phys->ds_creation_txg = TXG - 1; 605 if (ds != zfs->snapds) 606 ds->phys->ds_prev_snap_txg = TXG - 1; 607 ds->phys->ds_guid = ((uint64_t)random() << 32) | random(); 608 ds->dir = dir; 609 610 return (ds); 611 } 612