1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 The FreeBSD Foundation 5 * 6 * This software was developed by Mark Johnston under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions are 11 * met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <assert.h> 32 #include <stdlib.h> 33 #include <string.h> 34 35 #include <util.h> 36 37 #include "makefs.h" 38 #include "zfs.h" 39 40 typedef struct zfs_dsl_dataset { 41 zfs_objset_t *os; /* referenced objset, may be null */ 42 dsl_dataset_phys_t *phys; /* on-disk representation */ 43 uint64_t dsid; /* DSL dataset dnode */ 44 45 struct zfs_dsl_dir *dir; /* containing parent */ 46 } zfs_dsl_dataset_t; 47 48 typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; 49 50 typedef struct zfs_dsl_dir { 51 char *fullname; /* full dataset name */ 52 char *name; /* basename(fullname) */ 53 dsl_dir_phys_t *phys; /* on-disk representation */ 54 nvlist_t *propsnv; /* properties saved in propszap */ 55 56 zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ 57 58 uint64_t dirid; /* DSL directory dnode */ 59 zfs_zap_t *propszap; /* dataset properties */ 60 zfs_zap_t *childzap; /* child directories */ 61 62 /* DSL directory tree linkage. */ 63 struct zfs_dsl_dir *parent; 64 zfs_dsl_dir_list_t children; 65 STAILQ_ENTRY(zfs_dsl_dir) next; 66 } zfs_dsl_dir_t; 67 68 static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); 69 static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); 70 71 static int 72 nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) 73 { 74 char *str; 75 int error, len; 76 77 error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); 78 if (error == 0) { 79 *retp = ecalloc(1, len + 1); 80 memcpy(*retp, str, len); 81 } 82 return (error); 83 } 84 85 static int 86 nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) 87 { 88 return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); 89 } 90 91 /* 92 * Return an allocated string containing the head dataset's mountpoint, 93 * including the root path prefix. 94 * 95 * If the dataset has a mountpoint property, it is returned. Otherwise we have 96 * to follow ZFS' inheritance rules. 97 */ 98 char * 99 dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 100 { 101 zfs_dsl_dir_t *pdir; 102 char *mountpoint; 103 104 if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { 105 if (strcmp(mountpoint, "none") == 0) 106 return (NULL); 107 } else { 108 /* 109 * If we don't have a mountpoint, it's inherited from one of our 110 * ancestors. Walk up the hierarchy until we find it, building 111 * up our mountpoint along the way. The mountpoint property is 112 * always set for the root dataset. 113 */ 114 for (pdir = dir->parent, mountpoint = estrdup(dir->name);; 115 pdir = pdir->parent) { 116 char *origmountpoint, *tmp; 117 118 origmountpoint = mountpoint; 119 120 if (nvlist_find_string(pdir->propsnv, "mountpoint", 121 &tmp) == 0) { 122 easprintf(&mountpoint, "%s%s%s", tmp, 123 tmp[strlen(tmp) - 1] == '/' ? "" : "/", 124 origmountpoint); 125 free(tmp); 126 free(origmountpoint); 127 break; 128 } 129 130 easprintf(&mountpoint, "%s/%s", pdir->name, 131 origmountpoint); 132 free(origmountpoint); 133 } 134 } 135 assert(mountpoint[0] == '/'); 136 assert(strstr(mountpoint, zfs->rootpath) == mountpoint); 137 138 return (mountpoint); 139 } 140 141 int 142 dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) 143 { 144 return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); 145 } 146 147 /* 148 * Handle dataset properties that we know about; stash them into an nvlist to be 149 * written later to the properties ZAP object. 150 * 151 * If the set of properties we handle grows too much, we should probably explore 152 * using libzfs to manage them. 153 */ 154 static void 155 dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, 156 const char *val) 157 { 158 nvlist_t *nvl; 159 160 nvl = dir->propsnv; 161 if (val == NULL || val[0] == '\0') 162 errx(1, "missing value for property `%s'", key); 163 if (nvpair_find(nvl, key) != NULL) 164 errx(1, "property `%s' already set", key); 165 166 if (strcmp(key, "mountpoint") == 0) { 167 if (strcmp(val, "none") != 0) { 168 if (val[0] != '/') 169 errx(1, "mountpoint `%s' is not absolute", val); 170 if (strcmp(val, zfs->rootpath) != 0 && 171 strcmp(zfs->rootpath, "/") != 0 && 172 (strstr(val, zfs->rootpath) != val || 173 val[strlen(zfs->rootpath)] != '/')) { 174 errx(1, "mountpoint `%s' is not prefixed by " 175 "the root path `%s'", val, zfs->rootpath); 176 } 177 } 178 nvlist_add_string(nvl, key, val); 179 } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || 180 strcmp(key, "setuid") == 0) { 181 if (strcmp(val, "on") == 0) 182 nvlist_add_uint64(nvl, key, 1); 183 else if (strcmp(val, "off") == 0) 184 nvlist_add_uint64(nvl, key, 0); 185 else 186 errx(1, "invalid value `%s' for %s", val, key); 187 } else if (strcmp(key, "canmount") == 0) { 188 if (strcmp(val, "noauto") == 0) 189 nvlist_add_uint64(nvl, key, 2); 190 else if (strcmp(val, "on") == 0) 191 nvlist_add_uint64(nvl, key, 1); 192 else if (strcmp(val, "off") == 0) 193 nvlist_add_uint64(nvl, key, 0); 194 else 195 errx(1, "invalid value `%s' for %s", val, key); 196 } else { 197 errx(1, "unknown property `%s'", key); 198 } 199 } 200 201 static zfs_dsl_dir_t * 202 dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) 203 { 204 zfs_dsl_dir_t *dir; 205 char *path; 206 207 easprintf(&path, "%s/%s", zfs->poolname, name); 208 dir = dsl_dir_alloc(zfs, path); 209 free(path); 210 return (dir); 211 } 212 213 static void 214 dsl_origindir_init(zfs_opt_t *zfs) 215 { 216 dnode_phys_t *clones; 217 uint64_t clonesid; 218 219 zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); 220 zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); 221 zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); 222 223 clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); 224 zfs->cloneszap = zap_alloc(zfs->mos, clones); 225 zfs->origindsldir->phys->dd_clones = clonesid; 226 } 227 228 void 229 dsl_init(zfs_opt_t *zfs) 230 { 231 zfs_dsl_dir_t *dir; 232 struct dataset_desc *d; 233 const char *dspropdelim; 234 235 dspropdelim = ";"; 236 237 zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); 238 239 nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", 240 ZIO_COMPRESS_OFF); 241 242 zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); 243 zfs->rootdsldir->headds = zfs->rootds; 244 245 zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); 246 zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); 247 dsl_origindir_init(zfs); 248 249 /* 250 * Go through the list of user-specified datasets and create DSL objects 251 * for them. 252 */ 253 STAILQ_FOREACH(d, &zfs->datasetdescs, next) { 254 char *dsname, *next, *params, *param, *nextparam; 255 256 params = d->params; 257 dsname = strsep(¶ms, dspropdelim); 258 259 if (strcmp(dsname, zfs->poolname) == 0) { 260 /* 261 * This is the root dataset; it's already created, so 262 * we're just setting options. 263 */ 264 dir = zfs->rootdsldir; 265 } else { 266 /* 267 * This dataset must be a child of the root dataset. 268 */ 269 if (strstr(dsname, zfs->poolname) != dsname || 270 (next = strchr(dsname, '/')) == NULL || 271 (size_t)(next - dsname) != strlen(zfs->poolname)) { 272 errx(1, "dataset `%s' must be a child of `%s'", 273 dsname, zfs->poolname); 274 } 275 dir = dsl_dir_alloc(zfs, dsname); 276 dir->headds = dsl_dataset_alloc(zfs, dir); 277 } 278 279 for (nextparam = param = params; nextparam != NULL;) { 280 char *key, *val; 281 282 param = strsep(&nextparam, dspropdelim); 283 284 key = val = param; 285 key = strsep(&val, "="); 286 dsl_dir_set_prop(zfs, dir, key, val); 287 } 288 } 289 290 /* 291 * Set the root dataset's mount point if the user didn't override the 292 * default. 293 */ 294 if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { 295 nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", 296 zfs->rootpath); 297 } 298 } 299 300 uint64_t 301 dsl_dir_id(zfs_dsl_dir_t *dir) 302 { 303 return (dir->dirid); 304 } 305 306 uint64_t 307 dsl_dir_dataset_id(zfs_dsl_dir_t *dir) 308 { 309 return (dir->headds->dsid); 310 } 311 312 static void 313 dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 314 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 315 { 316 zfs_dsl_dir_t *cdsldir; 317 318 STAILQ_FOREACH(cdsldir, &dsldir->children, next) { 319 dsl_dir_foreach_post(zfs, cdsldir, cb, arg); 320 } 321 cb(zfs, dsldir, arg); 322 } 323 324 /* 325 * Used when the caller doesn't care about the order one way or another. 326 */ 327 void 328 dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 329 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 330 { 331 dsl_dir_foreach_post(zfs, dsldir, cb, arg); 332 } 333 334 const char * 335 dsl_dir_fullname(const zfs_dsl_dir_t *dir) 336 { 337 return (dir->fullname); 338 } 339 340 /* 341 * Create a DSL directory, which is effectively an entry in the ZFS namespace. 342 * We always create a root DSL directory, whose name is the pool's name, and 343 * several metadata directories. 344 * 345 * Each directory has two ZAP objects, one pointing to child directories, and 346 * one for properties (which are inherited by children unless overridden). 347 * Directories typically reference a DSL dataset, the "head dataset", which 348 * points to an object set. 349 */ 350 static zfs_dsl_dir_t * 351 dsl_dir_alloc(zfs_opt_t *zfs, const char *name) 352 { 353 zfs_dsl_dir_list_t l, *lp; 354 zfs_dsl_dir_t *dir, *parent; 355 dnode_phys_t *dnode; 356 char *dirname, *nextdir, *origname; 357 uint64_t childid, propsid; 358 359 dir = ecalloc(1, sizeof(*dir)); 360 361 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, 362 DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); 363 dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); 364 365 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); 366 dir->propszap = zap_alloc(zfs->mos, dnode); 367 368 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, 369 &childid); 370 dir->childzap = zap_alloc(zfs->mos, dnode); 371 372 dir->propsnv = nvlist_create(NV_UNIQUE_NAME); 373 STAILQ_INIT(&dir->children); 374 375 dir->phys->dd_child_dir_zapobj = childid; 376 dir->phys->dd_props_zapobj = propsid; 377 378 if (name == NULL) { 379 /* 380 * This is the root DSL directory. 381 */ 382 dir->name = estrdup(zfs->poolname); 383 dir->fullname = estrdup(zfs->poolname); 384 dir->parent = NULL; 385 dir->phys->dd_parent_obj = 0; 386 387 assert(zfs->rootdsldir == NULL); 388 zfs->rootdsldir = dir; 389 return (dir); 390 } 391 392 /* 393 * Insert the new directory into the hierarchy. Currently this must be 394 * done in order, e.g., when creating pool/a/b, pool/a must already 395 * exist. 396 */ 397 STAILQ_INIT(&l); 398 STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); 399 origname = dirname = nextdir = estrdup(name); 400 for (lp = &l;; lp = &parent->children) { 401 dirname = strsep(&nextdir, "/"); 402 if (nextdir == NULL) 403 break; 404 405 STAILQ_FOREACH(parent, lp, next) { 406 if (strcmp(parent->name, dirname) == 0) 407 break; 408 } 409 if (parent == NULL) { 410 errx(1, "no parent at `%s' for filesystem `%s'", 411 dirname, name); 412 } 413 } 414 415 dir->fullname = estrdup(name); 416 dir->name = estrdup(dirname); 417 free(origname); 418 STAILQ_INSERT_TAIL(lp, dir, next); 419 zap_add_uint64(parent->childzap, dir->name, dir->dirid); 420 421 dir->parent = parent; 422 dir->phys->dd_parent_obj = parent->dirid; 423 return (dir); 424 } 425 426 void 427 dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) 428 { 429 dir->phys->dd_used_bytes += bytes; 430 dir->phys->dd_compressed_bytes += bytes; 431 dir->phys->dd_uncompressed_bytes += bytes; 432 } 433 434 /* 435 * Convert dataset properties into entries in the DSL directory's properties 436 * ZAP. 437 */ 438 static void 439 dsl_dir_finalize_props(zfs_dsl_dir_t *dir) 440 { 441 for (nvp_header_t *nvh = NULL; 442 (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { 443 nv_string_t *nvname; 444 nv_pair_data_t *nvdata; 445 char *name; 446 447 nvname = (nv_string_t *)(nvh + 1); 448 nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + 449 NV_ALIGN4(nvname->nv_size)); 450 451 name = nvstring_get(nvname); 452 switch (nvdata->nv_type) { 453 case DATA_TYPE_UINT64: { 454 uint64_t val; 455 456 memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); 457 zap_add_uint64(dir->propszap, name, val); 458 break; 459 } 460 case DATA_TYPE_STRING: { 461 nv_string_t *nvstr; 462 char *val; 463 464 nvstr = (nv_string_t *)&nvdata->nv_data[0]; 465 val = nvstring_get(nvstr); 466 zap_add_string(dir->propszap, name, val); 467 free(val); 468 break; 469 } 470 default: 471 assert(0); 472 } 473 free(name); 474 } 475 } 476 477 static void 478 dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) 479 { 480 char key[32]; 481 zfs_dsl_dir_t *cdir; 482 dnode_phys_t *snapnames; 483 zfs_dsl_dataset_t *headds; 484 zfs_objset_t *os; 485 uint64_t bytes, snapnamesid; 486 487 dsl_dir_finalize_props(dir); 488 zap_write(zfs, dir->propszap); 489 zap_write(zfs, dir->childzap); 490 491 headds = dir->headds; 492 if (headds == NULL) 493 return; 494 os = headds->os; 495 if (os == NULL) 496 return; 497 498 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 499 &snapnamesid); 500 zap_write(zfs, zap_alloc(zfs->mos, snapnames)); 501 502 dir->phys->dd_head_dataset_obj = headds->dsid; 503 dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; 504 headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 505 headds->phys->ds_snapnames_zapobj = snapnamesid; 506 objset_root_blkptr_copy(os, &headds->phys->ds_bp); 507 508 zfs->snapds->phys->ds_num_children++; 509 snprintf(key, sizeof(key), "%jx", (uintmax_t)headds->dsid); 510 zap_add_uint64(zfs->cloneszap, key, headds->dsid); 511 512 bytes = objset_space(os); 513 headds->phys->ds_used_bytes = bytes; 514 headds->phys->ds_uncompressed_bytes = bytes; 515 headds->phys->ds_compressed_bytes = bytes; 516 517 STAILQ_FOREACH(cdir, &dir->children, next) { 518 /* 519 * The root directory needs a special case: the amount of 520 * space used for the MOS isn't known until everything else is 521 * finalized, so it can't be accounted in the MOS directory's 522 * parent until then. 523 */ 524 if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) 525 continue; 526 bytes += cdir->phys->dd_used_bytes; 527 } 528 dsl_dir_size_add(dir, bytes); 529 } 530 531 void 532 dsl_write(zfs_opt_t *zfs) 533 { 534 zfs_zap_t *snapnameszap; 535 dnode_phys_t *snapnames; 536 uint64_t snapmapid; 537 538 /* 539 * Perform accounting, starting from the leaves of the DSL directory 540 * tree. Accounting for $MOS is done later, once we've finished 541 * allocating space. 542 */ 543 dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); 544 545 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 546 &snapmapid); 547 snapnameszap = zap_alloc(zfs->mos, snapnames); 548 zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); 549 zap_write(zfs, snapnameszap); 550 551 zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; 552 zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 553 zfs->originds->phys->ds_snapnames_zapobj = snapmapid; 554 555 zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; 556 assert(zfs->snapds->phys->ds_num_children > 0); 557 zfs->snapds->phys->ds_num_children++; 558 559 zap_write(zfs, zfs->cloneszap); 560 561 /* XXX-MJ dirs and datasets are leaked */ 562 } 563 564 void 565 dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) 566 { 567 dir->headds->os = os; 568 objset_write(zfs, os); 569 } 570 571 bool 572 dsl_dir_has_dataset(zfs_dsl_dir_t *dir) 573 { 574 return (dir->headds != NULL); 575 } 576 577 bool 578 dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) 579 { 580 return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); 581 } 582 583 static zfs_dsl_dataset_t * 584 dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 585 { 586 zfs_dsl_dataset_t *ds; 587 dnode_phys_t *dnode; 588 uint64_t deadlistid; 589 590 ds = ecalloc(1, sizeof(*ds)); 591 592 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, 593 DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); 594 ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); 595 596 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, 597 DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); 598 zap_write(zfs, zap_alloc(zfs->mos, dnode)); 599 600 ds->phys->ds_dir_obj = dir->dirid; 601 ds->phys->ds_deadlist_obj = deadlistid; 602 ds->phys->ds_creation_txg = TXG - 1; 603 if (ds != zfs->snapds) 604 ds->phys->ds_prev_snap_txg = TXG - 1; 605 ds->phys->ds_guid = ((uint64_t)random() << 32) | random(); 606 ds->dir = dir; 607 608 return (ds); 609 } 610