1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 The FreeBSD Foundation 5 * 6 * This software was developed by Mark Johnston under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions are 11 * met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <assert.h> 32 #include <string.h> 33 34 #include <util.h> 35 36 #include "makefs.h" 37 #include "zfs.h" 38 39 typedef struct zfs_dsl_dataset { 40 zfs_objset_t *os; /* referenced objset, may be null */ 41 dsl_dataset_phys_t *phys; /* on-disk representation */ 42 uint64_t dsid; /* DSL dataset dnode */ 43 44 struct zfs_dsl_dir *dir; /* containing parent */ 45 } zfs_dsl_dataset_t; 46 47 typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; 48 49 typedef struct zfs_dsl_dir { 50 char *fullname; /* full dataset name */ 51 char *name; /* basename(fullname) */ 52 dsl_dir_phys_t *phys; /* on-disk representation */ 53 nvlist_t *propsnv; /* properties saved in propszap */ 54 55 zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ 56 57 uint64_t dirid; /* DSL directory dnode */ 58 zfs_zap_t *propszap; /* dataset properties */ 59 zfs_zap_t *childzap; /* child directories */ 60 61 /* DSL directory tree linkage. */ 62 struct zfs_dsl_dir *parent; 63 zfs_dsl_dir_list_t children; 64 STAILQ_ENTRY(zfs_dsl_dir) next; 65 } zfs_dsl_dir_t; 66 67 static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); 68 static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); 69 70 static int 71 nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) 72 { 73 char *str; 74 int error, len; 75 76 error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); 77 if (error == 0) { 78 *retp = ecalloc(1, len + 1); 79 memcpy(*retp, str, len); 80 } 81 return (error); 82 } 83 84 static int 85 nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) 86 { 87 return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); 88 } 89 90 /* 91 * Return an allocated string containing the head dataset's mountpoint, 92 * including the root path prefix. 93 * 94 * If the dataset has a mountpoint property, it is returned. Otherwise we have 95 * to follow ZFS' inheritance rules. 96 */ 97 char * 98 dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 99 { 100 zfs_dsl_dir_t *pdir; 101 char *mountpoint, *origmountpoint; 102 103 if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { 104 if (strcmp(mountpoint, "none") == 0) 105 return (NULL); 106 107 /* 108 * nvlist_find_string() does not make a copy. 109 */ 110 mountpoint = estrdup(mountpoint); 111 } else { 112 /* 113 * If we don't have a mountpoint, it's inherited from one of our 114 * ancestors. Walk up the hierarchy until we find it, building 115 * up our mountpoint along the way. The mountpoint property is 116 * always set for the root dataset. 117 */ 118 for (pdir = dir->parent, mountpoint = estrdup(dir->name);;) { 119 origmountpoint = mountpoint; 120 121 if (nvlist_find_string(pdir->propsnv, "mountpoint", 122 &mountpoint) == 0) { 123 easprintf(&mountpoint, "%s%s%s", mountpoint, 124 mountpoint[strlen(mountpoint) - 1] == '/' ? 125 "" : "/", origmountpoint); 126 free(origmountpoint); 127 break; 128 } 129 130 easprintf(&mountpoint, "%s/%s", pdir->name, 131 origmountpoint); 132 free(origmountpoint); 133 pdir = pdir->parent; 134 } 135 } 136 assert(mountpoint[0] == '/'); 137 assert(strstr(mountpoint, zfs->rootpath) == mountpoint); 138 139 return (mountpoint); 140 } 141 142 int 143 dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) 144 { 145 return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); 146 } 147 148 /* 149 * Handle dataset properties that we know about; stash them into an nvlist to be 150 * written later to the properties ZAP object. 151 * 152 * If the set of properties we handle grows too much, we should probably explore 153 * using libzfs to manage them. 154 */ 155 static void 156 dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, 157 const char *val) 158 { 159 nvlist_t *nvl; 160 161 nvl = dir->propsnv; 162 if (val == NULL || val[0] == '\0') 163 errx(1, "missing value for property `%s'", key); 164 if (nvpair_find(nvl, key) != NULL) 165 errx(1, "property `%s' already set", key); 166 167 if (strcmp(key, "mountpoint") == 0) { 168 if (strcmp(val, "none") != 0) { 169 if (val[0] != '/') 170 errx(1, "mountpoint `%s' is not absolute", val); 171 if (strcmp(val, zfs->rootpath) != 0 && 172 strcmp(zfs->rootpath, "/") != 0 && 173 (strstr(val, zfs->rootpath) != val || 174 val[strlen(zfs->rootpath)] != '/')) { 175 errx(1, "mountpoint `%s' is not prefixed by " 176 "the root path `%s'", val, zfs->rootpath); 177 } 178 } 179 nvlist_add_string(nvl, key, val); 180 } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || 181 strcmp(key, "setuid") == 0) { 182 if (strcmp(val, "on") == 0) 183 nvlist_add_uint64(nvl, key, 1); 184 else if (strcmp(val, "off") == 0) 185 nvlist_add_uint64(nvl, key, 0); 186 else 187 errx(1, "invalid value `%s' for %s", val, key); 188 } else if (strcmp(key, "canmount") == 0) { 189 if (strcmp(val, "noauto") == 0) 190 nvlist_add_uint64(nvl, key, 2); 191 else if (strcmp(val, "on") == 0) 192 nvlist_add_uint64(nvl, key, 1); 193 else if (strcmp(val, "off") == 0) 194 nvlist_add_uint64(nvl, key, 0); 195 else 196 errx(1, "invalid value `%s' for %s", val, key); 197 } else { 198 errx(1, "unknown property `%s'", key); 199 } 200 } 201 202 static zfs_dsl_dir_t * 203 dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) 204 { 205 zfs_dsl_dir_t *dir; 206 char *path; 207 208 easprintf(&path, "%s/%s", zfs->poolname, name); 209 dir = dsl_dir_alloc(zfs, path); 210 free(path); 211 return (dir); 212 } 213 214 static void 215 dsl_origindir_init(zfs_opt_t *zfs) 216 { 217 dnode_phys_t *clones; 218 uint64_t clonesid; 219 220 zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); 221 zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); 222 zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); 223 224 clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); 225 zfs->cloneszap = zap_alloc(zfs->mos, clones); 226 zfs->origindsldir->phys->dd_clones = clonesid; 227 } 228 229 void 230 dsl_init(zfs_opt_t *zfs) 231 { 232 zfs_dsl_dir_t *dir; 233 struct dataset_desc *d; 234 const char *dspropdelim; 235 236 dspropdelim = ";"; 237 238 zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); 239 240 nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", 241 ZIO_COMPRESS_OFF); 242 243 zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); 244 zfs->rootdsldir->headds = zfs->rootds; 245 246 zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); 247 zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); 248 dsl_origindir_init(zfs); 249 250 /* 251 * Go through the list of user-specified datasets and create DSL objects 252 * for them. 253 */ 254 STAILQ_FOREACH(d, &zfs->datasetdescs, next) { 255 char *dsname, *next, *params, *param, *nextparam; 256 257 params = d->params; 258 dsname = strsep(¶ms, dspropdelim); 259 260 if (strcmp(dsname, zfs->poolname) == 0) { 261 /* 262 * This is the root dataset; it's already created, so 263 * we're just setting options. 264 */ 265 dir = zfs->rootdsldir; 266 } else { 267 /* 268 * This dataset must be a child of the root dataset. 269 */ 270 if (strstr(dsname, zfs->poolname) != dsname || 271 (next = strchr(dsname, '/')) == NULL || 272 (size_t)(next - dsname) != strlen(zfs->poolname)) { 273 errx(1, "dataset `%s' must be a child of `%s'", 274 dsname, zfs->poolname); 275 } 276 dir = dsl_dir_alloc(zfs, dsname); 277 dir->headds = dsl_dataset_alloc(zfs, dir); 278 } 279 280 for (nextparam = param = params; nextparam != NULL;) { 281 char *key, *val; 282 283 param = strsep(&nextparam, dspropdelim); 284 285 key = val = param; 286 key = strsep(&val, "="); 287 dsl_dir_set_prop(zfs, dir, key, val); 288 } 289 } 290 291 /* 292 * Set the root dataset's mount point if the user didn't override the 293 * default. 294 */ 295 if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { 296 nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", 297 zfs->rootpath); 298 } 299 } 300 301 uint64_t 302 dsl_dir_id(zfs_dsl_dir_t *dir) 303 { 304 return (dir->dirid); 305 } 306 307 uint64_t 308 dsl_dir_dataset_id(zfs_dsl_dir_t *dir) 309 { 310 return (dir->headds->dsid); 311 } 312 313 static void 314 dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 315 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 316 { 317 zfs_dsl_dir_t *cdsldir; 318 319 STAILQ_FOREACH(cdsldir, &dsldir->children, next) { 320 dsl_dir_foreach_post(zfs, cdsldir, cb, arg); 321 } 322 cb(zfs, dsldir, arg); 323 } 324 325 /* 326 * Used when the caller doesn't care about the order one way or another. 327 */ 328 void 329 dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, 330 void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) 331 { 332 dsl_dir_foreach_post(zfs, dsldir, cb, arg); 333 } 334 335 const char * 336 dsl_dir_fullname(const zfs_dsl_dir_t *dir) 337 { 338 return (dir->fullname); 339 } 340 341 /* 342 * Create a DSL directory, which is effectively an entry in the ZFS namespace. 343 * We always create a root DSL directory, whose name is the pool's name, and 344 * several metadata directories. 345 * 346 * Each directory has two ZAP objects, one pointing to child directories, and 347 * one for properties (which are inherited by children unless overridden). 348 * Directories typically reference a DSL dataset, the "head dataset", which 349 * points to an object set. 350 */ 351 static zfs_dsl_dir_t * 352 dsl_dir_alloc(zfs_opt_t *zfs, const char *name) 353 { 354 zfs_dsl_dir_list_t l, *lp; 355 zfs_dsl_dir_t *dir, *parent; 356 dnode_phys_t *dnode; 357 char *dirname, *nextdir, *origname; 358 uint64_t childid, propsid; 359 360 dir = ecalloc(1, sizeof(*dir)); 361 362 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, 363 DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); 364 dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); 365 366 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); 367 dir->propszap = zap_alloc(zfs->mos, dnode); 368 369 dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, 370 &childid); 371 dir->childzap = zap_alloc(zfs->mos, dnode); 372 373 dir->propsnv = nvlist_create(NV_UNIQUE_NAME); 374 STAILQ_INIT(&dir->children); 375 376 dir->phys->dd_child_dir_zapobj = childid; 377 dir->phys->dd_props_zapobj = propsid; 378 379 if (name == NULL) { 380 /* 381 * This is the root DSL directory. 382 */ 383 dir->name = estrdup(zfs->poolname); 384 dir->fullname = estrdup(zfs->poolname); 385 dir->parent = NULL; 386 dir->phys->dd_parent_obj = 0; 387 388 assert(zfs->rootdsldir == NULL); 389 zfs->rootdsldir = dir; 390 return (dir); 391 } 392 393 /* 394 * Insert the new directory into the hierarchy. Currently this must be 395 * done in order, e.g., when creating pool/a/b, pool/a must already 396 * exist. 397 */ 398 STAILQ_INIT(&l); 399 STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); 400 origname = dirname = nextdir = estrdup(name); 401 for (lp = &l;; lp = &parent->children) { 402 dirname = strsep(&nextdir, "/"); 403 if (nextdir == NULL) 404 break; 405 406 STAILQ_FOREACH(parent, lp, next) { 407 if (strcmp(parent->name, dirname) == 0) 408 break; 409 } 410 if (parent == NULL) { 411 errx(1, "no parent at `%s' for filesystem `%s'", 412 dirname, name); 413 } 414 } 415 416 dir->fullname = estrdup(name); 417 dir->name = estrdup(dirname); 418 free(origname); 419 STAILQ_INSERT_TAIL(lp, dir, next); 420 zap_add_uint64(parent->childzap, dir->name, dir->dirid); 421 422 dir->parent = parent; 423 dir->phys->dd_parent_obj = parent->dirid; 424 return (dir); 425 } 426 427 void 428 dsl_dir_size_set(zfs_dsl_dir_t *dir, uint64_t bytes) 429 { 430 dir->phys->dd_used_bytes = bytes; 431 dir->phys->dd_compressed_bytes = bytes; 432 dir->phys->dd_uncompressed_bytes = bytes; 433 } 434 435 /* 436 * Convert dataset properties into entries in the DSL directory's properties 437 * ZAP. 438 */ 439 static void 440 dsl_dir_finalize_props(zfs_dsl_dir_t *dir) 441 { 442 for (nvp_header_t *nvh = NULL; 443 (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { 444 nv_string_t *nvname; 445 nv_pair_data_t *nvdata; 446 char *name; 447 448 nvname = (nv_string_t *)(nvh + 1); 449 nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + 450 NV_ALIGN4(nvname->nv_size)); 451 452 name = nvstring_get(nvname); 453 switch (nvdata->nv_type) { 454 case DATA_TYPE_UINT64: { 455 uint64_t val; 456 457 memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); 458 zap_add_uint64(dir->propszap, name, val); 459 break; 460 } 461 case DATA_TYPE_STRING: { 462 nv_string_t *nvstr; 463 char *val; 464 465 nvstr = (nv_string_t *)&nvdata->nv_data[0]; 466 val = nvstring_get(nvstr); 467 zap_add_string(dir->propszap, name, val); 468 free(val); 469 break; 470 } 471 default: 472 assert(0); 473 } 474 free(name); 475 } 476 } 477 478 static void 479 dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) 480 { 481 char key[32]; 482 zfs_dsl_dir_t *cdir; 483 dnode_phys_t *snapnames; 484 zfs_dsl_dataset_t *headds; 485 zfs_objset_t *os; 486 uint64_t bytes, snapnamesid; 487 488 dsl_dir_finalize_props(dir); 489 zap_write(zfs, dir->propszap); 490 zap_write(zfs, dir->childzap); 491 492 headds = dir->headds; 493 if (headds == NULL) 494 return; 495 os = headds->os; 496 if (os == NULL) 497 return; 498 499 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 500 &snapnamesid); 501 zap_write(zfs, zap_alloc(zfs->mos, snapnames)); 502 503 dir->phys->dd_head_dataset_obj = headds->dsid; 504 dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; 505 headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 506 headds->phys->ds_snapnames_zapobj = snapnamesid; 507 objset_root_blkptr_copy(os, &headds->phys->ds_bp); 508 509 zfs->snapds->phys->ds_num_children++; 510 snprintf(key, sizeof(key), "%jx", (uintmax_t)headds->dsid); 511 zap_add_uint64(zfs->cloneszap, key, headds->dsid); 512 513 bytes = objset_space(os); 514 headds->phys->ds_used_bytes = bytes; 515 headds->phys->ds_uncompressed_bytes = bytes; 516 headds->phys->ds_compressed_bytes = bytes; 517 518 STAILQ_FOREACH(cdir, &dir->children, next) 519 bytes += cdir->phys->dd_used_bytes; 520 dsl_dir_size_set(dir, bytes); 521 } 522 523 void 524 dsl_write(zfs_opt_t *zfs) 525 { 526 zfs_zap_t *snapnameszap; 527 dnode_phys_t *snapnames; 528 uint64_t snapmapid; 529 530 /* 531 * Perform accounting, starting from the leaves of the DSL directory 532 * tree. Accounting for $MOS is done later, once we've finished 533 * allocating space. 534 */ 535 dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); 536 537 snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, 538 &snapmapid); 539 snapnameszap = zap_alloc(zfs->mos, snapnames); 540 zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); 541 zap_write(zfs, snapnameszap); 542 543 zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; 544 zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; 545 zfs->originds->phys->ds_snapnames_zapobj = snapmapid; 546 547 zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; 548 assert(zfs->snapds->phys->ds_num_children > 0); 549 zfs->snapds->phys->ds_num_children++; 550 551 zap_write(zfs, zfs->cloneszap); 552 553 /* XXX-MJ dirs and datasets are leaked */ 554 } 555 556 void 557 dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) 558 { 559 dir->headds->os = os; 560 objset_write(zfs, os); 561 } 562 563 bool 564 dsl_dir_has_dataset(zfs_dsl_dir_t *dir) 565 { 566 return (dir->headds != NULL); 567 } 568 569 bool 570 dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) 571 { 572 return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); 573 } 574 575 static zfs_dsl_dataset_t * 576 dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) 577 { 578 zfs_dsl_dataset_t *ds; 579 dnode_phys_t *dnode; 580 uint64_t deadlistid; 581 582 ds = ecalloc(1, sizeof(*ds)); 583 584 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, 585 DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); 586 ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); 587 588 dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, 589 DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); 590 zap_write(zfs, zap_alloc(zfs->mos, dnode)); 591 592 ds->phys->ds_dir_obj = dir->dirid; 593 ds->phys->ds_deadlist_obj = deadlistid; 594 ds->phys->ds_creation_txg = TXG - 1; 595 if (ds != zfs->snapds) 596 ds->phys->ds_prev_snap_txg = TXG - 1; 597 ds->phys->ds_guid = ((uint64_t)random() << 32) | random(); 598 ds->dir = dir; 599 600 return (ds); 601 } 602