1240afd8cSMark Johnston /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <sys/stat.h> 32240afd8cSMark Johnston 33240afd8cSMark Johnston #include <assert.h> 34c6890399SJessica Clarke #include <dirent.h> 35240afd8cSMark Johnston #include <fcntl.h> 36c6890399SJessica Clarke #include <stdlib.h> 37240afd8cSMark Johnston #include <string.h> 38240afd8cSMark Johnston #include <unistd.h> 39240afd8cSMark Johnston 40240afd8cSMark Johnston #include <util.h> 41240afd8cSMark Johnston 42240afd8cSMark Johnston #include "makefs.h" 43240afd8cSMark Johnston #include "zfs.h" 44240afd8cSMark Johnston 45240afd8cSMark Johnston typedef struct { 46240afd8cSMark Johnston const char *name; 47240afd8cSMark Johnston unsigned int id; 48240afd8cSMark Johnston uint16_t size; 49240afd8cSMark Johnston sa_bswap_type_t bs; 50240afd8cSMark Johnston } zfs_sattr_t; 51240afd8cSMark Johnston 52240afd8cSMark Johnston typedef struct zfs_fs { 53240afd8cSMark Johnston zfs_objset_t *os; 54240afd8cSMark Johnston 55240afd8cSMark Johnston /* Offset table for system attributes, indexed by a zpl_attr_t. */ 56240afd8cSMark Johnston uint16_t *saoffs; 57240afd8cSMark Johnston size_t sacnt; 58240afd8cSMark Johnston const zfs_sattr_t *satab; 59240afd8cSMark Johnston } zfs_fs_t; 60240afd8cSMark Johnston 61240afd8cSMark Johnston /* 62240afd8cSMark Johnston * The order of the attributes doesn't matter, this is simply the one hard-coded 63240afd8cSMark Johnston * by OpenZFS, based on a zdb dump of the SA_REGISTRY table. 64240afd8cSMark Johnston */ 65240afd8cSMark Johnston typedef enum zpl_attr { 66240afd8cSMark Johnston ZPL_ATIME, 67240afd8cSMark Johnston ZPL_MTIME, 68240afd8cSMark Johnston ZPL_CTIME, 69240afd8cSMark Johnston ZPL_CRTIME, 70240afd8cSMark Johnston ZPL_GEN, 71240afd8cSMark Johnston ZPL_MODE, 72240afd8cSMark Johnston ZPL_SIZE, 73240afd8cSMark Johnston ZPL_PARENT, 74240afd8cSMark Johnston ZPL_LINKS, 75240afd8cSMark Johnston ZPL_XATTR, 76240afd8cSMark Johnston ZPL_RDEV, 77240afd8cSMark Johnston ZPL_FLAGS, 78240afd8cSMark Johnston ZPL_UID, 79240afd8cSMark Johnston ZPL_GID, 80240afd8cSMark Johnston ZPL_PAD, 81240afd8cSMark Johnston ZPL_ZNODE_ACL, 82240afd8cSMark Johnston ZPL_DACL_COUNT, 83240afd8cSMark Johnston ZPL_SYMLINK, 84240afd8cSMark Johnston ZPL_SCANSTAMP, 85240afd8cSMark Johnston ZPL_DACL_ACES, 86240afd8cSMark Johnston ZPL_DXATTR, 87240afd8cSMark Johnston ZPL_PROJID, 88240afd8cSMark Johnston } zpl_attr_t; 89240afd8cSMark Johnston 90240afd8cSMark Johnston /* 91240afd8cSMark Johnston * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t. 92240afd8cSMark Johnston */ 93240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = { 94240afd8cSMark Johnston #define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b } 95240afd8cSMark Johnston _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 96240afd8cSMark Johnston _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 97240afd8cSMark Johnston _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 98240afd8cSMark Johnston _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 99240afd8cSMark Johnston _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY), 100240afd8cSMark Johnston _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY), 101240afd8cSMark Johnston _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY), 102240afd8cSMark Johnston _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY), 103240afd8cSMark Johnston _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY), 104240afd8cSMark Johnston _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY), 105240afd8cSMark Johnston _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY), 106240afd8cSMark Johnston _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY), 107240afd8cSMark Johnston _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY), 108240afd8cSMark Johnston _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY), 109240afd8cSMark Johnston _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY), 110240afd8cSMark Johnston _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY), 111240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY), 112240afd8cSMark Johnston _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY), 113240afd8cSMark Johnston _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY), 114240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL), 115240afd8cSMark Johnston _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY), 116240afd8cSMark Johnston _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY), 117240afd8cSMark Johnston #undef ZPL_ATTR 118240afd8cSMark Johnston }; 119240afd8cSMark Johnston 120240afd8cSMark Johnston /* 121240afd8cSMark Johnston * This layout matches that of a filesystem created using OpenZFS on FreeBSD. 122240afd8cSMark Johnston * It need not match in general, but FreeBSD's loader doesn't bother parsing the 123240afd8cSMark Johnston * layout and just hard-codes attribute offsets. 124240afd8cSMark Johnston */ 125240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = { 126240afd8cSMark Johnston ZPL_MODE, 127240afd8cSMark Johnston ZPL_SIZE, 128240afd8cSMark Johnston ZPL_GEN, 129240afd8cSMark Johnston ZPL_UID, 130240afd8cSMark Johnston ZPL_GID, 131240afd8cSMark Johnston ZPL_PARENT, 132240afd8cSMark Johnston ZPL_FLAGS, 133240afd8cSMark Johnston ZPL_ATIME, 134240afd8cSMark Johnston ZPL_MTIME, 135240afd8cSMark Johnston ZPL_CTIME, 136240afd8cSMark Johnston ZPL_CRTIME, 137240afd8cSMark Johnston ZPL_LINKS, 138240afd8cSMark Johnston ZPL_DACL_COUNT, 139240afd8cSMark Johnston ZPL_DACL_ACES, 140240afd8cSMark Johnston ZPL_SYMLINK, 141240afd8cSMark Johnston }; 142240afd8cSMark Johnston 143240afd8cSMark Johnston /* 144240afd8cSMark Johnston * Keys for the ZPL attribute tables in the SA layout ZAP. The first two 145240afd8cSMark Johnston * indices are reserved for legacy attribute encoding. 146240afd8cSMark Johnston */ 147240afd8cSMark Johnston #define SA_LAYOUT_INDEX_DEFAULT 2 148240afd8cSMark Johnston #define SA_LAYOUT_INDEX_SYMLINK 3 149240afd8cSMark Johnston 150240afd8cSMark Johnston struct fs_populate_dir { 151240afd8cSMark Johnston SLIST_ENTRY(fs_populate_dir) next; 152240afd8cSMark Johnston int dirfd; 153240afd8cSMark Johnston uint64_t objid; 154240afd8cSMark Johnston zfs_zap_t *zap; 155240afd8cSMark Johnston }; 156240afd8cSMark Johnston 157240afd8cSMark Johnston struct fs_populate_arg { 158240afd8cSMark Johnston zfs_opt_t *zfs; 159240afd8cSMark Johnston zfs_fs_t *fs; /* owning filesystem */ 160240afd8cSMark Johnston uint64_t rootdirid; /* root directory dnode ID */ 1618eca3207SMark Johnston int rootdirfd; /* root directory fd */ 162240afd8cSMark Johnston SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */ 163240afd8cSMark Johnston }; 164240afd8cSMark Johnston 165240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int); 166240afd8cSMark Johnston 1678eca3207SMark Johnston static void 1688eca3207SMark Johnston eclose(int fd) 1698eca3207SMark Johnston { 1708eca3207SMark Johnston if (close(fd) != 0) 1718eca3207SMark Johnston err(1, "close"); 1728eca3207SMark Johnston } 1738eca3207SMark Johnston 174240afd8cSMark Johnston static bool 175240afd8cSMark Johnston fsnode_isroot(const fsnode *cur) 176240afd8cSMark Johnston { 177240afd8cSMark Johnston return (strcmp(cur->name, ".") == 0); 178240afd8cSMark Johnston } 179240afd8cSMark Johnston 180240afd8cSMark Johnston /* 181240afd8cSMark Johnston * Visit each node in a directory hierarchy, in pre-order depth-first order. 182240afd8cSMark Johnston */ 183240afd8cSMark Johnston static void 184240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg) 185240afd8cSMark Johnston { 186240afd8cSMark Johnston assert(root->type == S_IFDIR); 187240afd8cSMark Johnston 188240afd8cSMark Johnston for (fsnode *cur = root; cur != NULL; cur = cur->next) { 189240afd8cSMark Johnston assert(cur->type == S_IFREG || cur->type == S_IFDIR || 190240afd8cSMark Johnston cur->type == S_IFLNK); 191240afd8cSMark Johnston 192240afd8cSMark Johnston if (cb(cur, arg) == 0) 193240afd8cSMark Johnston continue; 194240afd8cSMark Johnston if (cur->type == S_IFDIR && cur->child != NULL) 195240afd8cSMark Johnston fsnode_foreach(cur->child, cb, arg); 196240afd8cSMark Johnston } 197240afd8cSMark Johnston } 198240afd8cSMark Johnston 199240afd8cSMark Johnston static void 200240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid) 201240afd8cSMark Johnston { 202240afd8cSMark Johnston struct fs_populate_dir *dir; 203240afd8cSMark Johnston uint64_t type; 204240afd8cSMark Johnston 205240afd8cSMark Johnston switch (cur->type) { 206240afd8cSMark Johnston case S_IFREG: 207240afd8cSMark Johnston type = DT_REG; 208240afd8cSMark Johnston break; 209240afd8cSMark Johnston case S_IFDIR: 210240afd8cSMark Johnston type = DT_DIR; 211240afd8cSMark Johnston break; 212240afd8cSMark Johnston case S_IFLNK: 213240afd8cSMark Johnston type = DT_LNK; 214240afd8cSMark Johnston break; 215240afd8cSMark Johnston default: 216240afd8cSMark Johnston assert(0); 217240afd8cSMark Johnston } 218240afd8cSMark Johnston 219240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 220240afd8cSMark Johnston zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid)); 221240afd8cSMark Johnston } 222240afd8cSMark Johnston 223240afd8cSMark Johnston static void 224240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind, 225240afd8cSMark Johnston size_t *szp) 226240afd8cSMark Johnston { 227240afd8cSMark Johnston assert(ind < fs->sacnt); 228240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 229240afd8cSMark Johnston 230240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size); 231240afd8cSMark Johnston *szp += fs->satab[ind].size; 232240afd8cSMark Johnston } 233240afd8cSMark Johnston 234240afd8cSMark Johnston static void 235240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val, 236240afd8cSMark Johnston size_t valsz, size_t varoff, uint16_t ind, size_t *szp) 237240afd8cSMark Johnston { 238240afd8cSMark Johnston assert(ind < fs->sacnt); 239240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 240240afd8cSMark Johnston assert(fs->satab[ind].size == 0); 241240afd8cSMark Johnston 242240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz); 243240afd8cSMark Johnston *szp += valsz; 244240afd8cSMark Johnston } 245240afd8cSMark Johnston 2468eca3207SMark Johnston /* 2478eca3207SMark Johnston * Derive the relative fd/path combo needed to access a file. Ideally we'd 2488eca3207SMark Johnston * always be able to use relative lookups (i.e., use the *at() system calls), 2498eca3207SMark Johnston * since they require less path translation and are more amenable to sandboxing, 2508eca3207SMark Johnston * but the handling of multiple staging directories makes that difficult. To 2518eca3207SMark Johnston * make matters worse, we have no choice but to use relative lookups when 2528eca3207SMark Johnston * dealing with an mtree manifest, so both mechanisms are implemented. 2538eca3207SMark Johnston */ 2548eca3207SMark Johnston static void 2558eca3207SMark Johnston fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg, 2568eca3207SMark Johnston char *path, size_t sz, int *dirfdp) 2578eca3207SMark Johnston { 258aac389a3SBrooks Davis if (cur->contents != NULL) { 259aac389a3SBrooks Davis size_t n; 260aac389a3SBrooks Davis 261aac389a3SBrooks Davis *dirfdp = AT_FDCWD; 262aac389a3SBrooks Davis n = strlcpy(path, cur->contents, sz); 263aac389a3SBrooks Davis assert(n < sz); 264aac389a3SBrooks Davis } else if (cur->root == NULL) { 2658eca3207SMark Johnston size_t n; 2668eca3207SMark Johnston 2678eca3207SMark Johnston *dirfdp = SLIST_FIRST(&arg->dirs)->dirfd; 2688eca3207SMark Johnston n = strlcpy(path, cur->name, sz); 2698eca3207SMark Johnston assert(n < sz); 2708eca3207SMark Johnston } else { 2718eca3207SMark Johnston int n; 2728eca3207SMark Johnston 2738eca3207SMark Johnston *dirfdp = AT_FDCWD; 2748eca3207SMark Johnston n = snprintf(path, sz, "%s/%s/%s", 2758eca3207SMark Johnston cur->root, cur->path, cur->name); 2768eca3207SMark Johnston assert(n >= 0); 2778eca3207SMark Johnston assert((size_t)n < sz); 2788eca3207SMark Johnston } 2798eca3207SMark Johnston } 2808eca3207SMark Johnston 2818eca3207SMark Johnston static int 2828eca3207SMark Johnston fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags) 2838eca3207SMark Johnston { 2848eca3207SMark Johnston char path[PATH_MAX]; 2858eca3207SMark Johnston int fd; 2868eca3207SMark Johnston 2878eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 2888eca3207SMark Johnston 2898eca3207SMark Johnston fd = openat(fd, path, flags); 2908eca3207SMark Johnston if (fd < 0) 2918eca3207SMark Johnston err(1, "openat(%s)", path); 2928eca3207SMark Johnston return (fd); 2938eca3207SMark Johnston } 2948eca3207SMark Johnston 2956e011d15SBrooks Davis static int 2966e011d15SBrooks Davis fs_open_can_fail(const fsnode *cur, struct fs_populate_arg *arg, int flags) 2976e011d15SBrooks Davis { 2986e011d15SBrooks Davis int fd; 2996e011d15SBrooks Davis char path[PATH_MAX]; 3006e011d15SBrooks Davis 3016e011d15SBrooks Davis fs_populate_path(cur, arg, path, sizeof(path), &fd); 3026e011d15SBrooks Davis 3036e011d15SBrooks Davis return (openat(fd, path, flags)); 3046e011d15SBrooks Davis } 3056e011d15SBrooks Davis 3068eca3207SMark Johnston static void 3078eca3207SMark Johnston fs_readlink(const fsnode *cur, struct fs_populate_arg *arg, 3088eca3207SMark Johnston char *buf, size_t bufsz) 3098eca3207SMark Johnston { 3108eca3207SMark Johnston char path[PATH_MAX]; 3118eca3207SMark Johnston int fd; 3128eca3207SMark Johnston 313b78d5b42SBrooks Davis if (cur->symlink != NULL) { 314b78d5b42SBrooks Davis size_t n; 315b78d5b42SBrooks Davis 316b78d5b42SBrooks Davis n = strlcpy(buf, cur->symlink, bufsz); 317b78d5b42SBrooks Davis assert(n < bufsz); 318b78d5b42SBrooks Davis } else { 319b78d5b42SBrooks Davis ssize_t n; 320b78d5b42SBrooks Davis 3218eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 3228eca3207SMark Johnston 3238eca3207SMark Johnston n = readlinkat(fd, path, buf, bufsz - 1); 3248eca3207SMark Johnston if (n == -1) 3258eca3207SMark Johnston err(1, "readlinkat(%s)", cur->name); 3268eca3207SMark Johnston buf[n] = '\0'; 3278eca3207SMark Johnston } 328b78d5b42SBrooks Davis } 3298eca3207SMark Johnston 330240afd8cSMark Johnston static void 331b0ce7dfcSJessica Clarke fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts, 332b0ce7dfcSJessica Clarke uint16_t ind, size_t *szp) 333b0ce7dfcSJessica Clarke { 334b0ce7dfcSJessica Clarke uint64_t timebuf[2]; 335b0ce7dfcSJessica Clarke 336b0ce7dfcSJessica Clarke assert(ind < fs->sacnt); 337b0ce7dfcSJessica Clarke assert(fs->saoffs[ind] != 0xffff); 338b0ce7dfcSJessica Clarke assert(fs->satab[ind].size == sizeof(timebuf)); 339b0ce7dfcSJessica Clarke 340b0ce7dfcSJessica Clarke timebuf[0] = ts->tv_sec; 341b0ce7dfcSJessica Clarke timebuf[1] = ts->tv_nsec; 342b0ce7dfcSJessica Clarke fs_populate_attr(fs, attrbuf, timebuf, ind, szp); 343b0ce7dfcSJessica Clarke } 344b0ce7dfcSJessica Clarke 345b0ce7dfcSJessica Clarke static void 346240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, 347240afd8cSMark Johnston dnode_phys_t *dnode) 348240afd8cSMark Johnston { 349240afd8cSMark Johnston char target[PATH_MAX]; 350240afd8cSMark Johnston zfs_fs_t *fs; 351240afd8cSMark Johnston zfs_ace_hdr_t aces[3]; 352240afd8cSMark Johnston struct stat *sb; 353240afd8cSMark Johnston sa_hdr_phys_t *sahdr; 354240afd8cSMark Johnston uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid; 355240afd8cSMark Johnston char *attrbuf; 356240afd8cSMark Johnston size_t bonussz, hdrsz; 357240afd8cSMark Johnston int layout; 358240afd8cSMark Johnston 359240afd8cSMark Johnston assert(dnode->dn_bonustype == DMU_OT_SA); 360240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 361240afd8cSMark Johnston 362240afd8cSMark Johnston fs = arg->fs; 363240afd8cSMark Johnston sb = &cur->inode->st; 364240afd8cSMark Johnston 365240afd8cSMark Johnston switch (cur->type) { 366240afd8cSMark Johnston case S_IFREG: 367240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 368240afd8cSMark Johnston links = cur->inode->nlink; 369240afd8cSMark Johnston objsize = sb->st_size; 370240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 371240afd8cSMark Johnston break; 372240afd8cSMark Johnston case S_IFDIR: 373240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 374240afd8cSMark Johnston links = 1; /* .. */ 375240afd8cSMark Johnston objsize = 1; /* .. */ 376240afd8cSMark Johnston 377240afd8cSMark Johnston /* 378240afd8cSMark Johnston * The size of a ZPL directory is the number of entries 379240afd8cSMark Johnston * (including "." and ".."), and the link count is the number of 380240afd8cSMark Johnston * entries which are directories (including "." and ".."). 381240afd8cSMark Johnston */ 382240afd8cSMark Johnston for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child; 383240afd8cSMark Johnston c != NULL; c = c->next) { 384240afd8cSMark Johnston if (c->type == S_IFDIR) 385240afd8cSMark Johnston links++; 386240afd8cSMark Johnston objsize++; 387240afd8cSMark Johnston } 388240afd8cSMark Johnston 389240afd8cSMark Johnston /* The root directory is its own parent. */ 390240afd8cSMark Johnston parent = SLIST_EMPTY(&arg->dirs) ? 391240afd8cSMark Johnston arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid; 392240afd8cSMark Johnston break; 3938eca3207SMark Johnston case S_IFLNK: 3948eca3207SMark Johnston fs_readlink(cur, arg, target, sizeof(target)); 395240afd8cSMark Johnston 396240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_SYMLINK; 397240afd8cSMark Johnston links = 1; 398240afd8cSMark Johnston objsize = strlen(target); 399240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 400240afd8cSMark Johnston break; 401240afd8cSMark Johnston default: 402240afd8cSMark Johnston assert(0); 403240afd8cSMark Johnston } 404240afd8cSMark Johnston 405240afd8cSMark Johnston daclcount = nitems(aces); 406240afd8cSMark Johnston flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_NO_EXECS_DENIED | 407240afd8cSMark Johnston ZFS_ARCHIVE | ZFS_AV_MODIFIED; /* XXX-MJ */ 408240afd8cSMark Johnston gen = 1; 409240afd8cSMark Johnston gid = sb->st_gid; 410240afd8cSMark Johnston mode = sb->st_mode; 411240afd8cSMark Johnston uid = sb->st_uid; 412240afd8cSMark Johnston 413240afd8cSMark Johnston memset(aces, 0, sizeof(aces)); 414240afd8cSMark Johnston aces[0].z_flags = ACE_OWNER; 415240afd8cSMark Johnston aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 416240afd8cSMark Johnston aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER | 417240afd8cSMark Johnston ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL | 418240afd8cSMark Johnston ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 419240afd8cSMark Johnston if ((mode & S_IRUSR) != 0) 420240afd8cSMark Johnston aces[0].z_access_mask |= ACE_READ_DATA; 421240afd8cSMark Johnston if ((mode & S_IWUSR) != 0) 422240afd8cSMark Johnston aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 423240afd8cSMark Johnston if ((mode & S_IXUSR) != 0) 424240afd8cSMark Johnston aces[0].z_access_mask |= ACE_EXECUTE; 425240afd8cSMark Johnston 426240afd8cSMark Johnston aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP; 427240afd8cSMark Johnston aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 428240afd8cSMark Johnston aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 429240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 430240afd8cSMark Johnston if ((mode & S_IRGRP) != 0) 431240afd8cSMark Johnston aces[1].z_access_mask |= ACE_READ_DATA; 432240afd8cSMark Johnston if ((mode & S_IWGRP) != 0) 433240afd8cSMark Johnston aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 434240afd8cSMark Johnston if ((mode & S_IXGRP) != 0) 435240afd8cSMark Johnston aces[1].z_access_mask |= ACE_EXECUTE; 436240afd8cSMark Johnston 437240afd8cSMark Johnston aces[2].z_flags = ACE_EVERYONE; 438240afd8cSMark Johnston aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 439240afd8cSMark Johnston aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 440240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 441240afd8cSMark Johnston if ((mode & S_IROTH) != 0) 442240afd8cSMark Johnston aces[2].z_access_mask |= ACE_READ_DATA; 443240afd8cSMark Johnston if ((mode & S_IWOTH) != 0) 444240afd8cSMark Johnston aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 445240afd8cSMark Johnston if ((mode & S_IXOTH) != 0) 446240afd8cSMark Johnston aces[2].z_access_mask |= ACE_EXECUTE; 447240afd8cSMark Johnston 448240afd8cSMark Johnston switch (layout) { 449240afd8cSMark Johnston case SA_LAYOUT_INDEX_DEFAULT: 450240afd8cSMark Johnston /* At most one variable-length attribute. */ 451240afd8cSMark Johnston hdrsz = sizeof(uint64_t); 452240afd8cSMark Johnston break; 453240afd8cSMark Johnston case SA_LAYOUT_INDEX_SYMLINK: 454240afd8cSMark Johnston /* At most five variable-length attributes. */ 455240afd8cSMark Johnston hdrsz = sizeof(uint64_t) * 2; 456240afd8cSMark Johnston break; 457240afd8cSMark Johnston default: 458240afd8cSMark Johnston assert(0); 459240afd8cSMark Johnston } 460240afd8cSMark Johnston 461240afd8cSMark Johnston sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode); 462240afd8cSMark Johnston sahdr->sa_magic = SA_MAGIC; 463240afd8cSMark Johnston SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz); 464240afd8cSMark Johnston 465240afd8cSMark Johnston bonussz = SA_HDR_SIZE(sahdr); 466240afd8cSMark Johnston attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr); 467240afd8cSMark Johnston 468240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz); 469240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz); 470240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz); 471240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz); 472240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz); 473240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz); 474240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz); 475240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz); 476240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz); 477240afd8cSMark Johnston 478240afd8cSMark Johnston /* 479240afd8cSMark Johnston * We deliberately set atime = mtime here to ensure that images are 480240afd8cSMark Johnston * reproducible. 481240afd8cSMark Johnston */ 482b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz); 483b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz); 484b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz); 485c6890399SJessica Clarke #ifdef __linux__ 486c6890399SJessica Clarke /* Linux has no st_birthtim; approximate with st_ctim */ 487c6890399SJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz); 488c6890399SJessica Clarke #else 489b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz); 490c6890399SJessica Clarke #endif 491240afd8cSMark Johnston 492240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0, 493240afd8cSMark Johnston ZPL_DACL_ACES, &bonussz); 494240afd8cSMark Johnston sahdr->sa_lengths[0] = sizeof(aces); 495240afd8cSMark Johnston 496240afd8cSMark Johnston if (cur->type == S_IFLNK) { 497240afd8cSMark Johnston assert(layout == SA_LAYOUT_INDEX_SYMLINK); 498240afd8cSMark Johnston /* Need to use a spill block pointer if the target is long. */ 499240afd8cSMark Johnston assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN); 500240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, target, objsize, 501240afd8cSMark Johnston sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz); 502240afd8cSMark Johnston sahdr->sa_lengths[1] = (uint16_t)objsize; 503240afd8cSMark Johnston } 504240afd8cSMark Johnston 505240afd8cSMark Johnston dnode->dn_bonuslen = bonussz; 506240afd8cSMark Johnston } 507240afd8cSMark Johnston 508240afd8cSMark Johnston static void 509240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg) 510240afd8cSMark Johnston { 511240afd8cSMark Johnston struct dnode_cursor *c; 512240afd8cSMark Johnston dnode_phys_t *dnode; 513240afd8cSMark Johnston zfs_opt_t *zfs; 514240afd8cSMark Johnston char *buf; 515240afd8cSMark Johnston uint64_t dnid; 516240afd8cSMark Johnston ssize_t n; 517240afd8cSMark Johnston size_t bufsz; 518*ef20cd33SMark Johnston off_t nbytes, reqbytes, size; 519240afd8cSMark Johnston int fd; 520240afd8cSMark Johnston 521240afd8cSMark Johnston assert(cur->type == S_IFREG); 522240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 523240afd8cSMark Johnston 524240afd8cSMark Johnston zfs = arg->zfs; 525240afd8cSMark Johnston 526240afd8cSMark Johnston assert(cur->inode->ino != 0); 527240afd8cSMark Johnston if ((cur->inode->flags & FI_ALLOCATED) != 0) { 528240afd8cSMark Johnston /* 529240afd8cSMark Johnston * This is a hard link of an existing file. 530240afd8cSMark Johnston * 531240afd8cSMark Johnston * XXX-MJ need to check whether it crosses datasets, add a test 532240afd8cSMark Johnston * case for that 533240afd8cSMark Johnston */ 534240afd8cSMark Johnston fs_populate_dirent(arg, cur, cur->inode->ino); 535240afd8cSMark Johnston return; 536240afd8cSMark Johnston } 537240afd8cSMark Johnston 538240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 539240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 540240afd8cSMark Johnston cur->inode->ino = dnid; 541240afd8cSMark Johnston cur->inode->flags |= FI_ALLOCATED; 542240afd8cSMark Johnston 5438eca3207SMark Johnston fd = fs_open(cur, arg, O_RDONLY); 544240afd8cSMark Johnston 545240afd8cSMark Johnston buf = zfs->filebuf; 546240afd8cSMark Johnston bufsz = sizeof(zfs->filebuf); 547240afd8cSMark Johnston size = cur->inode->st.st_size; 548240afd8cSMark Johnston c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0); 549*ef20cd33SMark Johnston for (off_t foff = 0; foff < size; foff += nbytes) { 550240afd8cSMark Johnston off_t loc, sofar; 551240afd8cSMark Johnston 552240afd8cSMark Johnston /* 553240afd8cSMark Johnston * Fill up our buffer, handling partial reads. 554240afd8cSMark Johnston */ 555240afd8cSMark Johnston sofar = 0; 556*ef20cd33SMark Johnston nbytes = MIN(size - foff, (off_t)bufsz); 557240afd8cSMark Johnston do { 558*ef20cd33SMark Johnston n = read(fd, buf + sofar, nbytes); 559240afd8cSMark Johnston if (n < 0) 560240afd8cSMark Johnston err(1, "reading from '%s'", cur->name); 561240afd8cSMark Johnston if (n == 0) 562240afd8cSMark Johnston errx(1, "unexpected EOF reading '%s'", 563240afd8cSMark Johnston cur->name); 564240afd8cSMark Johnston sofar += n; 565*ef20cd33SMark Johnston } while (sofar < nbytes); 566240afd8cSMark Johnston 567*ef20cd33SMark Johnston if (nbytes < (off_t)bufsz) 568*ef20cd33SMark Johnston memset(buf + nbytes, 0, bufsz - nbytes); 569240afd8cSMark Johnston 570*ef20cd33SMark Johnston reqbytes = foff == 0 ? nbytes : MAXBLOCKSIZE; 571*ef20cd33SMark Johnston loc = objset_space_alloc(zfs, arg->fs->os, &reqbytes); 572*ef20cd33SMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, reqbytes, loc, 573240afd8cSMark Johnston dnode_cursor_next(zfs, c, foff)); 574240afd8cSMark Johnston } 5758eca3207SMark Johnston eclose(fd); 576240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 577240afd8cSMark Johnston 578240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 579240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 580240afd8cSMark Johnston } 581240afd8cSMark Johnston 582240afd8cSMark Johnston static void 583240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg) 584240afd8cSMark Johnston { 585240afd8cSMark Johnston dnode_phys_t *dnode; 586240afd8cSMark Johnston zfs_objset_t *os; 587240afd8cSMark Johnston uint64_t dnid; 588240afd8cSMark Johnston int dirfd; 589240afd8cSMark Johnston 590240afd8cSMark Johnston assert(cur->type == S_IFDIR); 591240afd8cSMark Johnston assert((cur->inode->flags & FI_ALLOCATED) == 0); 592240afd8cSMark Johnston 593240afd8cSMark Johnston os = arg->fs->os; 594240afd8cSMark Johnston 595240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS, 596240afd8cSMark Johnston DMU_OT_SA, 0, &dnid); 597240afd8cSMark Johnston 598240afd8cSMark Johnston /* 599240afd8cSMark Johnston * Add an entry to the parent directory and open this directory. 600240afd8cSMark Johnston */ 601240afd8cSMark Johnston if (!SLIST_EMPTY(&arg->dirs)) { 602240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 6036e011d15SBrooks Davis /* 6046e011d15SBrooks Davis * We only need the directory fd if we're finding files in 6056e011d15SBrooks Davis * it. If it's just there for other directories or 6066e011d15SBrooks Davis * files using contents= we don't need to succeed here. 6076e011d15SBrooks Davis */ 6086e011d15SBrooks Davis dirfd = fs_open_can_fail(cur, arg, O_DIRECTORY | O_RDONLY); 609240afd8cSMark Johnston } else { 610240afd8cSMark Johnston arg->rootdirid = dnid; 6118eca3207SMark Johnston dirfd = arg->rootdirfd; 6128eca3207SMark Johnston arg->rootdirfd = -1; 613240afd8cSMark Johnston } 614240afd8cSMark Johnston 615240afd8cSMark Johnston /* 616240afd8cSMark Johnston * Set ZPL attributes. 617240afd8cSMark Johnston */ 618240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 619240afd8cSMark Johnston 620240afd8cSMark Johnston /* 621240afd8cSMark Johnston * If this is a root directory, then its children belong to a different 622240afd8cSMark Johnston * dataset and this directory remains empty in the current objset. 623240afd8cSMark Johnston */ 624240afd8cSMark Johnston if ((cur->inode->flags & FI_ROOT) == 0) { 625240afd8cSMark Johnston struct fs_populate_dir *dir; 626240afd8cSMark Johnston 627240afd8cSMark Johnston dir = ecalloc(1, sizeof(*dir)); 628240afd8cSMark Johnston dir->dirfd = dirfd; 629240afd8cSMark Johnston dir->objid = dnid; 630240afd8cSMark Johnston dir->zap = zap_alloc(os, dnode); 631240afd8cSMark Johnston SLIST_INSERT_HEAD(&arg->dirs, dir, next); 632240afd8cSMark Johnston } else { 633240afd8cSMark Johnston zap_write(arg->zfs, zap_alloc(os, dnode)); 634240afd8cSMark Johnston fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd); 635240afd8cSMark Johnston } 636240afd8cSMark Johnston } 637240afd8cSMark Johnston 638240afd8cSMark Johnston static void 639240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg) 640240afd8cSMark Johnston { 641240afd8cSMark Johnston dnode_phys_t *dnode; 642240afd8cSMark Johnston uint64_t dnid; 643240afd8cSMark Johnston 644240afd8cSMark Johnston assert(cur->type == S_IFLNK); 645240afd8cSMark Johnston assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0); 646240afd8cSMark Johnston 647240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 648240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 649240afd8cSMark Johnston 650240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 651240afd8cSMark Johnston 652240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 653240afd8cSMark Johnston } 654240afd8cSMark Johnston 655240afd8cSMark Johnston static int 656240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg) 657240afd8cSMark Johnston { 658240afd8cSMark Johnston struct fs_populate_arg *arg; 659240afd8cSMark Johnston struct fs_populate_dir *dir; 660240afd8cSMark Johnston int ret; 661240afd8cSMark Johnston 662240afd8cSMark Johnston arg = _arg; 663240afd8cSMark Johnston switch (cur->type) { 664240afd8cSMark Johnston case S_IFREG: 665240afd8cSMark Johnston fs_populate_file(cur, arg); 666240afd8cSMark Johnston break; 667240afd8cSMark Johnston case S_IFDIR: 668240afd8cSMark Johnston if (fsnode_isroot(cur)) 669240afd8cSMark Johnston break; 670240afd8cSMark Johnston fs_populate_dir(cur, arg); 671240afd8cSMark Johnston break; 672240afd8cSMark Johnston case S_IFLNK: 673240afd8cSMark Johnston fs_populate_symlink(cur, arg); 674240afd8cSMark Johnston break; 675240afd8cSMark Johnston default: 676240afd8cSMark Johnston assert(0); 677240afd8cSMark Johnston } 678240afd8cSMark Johnston 679240afd8cSMark Johnston ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1; 680240afd8cSMark Johnston 681240afd8cSMark Johnston if (cur->next == NULL && 682240afd8cSMark Johnston (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) { 683240afd8cSMark Johnston /* 684240afd8cSMark Johnston * We reached a terminal node in a subtree. Walk back up and 685240afd8cSMark Johnston * write out directories. We're done once we hit the root of a 686240afd8cSMark Johnston * dataset or find a level where we're not on the edge of the 687240afd8cSMark Johnston * tree. 688240afd8cSMark Johnston */ 689240afd8cSMark Johnston do { 690240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 691240afd8cSMark Johnston SLIST_REMOVE_HEAD(&arg->dirs, next); 692240afd8cSMark Johnston zap_write(arg->zfs, dir->zap); 6938eca3207SMark Johnston if (dir->dirfd != -1) 6948eca3207SMark Johnston eclose(dir->dirfd); 695240afd8cSMark Johnston free(dir); 696240afd8cSMark Johnston cur = cur->parent; 697240afd8cSMark Johnston } while (cur != NULL && cur->next == NULL && 698240afd8cSMark Johnston (cur->inode->flags & FI_ROOT) == 0); 699240afd8cSMark Johnston } 700240afd8cSMark Johnston 701240afd8cSMark Johnston return (ret); 702240afd8cSMark Johnston } 703240afd8cSMark Johnston 704240afd8cSMark Johnston static void 705240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, 706240afd8cSMark Johnston const sa_attr_type_t layout[], size_t sacnt) 707240afd8cSMark Johnston { 708240afd8cSMark Johnston char ti[16]; 709240afd8cSMark Johnston 710240afd8cSMark Johnston assert(sizeof(layout[0]) == 2); 711240afd8cSMark Johnston 712240afd8cSMark Johnston snprintf(ti, sizeof(ti), "%u", index); 713240afd8cSMark Johnston zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, 714240afd8cSMark Johnston (const uint8_t *)layout); 715240afd8cSMark Johnston } 716240afd8cSMark Johnston 717240afd8cSMark Johnston /* 718240afd8cSMark Johnston * Initialize system attribute tables. 719240afd8cSMark Johnston * 720240afd8cSMark Johnston * There are two elements to this. First, we write the zpl_attrs[] and 721240afd8cSMark Johnston * zpl_attr_layout[] tables to disk. Then we create a lookup table which 722240afd8cSMark Johnston * allows us to set file attributes quickly. 723240afd8cSMark Johnston */ 724240afd8cSMark Johnston static uint64_t 725240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs) 726240afd8cSMark Johnston { 727240afd8cSMark Johnston zfs_zap_t *sazap, *salzap, *sarzap; 728240afd8cSMark Johnston zfs_objset_t *os; 729240afd8cSMark Johnston dnode_phys_t *saobj, *salobj, *sarobj; 730240afd8cSMark Johnston uint64_t saobjid, salobjid, sarobjid; 731240afd8cSMark Johnston uint16_t offset; 732240afd8cSMark Johnston 733240afd8cSMark Johnston os = fs->os; 734240afd8cSMark Johnston 735240afd8cSMark Johnston /* 736240afd8cSMark Johnston * The on-disk tables are stored in two ZAP objects, the registry object 737240afd8cSMark Johnston * and the layout object. Individual attributes are described by 738240afd8cSMark Johnston * entries in the registry object; for example, the value for the 739240afd8cSMark Johnston * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute. 740240afd8cSMark Johnston * The attributes of a file are ordered according to one of the layouts 741240afd8cSMark Johnston * defined in the layout object. The master node object is simply used 742240afd8cSMark Johnston * to locate the registry and layout objects. 743240afd8cSMark Johnston */ 744240afd8cSMark Johnston saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid); 745240afd8cSMark Johnston salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid); 746240afd8cSMark Johnston sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid); 747240afd8cSMark Johnston 748240afd8cSMark Johnston sarzap = zap_alloc(os, sarobj); 749240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) { 750240afd8cSMark Johnston const zfs_sattr_t *sa; 751240afd8cSMark Johnston uint64_t attr; 752240afd8cSMark Johnston 753240afd8cSMark Johnston attr = 0; 754240afd8cSMark Johnston sa = &zpl_attrs[i]; 755240afd8cSMark Johnston SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs); 756240afd8cSMark Johnston zap_add_uint64(sarzap, sa->name, attr); 757240afd8cSMark Johnston } 758240afd8cSMark Johnston zap_write(zfs, sarzap); 759240afd8cSMark Johnston 760240afd8cSMark Johnston /* 761240afd8cSMark Johnston * Layouts are arrays of indices into the registry. We define two 762240afd8cSMark Johnston * layouts for use by the ZPL, one for non-symlinks and one for 763240afd8cSMark Johnston * symlinks. They are identical except that the symlink layout includes 764240afd8cSMark Johnston * ZPL_SYMLINK as its final attribute. 765240afd8cSMark Johnston */ 766240afd8cSMark Johnston salzap = zap_alloc(os, salobj); 767240afd8cSMark Johnston assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK); 768240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT, 769240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout) - 1); 770240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK, 771240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout)); 772240afd8cSMark Johnston zap_write(zfs, salzap); 773240afd8cSMark Johnston 774240afd8cSMark Johnston sazap = zap_alloc(os, saobj); 775240afd8cSMark Johnston zap_add_uint64(sazap, SA_LAYOUTS, salobjid); 776240afd8cSMark Johnston zap_add_uint64(sazap, SA_REGISTRY, sarobjid); 777240afd8cSMark Johnston zap_write(zfs, sazap); 778240afd8cSMark Johnston 779240afd8cSMark Johnston /* Sanity check. */ 780240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) 781240afd8cSMark Johnston assert(i == zpl_attrs[i].id); 782240afd8cSMark Johnston 783240afd8cSMark Johnston /* 784240afd8cSMark Johnston * Build the offset table used when setting file attributes. File 785240afd8cSMark Johnston * attributes are stored in the object's bonus buffer; this table 786240afd8cSMark Johnston * provides the buffer offset of attributes referenced by the layout 787240afd8cSMark Johnston * table. 788240afd8cSMark Johnston */ 789240afd8cSMark Johnston fs->sacnt = nitems(zpl_attrs); 790240afd8cSMark Johnston fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs)); 791240afd8cSMark Johnston for (size_t i = 0; i < fs->sacnt; i++) 792240afd8cSMark Johnston fs->saoffs[i] = 0xffff; 793240afd8cSMark Johnston offset = 0; 794240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attr_layout); i++) { 795240afd8cSMark Johnston uint16_t size; 796240afd8cSMark Johnston 797240afd8cSMark Johnston assert(zpl_attr_layout[i] < fs->sacnt); 798240afd8cSMark Johnston 799240afd8cSMark Johnston fs->saoffs[zpl_attr_layout[i]] = offset; 800240afd8cSMark Johnston size = zpl_attrs[zpl_attr_layout[i]].size; 801240afd8cSMark Johnston offset += size; 802240afd8cSMark Johnston } 803240afd8cSMark Johnston fs->satab = zpl_attrs; 804240afd8cSMark Johnston 805240afd8cSMark Johnston return (saobjid); 806240afd8cSMark Johnston } 807240afd8cSMark Johnston 808240afd8cSMark Johnston static void 809240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg) 810240afd8cSMark Johnston { 811240afd8cSMark Johnston char *mountpoint, *origmountpoint, *name, *next; 812240afd8cSMark Johnston fsnode *cur, *root; 813240afd8cSMark Johnston uint64_t canmount; 814240afd8cSMark Johnston 815240afd8cSMark Johnston if (!dsl_dir_has_dataset(dsldir)) 816240afd8cSMark Johnston return; 817240afd8cSMark Johnston 81878d7704bSMark Johnston if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0) 81978d7704bSMark Johnston return; 820240afd8cSMark Johnston mountpoint = dsl_dir_get_mountpoint(zfs, dsldir); 821240afd8cSMark Johnston if (mountpoint == NULL) 822240afd8cSMark Johnston return; 823240afd8cSMark Johnston 824240afd8cSMark Johnston /* 825240afd8cSMark Johnston * If we were asked to specify a bootfs, set it here. 826240afd8cSMark Johnston */ 827240afd8cSMark Johnston if (zfs->bootfs != NULL && strcmp(zfs->bootfs, 828240afd8cSMark Johnston dsl_dir_fullname(dsldir)) == 0) { 829240afd8cSMark Johnston zap_add_uint64(zfs->poolprops, "bootfs", 830240afd8cSMark Johnston dsl_dir_dataset_id(dsldir)); 831240afd8cSMark Johnston } 832240afd8cSMark Johnston 833240afd8cSMark Johnston origmountpoint = mountpoint; 834240afd8cSMark Johnston 835240afd8cSMark Johnston /* 836240afd8cSMark Johnston * Figure out which fsnode corresponds to our mountpoint. 837240afd8cSMark Johnston */ 838240afd8cSMark Johnston root = arg; 839240afd8cSMark Johnston cur = root; 840240afd8cSMark Johnston if (strcmp(mountpoint, zfs->rootpath) != 0) { 841240afd8cSMark Johnston mountpoint += strlen(zfs->rootpath); 842240afd8cSMark Johnston 843240afd8cSMark Johnston /* 844240afd8cSMark Johnston * Look up the directory in the staged tree. For example, if 845240afd8cSMark Johnston * the dataset's mount point is /foo/bar/baz, we'll search the 846240afd8cSMark Johnston * root directory for "foo", search "foo" for "baz", and so on. 847240afd8cSMark Johnston * Each intermediate name must refer to a directory; the final 848240afd8cSMark Johnston * component need not exist. 849240afd8cSMark Johnston */ 850240afd8cSMark Johnston cur = root; 851240afd8cSMark Johnston for (next = name = mountpoint; next != NULL;) { 852240afd8cSMark Johnston for (; *next == '/'; next++) 853240afd8cSMark Johnston ; 854240afd8cSMark Johnston name = strsep(&next, "/"); 855240afd8cSMark Johnston 856240afd8cSMark Johnston for (; cur != NULL && strcmp(cur->name, name) != 0; 857240afd8cSMark Johnston cur = cur->next) 858240afd8cSMark Johnston ; 859240afd8cSMark Johnston if (cur == NULL) { 860240afd8cSMark Johnston if (next == NULL) 861240afd8cSMark Johnston break; 862240afd8cSMark Johnston errx(1, "missing mountpoint directory for `%s'", 863240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 864240afd8cSMark Johnston } 865240afd8cSMark Johnston if (cur->type != S_IFDIR) { 866240afd8cSMark Johnston errx(1, 867240afd8cSMark Johnston "mountpoint for `%s' is not a directory", 868240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 869240afd8cSMark Johnston } 870240afd8cSMark Johnston if (next != NULL) 871240afd8cSMark Johnston cur = cur->child; 872240afd8cSMark Johnston } 873240afd8cSMark Johnston } 874240afd8cSMark Johnston 875240afd8cSMark Johnston if (cur != NULL) { 876240afd8cSMark Johnston assert(cur->type == S_IFDIR); 877240afd8cSMark Johnston 878240afd8cSMark Johnston /* 879240afd8cSMark Johnston * Multiple datasets shouldn't share a mountpoint. It's 880240afd8cSMark Johnston * technically allowed, but it's not clear what makefs should do 881240afd8cSMark Johnston * in that case. 882240afd8cSMark Johnston */ 883240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 884240afd8cSMark Johnston if (cur != root) 885240afd8cSMark Johnston cur->inode->flags |= FI_ROOT; 886240afd8cSMark Johnston assert(cur->inode->param == NULL); 887240afd8cSMark Johnston cur->inode->param = dsldir; 888240afd8cSMark Johnston } 889240afd8cSMark Johnston 890240afd8cSMark Johnston free(origmountpoint); 891240afd8cSMark Johnston } 892240afd8cSMark Johnston 893240afd8cSMark Johnston static int 894240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg) 895240afd8cSMark Johnston { 896240afd8cSMark Johnston uint64_t *countp; 897240afd8cSMark Johnston 898240afd8cSMark Johnston countp = arg; 899240afd8cSMark Johnston if (cur->type == S_IFDIR && fsnode_isroot(cur)) 900240afd8cSMark Johnston return (1); 901240afd8cSMark Johnston 902240afd8cSMark Johnston if (cur->inode->ino == 0) { 903240afd8cSMark Johnston cur->inode->ino = ++(*countp); 904240afd8cSMark Johnston cur->inode->nlink = 1; 905240afd8cSMark Johnston } else { 906240afd8cSMark Johnston cur->inode->nlink++; 907240afd8cSMark Johnston } 908240afd8cSMark Johnston 909240afd8cSMark Johnston return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1); 910240afd8cSMark Johnston } 911240afd8cSMark Johnston 912240afd8cSMark Johnston /* 913240afd8cSMark Johnston * Create a filesystem dataset. More specifically: 914240afd8cSMark Johnston * - create an object set for the dataset, 915240afd8cSMark Johnston * - add required metadata (SA tables, property definitions, etc.) to that 916240afd8cSMark Johnston * object set, 917240afd8cSMark Johnston * - optionally populate the object set with file objects, using "root" as the 918240afd8cSMark Johnston * root directory. 919240afd8cSMark Johnston * 920240afd8cSMark Johnston * "dirfd" is a directory descriptor for the directory referenced by "root". It 921240afd8cSMark Johnston * is closed before returning. 922240afd8cSMark Johnston */ 923240afd8cSMark Johnston static void 924240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd) 925240afd8cSMark Johnston { 926240afd8cSMark Johnston struct fs_populate_arg arg; 927240afd8cSMark Johnston zfs_fs_t fs; 928240afd8cSMark Johnston zfs_zap_t *masterzap; 929240afd8cSMark Johnston zfs_objset_t *os; 930240afd8cSMark Johnston dnode_phys_t *deleteq, *masterobj; 931240afd8cSMark Johnston uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid; 932240afd8cSMark Johnston bool fakedroot; 933240afd8cSMark Johnston 934240afd8cSMark Johnston /* 935240afd8cSMark Johnston * This dataset's mountpoint doesn't exist in the staging tree, or the 936240afd8cSMark Johnston * dataset doesn't have a mountpoint at all. In either case we still 937240afd8cSMark Johnston * need a root directory. Fake up a root fsnode to handle this case. 938240afd8cSMark Johnston */ 939240afd8cSMark Johnston fakedroot = root == NULL; 940240afd8cSMark Johnston if (fakedroot) { 941240afd8cSMark Johnston struct stat *stp; 942240afd8cSMark Johnston 943240afd8cSMark Johnston assert(dirfd == -1); 944240afd8cSMark Johnston 945240afd8cSMark Johnston root = ecalloc(1, sizeof(*root)); 946240afd8cSMark Johnston root->inode = ecalloc(1, sizeof(*root->inode)); 947240afd8cSMark Johnston root->name = estrdup("."); 948240afd8cSMark Johnston root->type = S_IFDIR; 949240afd8cSMark Johnston 950240afd8cSMark Johnston stp = &root->inode->st; 951240afd8cSMark Johnston stp->st_uid = 0; 952240afd8cSMark Johnston stp->st_gid = 0; 953240afd8cSMark Johnston stp->st_mode = S_IFDIR | 0755; 954240afd8cSMark Johnston } 955240afd8cSMark Johnston assert(root->type == S_IFDIR); 956240afd8cSMark Johnston assert(fsnode_isroot(root)); 957240afd8cSMark Johnston 958240afd8cSMark Johnston /* 959240afd8cSMark Johnston * Initialize the object set for this dataset. 960240afd8cSMark Johnston */ 961240afd8cSMark Johnston os = objset_alloc(zfs, DMU_OST_ZFS); 962240afd8cSMark Johnston masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid); 963240afd8cSMark Johnston assert(moid == MASTER_NODE_OBJ); 964240afd8cSMark Johnston 965240afd8cSMark Johnston memset(&fs, 0, sizeof(fs)); 966240afd8cSMark Johnston fs.os = os; 967240afd8cSMark Johnston 968240afd8cSMark Johnston /* 969240afd8cSMark Johnston * Create the ZAP SA layout now since filesystem object dnodes will 970240afd8cSMark Johnston * refer to those attributes. 971240afd8cSMark Johnston */ 972240afd8cSMark Johnston saobjid = fs_set_zpl_attrs(zfs, &fs); 973240afd8cSMark Johnston 974240afd8cSMark Johnston /* 975240afd8cSMark Johnston * Make a pass over the staged directory to detect hard links and assign 976240afd8cSMark Johnston * virtual dnode numbers. 977240afd8cSMark Johnston */ 978240afd8cSMark Johnston dnodecount = 1; /* root directory */ 979240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_mark, &dnodecount); 980240afd8cSMark Johnston 981240afd8cSMark Johnston /* 982240afd8cSMark Johnston * Make a second pass to populate the dataset with files from the 983240afd8cSMark Johnston * staged directory. Most of our runtime is spent here. 984240afd8cSMark Johnston */ 9858eca3207SMark Johnston arg.rootdirfd = dirfd; 986240afd8cSMark Johnston arg.zfs = zfs; 987240afd8cSMark Johnston arg.fs = &fs; 988240afd8cSMark Johnston SLIST_INIT(&arg.dirs); 989240afd8cSMark Johnston fs_populate_dir(root, &arg); 990240afd8cSMark Johnston assert(!SLIST_EMPTY(&arg.dirs)); 991240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_populate, &arg); 992240afd8cSMark Johnston assert(SLIST_EMPTY(&arg.dirs)); 993240afd8cSMark Johnston rootdirid = arg.rootdirid; 994240afd8cSMark Johnston 995240afd8cSMark Johnston /* 996240afd8cSMark Johnston * Create an empty delete queue. We don't do anything with it, but 997240afd8cSMark Johnston * OpenZFS will refuse to mount filesystems that don't have one. 998240afd8cSMark Johnston */ 999240afd8cSMark Johnston deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid); 1000240afd8cSMark Johnston zap_write(zfs, zap_alloc(os, deleteq)); 1001240afd8cSMark Johnston 1002240afd8cSMark Johnston /* 1003240afd8cSMark Johnston * Populate and write the master node object. This is a ZAP object 1004240afd8cSMark Johnston * containing various dataset properties and the object IDs of the root 1005240afd8cSMark Johnston * directory and delete queue. 1006240afd8cSMark Johnston */ 1007240afd8cSMark Johnston masterzap = zap_alloc(os, masterobj); 1008240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid); 1009240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid); 1010240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid); 1011240afd8cSMark Johnston zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */); 1012240afd8cSMark Johnston zap_add_uint64(masterzap, "normalization", 0 /* off */); 1013240afd8cSMark Johnston zap_add_uint64(masterzap, "utf8only", 0 /* off */); 1014240afd8cSMark Johnston zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */); 1015240afd8cSMark Johnston zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */); 1016240afd8cSMark Johnston zap_write(zfs, masterzap); 1017240afd8cSMark Johnston 1018240afd8cSMark Johnston /* 1019240afd8cSMark Johnston * All finished with this object set, we may as well write it now. 1020240afd8cSMark Johnston * The DSL layer will sum up the bytes consumed by each dataset using 1021240afd8cSMark Johnston * information stored in the object set, so it can't be freed just yet. 1022240afd8cSMark Johnston */ 1023240afd8cSMark Johnston dsl_dir_dataset_write(zfs, os, dsldir); 1024240afd8cSMark Johnston 1025240afd8cSMark Johnston if (fakedroot) { 1026240afd8cSMark Johnston free(root->inode); 1027240afd8cSMark Johnston free(root->name); 1028240afd8cSMark Johnston free(root); 1029240afd8cSMark Johnston } 1030240afd8cSMark Johnston free(fs.saoffs); 1031240afd8cSMark Johnston } 1032240afd8cSMark Johnston 1033240afd8cSMark Johnston /* 1034240afd8cSMark Johnston * Create an object set for each DSL directory which has a dataset and doesn't 1035240afd8cSMark Johnston * already have an object set. 1036240afd8cSMark Johnston */ 1037240afd8cSMark Johnston static void 1038240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused) 1039240afd8cSMark Johnston { 1040240afd8cSMark Johnston if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir)) 1041240afd8cSMark Johnston fs_build_one(zfs, dsldir, NULL, -1); 1042240afd8cSMark Johnston } 1043240afd8cSMark Johnston 1044240afd8cSMark Johnston /* 1045240afd8cSMark Johnston * Create our datasets and populate them with files. 1046240afd8cSMark Johnston */ 1047240afd8cSMark Johnston void 1048240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root) 1049240afd8cSMark Johnston { 1050240afd8cSMark Johnston /* 1051240afd8cSMark Johnston * Run through our datasets and find the root fsnode for each one. Each 1052240afd8cSMark Johnston * root fsnode is flagged so that we can figure out which dataset it 1053240afd8cSMark Johnston * belongs to. 1054240afd8cSMark Johnston */ 1055240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root); 1056240afd8cSMark Johnston 1057240afd8cSMark Johnston /* 1058240afd8cSMark Johnston * Did we find our boot filesystem? 1059240afd8cSMark Johnston */ 1060240afd8cSMark Johnston if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs")) 1061240afd8cSMark Johnston errx(1, "no mounted dataset matches bootfs property `%s'", 1062240afd8cSMark Johnston zfs->bootfs); 1063240afd8cSMark Johnston 1064240afd8cSMark Johnston /* 1065240afd8cSMark Johnston * Traverse the file hierarchy starting from the root fsnode. One 1066240afd8cSMark Johnston * dataset, not necessarily the root dataset, must "own" the root 1067240afd8cSMark Johnston * directory by having its mountpoint be equal to the root path. 1068240afd8cSMark Johnston * 1069240afd8cSMark Johnston * As roots of other datasets are encountered during the traversal, 1070240afd8cSMark Johnston * fs_build_one() recursively creates the corresponding object sets and 1071240afd8cSMark Johnston * populates them. Once this function has returned, all datasets will 1072240afd8cSMark Johnston * have been fully populated. 1073240afd8cSMark Johnston */ 1074240afd8cSMark Johnston fs_build_one(zfs, root->inode->param, root, dirfd); 1075240afd8cSMark Johnston 1076240afd8cSMark Johnston /* 1077240afd8cSMark Johnston * Now create object sets for datasets whose mountpoints weren't found 1078240afd8cSMark Johnston * in the staging directory, either because there is no mountpoint, or 1079240afd8cSMark Johnston * because the mountpoint doesn't correspond to an existing directory. 1080240afd8cSMark Johnston */ 1081240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL); 1082240afd8cSMark Johnston } 1083