1240afd8cSMark Johnston /*- 2240afd8cSMark Johnston * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <sys/stat.h> 32240afd8cSMark Johnston 33240afd8cSMark Johnston #include <assert.h> 34c6890399SJessica Clarke #include <dirent.h> 35240afd8cSMark Johnston #include <fcntl.h> 36c6890399SJessica Clarke #include <stdlib.h> 37240afd8cSMark Johnston #include <string.h> 38240afd8cSMark Johnston #include <unistd.h> 39240afd8cSMark Johnston 40240afd8cSMark Johnston #include <util.h> 41240afd8cSMark Johnston 42240afd8cSMark Johnston #include "makefs.h" 43240afd8cSMark Johnston #include "zfs.h" 44240afd8cSMark Johnston 45240afd8cSMark Johnston typedef struct { 46240afd8cSMark Johnston const char *name; 47240afd8cSMark Johnston unsigned int id; 48240afd8cSMark Johnston uint16_t size; 49240afd8cSMark Johnston sa_bswap_type_t bs; 50240afd8cSMark Johnston } zfs_sattr_t; 51240afd8cSMark Johnston 52240afd8cSMark Johnston typedef struct zfs_fs { 53240afd8cSMark Johnston zfs_objset_t *os; 54240afd8cSMark Johnston 55240afd8cSMark Johnston /* Offset table for system attributes, indexed by a zpl_attr_t. */ 56240afd8cSMark Johnston uint16_t *saoffs; 57240afd8cSMark Johnston size_t sacnt; 58240afd8cSMark Johnston const zfs_sattr_t *satab; 59240afd8cSMark Johnston } zfs_fs_t; 60240afd8cSMark Johnston 61240afd8cSMark Johnston /* 62240afd8cSMark Johnston * The order of the attributes doesn't matter, this is simply the one hard-coded 63240afd8cSMark Johnston * by OpenZFS, based on a zdb dump of the SA_REGISTRY table. 64240afd8cSMark Johnston */ 65240afd8cSMark Johnston typedef enum zpl_attr { 66240afd8cSMark Johnston ZPL_ATIME, 67240afd8cSMark Johnston ZPL_MTIME, 68240afd8cSMark Johnston ZPL_CTIME, 69240afd8cSMark Johnston ZPL_CRTIME, 70240afd8cSMark Johnston ZPL_GEN, 71240afd8cSMark Johnston ZPL_MODE, 72240afd8cSMark Johnston ZPL_SIZE, 73240afd8cSMark Johnston ZPL_PARENT, 74240afd8cSMark Johnston ZPL_LINKS, 75240afd8cSMark Johnston ZPL_XATTR, 76240afd8cSMark Johnston ZPL_RDEV, 77240afd8cSMark Johnston ZPL_FLAGS, 78240afd8cSMark Johnston ZPL_UID, 79240afd8cSMark Johnston ZPL_GID, 80240afd8cSMark Johnston ZPL_PAD, 81240afd8cSMark Johnston ZPL_ZNODE_ACL, 82240afd8cSMark Johnston ZPL_DACL_COUNT, 83240afd8cSMark Johnston ZPL_SYMLINK, 84240afd8cSMark Johnston ZPL_SCANSTAMP, 85240afd8cSMark Johnston ZPL_DACL_ACES, 86240afd8cSMark Johnston ZPL_DXATTR, 87240afd8cSMark Johnston ZPL_PROJID, 88240afd8cSMark Johnston } zpl_attr_t; 89240afd8cSMark Johnston 90240afd8cSMark Johnston /* 91240afd8cSMark Johnston * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t. 92240afd8cSMark Johnston */ 93240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = { 94240afd8cSMark Johnston #define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b } 95240afd8cSMark Johnston _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 96240afd8cSMark Johnston _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 97240afd8cSMark Johnston _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 98240afd8cSMark Johnston _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 99240afd8cSMark Johnston _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY), 100240afd8cSMark Johnston _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY), 101240afd8cSMark Johnston _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY), 102240afd8cSMark Johnston _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY), 103240afd8cSMark Johnston _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY), 104240afd8cSMark Johnston _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY), 105240afd8cSMark Johnston _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY), 106240afd8cSMark Johnston _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY), 107240afd8cSMark Johnston _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY), 108240afd8cSMark Johnston _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY), 109240afd8cSMark Johnston _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY), 110240afd8cSMark Johnston _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY), 111240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY), 112240afd8cSMark Johnston _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY), 113240afd8cSMark Johnston _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY), 114240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL), 115240afd8cSMark Johnston _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY), 116240afd8cSMark Johnston _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY), 117240afd8cSMark Johnston #undef ZPL_ATTR 118240afd8cSMark Johnston }; 119240afd8cSMark Johnston 120240afd8cSMark Johnston /* 121240afd8cSMark Johnston * This layout matches that of a filesystem created using OpenZFS on FreeBSD. 122240afd8cSMark Johnston * It need not match in general, but FreeBSD's loader doesn't bother parsing the 123240afd8cSMark Johnston * layout and just hard-codes attribute offsets. 124240afd8cSMark Johnston */ 125240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = { 126240afd8cSMark Johnston ZPL_MODE, 127240afd8cSMark Johnston ZPL_SIZE, 128240afd8cSMark Johnston ZPL_GEN, 129240afd8cSMark Johnston ZPL_UID, 130240afd8cSMark Johnston ZPL_GID, 131240afd8cSMark Johnston ZPL_PARENT, 132240afd8cSMark Johnston ZPL_FLAGS, 133240afd8cSMark Johnston ZPL_ATIME, 134240afd8cSMark Johnston ZPL_MTIME, 135240afd8cSMark Johnston ZPL_CTIME, 136240afd8cSMark Johnston ZPL_CRTIME, 137240afd8cSMark Johnston ZPL_LINKS, 138240afd8cSMark Johnston ZPL_DACL_COUNT, 139240afd8cSMark Johnston ZPL_DACL_ACES, 140240afd8cSMark Johnston ZPL_SYMLINK, 141240afd8cSMark Johnston }; 142240afd8cSMark Johnston 143240afd8cSMark Johnston /* 144240afd8cSMark Johnston * Keys for the ZPL attribute tables in the SA layout ZAP. The first two 145240afd8cSMark Johnston * indices are reserved for legacy attribute encoding. 146240afd8cSMark Johnston */ 147240afd8cSMark Johnston #define SA_LAYOUT_INDEX_DEFAULT 2 148240afd8cSMark Johnston #define SA_LAYOUT_INDEX_SYMLINK 3 149240afd8cSMark Johnston 150240afd8cSMark Johnston struct fs_populate_dir { 151240afd8cSMark Johnston SLIST_ENTRY(fs_populate_dir) next; 152240afd8cSMark Johnston int dirfd; 153240afd8cSMark Johnston uint64_t objid; 154240afd8cSMark Johnston zfs_zap_t *zap; 155240afd8cSMark Johnston }; 156240afd8cSMark Johnston 157240afd8cSMark Johnston struct fs_populate_arg { 158240afd8cSMark Johnston zfs_opt_t *zfs; 159240afd8cSMark Johnston zfs_fs_t *fs; /* owning filesystem */ 160240afd8cSMark Johnston uint64_t rootdirid; /* root directory dnode ID */ 1618eca3207SMark Johnston int rootdirfd; /* root directory fd */ 162240afd8cSMark Johnston SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */ 163240afd8cSMark Johnston }; 164240afd8cSMark Johnston 165240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int); 166240afd8cSMark Johnston 1678eca3207SMark Johnston static void 1688eca3207SMark Johnston eclose(int fd) 1698eca3207SMark Johnston { 1708eca3207SMark Johnston if (close(fd) != 0) 1718eca3207SMark Johnston err(1, "close"); 1728eca3207SMark Johnston } 1738eca3207SMark Johnston 174240afd8cSMark Johnston static bool 175240afd8cSMark Johnston fsnode_isroot(const fsnode *cur) 176240afd8cSMark Johnston { 177240afd8cSMark Johnston return (strcmp(cur->name, ".") == 0); 178240afd8cSMark Johnston } 179240afd8cSMark Johnston 180240afd8cSMark Johnston /* 181240afd8cSMark Johnston * Visit each node in a directory hierarchy, in pre-order depth-first order. 182240afd8cSMark Johnston */ 183240afd8cSMark Johnston static void 184240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg) 185240afd8cSMark Johnston { 186240afd8cSMark Johnston assert(root->type == S_IFDIR); 187240afd8cSMark Johnston 188240afd8cSMark Johnston for (fsnode *cur = root; cur != NULL; cur = cur->next) { 189240afd8cSMark Johnston assert(cur->type == S_IFREG || cur->type == S_IFDIR || 190240afd8cSMark Johnston cur->type == S_IFLNK); 191240afd8cSMark Johnston 192240afd8cSMark Johnston if (cb(cur, arg) == 0) 193240afd8cSMark Johnston continue; 194240afd8cSMark Johnston if (cur->type == S_IFDIR && cur->child != NULL) 195240afd8cSMark Johnston fsnode_foreach(cur->child, cb, arg); 196240afd8cSMark Johnston } 197240afd8cSMark Johnston } 198240afd8cSMark Johnston 199240afd8cSMark Johnston static void 200240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid) 201240afd8cSMark Johnston { 202240afd8cSMark Johnston struct fs_populate_dir *dir; 203240afd8cSMark Johnston uint64_t type; 204240afd8cSMark Johnston 205240afd8cSMark Johnston switch (cur->type) { 206240afd8cSMark Johnston case S_IFREG: 207240afd8cSMark Johnston type = DT_REG; 208240afd8cSMark Johnston break; 209240afd8cSMark Johnston case S_IFDIR: 210240afd8cSMark Johnston type = DT_DIR; 211240afd8cSMark Johnston break; 212240afd8cSMark Johnston case S_IFLNK: 213240afd8cSMark Johnston type = DT_LNK; 214240afd8cSMark Johnston break; 215240afd8cSMark Johnston default: 216240afd8cSMark Johnston assert(0); 217240afd8cSMark Johnston } 218240afd8cSMark Johnston 219240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 220240afd8cSMark Johnston zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid)); 221240afd8cSMark Johnston } 222240afd8cSMark Johnston 223240afd8cSMark Johnston static void 224240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind, 225240afd8cSMark Johnston size_t *szp) 226240afd8cSMark Johnston { 227240afd8cSMark Johnston assert(ind < fs->sacnt); 228240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 229240afd8cSMark Johnston 230240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size); 231240afd8cSMark Johnston *szp += fs->satab[ind].size; 232240afd8cSMark Johnston } 233240afd8cSMark Johnston 234240afd8cSMark Johnston static void 235240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val, 236240afd8cSMark Johnston size_t valsz, size_t varoff, uint16_t ind, size_t *szp) 237240afd8cSMark Johnston { 238240afd8cSMark Johnston assert(ind < fs->sacnt); 239240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 240240afd8cSMark Johnston assert(fs->satab[ind].size == 0); 241240afd8cSMark Johnston 242240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz); 243240afd8cSMark Johnston *szp += valsz; 244240afd8cSMark Johnston } 245240afd8cSMark Johnston 2468eca3207SMark Johnston /* 2478eca3207SMark Johnston * Derive the relative fd/path combo needed to access a file. Ideally we'd 2488eca3207SMark Johnston * always be able to use relative lookups (i.e., use the *at() system calls), 2498eca3207SMark Johnston * since they require less path translation and are more amenable to sandboxing, 2508eca3207SMark Johnston * but the handling of multiple staging directories makes that difficult. To 2518eca3207SMark Johnston * make matters worse, we have no choice but to use relative lookups when 2528eca3207SMark Johnston * dealing with an mtree manifest, so both mechanisms are implemented. 2538eca3207SMark Johnston */ 2548eca3207SMark Johnston static void 2558eca3207SMark Johnston fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg, 2568eca3207SMark Johnston char *path, size_t sz, int *dirfdp) 2578eca3207SMark Johnston { 258*aac389a3SBrooks Davis if (cur->contents != NULL) { 259*aac389a3SBrooks Davis size_t n; 260*aac389a3SBrooks Davis 261*aac389a3SBrooks Davis *dirfdp = AT_FDCWD; 262*aac389a3SBrooks Davis n = strlcpy(path, cur->contents, sz); 263*aac389a3SBrooks Davis assert(n < sz); 264*aac389a3SBrooks Davis } else if (cur->root == NULL) { 2658eca3207SMark Johnston size_t n; 2668eca3207SMark Johnston 2678eca3207SMark Johnston *dirfdp = SLIST_FIRST(&arg->dirs)->dirfd; 2688eca3207SMark Johnston n = strlcpy(path, cur->name, sz); 2698eca3207SMark Johnston assert(n < sz); 2708eca3207SMark Johnston } else { 2718eca3207SMark Johnston int n; 2728eca3207SMark Johnston 2738eca3207SMark Johnston *dirfdp = AT_FDCWD; 2748eca3207SMark Johnston n = snprintf(path, sz, "%s/%s/%s", 2758eca3207SMark Johnston cur->root, cur->path, cur->name); 2768eca3207SMark Johnston assert(n >= 0); 2778eca3207SMark Johnston assert((size_t)n < sz); 2788eca3207SMark Johnston } 2798eca3207SMark Johnston } 2808eca3207SMark Johnston 2818eca3207SMark Johnston static int 2828eca3207SMark Johnston fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags) 2838eca3207SMark Johnston { 2848eca3207SMark Johnston char path[PATH_MAX]; 2858eca3207SMark Johnston int fd; 2868eca3207SMark Johnston 2878eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 2888eca3207SMark Johnston 2898eca3207SMark Johnston fd = openat(fd, path, flags); 2908eca3207SMark Johnston if (fd < 0) 2918eca3207SMark Johnston err(1, "openat(%s)", path); 2928eca3207SMark Johnston return (fd); 2938eca3207SMark Johnston } 2948eca3207SMark Johnston 2958eca3207SMark Johnston static void 2968eca3207SMark Johnston fs_readlink(const fsnode *cur, struct fs_populate_arg *arg, 2978eca3207SMark Johnston char *buf, size_t bufsz) 2988eca3207SMark Johnston { 2998eca3207SMark Johnston char path[PATH_MAX]; 3008eca3207SMark Johnston ssize_t n; 3018eca3207SMark Johnston int fd; 3028eca3207SMark Johnston 3038eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 3048eca3207SMark Johnston 3058eca3207SMark Johnston n = readlinkat(fd, path, buf, bufsz - 1); 3068eca3207SMark Johnston if (n == -1) 3078eca3207SMark Johnston err(1, "readlinkat(%s)", cur->name); 3088eca3207SMark Johnston buf[n] = '\0'; 3098eca3207SMark Johnston } 3108eca3207SMark Johnston 311240afd8cSMark Johnston static void 312b0ce7dfcSJessica Clarke fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts, 313b0ce7dfcSJessica Clarke uint16_t ind, size_t *szp) 314b0ce7dfcSJessica Clarke { 315b0ce7dfcSJessica Clarke uint64_t timebuf[2]; 316b0ce7dfcSJessica Clarke 317b0ce7dfcSJessica Clarke assert(ind < fs->sacnt); 318b0ce7dfcSJessica Clarke assert(fs->saoffs[ind] != 0xffff); 319b0ce7dfcSJessica Clarke assert(fs->satab[ind].size == sizeof(timebuf)); 320b0ce7dfcSJessica Clarke 321b0ce7dfcSJessica Clarke timebuf[0] = ts->tv_sec; 322b0ce7dfcSJessica Clarke timebuf[1] = ts->tv_nsec; 323b0ce7dfcSJessica Clarke fs_populate_attr(fs, attrbuf, timebuf, ind, szp); 324b0ce7dfcSJessica Clarke } 325b0ce7dfcSJessica Clarke 326b0ce7dfcSJessica Clarke static void 327240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, 328240afd8cSMark Johnston dnode_phys_t *dnode) 329240afd8cSMark Johnston { 330240afd8cSMark Johnston char target[PATH_MAX]; 331240afd8cSMark Johnston zfs_fs_t *fs; 332240afd8cSMark Johnston zfs_ace_hdr_t aces[3]; 333240afd8cSMark Johnston struct stat *sb; 334240afd8cSMark Johnston sa_hdr_phys_t *sahdr; 335240afd8cSMark Johnston uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid; 336240afd8cSMark Johnston char *attrbuf; 337240afd8cSMark Johnston size_t bonussz, hdrsz; 338240afd8cSMark Johnston int layout; 339240afd8cSMark Johnston 340240afd8cSMark Johnston assert(dnode->dn_bonustype == DMU_OT_SA); 341240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 342240afd8cSMark Johnston 343240afd8cSMark Johnston fs = arg->fs; 344240afd8cSMark Johnston sb = &cur->inode->st; 345240afd8cSMark Johnston 346240afd8cSMark Johnston switch (cur->type) { 347240afd8cSMark Johnston case S_IFREG: 348240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 349240afd8cSMark Johnston links = cur->inode->nlink; 350240afd8cSMark Johnston objsize = sb->st_size; 351240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 352240afd8cSMark Johnston break; 353240afd8cSMark Johnston case S_IFDIR: 354240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 355240afd8cSMark Johnston links = 1; /* .. */ 356240afd8cSMark Johnston objsize = 1; /* .. */ 357240afd8cSMark Johnston 358240afd8cSMark Johnston /* 359240afd8cSMark Johnston * The size of a ZPL directory is the number of entries 360240afd8cSMark Johnston * (including "." and ".."), and the link count is the number of 361240afd8cSMark Johnston * entries which are directories (including "." and ".."). 362240afd8cSMark Johnston */ 363240afd8cSMark Johnston for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child; 364240afd8cSMark Johnston c != NULL; c = c->next) { 365240afd8cSMark Johnston if (c->type == S_IFDIR) 366240afd8cSMark Johnston links++; 367240afd8cSMark Johnston objsize++; 368240afd8cSMark Johnston } 369240afd8cSMark Johnston 370240afd8cSMark Johnston /* The root directory is its own parent. */ 371240afd8cSMark Johnston parent = SLIST_EMPTY(&arg->dirs) ? 372240afd8cSMark Johnston arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid; 373240afd8cSMark Johnston break; 3748eca3207SMark Johnston case S_IFLNK: 3758eca3207SMark Johnston fs_readlink(cur, arg, target, sizeof(target)); 376240afd8cSMark Johnston 377240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_SYMLINK; 378240afd8cSMark Johnston links = 1; 379240afd8cSMark Johnston objsize = strlen(target); 380240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 381240afd8cSMark Johnston break; 382240afd8cSMark Johnston default: 383240afd8cSMark Johnston assert(0); 384240afd8cSMark Johnston } 385240afd8cSMark Johnston 386240afd8cSMark Johnston daclcount = nitems(aces); 387240afd8cSMark Johnston flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_NO_EXECS_DENIED | 388240afd8cSMark Johnston ZFS_ARCHIVE | ZFS_AV_MODIFIED; /* XXX-MJ */ 389240afd8cSMark Johnston gen = 1; 390240afd8cSMark Johnston gid = sb->st_gid; 391240afd8cSMark Johnston mode = sb->st_mode; 392240afd8cSMark Johnston uid = sb->st_uid; 393240afd8cSMark Johnston 394240afd8cSMark Johnston memset(aces, 0, sizeof(aces)); 395240afd8cSMark Johnston aces[0].z_flags = ACE_OWNER; 396240afd8cSMark Johnston aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 397240afd8cSMark Johnston aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER | 398240afd8cSMark Johnston ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL | 399240afd8cSMark Johnston ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 400240afd8cSMark Johnston if ((mode & S_IRUSR) != 0) 401240afd8cSMark Johnston aces[0].z_access_mask |= ACE_READ_DATA; 402240afd8cSMark Johnston if ((mode & S_IWUSR) != 0) 403240afd8cSMark Johnston aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 404240afd8cSMark Johnston if ((mode & S_IXUSR) != 0) 405240afd8cSMark Johnston aces[0].z_access_mask |= ACE_EXECUTE; 406240afd8cSMark Johnston 407240afd8cSMark Johnston aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP; 408240afd8cSMark Johnston aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 409240afd8cSMark Johnston aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 410240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 411240afd8cSMark Johnston if ((mode & S_IRGRP) != 0) 412240afd8cSMark Johnston aces[1].z_access_mask |= ACE_READ_DATA; 413240afd8cSMark Johnston if ((mode & S_IWGRP) != 0) 414240afd8cSMark Johnston aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 415240afd8cSMark Johnston if ((mode & S_IXGRP) != 0) 416240afd8cSMark Johnston aces[1].z_access_mask |= ACE_EXECUTE; 417240afd8cSMark Johnston 418240afd8cSMark Johnston aces[2].z_flags = ACE_EVERYONE; 419240afd8cSMark Johnston aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 420240afd8cSMark Johnston aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 421240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 422240afd8cSMark Johnston if ((mode & S_IROTH) != 0) 423240afd8cSMark Johnston aces[2].z_access_mask |= ACE_READ_DATA; 424240afd8cSMark Johnston if ((mode & S_IWOTH) != 0) 425240afd8cSMark Johnston aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 426240afd8cSMark Johnston if ((mode & S_IXOTH) != 0) 427240afd8cSMark Johnston aces[2].z_access_mask |= ACE_EXECUTE; 428240afd8cSMark Johnston 429240afd8cSMark Johnston switch (layout) { 430240afd8cSMark Johnston case SA_LAYOUT_INDEX_DEFAULT: 431240afd8cSMark Johnston /* At most one variable-length attribute. */ 432240afd8cSMark Johnston hdrsz = sizeof(uint64_t); 433240afd8cSMark Johnston break; 434240afd8cSMark Johnston case SA_LAYOUT_INDEX_SYMLINK: 435240afd8cSMark Johnston /* At most five variable-length attributes. */ 436240afd8cSMark Johnston hdrsz = sizeof(uint64_t) * 2; 437240afd8cSMark Johnston break; 438240afd8cSMark Johnston default: 439240afd8cSMark Johnston assert(0); 440240afd8cSMark Johnston } 441240afd8cSMark Johnston 442240afd8cSMark Johnston sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode); 443240afd8cSMark Johnston sahdr->sa_magic = SA_MAGIC; 444240afd8cSMark Johnston SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz); 445240afd8cSMark Johnston 446240afd8cSMark Johnston bonussz = SA_HDR_SIZE(sahdr); 447240afd8cSMark Johnston attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr); 448240afd8cSMark Johnston 449240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz); 450240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz); 451240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz); 452240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz); 453240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz); 454240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz); 455240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz); 456240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz); 457240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz); 458240afd8cSMark Johnston 459240afd8cSMark Johnston /* 460240afd8cSMark Johnston * We deliberately set atime = mtime here to ensure that images are 461240afd8cSMark Johnston * reproducible. 462240afd8cSMark Johnston */ 463b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz); 464b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz); 465b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz); 466c6890399SJessica Clarke #ifdef __linux__ 467c6890399SJessica Clarke /* Linux has no st_birthtim; approximate with st_ctim */ 468c6890399SJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz); 469c6890399SJessica Clarke #else 470b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz); 471c6890399SJessica Clarke #endif 472240afd8cSMark Johnston 473240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0, 474240afd8cSMark Johnston ZPL_DACL_ACES, &bonussz); 475240afd8cSMark Johnston sahdr->sa_lengths[0] = sizeof(aces); 476240afd8cSMark Johnston 477240afd8cSMark Johnston if (cur->type == S_IFLNK) { 478240afd8cSMark Johnston assert(layout == SA_LAYOUT_INDEX_SYMLINK); 479240afd8cSMark Johnston /* Need to use a spill block pointer if the target is long. */ 480240afd8cSMark Johnston assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN); 481240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, target, objsize, 482240afd8cSMark Johnston sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz); 483240afd8cSMark Johnston sahdr->sa_lengths[1] = (uint16_t)objsize; 484240afd8cSMark Johnston } 485240afd8cSMark Johnston 486240afd8cSMark Johnston dnode->dn_bonuslen = bonussz; 487240afd8cSMark Johnston } 488240afd8cSMark Johnston 489240afd8cSMark Johnston static void 490240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg) 491240afd8cSMark Johnston { 492240afd8cSMark Johnston struct dnode_cursor *c; 493240afd8cSMark Johnston dnode_phys_t *dnode; 494240afd8cSMark Johnston zfs_opt_t *zfs; 495240afd8cSMark Johnston char *buf; 496240afd8cSMark Johnston uint64_t dnid; 497240afd8cSMark Johnston ssize_t n; 498240afd8cSMark Johnston size_t bufsz; 499240afd8cSMark Johnston off_t size, target; 500240afd8cSMark Johnston int fd; 501240afd8cSMark Johnston 502240afd8cSMark Johnston assert(cur->type == S_IFREG); 503240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 504240afd8cSMark Johnston 505240afd8cSMark Johnston zfs = arg->zfs; 506240afd8cSMark Johnston 507240afd8cSMark Johnston assert(cur->inode->ino != 0); 508240afd8cSMark Johnston if ((cur->inode->flags & FI_ALLOCATED) != 0) { 509240afd8cSMark Johnston /* 510240afd8cSMark Johnston * This is a hard link of an existing file. 511240afd8cSMark Johnston * 512240afd8cSMark Johnston * XXX-MJ need to check whether it crosses datasets, add a test 513240afd8cSMark Johnston * case for that 514240afd8cSMark Johnston */ 515240afd8cSMark Johnston fs_populate_dirent(arg, cur, cur->inode->ino); 516240afd8cSMark Johnston return; 517240afd8cSMark Johnston } 518240afd8cSMark Johnston 519240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 520240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 521240afd8cSMark Johnston cur->inode->ino = dnid; 522240afd8cSMark Johnston cur->inode->flags |= FI_ALLOCATED; 523240afd8cSMark Johnston 5248eca3207SMark Johnston fd = fs_open(cur, arg, O_RDONLY); 525240afd8cSMark Johnston 526240afd8cSMark Johnston buf = zfs->filebuf; 527240afd8cSMark Johnston bufsz = sizeof(zfs->filebuf); 528240afd8cSMark Johnston size = cur->inode->st.st_size; 529240afd8cSMark Johnston c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0); 530240afd8cSMark Johnston for (off_t foff = 0; foff < size; foff += target) { 531240afd8cSMark Johnston off_t loc, sofar; 532240afd8cSMark Johnston 533240afd8cSMark Johnston /* 534240afd8cSMark Johnston * Fill up our buffer, handling partial reads. 535240afd8cSMark Johnston * 536240afd8cSMark Johnston * It might be profitable to use copy_file_range(2) here. 537240afd8cSMark Johnston */ 538240afd8cSMark Johnston sofar = 0; 539240afd8cSMark Johnston target = MIN(size - foff, (off_t)bufsz); 540240afd8cSMark Johnston do { 541240afd8cSMark Johnston n = read(fd, buf + sofar, target); 542240afd8cSMark Johnston if (n < 0) 543240afd8cSMark Johnston err(1, "reading from '%s'", cur->name); 544240afd8cSMark Johnston if (n == 0) 545240afd8cSMark Johnston errx(1, "unexpected EOF reading '%s'", 546240afd8cSMark Johnston cur->name); 547240afd8cSMark Johnston sofar += n; 548240afd8cSMark Johnston } while (sofar < target); 549240afd8cSMark Johnston 550240afd8cSMark Johnston if (target < (off_t)bufsz) 551240afd8cSMark Johnston memset(buf + target, 0, bufsz - target); 552240afd8cSMark Johnston 553240afd8cSMark Johnston loc = objset_space_alloc(zfs, arg->fs->os, &target); 554240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, target, loc, 555240afd8cSMark Johnston dnode_cursor_next(zfs, c, foff)); 556240afd8cSMark Johnston } 5578eca3207SMark Johnston eclose(fd); 558240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 559240afd8cSMark Johnston 560240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 561240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 562240afd8cSMark Johnston } 563240afd8cSMark Johnston 564240afd8cSMark Johnston static void 565240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg) 566240afd8cSMark Johnston { 567240afd8cSMark Johnston dnode_phys_t *dnode; 568240afd8cSMark Johnston zfs_objset_t *os; 569240afd8cSMark Johnston uint64_t dnid; 570240afd8cSMark Johnston int dirfd; 571240afd8cSMark Johnston 572240afd8cSMark Johnston assert(cur->type == S_IFDIR); 573240afd8cSMark Johnston assert((cur->inode->flags & FI_ALLOCATED) == 0); 574240afd8cSMark Johnston 575240afd8cSMark Johnston os = arg->fs->os; 576240afd8cSMark Johnston 577240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS, 578240afd8cSMark Johnston DMU_OT_SA, 0, &dnid); 579240afd8cSMark Johnston 580240afd8cSMark Johnston /* 581240afd8cSMark Johnston * Add an entry to the parent directory and open this directory. 582240afd8cSMark Johnston */ 583240afd8cSMark Johnston if (!SLIST_EMPTY(&arg->dirs)) { 584240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 5858eca3207SMark Johnston dirfd = fs_open(cur, arg, O_DIRECTORY | O_RDONLY); 586240afd8cSMark Johnston } else { 587240afd8cSMark Johnston arg->rootdirid = dnid; 5888eca3207SMark Johnston dirfd = arg->rootdirfd; 5898eca3207SMark Johnston arg->rootdirfd = -1; 590240afd8cSMark Johnston } 591240afd8cSMark Johnston 592240afd8cSMark Johnston /* 593240afd8cSMark Johnston * Set ZPL attributes. 594240afd8cSMark Johnston */ 595240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 596240afd8cSMark Johnston 597240afd8cSMark Johnston /* 598240afd8cSMark Johnston * If this is a root directory, then its children belong to a different 599240afd8cSMark Johnston * dataset and this directory remains empty in the current objset. 600240afd8cSMark Johnston */ 601240afd8cSMark Johnston if ((cur->inode->flags & FI_ROOT) == 0) { 602240afd8cSMark Johnston struct fs_populate_dir *dir; 603240afd8cSMark Johnston 604240afd8cSMark Johnston dir = ecalloc(1, sizeof(*dir)); 605240afd8cSMark Johnston dir->dirfd = dirfd; 606240afd8cSMark Johnston dir->objid = dnid; 607240afd8cSMark Johnston dir->zap = zap_alloc(os, dnode); 608240afd8cSMark Johnston SLIST_INSERT_HEAD(&arg->dirs, dir, next); 609240afd8cSMark Johnston } else { 610240afd8cSMark Johnston zap_write(arg->zfs, zap_alloc(os, dnode)); 611240afd8cSMark Johnston fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd); 612240afd8cSMark Johnston } 613240afd8cSMark Johnston } 614240afd8cSMark Johnston 615240afd8cSMark Johnston static void 616240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg) 617240afd8cSMark Johnston { 618240afd8cSMark Johnston dnode_phys_t *dnode; 619240afd8cSMark Johnston uint64_t dnid; 620240afd8cSMark Johnston 621240afd8cSMark Johnston assert(cur->type == S_IFLNK); 622240afd8cSMark Johnston assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0); 623240afd8cSMark Johnston 624240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 625240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 626240afd8cSMark Johnston 627240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 628240afd8cSMark Johnston 629240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 630240afd8cSMark Johnston } 631240afd8cSMark Johnston 632240afd8cSMark Johnston static int 633240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg) 634240afd8cSMark Johnston { 635240afd8cSMark Johnston struct fs_populate_arg *arg; 636240afd8cSMark Johnston struct fs_populate_dir *dir; 637240afd8cSMark Johnston int ret; 638240afd8cSMark Johnston 639240afd8cSMark Johnston arg = _arg; 640240afd8cSMark Johnston switch (cur->type) { 641240afd8cSMark Johnston case S_IFREG: 642240afd8cSMark Johnston fs_populate_file(cur, arg); 643240afd8cSMark Johnston break; 644240afd8cSMark Johnston case S_IFDIR: 645240afd8cSMark Johnston if (fsnode_isroot(cur)) 646240afd8cSMark Johnston break; 647240afd8cSMark Johnston fs_populate_dir(cur, arg); 648240afd8cSMark Johnston break; 649240afd8cSMark Johnston case S_IFLNK: 650240afd8cSMark Johnston fs_populate_symlink(cur, arg); 651240afd8cSMark Johnston break; 652240afd8cSMark Johnston default: 653240afd8cSMark Johnston assert(0); 654240afd8cSMark Johnston } 655240afd8cSMark Johnston 656240afd8cSMark Johnston ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1; 657240afd8cSMark Johnston 658240afd8cSMark Johnston if (cur->next == NULL && 659240afd8cSMark Johnston (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) { 660240afd8cSMark Johnston /* 661240afd8cSMark Johnston * We reached a terminal node in a subtree. Walk back up and 662240afd8cSMark Johnston * write out directories. We're done once we hit the root of a 663240afd8cSMark Johnston * dataset or find a level where we're not on the edge of the 664240afd8cSMark Johnston * tree. 665240afd8cSMark Johnston */ 666240afd8cSMark Johnston do { 667240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 668240afd8cSMark Johnston SLIST_REMOVE_HEAD(&arg->dirs, next); 669240afd8cSMark Johnston zap_write(arg->zfs, dir->zap); 6708eca3207SMark Johnston if (dir->dirfd != -1) 6718eca3207SMark Johnston eclose(dir->dirfd); 672240afd8cSMark Johnston free(dir); 673240afd8cSMark Johnston cur = cur->parent; 674240afd8cSMark Johnston } while (cur != NULL && cur->next == NULL && 675240afd8cSMark Johnston (cur->inode->flags & FI_ROOT) == 0); 676240afd8cSMark Johnston } 677240afd8cSMark Johnston 678240afd8cSMark Johnston return (ret); 679240afd8cSMark Johnston } 680240afd8cSMark Johnston 681240afd8cSMark Johnston static void 682240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, 683240afd8cSMark Johnston const sa_attr_type_t layout[], size_t sacnt) 684240afd8cSMark Johnston { 685240afd8cSMark Johnston char ti[16]; 686240afd8cSMark Johnston 687240afd8cSMark Johnston assert(sizeof(layout[0]) == 2); 688240afd8cSMark Johnston 689240afd8cSMark Johnston snprintf(ti, sizeof(ti), "%u", index); 690240afd8cSMark Johnston zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, 691240afd8cSMark Johnston (const uint8_t *)layout); 692240afd8cSMark Johnston } 693240afd8cSMark Johnston 694240afd8cSMark Johnston /* 695240afd8cSMark Johnston * Initialize system attribute tables. 696240afd8cSMark Johnston * 697240afd8cSMark Johnston * There are two elements to this. First, we write the zpl_attrs[] and 698240afd8cSMark Johnston * zpl_attr_layout[] tables to disk. Then we create a lookup table which 699240afd8cSMark Johnston * allows us to set file attributes quickly. 700240afd8cSMark Johnston */ 701240afd8cSMark Johnston static uint64_t 702240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs) 703240afd8cSMark Johnston { 704240afd8cSMark Johnston zfs_zap_t *sazap, *salzap, *sarzap; 705240afd8cSMark Johnston zfs_objset_t *os; 706240afd8cSMark Johnston dnode_phys_t *saobj, *salobj, *sarobj; 707240afd8cSMark Johnston uint64_t saobjid, salobjid, sarobjid; 708240afd8cSMark Johnston uint16_t offset; 709240afd8cSMark Johnston 710240afd8cSMark Johnston os = fs->os; 711240afd8cSMark Johnston 712240afd8cSMark Johnston /* 713240afd8cSMark Johnston * The on-disk tables are stored in two ZAP objects, the registry object 714240afd8cSMark Johnston * and the layout object. Individual attributes are described by 715240afd8cSMark Johnston * entries in the registry object; for example, the value for the 716240afd8cSMark Johnston * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute. 717240afd8cSMark Johnston * The attributes of a file are ordered according to one of the layouts 718240afd8cSMark Johnston * defined in the layout object. The master node object is simply used 719240afd8cSMark Johnston * to locate the registry and layout objects. 720240afd8cSMark Johnston */ 721240afd8cSMark Johnston saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid); 722240afd8cSMark Johnston salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid); 723240afd8cSMark Johnston sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid); 724240afd8cSMark Johnston 725240afd8cSMark Johnston sarzap = zap_alloc(os, sarobj); 726240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) { 727240afd8cSMark Johnston const zfs_sattr_t *sa; 728240afd8cSMark Johnston uint64_t attr; 729240afd8cSMark Johnston 730240afd8cSMark Johnston attr = 0; 731240afd8cSMark Johnston sa = &zpl_attrs[i]; 732240afd8cSMark Johnston SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs); 733240afd8cSMark Johnston zap_add_uint64(sarzap, sa->name, attr); 734240afd8cSMark Johnston } 735240afd8cSMark Johnston zap_write(zfs, sarzap); 736240afd8cSMark Johnston 737240afd8cSMark Johnston /* 738240afd8cSMark Johnston * Layouts are arrays of indices into the registry. We define two 739240afd8cSMark Johnston * layouts for use by the ZPL, one for non-symlinks and one for 740240afd8cSMark Johnston * symlinks. They are identical except that the symlink layout includes 741240afd8cSMark Johnston * ZPL_SYMLINK as its final attribute. 742240afd8cSMark Johnston */ 743240afd8cSMark Johnston salzap = zap_alloc(os, salobj); 744240afd8cSMark Johnston assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK); 745240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT, 746240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout) - 1); 747240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK, 748240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout)); 749240afd8cSMark Johnston zap_write(zfs, salzap); 750240afd8cSMark Johnston 751240afd8cSMark Johnston sazap = zap_alloc(os, saobj); 752240afd8cSMark Johnston zap_add_uint64(sazap, SA_LAYOUTS, salobjid); 753240afd8cSMark Johnston zap_add_uint64(sazap, SA_REGISTRY, sarobjid); 754240afd8cSMark Johnston zap_write(zfs, sazap); 755240afd8cSMark Johnston 756240afd8cSMark Johnston /* Sanity check. */ 757240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) 758240afd8cSMark Johnston assert(i == zpl_attrs[i].id); 759240afd8cSMark Johnston 760240afd8cSMark Johnston /* 761240afd8cSMark Johnston * Build the offset table used when setting file attributes. File 762240afd8cSMark Johnston * attributes are stored in the object's bonus buffer; this table 763240afd8cSMark Johnston * provides the buffer offset of attributes referenced by the layout 764240afd8cSMark Johnston * table. 765240afd8cSMark Johnston */ 766240afd8cSMark Johnston fs->sacnt = nitems(zpl_attrs); 767240afd8cSMark Johnston fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs)); 768240afd8cSMark Johnston for (size_t i = 0; i < fs->sacnt; i++) 769240afd8cSMark Johnston fs->saoffs[i] = 0xffff; 770240afd8cSMark Johnston offset = 0; 771240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attr_layout); i++) { 772240afd8cSMark Johnston uint16_t size; 773240afd8cSMark Johnston 774240afd8cSMark Johnston assert(zpl_attr_layout[i] < fs->sacnt); 775240afd8cSMark Johnston 776240afd8cSMark Johnston fs->saoffs[zpl_attr_layout[i]] = offset; 777240afd8cSMark Johnston size = zpl_attrs[zpl_attr_layout[i]].size; 778240afd8cSMark Johnston offset += size; 779240afd8cSMark Johnston } 780240afd8cSMark Johnston fs->satab = zpl_attrs; 781240afd8cSMark Johnston 782240afd8cSMark Johnston return (saobjid); 783240afd8cSMark Johnston } 784240afd8cSMark Johnston 785240afd8cSMark Johnston static void 786240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg) 787240afd8cSMark Johnston { 788240afd8cSMark Johnston char *mountpoint, *origmountpoint, *name, *next; 789240afd8cSMark Johnston fsnode *cur, *root; 790240afd8cSMark Johnston uint64_t canmount; 791240afd8cSMark Johnston 792240afd8cSMark Johnston if (!dsl_dir_has_dataset(dsldir)) 793240afd8cSMark Johnston return; 794240afd8cSMark Johnston 79578d7704bSMark Johnston if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0) 79678d7704bSMark Johnston return; 797240afd8cSMark Johnston mountpoint = dsl_dir_get_mountpoint(zfs, dsldir); 798240afd8cSMark Johnston if (mountpoint == NULL) 799240afd8cSMark Johnston return; 800240afd8cSMark Johnston 801240afd8cSMark Johnston /* 802240afd8cSMark Johnston * If we were asked to specify a bootfs, set it here. 803240afd8cSMark Johnston */ 804240afd8cSMark Johnston if (zfs->bootfs != NULL && strcmp(zfs->bootfs, 805240afd8cSMark Johnston dsl_dir_fullname(dsldir)) == 0) { 806240afd8cSMark Johnston zap_add_uint64(zfs->poolprops, "bootfs", 807240afd8cSMark Johnston dsl_dir_dataset_id(dsldir)); 808240afd8cSMark Johnston } 809240afd8cSMark Johnston 810240afd8cSMark Johnston origmountpoint = mountpoint; 811240afd8cSMark Johnston 812240afd8cSMark Johnston /* 813240afd8cSMark Johnston * Figure out which fsnode corresponds to our mountpoint. 814240afd8cSMark Johnston */ 815240afd8cSMark Johnston root = arg; 816240afd8cSMark Johnston cur = root; 817240afd8cSMark Johnston if (strcmp(mountpoint, zfs->rootpath) != 0) { 818240afd8cSMark Johnston mountpoint += strlen(zfs->rootpath); 819240afd8cSMark Johnston 820240afd8cSMark Johnston /* 821240afd8cSMark Johnston * Look up the directory in the staged tree. For example, if 822240afd8cSMark Johnston * the dataset's mount point is /foo/bar/baz, we'll search the 823240afd8cSMark Johnston * root directory for "foo", search "foo" for "baz", and so on. 824240afd8cSMark Johnston * Each intermediate name must refer to a directory; the final 825240afd8cSMark Johnston * component need not exist. 826240afd8cSMark Johnston */ 827240afd8cSMark Johnston cur = root; 828240afd8cSMark Johnston for (next = name = mountpoint; next != NULL;) { 829240afd8cSMark Johnston for (; *next == '/'; next++) 830240afd8cSMark Johnston ; 831240afd8cSMark Johnston name = strsep(&next, "/"); 832240afd8cSMark Johnston 833240afd8cSMark Johnston for (; cur != NULL && strcmp(cur->name, name) != 0; 834240afd8cSMark Johnston cur = cur->next) 835240afd8cSMark Johnston ; 836240afd8cSMark Johnston if (cur == NULL) { 837240afd8cSMark Johnston if (next == NULL) 838240afd8cSMark Johnston break; 839240afd8cSMark Johnston errx(1, "missing mountpoint directory for `%s'", 840240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 841240afd8cSMark Johnston } 842240afd8cSMark Johnston if (cur->type != S_IFDIR) { 843240afd8cSMark Johnston errx(1, 844240afd8cSMark Johnston "mountpoint for `%s' is not a directory", 845240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 846240afd8cSMark Johnston } 847240afd8cSMark Johnston if (next != NULL) 848240afd8cSMark Johnston cur = cur->child; 849240afd8cSMark Johnston } 850240afd8cSMark Johnston } 851240afd8cSMark Johnston 852240afd8cSMark Johnston if (cur != NULL) { 853240afd8cSMark Johnston assert(cur->type == S_IFDIR); 854240afd8cSMark Johnston 855240afd8cSMark Johnston /* 856240afd8cSMark Johnston * Multiple datasets shouldn't share a mountpoint. It's 857240afd8cSMark Johnston * technically allowed, but it's not clear what makefs should do 858240afd8cSMark Johnston * in that case. 859240afd8cSMark Johnston */ 860240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 861240afd8cSMark Johnston if (cur != root) 862240afd8cSMark Johnston cur->inode->flags |= FI_ROOT; 863240afd8cSMark Johnston assert(cur->inode->param == NULL); 864240afd8cSMark Johnston cur->inode->param = dsldir; 865240afd8cSMark Johnston } 866240afd8cSMark Johnston 867240afd8cSMark Johnston free(origmountpoint); 868240afd8cSMark Johnston } 869240afd8cSMark Johnston 870240afd8cSMark Johnston static int 871240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg) 872240afd8cSMark Johnston { 873240afd8cSMark Johnston uint64_t *countp; 874240afd8cSMark Johnston 875240afd8cSMark Johnston countp = arg; 876240afd8cSMark Johnston if (cur->type == S_IFDIR && fsnode_isroot(cur)) 877240afd8cSMark Johnston return (1); 878240afd8cSMark Johnston 879240afd8cSMark Johnston if (cur->inode->ino == 0) { 880240afd8cSMark Johnston cur->inode->ino = ++(*countp); 881240afd8cSMark Johnston cur->inode->nlink = 1; 882240afd8cSMark Johnston } else { 883240afd8cSMark Johnston cur->inode->nlink++; 884240afd8cSMark Johnston } 885240afd8cSMark Johnston 886240afd8cSMark Johnston return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1); 887240afd8cSMark Johnston } 888240afd8cSMark Johnston 889240afd8cSMark Johnston /* 890240afd8cSMark Johnston * Create a filesystem dataset. More specifically: 891240afd8cSMark Johnston * - create an object set for the dataset, 892240afd8cSMark Johnston * - add required metadata (SA tables, property definitions, etc.) to that 893240afd8cSMark Johnston * object set, 894240afd8cSMark Johnston * - optionally populate the object set with file objects, using "root" as the 895240afd8cSMark Johnston * root directory. 896240afd8cSMark Johnston * 897240afd8cSMark Johnston * "dirfd" is a directory descriptor for the directory referenced by "root". It 898240afd8cSMark Johnston * is closed before returning. 899240afd8cSMark Johnston */ 900240afd8cSMark Johnston static void 901240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd) 902240afd8cSMark Johnston { 903240afd8cSMark Johnston struct fs_populate_arg arg; 904240afd8cSMark Johnston zfs_fs_t fs; 905240afd8cSMark Johnston zfs_zap_t *masterzap; 906240afd8cSMark Johnston zfs_objset_t *os; 907240afd8cSMark Johnston dnode_phys_t *deleteq, *masterobj; 908240afd8cSMark Johnston uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid; 909240afd8cSMark Johnston bool fakedroot; 910240afd8cSMark Johnston 911240afd8cSMark Johnston /* 912240afd8cSMark Johnston * This dataset's mountpoint doesn't exist in the staging tree, or the 913240afd8cSMark Johnston * dataset doesn't have a mountpoint at all. In either case we still 914240afd8cSMark Johnston * need a root directory. Fake up a root fsnode to handle this case. 915240afd8cSMark Johnston */ 916240afd8cSMark Johnston fakedroot = root == NULL; 917240afd8cSMark Johnston if (fakedroot) { 918240afd8cSMark Johnston struct stat *stp; 919240afd8cSMark Johnston 920240afd8cSMark Johnston assert(dirfd == -1); 921240afd8cSMark Johnston 922240afd8cSMark Johnston root = ecalloc(1, sizeof(*root)); 923240afd8cSMark Johnston root->inode = ecalloc(1, sizeof(*root->inode)); 924240afd8cSMark Johnston root->name = estrdup("."); 925240afd8cSMark Johnston root->type = S_IFDIR; 926240afd8cSMark Johnston 927240afd8cSMark Johnston stp = &root->inode->st; 928240afd8cSMark Johnston stp->st_uid = 0; 929240afd8cSMark Johnston stp->st_gid = 0; 930240afd8cSMark Johnston stp->st_mode = S_IFDIR | 0755; 931240afd8cSMark Johnston } 932240afd8cSMark Johnston assert(root->type == S_IFDIR); 933240afd8cSMark Johnston assert(fsnode_isroot(root)); 934240afd8cSMark Johnston 935240afd8cSMark Johnston /* 936240afd8cSMark Johnston * Initialize the object set for this dataset. 937240afd8cSMark Johnston */ 938240afd8cSMark Johnston os = objset_alloc(zfs, DMU_OST_ZFS); 939240afd8cSMark Johnston masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid); 940240afd8cSMark Johnston assert(moid == MASTER_NODE_OBJ); 941240afd8cSMark Johnston 942240afd8cSMark Johnston memset(&fs, 0, sizeof(fs)); 943240afd8cSMark Johnston fs.os = os; 944240afd8cSMark Johnston 945240afd8cSMark Johnston /* 946240afd8cSMark Johnston * Create the ZAP SA layout now since filesystem object dnodes will 947240afd8cSMark Johnston * refer to those attributes. 948240afd8cSMark Johnston */ 949240afd8cSMark Johnston saobjid = fs_set_zpl_attrs(zfs, &fs); 950240afd8cSMark Johnston 951240afd8cSMark Johnston /* 952240afd8cSMark Johnston * Make a pass over the staged directory to detect hard links and assign 953240afd8cSMark Johnston * virtual dnode numbers. 954240afd8cSMark Johnston */ 955240afd8cSMark Johnston dnodecount = 1; /* root directory */ 956240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_mark, &dnodecount); 957240afd8cSMark Johnston 958240afd8cSMark Johnston /* 959240afd8cSMark Johnston * Make a second pass to populate the dataset with files from the 960240afd8cSMark Johnston * staged directory. Most of our runtime is spent here. 961240afd8cSMark Johnston */ 9628eca3207SMark Johnston arg.rootdirfd = dirfd; 963240afd8cSMark Johnston arg.zfs = zfs; 964240afd8cSMark Johnston arg.fs = &fs; 965240afd8cSMark Johnston SLIST_INIT(&arg.dirs); 966240afd8cSMark Johnston fs_populate_dir(root, &arg); 967240afd8cSMark Johnston assert(!SLIST_EMPTY(&arg.dirs)); 968240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_populate, &arg); 969240afd8cSMark Johnston assert(SLIST_EMPTY(&arg.dirs)); 970240afd8cSMark Johnston rootdirid = arg.rootdirid; 971240afd8cSMark Johnston 972240afd8cSMark Johnston /* 973240afd8cSMark Johnston * Create an empty delete queue. We don't do anything with it, but 974240afd8cSMark Johnston * OpenZFS will refuse to mount filesystems that don't have one. 975240afd8cSMark Johnston */ 976240afd8cSMark Johnston deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid); 977240afd8cSMark Johnston zap_write(zfs, zap_alloc(os, deleteq)); 978240afd8cSMark Johnston 979240afd8cSMark Johnston /* 980240afd8cSMark Johnston * Populate and write the master node object. This is a ZAP object 981240afd8cSMark Johnston * containing various dataset properties and the object IDs of the root 982240afd8cSMark Johnston * directory and delete queue. 983240afd8cSMark Johnston */ 984240afd8cSMark Johnston masterzap = zap_alloc(os, masterobj); 985240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid); 986240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid); 987240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid); 988240afd8cSMark Johnston zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */); 989240afd8cSMark Johnston zap_add_uint64(masterzap, "normalization", 0 /* off */); 990240afd8cSMark Johnston zap_add_uint64(masterzap, "utf8only", 0 /* off */); 991240afd8cSMark Johnston zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */); 992240afd8cSMark Johnston zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */); 993240afd8cSMark Johnston zap_write(zfs, masterzap); 994240afd8cSMark Johnston 995240afd8cSMark Johnston /* 996240afd8cSMark Johnston * All finished with this object set, we may as well write it now. 997240afd8cSMark Johnston * The DSL layer will sum up the bytes consumed by each dataset using 998240afd8cSMark Johnston * information stored in the object set, so it can't be freed just yet. 999240afd8cSMark Johnston */ 1000240afd8cSMark Johnston dsl_dir_dataset_write(zfs, os, dsldir); 1001240afd8cSMark Johnston 1002240afd8cSMark Johnston if (fakedroot) { 1003240afd8cSMark Johnston free(root->inode); 1004240afd8cSMark Johnston free(root->name); 1005240afd8cSMark Johnston free(root); 1006240afd8cSMark Johnston } 1007240afd8cSMark Johnston free(fs.saoffs); 1008240afd8cSMark Johnston } 1009240afd8cSMark Johnston 1010240afd8cSMark Johnston /* 1011240afd8cSMark Johnston * Create an object set for each DSL directory which has a dataset and doesn't 1012240afd8cSMark Johnston * already have an object set. 1013240afd8cSMark Johnston */ 1014240afd8cSMark Johnston static void 1015240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused) 1016240afd8cSMark Johnston { 1017240afd8cSMark Johnston if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir)) 1018240afd8cSMark Johnston fs_build_one(zfs, dsldir, NULL, -1); 1019240afd8cSMark Johnston } 1020240afd8cSMark Johnston 1021240afd8cSMark Johnston /* 1022240afd8cSMark Johnston * Create our datasets and populate them with files. 1023240afd8cSMark Johnston */ 1024240afd8cSMark Johnston void 1025240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root) 1026240afd8cSMark Johnston { 1027240afd8cSMark Johnston /* 1028240afd8cSMark Johnston * Run through our datasets and find the root fsnode for each one. Each 1029240afd8cSMark Johnston * root fsnode is flagged so that we can figure out which dataset it 1030240afd8cSMark Johnston * belongs to. 1031240afd8cSMark Johnston */ 1032240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root); 1033240afd8cSMark Johnston 1034240afd8cSMark Johnston /* 1035240afd8cSMark Johnston * Did we find our boot filesystem? 1036240afd8cSMark Johnston */ 1037240afd8cSMark Johnston if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs")) 1038240afd8cSMark Johnston errx(1, "no mounted dataset matches bootfs property `%s'", 1039240afd8cSMark Johnston zfs->bootfs); 1040240afd8cSMark Johnston 1041240afd8cSMark Johnston /* 1042240afd8cSMark Johnston * Traverse the file hierarchy starting from the root fsnode. One 1043240afd8cSMark Johnston * dataset, not necessarily the root dataset, must "own" the root 1044240afd8cSMark Johnston * directory by having its mountpoint be equal to the root path. 1045240afd8cSMark Johnston * 1046240afd8cSMark Johnston * As roots of other datasets are encountered during the traversal, 1047240afd8cSMark Johnston * fs_build_one() recursively creates the corresponding object sets and 1048240afd8cSMark Johnston * populates them. Once this function has returned, all datasets will 1049240afd8cSMark Johnston * have been fully populated. 1050240afd8cSMark Johnston */ 1051240afd8cSMark Johnston fs_build_one(zfs, root->inode->param, root, dirfd); 1052240afd8cSMark Johnston 1053240afd8cSMark Johnston /* 1054240afd8cSMark Johnston * Now create object sets for datasets whose mountpoints weren't found 1055240afd8cSMark Johnston * in the staging directory, either because there is no mountpoint, or 1056240afd8cSMark Johnston * because the mountpoint doesn't correspond to an existing directory. 1057240afd8cSMark Johnston */ 1058240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL); 1059240afd8cSMark Johnston } 1060