1240afd8cSMark Johnston /*- 2240afd8cSMark Johnston * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <sys/stat.h> 32240afd8cSMark Johnston 33240afd8cSMark Johnston #include <assert.h> 34*c6890399SJessica Clarke #include <dirent.h> 35240afd8cSMark Johnston #include <fcntl.h> 36*c6890399SJessica Clarke #include <stdlib.h> 37240afd8cSMark Johnston #include <string.h> 38240afd8cSMark Johnston #include <unistd.h> 39240afd8cSMark Johnston 40240afd8cSMark Johnston #include <util.h> 41240afd8cSMark Johnston 42240afd8cSMark Johnston #include "makefs.h" 43240afd8cSMark Johnston #include "zfs.h" 44240afd8cSMark Johnston 45240afd8cSMark Johnston typedef struct { 46240afd8cSMark Johnston const char *name; 47240afd8cSMark Johnston unsigned int id; 48240afd8cSMark Johnston uint16_t size; 49240afd8cSMark Johnston sa_bswap_type_t bs; 50240afd8cSMark Johnston } zfs_sattr_t; 51240afd8cSMark Johnston 52240afd8cSMark Johnston typedef struct zfs_fs { 53240afd8cSMark Johnston zfs_objset_t *os; 54240afd8cSMark Johnston 55240afd8cSMark Johnston /* Offset table for system attributes, indexed by a zpl_attr_t. */ 56240afd8cSMark Johnston uint16_t *saoffs; 57240afd8cSMark Johnston size_t sacnt; 58240afd8cSMark Johnston const zfs_sattr_t *satab; 59240afd8cSMark Johnston } zfs_fs_t; 60240afd8cSMark Johnston 61240afd8cSMark Johnston /* 62240afd8cSMark Johnston * The order of the attributes doesn't matter, this is simply the one hard-coded 63240afd8cSMark Johnston * by OpenZFS, based on a zdb dump of the SA_REGISTRY table. 64240afd8cSMark Johnston */ 65240afd8cSMark Johnston typedef enum zpl_attr { 66240afd8cSMark Johnston ZPL_ATIME, 67240afd8cSMark Johnston ZPL_MTIME, 68240afd8cSMark Johnston ZPL_CTIME, 69240afd8cSMark Johnston ZPL_CRTIME, 70240afd8cSMark Johnston ZPL_GEN, 71240afd8cSMark Johnston ZPL_MODE, 72240afd8cSMark Johnston ZPL_SIZE, 73240afd8cSMark Johnston ZPL_PARENT, 74240afd8cSMark Johnston ZPL_LINKS, 75240afd8cSMark Johnston ZPL_XATTR, 76240afd8cSMark Johnston ZPL_RDEV, 77240afd8cSMark Johnston ZPL_FLAGS, 78240afd8cSMark Johnston ZPL_UID, 79240afd8cSMark Johnston ZPL_GID, 80240afd8cSMark Johnston ZPL_PAD, 81240afd8cSMark Johnston ZPL_ZNODE_ACL, 82240afd8cSMark Johnston ZPL_DACL_COUNT, 83240afd8cSMark Johnston ZPL_SYMLINK, 84240afd8cSMark Johnston ZPL_SCANSTAMP, 85240afd8cSMark Johnston ZPL_DACL_ACES, 86240afd8cSMark Johnston ZPL_DXATTR, 87240afd8cSMark Johnston ZPL_PROJID, 88240afd8cSMark Johnston } zpl_attr_t; 89240afd8cSMark Johnston 90240afd8cSMark Johnston /* 91240afd8cSMark Johnston * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t. 92240afd8cSMark Johnston */ 93240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = { 94240afd8cSMark Johnston #define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b } 95240afd8cSMark Johnston _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 96240afd8cSMark Johnston _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 97240afd8cSMark Johnston _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 98240afd8cSMark Johnston _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 99240afd8cSMark Johnston _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY), 100240afd8cSMark Johnston _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY), 101240afd8cSMark Johnston _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY), 102240afd8cSMark Johnston _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY), 103240afd8cSMark Johnston _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY), 104240afd8cSMark Johnston _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY), 105240afd8cSMark Johnston _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY), 106240afd8cSMark Johnston _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY), 107240afd8cSMark Johnston _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY), 108240afd8cSMark Johnston _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY), 109240afd8cSMark Johnston _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY), 110240afd8cSMark Johnston _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY), 111240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY), 112240afd8cSMark Johnston _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY), 113240afd8cSMark Johnston _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY), 114240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL), 115240afd8cSMark Johnston _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY), 116240afd8cSMark Johnston _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY), 117240afd8cSMark Johnston #undef ZPL_ATTR 118240afd8cSMark Johnston }; 119240afd8cSMark Johnston 120240afd8cSMark Johnston /* 121240afd8cSMark Johnston * This layout matches that of a filesystem created using OpenZFS on FreeBSD. 122240afd8cSMark Johnston * It need not match in general, but FreeBSD's loader doesn't bother parsing the 123240afd8cSMark Johnston * layout and just hard-codes attribute offsets. 124240afd8cSMark Johnston */ 125240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = { 126240afd8cSMark Johnston ZPL_MODE, 127240afd8cSMark Johnston ZPL_SIZE, 128240afd8cSMark Johnston ZPL_GEN, 129240afd8cSMark Johnston ZPL_UID, 130240afd8cSMark Johnston ZPL_GID, 131240afd8cSMark Johnston ZPL_PARENT, 132240afd8cSMark Johnston ZPL_FLAGS, 133240afd8cSMark Johnston ZPL_ATIME, 134240afd8cSMark Johnston ZPL_MTIME, 135240afd8cSMark Johnston ZPL_CTIME, 136240afd8cSMark Johnston ZPL_CRTIME, 137240afd8cSMark Johnston ZPL_LINKS, 138240afd8cSMark Johnston ZPL_DACL_COUNT, 139240afd8cSMark Johnston ZPL_DACL_ACES, 140240afd8cSMark Johnston ZPL_SYMLINK, 141240afd8cSMark Johnston }; 142240afd8cSMark Johnston 143240afd8cSMark Johnston /* 144240afd8cSMark Johnston * Keys for the ZPL attribute tables in the SA layout ZAP. The first two 145240afd8cSMark Johnston * indices are reserved for legacy attribute encoding. 146240afd8cSMark Johnston */ 147240afd8cSMark Johnston #define SA_LAYOUT_INDEX_DEFAULT 2 148240afd8cSMark Johnston #define SA_LAYOUT_INDEX_SYMLINK 3 149240afd8cSMark Johnston 150240afd8cSMark Johnston struct fs_populate_dir { 151240afd8cSMark Johnston SLIST_ENTRY(fs_populate_dir) next; 152240afd8cSMark Johnston int dirfd; 153240afd8cSMark Johnston uint64_t objid; 154240afd8cSMark Johnston zfs_zap_t *zap; 155240afd8cSMark Johnston }; 156240afd8cSMark Johnston 157240afd8cSMark Johnston struct fs_populate_arg { 158240afd8cSMark Johnston zfs_opt_t *zfs; 159240afd8cSMark Johnston zfs_fs_t *fs; /* owning filesystem */ 160240afd8cSMark Johnston uint64_t rootdirid; /* root directory dnode ID */ 1618eca3207SMark Johnston int rootdirfd; /* root directory fd */ 162240afd8cSMark Johnston SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */ 163240afd8cSMark Johnston }; 164240afd8cSMark Johnston 165240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int); 166240afd8cSMark Johnston 1678eca3207SMark Johnston static void 1688eca3207SMark Johnston eclose(int fd) 1698eca3207SMark Johnston { 1708eca3207SMark Johnston if (close(fd) != 0) 1718eca3207SMark Johnston err(1, "close"); 1728eca3207SMark Johnston } 1738eca3207SMark Johnston 174240afd8cSMark Johnston static bool 175240afd8cSMark Johnston fsnode_isroot(const fsnode *cur) 176240afd8cSMark Johnston { 177240afd8cSMark Johnston return (strcmp(cur->name, ".") == 0); 178240afd8cSMark Johnston } 179240afd8cSMark Johnston 180240afd8cSMark Johnston /* 181240afd8cSMark Johnston * Visit each node in a directory hierarchy, in pre-order depth-first order. 182240afd8cSMark Johnston */ 183240afd8cSMark Johnston static void 184240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg) 185240afd8cSMark Johnston { 186240afd8cSMark Johnston assert(root->type == S_IFDIR); 187240afd8cSMark Johnston 188240afd8cSMark Johnston for (fsnode *cur = root; cur != NULL; cur = cur->next) { 189240afd8cSMark Johnston assert(cur->type == S_IFREG || cur->type == S_IFDIR || 190240afd8cSMark Johnston cur->type == S_IFLNK); 191240afd8cSMark Johnston 192240afd8cSMark Johnston if (cb(cur, arg) == 0) 193240afd8cSMark Johnston continue; 194240afd8cSMark Johnston if (cur->type == S_IFDIR && cur->child != NULL) 195240afd8cSMark Johnston fsnode_foreach(cur->child, cb, arg); 196240afd8cSMark Johnston } 197240afd8cSMark Johnston } 198240afd8cSMark Johnston 199240afd8cSMark Johnston static void 200240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid) 201240afd8cSMark Johnston { 202240afd8cSMark Johnston struct fs_populate_dir *dir; 203240afd8cSMark Johnston uint64_t type; 204240afd8cSMark Johnston 205240afd8cSMark Johnston switch (cur->type) { 206240afd8cSMark Johnston case S_IFREG: 207240afd8cSMark Johnston type = DT_REG; 208240afd8cSMark Johnston break; 209240afd8cSMark Johnston case S_IFDIR: 210240afd8cSMark Johnston type = DT_DIR; 211240afd8cSMark Johnston break; 212240afd8cSMark Johnston case S_IFLNK: 213240afd8cSMark Johnston type = DT_LNK; 214240afd8cSMark Johnston break; 215240afd8cSMark Johnston default: 216240afd8cSMark Johnston assert(0); 217240afd8cSMark Johnston } 218240afd8cSMark Johnston 219240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 220240afd8cSMark Johnston zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid)); 221240afd8cSMark Johnston } 222240afd8cSMark Johnston 223240afd8cSMark Johnston static void 224240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind, 225240afd8cSMark Johnston size_t *szp) 226240afd8cSMark Johnston { 227240afd8cSMark Johnston assert(ind < fs->sacnt); 228240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 229240afd8cSMark Johnston 230240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size); 231240afd8cSMark Johnston *szp += fs->satab[ind].size; 232240afd8cSMark Johnston } 233240afd8cSMark Johnston 234240afd8cSMark Johnston static void 235240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val, 236240afd8cSMark Johnston size_t valsz, size_t varoff, uint16_t ind, size_t *szp) 237240afd8cSMark Johnston { 238240afd8cSMark Johnston assert(ind < fs->sacnt); 239240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 240240afd8cSMark Johnston assert(fs->satab[ind].size == 0); 241240afd8cSMark Johnston 242240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz); 243240afd8cSMark Johnston *szp += valsz; 244240afd8cSMark Johnston } 245240afd8cSMark Johnston 2468eca3207SMark Johnston /* 2478eca3207SMark Johnston * Derive the relative fd/path combo needed to access a file. Ideally we'd 2488eca3207SMark Johnston * always be able to use relative lookups (i.e., use the *at() system calls), 2498eca3207SMark Johnston * since they require less path translation and are more amenable to sandboxing, 2508eca3207SMark Johnston * but the handling of multiple staging directories makes that difficult. To 2518eca3207SMark Johnston * make matters worse, we have no choice but to use relative lookups when 2528eca3207SMark Johnston * dealing with an mtree manifest, so both mechanisms are implemented. 2538eca3207SMark Johnston */ 2548eca3207SMark Johnston static void 2558eca3207SMark Johnston fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg, 2568eca3207SMark Johnston char *path, size_t sz, int *dirfdp) 2578eca3207SMark Johnston { 2588eca3207SMark Johnston if (cur->root == NULL) { 2598eca3207SMark Johnston size_t n; 2608eca3207SMark Johnston 2618eca3207SMark Johnston *dirfdp = SLIST_FIRST(&arg->dirs)->dirfd; 2628eca3207SMark Johnston n = strlcpy(path, cur->name, sz); 2638eca3207SMark Johnston assert(n < sz); 2648eca3207SMark Johnston } else { 2658eca3207SMark Johnston int n; 2668eca3207SMark Johnston 2678eca3207SMark Johnston *dirfdp = AT_FDCWD; 2688eca3207SMark Johnston n = snprintf(path, sz, "%s/%s/%s", 2698eca3207SMark Johnston cur->root, cur->path, cur->name); 2708eca3207SMark Johnston assert(n >= 0); 2718eca3207SMark Johnston assert((size_t)n < sz); 2728eca3207SMark Johnston } 2738eca3207SMark Johnston } 2748eca3207SMark Johnston 2758eca3207SMark Johnston static int 2768eca3207SMark Johnston fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags) 2778eca3207SMark Johnston { 2788eca3207SMark Johnston char path[PATH_MAX]; 2798eca3207SMark Johnston int fd; 2808eca3207SMark Johnston 2818eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 2828eca3207SMark Johnston 2838eca3207SMark Johnston fd = openat(fd, path, flags); 2848eca3207SMark Johnston if (fd < 0) 2858eca3207SMark Johnston err(1, "openat(%s)", path); 2868eca3207SMark Johnston return (fd); 2878eca3207SMark Johnston } 2888eca3207SMark Johnston 2898eca3207SMark Johnston static void 2908eca3207SMark Johnston fs_readlink(const fsnode *cur, struct fs_populate_arg *arg, 2918eca3207SMark Johnston char *buf, size_t bufsz) 2928eca3207SMark Johnston { 2938eca3207SMark Johnston char path[PATH_MAX]; 2948eca3207SMark Johnston ssize_t n; 2958eca3207SMark Johnston int fd; 2968eca3207SMark Johnston 2978eca3207SMark Johnston fs_populate_path(cur, arg, path, sizeof(path), &fd); 2988eca3207SMark Johnston 2998eca3207SMark Johnston n = readlinkat(fd, path, buf, bufsz - 1); 3008eca3207SMark Johnston if (n == -1) 3018eca3207SMark Johnston err(1, "readlinkat(%s)", cur->name); 3028eca3207SMark Johnston buf[n] = '\0'; 3038eca3207SMark Johnston } 3048eca3207SMark Johnston 305240afd8cSMark Johnston static void 306b0ce7dfcSJessica Clarke fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts, 307b0ce7dfcSJessica Clarke uint16_t ind, size_t *szp) 308b0ce7dfcSJessica Clarke { 309b0ce7dfcSJessica Clarke uint64_t timebuf[2]; 310b0ce7dfcSJessica Clarke 311b0ce7dfcSJessica Clarke assert(ind < fs->sacnt); 312b0ce7dfcSJessica Clarke assert(fs->saoffs[ind] != 0xffff); 313b0ce7dfcSJessica Clarke assert(fs->satab[ind].size == sizeof(timebuf)); 314b0ce7dfcSJessica Clarke 315b0ce7dfcSJessica Clarke timebuf[0] = ts->tv_sec; 316b0ce7dfcSJessica Clarke timebuf[1] = ts->tv_nsec; 317b0ce7dfcSJessica Clarke fs_populate_attr(fs, attrbuf, timebuf, ind, szp); 318b0ce7dfcSJessica Clarke } 319b0ce7dfcSJessica Clarke 320b0ce7dfcSJessica Clarke static void 321240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, 322240afd8cSMark Johnston dnode_phys_t *dnode) 323240afd8cSMark Johnston { 324240afd8cSMark Johnston char target[PATH_MAX]; 325240afd8cSMark Johnston zfs_fs_t *fs; 326240afd8cSMark Johnston zfs_ace_hdr_t aces[3]; 327240afd8cSMark Johnston struct stat *sb; 328240afd8cSMark Johnston sa_hdr_phys_t *sahdr; 329240afd8cSMark Johnston uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid; 330240afd8cSMark Johnston char *attrbuf; 331240afd8cSMark Johnston size_t bonussz, hdrsz; 332240afd8cSMark Johnston int layout; 333240afd8cSMark Johnston 334240afd8cSMark Johnston assert(dnode->dn_bonustype == DMU_OT_SA); 335240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 336240afd8cSMark Johnston 337240afd8cSMark Johnston fs = arg->fs; 338240afd8cSMark Johnston sb = &cur->inode->st; 339240afd8cSMark Johnston 340240afd8cSMark Johnston switch (cur->type) { 341240afd8cSMark Johnston case S_IFREG: 342240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 343240afd8cSMark Johnston links = cur->inode->nlink; 344240afd8cSMark Johnston objsize = sb->st_size; 345240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 346240afd8cSMark Johnston break; 347240afd8cSMark Johnston case S_IFDIR: 348240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 349240afd8cSMark Johnston links = 1; /* .. */ 350240afd8cSMark Johnston objsize = 1; /* .. */ 351240afd8cSMark Johnston 352240afd8cSMark Johnston /* 353240afd8cSMark Johnston * The size of a ZPL directory is the number of entries 354240afd8cSMark Johnston * (including "." and ".."), and the link count is the number of 355240afd8cSMark Johnston * entries which are directories (including "." and ".."). 356240afd8cSMark Johnston */ 357240afd8cSMark Johnston for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child; 358240afd8cSMark Johnston c != NULL; c = c->next) { 359240afd8cSMark Johnston if (c->type == S_IFDIR) 360240afd8cSMark Johnston links++; 361240afd8cSMark Johnston objsize++; 362240afd8cSMark Johnston } 363240afd8cSMark Johnston 364240afd8cSMark Johnston /* The root directory is its own parent. */ 365240afd8cSMark Johnston parent = SLIST_EMPTY(&arg->dirs) ? 366240afd8cSMark Johnston arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid; 367240afd8cSMark Johnston break; 3688eca3207SMark Johnston case S_IFLNK: 3698eca3207SMark Johnston fs_readlink(cur, arg, target, sizeof(target)); 370240afd8cSMark Johnston 371240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_SYMLINK; 372240afd8cSMark Johnston links = 1; 373240afd8cSMark Johnston objsize = strlen(target); 374240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 375240afd8cSMark Johnston break; 376240afd8cSMark Johnston default: 377240afd8cSMark Johnston assert(0); 378240afd8cSMark Johnston } 379240afd8cSMark Johnston 380240afd8cSMark Johnston daclcount = nitems(aces); 381240afd8cSMark Johnston flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_NO_EXECS_DENIED | 382240afd8cSMark Johnston ZFS_ARCHIVE | ZFS_AV_MODIFIED; /* XXX-MJ */ 383240afd8cSMark Johnston gen = 1; 384240afd8cSMark Johnston gid = sb->st_gid; 385240afd8cSMark Johnston mode = sb->st_mode; 386240afd8cSMark Johnston uid = sb->st_uid; 387240afd8cSMark Johnston 388240afd8cSMark Johnston memset(aces, 0, sizeof(aces)); 389240afd8cSMark Johnston aces[0].z_flags = ACE_OWNER; 390240afd8cSMark Johnston aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 391240afd8cSMark Johnston aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER | 392240afd8cSMark Johnston ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL | 393240afd8cSMark Johnston ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 394240afd8cSMark Johnston if ((mode & S_IRUSR) != 0) 395240afd8cSMark Johnston aces[0].z_access_mask |= ACE_READ_DATA; 396240afd8cSMark Johnston if ((mode & S_IWUSR) != 0) 397240afd8cSMark Johnston aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 398240afd8cSMark Johnston if ((mode & S_IXUSR) != 0) 399240afd8cSMark Johnston aces[0].z_access_mask |= ACE_EXECUTE; 400240afd8cSMark Johnston 401240afd8cSMark Johnston aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP; 402240afd8cSMark Johnston aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 403240afd8cSMark Johnston aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 404240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 405240afd8cSMark Johnston if ((mode & S_IRGRP) != 0) 406240afd8cSMark Johnston aces[1].z_access_mask |= ACE_READ_DATA; 407240afd8cSMark Johnston if ((mode & S_IWGRP) != 0) 408240afd8cSMark Johnston aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 409240afd8cSMark Johnston if ((mode & S_IXGRP) != 0) 410240afd8cSMark Johnston aces[1].z_access_mask |= ACE_EXECUTE; 411240afd8cSMark Johnston 412240afd8cSMark Johnston aces[2].z_flags = ACE_EVERYONE; 413240afd8cSMark Johnston aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 414240afd8cSMark Johnston aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 415240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 416240afd8cSMark Johnston if ((mode & S_IROTH) != 0) 417240afd8cSMark Johnston aces[2].z_access_mask |= ACE_READ_DATA; 418240afd8cSMark Johnston if ((mode & S_IWOTH) != 0) 419240afd8cSMark Johnston aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 420240afd8cSMark Johnston if ((mode & S_IXOTH) != 0) 421240afd8cSMark Johnston aces[2].z_access_mask |= ACE_EXECUTE; 422240afd8cSMark Johnston 423240afd8cSMark Johnston switch (layout) { 424240afd8cSMark Johnston case SA_LAYOUT_INDEX_DEFAULT: 425240afd8cSMark Johnston /* At most one variable-length attribute. */ 426240afd8cSMark Johnston hdrsz = sizeof(uint64_t); 427240afd8cSMark Johnston break; 428240afd8cSMark Johnston case SA_LAYOUT_INDEX_SYMLINK: 429240afd8cSMark Johnston /* At most five variable-length attributes. */ 430240afd8cSMark Johnston hdrsz = sizeof(uint64_t) * 2; 431240afd8cSMark Johnston break; 432240afd8cSMark Johnston default: 433240afd8cSMark Johnston assert(0); 434240afd8cSMark Johnston } 435240afd8cSMark Johnston 436240afd8cSMark Johnston sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode); 437240afd8cSMark Johnston sahdr->sa_magic = SA_MAGIC; 438240afd8cSMark Johnston SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz); 439240afd8cSMark Johnston 440240afd8cSMark Johnston bonussz = SA_HDR_SIZE(sahdr); 441240afd8cSMark Johnston attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr); 442240afd8cSMark Johnston 443240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz); 444240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz); 445240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz); 446240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz); 447240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz); 448240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz); 449240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz); 450240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz); 451240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz); 452240afd8cSMark Johnston 453240afd8cSMark Johnston /* 454240afd8cSMark Johnston * We deliberately set atime = mtime here to ensure that images are 455240afd8cSMark Johnston * reproducible. 456240afd8cSMark Johnston */ 457b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz); 458b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz); 459b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz); 460*c6890399SJessica Clarke #ifdef __linux__ 461*c6890399SJessica Clarke /* Linux has no st_birthtim; approximate with st_ctim */ 462*c6890399SJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz); 463*c6890399SJessica Clarke #else 464b0ce7dfcSJessica Clarke fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz); 465*c6890399SJessica Clarke #endif 466240afd8cSMark Johnston 467240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0, 468240afd8cSMark Johnston ZPL_DACL_ACES, &bonussz); 469240afd8cSMark Johnston sahdr->sa_lengths[0] = sizeof(aces); 470240afd8cSMark Johnston 471240afd8cSMark Johnston if (cur->type == S_IFLNK) { 472240afd8cSMark Johnston assert(layout == SA_LAYOUT_INDEX_SYMLINK); 473240afd8cSMark Johnston /* Need to use a spill block pointer if the target is long. */ 474240afd8cSMark Johnston assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN); 475240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, target, objsize, 476240afd8cSMark Johnston sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz); 477240afd8cSMark Johnston sahdr->sa_lengths[1] = (uint16_t)objsize; 478240afd8cSMark Johnston } 479240afd8cSMark Johnston 480240afd8cSMark Johnston dnode->dn_bonuslen = bonussz; 481240afd8cSMark Johnston } 482240afd8cSMark Johnston 483240afd8cSMark Johnston static void 484240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg) 485240afd8cSMark Johnston { 486240afd8cSMark Johnston struct dnode_cursor *c; 487240afd8cSMark Johnston dnode_phys_t *dnode; 488240afd8cSMark Johnston zfs_opt_t *zfs; 489240afd8cSMark Johnston char *buf; 490240afd8cSMark Johnston uint64_t dnid; 491240afd8cSMark Johnston ssize_t n; 492240afd8cSMark Johnston size_t bufsz; 493240afd8cSMark Johnston off_t size, target; 494240afd8cSMark Johnston int fd; 495240afd8cSMark Johnston 496240afd8cSMark Johnston assert(cur->type == S_IFREG); 497240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 498240afd8cSMark Johnston 499240afd8cSMark Johnston zfs = arg->zfs; 500240afd8cSMark Johnston 501240afd8cSMark Johnston assert(cur->inode->ino != 0); 502240afd8cSMark Johnston if ((cur->inode->flags & FI_ALLOCATED) != 0) { 503240afd8cSMark Johnston /* 504240afd8cSMark Johnston * This is a hard link of an existing file. 505240afd8cSMark Johnston * 506240afd8cSMark Johnston * XXX-MJ need to check whether it crosses datasets, add a test 507240afd8cSMark Johnston * case for that 508240afd8cSMark Johnston */ 509240afd8cSMark Johnston fs_populate_dirent(arg, cur, cur->inode->ino); 510240afd8cSMark Johnston return; 511240afd8cSMark Johnston } 512240afd8cSMark Johnston 513240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 514240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 515240afd8cSMark Johnston cur->inode->ino = dnid; 516240afd8cSMark Johnston cur->inode->flags |= FI_ALLOCATED; 517240afd8cSMark Johnston 5188eca3207SMark Johnston fd = fs_open(cur, arg, O_RDONLY); 519240afd8cSMark Johnston 520240afd8cSMark Johnston buf = zfs->filebuf; 521240afd8cSMark Johnston bufsz = sizeof(zfs->filebuf); 522240afd8cSMark Johnston size = cur->inode->st.st_size; 523240afd8cSMark Johnston c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0); 524240afd8cSMark Johnston for (off_t foff = 0; foff < size; foff += target) { 525240afd8cSMark Johnston off_t loc, sofar; 526240afd8cSMark Johnston 527240afd8cSMark Johnston /* 528240afd8cSMark Johnston * Fill up our buffer, handling partial reads. 529240afd8cSMark Johnston * 530240afd8cSMark Johnston * It might be profitable to use copy_file_range(2) here. 531240afd8cSMark Johnston */ 532240afd8cSMark Johnston sofar = 0; 533240afd8cSMark Johnston target = MIN(size - foff, (off_t)bufsz); 534240afd8cSMark Johnston do { 535240afd8cSMark Johnston n = read(fd, buf + sofar, target); 536240afd8cSMark Johnston if (n < 0) 537240afd8cSMark Johnston err(1, "reading from '%s'", cur->name); 538240afd8cSMark Johnston if (n == 0) 539240afd8cSMark Johnston errx(1, "unexpected EOF reading '%s'", 540240afd8cSMark Johnston cur->name); 541240afd8cSMark Johnston sofar += n; 542240afd8cSMark Johnston } while (sofar < target); 543240afd8cSMark Johnston 544240afd8cSMark Johnston if (target < (off_t)bufsz) 545240afd8cSMark Johnston memset(buf + target, 0, bufsz - target); 546240afd8cSMark Johnston 547240afd8cSMark Johnston loc = objset_space_alloc(zfs, arg->fs->os, &target); 548240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, target, loc, 549240afd8cSMark Johnston dnode_cursor_next(zfs, c, foff)); 550240afd8cSMark Johnston } 5518eca3207SMark Johnston eclose(fd); 552240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 553240afd8cSMark Johnston 554240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 555240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 556240afd8cSMark Johnston } 557240afd8cSMark Johnston 558240afd8cSMark Johnston static void 559240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg) 560240afd8cSMark Johnston { 561240afd8cSMark Johnston dnode_phys_t *dnode; 562240afd8cSMark Johnston zfs_objset_t *os; 563240afd8cSMark Johnston uint64_t dnid; 564240afd8cSMark Johnston int dirfd; 565240afd8cSMark Johnston 566240afd8cSMark Johnston assert(cur->type == S_IFDIR); 567240afd8cSMark Johnston assert((cur->inode->flags & FI_ALLOCATED) == 0); 568240afd8cSMark Johnston 569240afd8cSMark Johnston os = arg->fs->os; 570240afd8cSMark Johnston 571240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS, 572240afd8cSMark Johnston DMU_OT_SA, 0, &dnid); 573240afd8cSMark Johnston 574240afd8cSMark Johnston /* 575240afd8cSMark Johnston * Add an entry to the parent directory and open this directory. 576240afd8cSMark Johnston */ 577240afd8cSMark Johnston if (!SLIST_EMPTY(&arg->dirs)) { 578240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 5798eca3207SMark Johnston dirfd = fs_open(cur, arg, O_DIRECTORY | O_RDONLY); 580240afd8cSMark Johnston } else { 581240afd8cSMark Johnston arg->rootdirid = dnid; 5828eca3207SMark Johnston dirfd = arg->rootdirfd; 5838eca3207SMark Johnston arg->rootdirfd = -1; 584240afd8cSMark Johnston } 585240afd8cSMark Johnston 586240afd8cSMark Johnston /* 587240afd8cSMark Johnston * Set ZPL attributes. 588240afd8cSMark Johnston */ 589240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 590240afd8cSMark Johnston 591240afd8cSMark Johnston /* 592240afd8cSMark Johnston * If this is a root directory, then its children belong to a different 593240afd8cSMark Johnston * dataset and this directory remains empty in the current objset. 594240afd8cSMark Johnston */ 595240afd8cSMark Johnston if ((cur->inode->flags & FI_ROOT) == 0) { 596240afd8cSMark Johnston struct fs_populate_dir *dir; 597240afd8cSMark Johnston 598240afd8cSMark Johnston dir = ecalloc(1, sizeof(*dir)); 599240afd8cSMark Johnston dir->dirfd = dirfd; 600240afd8cSMark Johnston dir->objid = dnid; 601240afd8cSMark Johnston dir->zap = zap_alloc(os, dnode); 602240afd8cSMark Johnston SLIST_INSERT_HEAD(&arg->dirs, dir, next); 603240afd8cSMark Johnston } else { 604240afd8cSMark Johnston zap_write(arg->zfs, zap_alloc(os, dnode)); 605240afd8cSMark Johnston fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd); 606240afd8cSMark Johnston } 607240afd8cSMark Johnston } 608240afd8cSMark Johnston 609240afd8cSMark Johnston static void 610240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg) 611240afd8cSMark Johnston { 612240afd8cSMark Johnston dnode_phys_t *dnode; 613240afd8cSMark Johnston uint64_t dnid; 614240afd8cSMark Johnston 615240afd8cSMark Johnston assert(cur->type == S_IFLNK); 616240afd8cSMark Johnston assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0); 617240afd8cSMark Johnston 618240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 619240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 620240afd8cSMark Johnston 621240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 622240afd8cSMark Johnston 623240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 624240afd8cSMark Johnston } 625240afd8cSMark Johnston 626240afd8cSMark Johnston static int 627240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg) 628240afd8cSMark Johnston { 629240afd8cSMark Johnston struct fs_populate_arg *arg; 630240afd8cSMark Johnston struct fs_populate_dir *dir; 631240afd8cSMark Johnston int ret; 632240afd8cSMark Johnston 633240afd8cSMark Johnston arg = _arg; 634240afd8cSMark Johnston switch (cur->type) { 635240afd8cSMark Johnston case S_IFREG: 636240afd8cSMark Johnston fs_populate_file(cur, arg); 637240afd8cSMark Johnston break; 638240afd8cSMark Johnston case S_IFDIR: 639240afd8cSMark Johnston if (fsnode_isroot(cur)) 640240afd8cSMark Johnston break; 641240afd8cSMark Johnston fs_populate_dir(cur, arg); 642240afd8cSMark Johnston break; 643240afd8cSMark Johnston case S_IFLNK: 644240afd8cSMark Johnston fs_populate_symlink(cur, arg); 645240afd8cSMark Johnston break; 646240afd8cSMark Johnston default: 647240afd8cSMark Johnston assert(0); 648240afd8cSMark Johnston } 649240afd8cSMark Johnston 650240afd8cSMark Johnston ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1; 651240afd8cSMark Johnston 652240afd8cSMark Johnston if (cur->next == NULL && 653240afd8cSMark Johnston (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) { 654240afd8cSMark Johnston /* 655240afd8cSMark Johnston * We reached a terminal node in a subtree. Walk back up and 656240afd8cSMark Johnston * write out directories. We're done once we hit the root of a 657240afd8cSMark Johnston * dataset or find a level where we're not on the edge of the 658240afd8cSMark Johnston * tree. 659240afd8cSMark Johnston */ 660240afd8cSMark Johnston do { 661240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 662240afd8cSMark Johnston SLIST_REMOVE_HEAD(&arg->dirs, next); 663240afd8cSMark Johnston zap_write(arg->zfs, dir->zap); 6648eca3207SMark Johnston if (dir->dirfd != -1) 6658eca3207SMark Johnston eclose(dir->dirfd); 666240afd8cSMark Johnston free(dir); 667240afd8cSMark Johnston cur = cur->parent; 668240afd8cSMark Johnston } while (cur != NULL && cur->next == NULL && 669240afd8cSMark Johnston (cur->inode->flags & FI_ROOT) == 0); 670240afd8cSMark Johnston } 671240afd8cSMark Johnston 672240afd8cSMark Johnston return (ret); 673240afd8cSMark Johnston } 674240afd8cSMark Johnston 675240afd8cSMark Johnston static void 676240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, 677240afd8cSMark Johnston const sa_attr_type_t layout[], size_t sacnt) 678240afd8cSMark Johnston { 679240afd8cSMark Johnston char ti[16]; 680240afd8cSMark Johnston 681240afd8cSMark Johnston assert(sizeof(layout[0]) == 2); 682240afd8cSMark Johnston 683240afd8cSMark Johnston snprintf(ti, sizeof(ti), "%u", index); 684240afd8cSMark Johnston zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, 685240afd8cSMark Johnston (const uint8_t *)layout); 686240afd8cSMark Johnston } 687240afd8cSMark Johnston 688240afd8cSMark Johnston /* 689240afd8cSMark Johnston * Initialize system attribute tables. 690240afd8cSMark Johnston * 691240afd8cSMark Johnston * There are two elements to this. First, we write the zpl_attrs[] and 692240afd8cSMark Johnston * zpl_attr_layout[] tables to disk. Then we create a lookup table which 693240afd8cSMark Johnston * allows us to set file attributes quickly. 694240afd8cSMark Johnston */ 695240afd8cSMark Johnston static uint64_t 696240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs) 697240afd8cSMark Johnston { 698240afd8cSMark Johnston zfs_zap_t *sazap, *salzap, *sarzap; 699240afd8cSMark Johnston zfs_objset_t *os; 700240afd8cSMark Johnston dnode_phys_t *saobj, *salobj, *sarobj; 701240afd8cSMark Johnston uint64_t saobjid, salobjid, sarobjid; 702240afd8cSMark Johnston uint16_t offset; 703240afd8cSMark Johnston 704240afd8cSMark Johnston os = fs->os; 705240afd8cSMark Johnston 706240afd8cSMark Johnston /* 707240afd8cSMark Johnston * The on-disk tables are stored in two ZAP objects, the registry object 708240afd8cSMark Johnston * and the layout object. Individual attributes are described by 709240afd8cSMark Johnston * entries in the registry object; for example, the value for the 710240afd8cSMark Johnston * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute. 711240afd8cSMark Johnston * The attributes of a file are ordered according to one of the layouts 712240afd8cSMark Johnston * defined in the layout object. The master node object is simply used 713240afd8cSMark Johnston * to locate the registry and layout objects. 714240afd8cSMark Johnston */ 715240afd8cSMark Johnston saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid); 716240afd8cSMark Johnston salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid); 717240afd8cSMark Johnston sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid); 718240afd8cSMark Johnston 719240afd8cSMark Johnston sarzap = zap_alloc(os, sarobj); 720240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) { 721240afd8cSMark Johnston const zfs_sattr_t *sa; 722240afd8cSMark Johnston uint64_t attr; 723240afd8cSMark Johnston 724240afd8cSMark Johnston attr = 0; 725240afd8cSMark Johnston sa = &zpl_attrs[i]; 726240afd8cSMark Johnston SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs); 727240afd8cSMark Johnston zap_add_uint64(sarzap, sa->name, attr); 728240afd8cSMark Johnston } 729240afd8cSMark Johnston zap_write(zfs, sarzap); 730240afd8cSMark Johnston 731240afd8cSMark Johnston /* 732240afd8cSMark Johnston * Layouts are arrays of indices into the registry. We define two 733240afd8cSMark Johnston * layouts for use by the ZPL, one for non-symlinks and one for 734240afd8cSMark Johnston * symlinks. They are identical except that the symlink layout includes 735240afd8cSMark Johnston * ZPL_SYMLINK as its final attribute. 736240afd8cSMark Johnston */ 737240afd8cSMark Johnston salzap = zap_alloc(os, salobj); 738240afd8cSMark Johnston assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK); 739240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT, 740240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout) - 1); 741240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK, 742240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout)); 743240afd8cSMark Johnston zap_write(zfs, salzap); 744240afd8cSMark Johnston 745240afd8cSMark Johnston sazap = zap_alloc(os, saobj); 746240afd8cSMark Johnston zap_add_uint64(sazap, SA_LAYOUTS, salobjid); 747240afd8cSMark Johnston zap_add_uint64(sazap, SA_REGISTRY, sarobjid); 748240afd8cSMark Johnston zap_write(zfs, sazap); 749240afd8cSMark Johnston 750240afd8cSMark Johnston /* Sanity check. */ 751240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) 752240afd8cSMark Johnston assert(i == zpl_attrs[i].id); 753240afd8cSMark Johnston 754240afd8cSMark Johnston /* 755240afd8cSMark Johnston * Build the offset table used when setting file attributes. File 756240afd8cSMark Johnston * attributes are stored in the object's bonus buffer; this table 757240afd8cSMark Johnston * provides the buffer offset of attributes referenced by the layout 758240afd8cSMark Johnston * table. 759240afd8cSMark Johnston */ 760240afd8cSMark Johnston fs->sacnt = nitems(zpl_attrs); 761240afd8cSMark Johnston fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs)); 762240afd8cSMark Johnston for (size_t i = 0; i < fs->sacnt; i++) 763240afd8cSMark Johnston fs->saoffs[i] = 0xffff; 764240afd8cSMark Johnston offset = 0; 765240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attr_layout); i++) { 766240afd8cSMark Johnston uint16_t size; 767240afd8cSMark Johnston 768240afd8cSMark Johnston assert(zpl_attr_layout[i] < fs->sacnt); 769240afd8cSMark Johnston 770240afd8cSMark Johnston fs->saoffs[zpl_attr_layout[i]] = offset; 771240afd8cSMark Johnston size = zpl_attrs[zpl_attr_layout[i]].size; 772240afd8cSMark Johnston offset += size; 773240afd8cSMark Johnston } 774240afd8cSMark Johnston fs->satab = zpl_attrs; 775240afd8cSMark Johnston 776240afd8cSMark Johnston return (saobjid); 777240afd8cSMark Johnston } 778240afd8cSMark Johnston 779240afd8cSMark Johnston static void 780240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg) 781240afd8cSMark Johnston { 782240afd8cSMark Johnston char *mountpoint, *origmountpoint, *name, *next; 783240afd8cSMark Johnston fsnode *cur, *root; 784240afd8cSMark Johnston uint64_t canmount; 785240afd8cSMark Johnston 786240afd8cSMark Johnston if (!dsl_dir_has_dataset(dsldir)) 787240afd8cSMark Johnston return; 788240afd8cSMark Johnston 78978d7704bSMark Johnston if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0) 79078d7704bSMark Johnston return; 791240afd8cSMark Johnston mountpoint = dsl_dir_get_mountpoint(zfs, dsldir); 792240afd8cSMark Johnston if (mountpoint == NULL) 793240afd8cSMark Johnston return; 794240afd8cSMark Johnston 795240afd8cSMark Johnston /* 796240afd8cSMark Johnston * If we were asked to specify a bootfs, set it here. 797240afd8cSMark Johnston */ 798240afd8cSMark Johnston if (zfs->bootfs != NULL && strcmp(zfs->bootfs, 799240afd8cSMark Johnston dsl_dir_fullname(dsldir)) == 0) { 800240afd8cSMark Johnston zap_add_uint64(zfs->poolprops, "bootfs", 801240afd8cSMark Johnston dsl_dir_dataset_id(dsldir)); 802240afd8cSMark Johnston } 803240afd8cSMark Johnston 804240afd8cSMark Johnston origmountpoint = mountpoint; 805240afd8cSMark Johnston 806240afd8cSMark Johnston /* 807240afd8cSMark Johnston * Figure out which fsnode corresponds to our mountpoint. 808240afd8cSMark Johnston */ 809240afd8cSMark Johnston root = arg; 810240afd8cSMark Johnston cur = root; 811240afd8cSMark Johnston if (strcmp(mountpoint, zfs->rootpath) != 0) { 812240afd8cSMark Johnston mountpoint += strlen(zfs->rootpath); 813240afd8cSMark Johnston 814240afd8cSMark Johnston /* 815240afd8cSMark Johnston * Look up the directory in the staged tree. For example, if 816240afd8cSMark Johnston * the dataset's mount point is /foo/bar/baz, we'll search the 817240afd8cSMark Johnston * root directory for "foo", search "foo" for "baz", and so on. 818240afd8cSMark Johnston * Each intermediate name must refer to a directory; the final 819240afd8cSMark Johnston * component need not exist. 820240afd8cSMark Johnston */ 821240afd8cSMark Johnston cur = root; 822240afd8cSMark Johnston for (next = name = mountpoint; next != NULL;) { 823240afd8cSMark Johnston for (; *next == '/'; next++) 824240afd8cSMark Johnston ; 825240afd8cSMark Johnston name = strsep(&next, "/"); 826240afd8cSMark Johnston 827240afd8cSMark Johnston for (; cur != NULL && strcmp(cur->name, name) != 0; 828240afd8cSMark Johnston cur = cur->next) 829240afd8cSMark Johnston ; 830240afd8cSMark Johnston if (cur == NULL) { 831240afd8cSMark Johnston if (next == NULL) 832240afd8cSMark Johnston break; 833240afd8cSMark Johnston errx(1, "missing mountpoint directory for `%s'", 834240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 835240afd8cSMark Johnston } 836240afd8cSMark Johnston if (cur->type != S_IFDIR) { 837240afd8cSMark Johnston errx(1, 838240afd8cSMark Johnston "mountpoint for `%s' is not a directory", 839240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 840240afd8cSMark Johnston } 841240afd8cSMark Johnston if (next != NULL) 842240afd8cSMark Johnston cur = cur->child; 843240afd8cSMark Johnston } 844240afd8cSMark Johnston } 845240afd8cSMark Johnston 846240afd8cSMark Johnston if (cur != NULL) { 847240afd8cSMark Johnston assert(cur->type == S_IFDIR); 848240afd8cSMark Johnston 849240afd8cSMark Johnston /* 850240afd8cSMark Johnston * Multiple datasets shouldn't share a mountpoint. It's 851240afd8cSMark Johnston * technically allowed, but it's not clear what makefs should do 852240afd8cSMark Johnston * in that case. 853240afd8cSMark Johnston */ 854240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 855240afd8cSMark Johnston if (cur != root) 856240afd8cSMark Johnston cur->inode->flags |= FI_ROOT; 857240afd8cSMark Johnston assert(cur->inode->param == NULL); 858240afd8cSMark Johnston cur->inode->param = dsldir; 859240afd8cSMark Johnston } 860240afd8cSMark Johnston 861240afd8cSMark Johnston free(origmountpoint); 862240afd8cSMark Johnston } 863240afd8cSMark Johnston 864240afd8cSMark Johnston static int 865240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg) 866240afd8cSMark Johnston { 867240afd8cSMark Johnston uint64_t *countp; 868240afd8cSMark Johnston 869240afd8cSMark Johnston countp = arg; 870240afd8cSMark Johnston if (cur->type == S_IFDIR && fsnode_isroot(cur)) 871240afd8cSMark Johnston return (1); 872240afd8cSMark Johnston 873240afd8cSMark Johnston if (cur->inode->ino == 0) { 874240afd8cSMark Johnston cur->inode->ino = ++(*countp); 875240afd8cSMark Johnston cur->inode->nlink = 1; 876240afd8cSMark Johnston } else { 877240afd8cSMark Johnston cur->inode->nlink++; 878240afd8cSMark Johnston } 879240afd8cSMark Johnston 880240afd8cSMark Johnston return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1); 881240afd8cSMark Johnston } 882240afd8cSMark Johnston 883240afd8cSMark Johnston /* 884240afd8cSMark Johnston * Create a filesystem dataset. More specifically: 885240afd8cSMark Johnston * - create an object set for the dataset, 886240afd8cSMark Johnston * - add required metadata (SA tables, property definitions, etc.) to that 887240afd8cSMark Johnston * object set, 888240afd8cSMark Johnston * - optionally populate the object set with file objects, using "root" as the 889240afd8cSMark Johnston * root directory. 890240afd8cSMark Johnston * 891240afd8cSMark Johnston * "dirfd" is a directory descriptor for the directory referenced by "root". It 892240afd8cSMark Johnston * is closed before returning. 893240afd8cSMark Johnston */ 894240afd8cSMark Johnston static void 895240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd) 896240afd8cSMark Johnston { 897240afd8cSMark Johnston struct fs_populate_arg arg; 898240afd8cSMark Johnston zfs_fs_t fs; 899240afd8cSMark Johnston zfs_zap_t *masterzap; 900240afd8cSMark Johnston zfs_objset_t *os; 901240afd8cSMark Johnston dnode_phys_t *deleteq, *masterobj; 902240afd8cSMark Johnston uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid; 903240afd8cSMark Johnston bool fakedroot; 904240afd8cSMark Johnston 905240afd8cSMark Johnston /* 906240afd8cSMark Johnston * This dataset's mountpoint doesn't exist in the staging tree, or the 907240afd8cSMark Johnston * dataset doesn't have a mountpoint at all. In either case we still 908240afd8cSMark Johnston * need a root directory. Fake up a root fsnode to handle this case. 909240afd8cSMark Johnston */ 910240afd8cSMark Johnston fakedroot = root == NULL; 911240afd8cSMark Johnston if (fakedroot) { 912240afd8cSMark Johnston struct stat *stp; 913240afd8cSMark Johnston 914240afd8cSMark Johnston assert(dirfd == -1); 915240afd8cSMark Johnston 916240afd8cSMark Johnston root = ecalloc(1, sizeof(*root)); 917240afd8cSMark Johnston root->inode = ecalloc(1, sizeof(*root->inode)); 918240afd8cSMark Johnston root->name = estrdup("."); 919240afd8cSMark Johnston root->type = S_IFDIR; 920240afd8cSMark Johnston 921240afd8cSMark Johnston stp = &root->inode->st; 922240afd8cSMark Johnston stp->st_uid = 0; 923240afd8cSMark Johnston stp->st_gid = 0; 924240afd8cSMark Johnston stp->st_mode = S_IFDIR | 0755; 925240afd8cSMark Johnston } 926240afd8cSMark Johnston assert(root->type == S_IFDIR); 927240afd8cSMark Johnston assert(fsnode_isroot(root)); 928240afd8cSMark Johnston 929240afd8cSMark Johnston /* 930240afd8cSMark Johnston * Initialize the object set for this dataset. 931240afd8cSMark Johnston */ 932240afd8cSMark Johnston os = objset_alloc(zfs, DMU_OST_ZFS); 933240afd8cSMark Johnston masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid); 934240afd8cSMark Johnston assert(moid == MASTER_NODE_OBJ); 935240afd8cSMark Johnston 936240afd8cSMark Johnston memset(&fs, 0, sizeof(fs)); 937240afd8cSMark Johnston fs.os = os; 938240afd8cSMark Johnston 939240afd8cSMark Johnston /* 940240afd8cSMark Johnston * Create the ZAP SA layout now since filesystem object dnodes will 941240afd8cSMark Johnston * refer to those attributes. 942240afd8cSMark Johnston */ 943240afd8cSMark Johnston saobjid = fs_set_zpl_attrs(zfs, &fs); 944240afd8cSMark Johnston 945240afd8cSMark Johnston /* 946240afd8cSMark Johnston * Make a pass over the staged directory to detect hard links and assign 947240afd8cSMark Johnston * virtual dnode numbers. 948240afd8cSMark Johnston */ 949240afd8cSMark Johnston dnodecount = 1; /* root directory */ 950240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_mark, &dnodecount); 951240afd8cSMark Johnston 952240afd8cSMark Johnston /* 953240afd8cSMark Johnston * Make a second pass to populate the dataset with files from the 954240afd8cSMark Johnston * staged directory. Most of our runtime is spent here. 955240afd8cSMark Johnston */ 9568eca3207SMark Johnston arg.rootdirfd = dirfd; 957240afd8cSMark Johnston arg.zfs = zfs; 958240afd8cSMark Johnston arg.fs = &fs; 959240afd8cSMark Johnston SLIST_INIT(&arg.dirs); 960240afd8cSMark Johnston fs_populate_dir(root, &arg); 961240afd8cSMark Johnston assert(!SLIST_EMPTY(&arg.dirs)); 962240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_populate, &arg); 963240afd8cSMark Johnston assert(SLIST_EMPTY(&arg.dirs)); 964240afd8cSMark Johnston rootdirid = arg.rootdirid; 965240afd8cSMark Johnston 966240afd8cSMark Johnston /* 967240afd8cSMark Johnston * Create an empty delete queue. We don't do anything with it, but 968240afd8cSMark Johnston * OpenZFS will refuse to mount filesystems that don't have one. 969240afd8cSMark Johnston */ 970240afd8cSMark Johnston deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid); 971240afd8cSMark Johnston zap_write(zfs, zap_alloc(os, deleteq)); 972240afd8cSMark Johnston 973240afd8cSMark Johnston /* 974240afd8cSMark Johnston * Populate and write the master node object. This is a ZAP object 975240afd8cSMark Johnston * containing various dataset properties and the object IDs of the root 976240afd8cSMark Johnston * directory and delete queue. 977240afd8cSMark Johnston */ 978240afd8cSMark Johnston masterzap = zap_alloc(os, masterobj); 979240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid); 980240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid); 981240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid); 982240afd8cSMark Johnston zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */); 983240afd8cSMark Johnston zap_add_uint64(masterzap, "normalization", 0 /* off */); 984240afd8cSMark Johnston zap_add_uint64(masterzap, "utf8only", 0 /* off */); 985240afd8cSMark Johnston zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */); 986240afd8cSMark Johnston zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */); 987240afd8cSMark Johnston zap_write(zfs, masterzap); 988240afd8cSMark Johnston 989240afd8cSMark Johnston /* 990240afd8cSMark Johnston * All finished with this object set, we may as well write it now. 991240afd8cSMark Johnston * The DSL layer will sum up the bytes consumed by each dataset using 992240afd8cSMark Johnston * information stored in the object set, so it can't be freed just yet. 993240afd8cSMark Johnston */ 994240afd8cSMark Johnston dsl_dir_dataset_write(zfs, os, dsldir); 995240afd8cSMark Johnston 996240afd8cSMark Johnston if (fakedroot) { 997240afd8cSMark Johnston free(root->inode); 998240afd8cSMark Johnston free(root->name); 999240afd8cSMark Johnston free(root); 1000240afd8cSMark Johnston } 1001240afd8cSMark Johnston free(fs.saoffs); 1002240afd8cSMark Johnston } 1003240afd8cSMark Johnston 1004240afd8cSMark Johnston /* 1005240afd8cSMark Johnston * Create an object set for each DSL directory which has a dataset and doesn't 1006240afd8cSMark Johnston * already have an object set. 1007240afd8cSMark Johnston */ 1008240afd8cSMark Johnston static void 1009240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused) 1010240afd8cSMark Johnston { 1011240afd8cSMark Johnston if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir)) 1012240afd8cSMark Johnston fs_build_one(zfs, dsldir, NULL, -1); 1013240afd8cSMark Johnston } 1014240afd8cSMark Johnston 1015240afd8cSMark Johnston /* 1016240afd8cSMark Johnston * Create our datasets and populate them with files. 1017240afd8cSMark Johnston */ 1018240afd8cSMark Johnston void 1019240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root) 1020240afd8cSMark Johnston { 1021240afd8cSMark Johnston /* 1022240afd8cSMark Johnston * Run through our datasets and find the root fsnode for each one. Each 1023240afd8cSMark Johnston * root fsnode is flagged so that we can figure out which dataset it 1024240afd8cSMark Johnston * belongs to. 1025240afd8cSMark Johnston */ 1026240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root); 1027240afd8cSMark Johnston 1028240afd8cSMark Johnston /* 1029240afd8cSMark Johnston * Did we find our boot filesystem? 1030240afd8cSMark Johnston */ 1031240afd8cSMark Johnston if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs")) 1032240afd8cSMark Johnston errx(1, "no mounted dataset matches bootfs property `%s'", 1033240afd8cSMark Johnston zfs->bootfs); 1034240afd8cSMark Johnston 1035240afd8cSMark Johnston /* 1036240afd8cSMark Johnston * Traverse the file hierarchy starting from the root fsnode. One 1037240afd8cSMark Johnston * dataset, not necessarily the root dataset, must "own" the root 1038240afd8cSMark Johnston * directory by having its mountpoint be equal to the root path. 1039240afd8cSMark Johnston * 1040240afd8cSMark Johnston * As roots of other datasets are encountered during the traversal, 1041240afd8cSMark Johnston * fs_build_one() recursively creates the corresponding object sets and 1042240afd8cSMark Johnston * populates them. Once this function has returned, all datasets will 1043240afd8cSMark Johnston * have been fully populated. 1044240afd8cSMark Johnston */ 1045240afd8cSMark Johnston fs_build_one(zfs, root->inode->param, root, dirfd); 1046240afd8cSMark Johnston 1047240afd8cSMark Johnston /* 1048240afd8cSMark Johnston * Now create object sets for datasets whose mountpoints weren't found 1049240afd8cSMark Johnston * in the staging directory, either because there is no mountpoint, or 1050240afd8cSMark Johnston * because the mountpoint doesn't correspond to an existing directory. 1051240afd8cSMark Johnston */ 1052240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL); 1053240afd8cSMark Johnston } 1054