1*240afd8cSMark Johnston /*- 2*240afd8cSMark Johnston * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*240afd8cSMark Johnston * 4*240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5*240afd8cSMark Johnston * 6*240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7*240afd8cSMark Johnston * the FreeBSD Foundation. 8*240afd8cSMark Johnston * 9*240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10*240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11*240afd8cSMark Johnston * met: 12*240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14*240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16*240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17*240afd8cSMark Johnston * 18*240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19*240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20*240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21*240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22*240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23*240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24*240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25*240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26*240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27*240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28*240afd8cSMark Johnston * SUCH DAMAGE. 29*240afd8cSMark Johnston */ 30*240afd8cSMark Johnston 31*240afd8cSMark Johnston #include <sys/dirent.h> 32*240afd8cSMark Johnston #include <sys/stat.h> 33*240afd8cSMark Johnston 34*240afd8cSMark Johnston #include <assert.h> 35*240afd8cSMark Johnston #include <fcntl.h> 36*240afd8cSMark Johnston #include <string.h> 37*240afd8cSMark Johnston #include <unistd.h> 38*240afd8cSMark Johnston 39*240afd8cSMark Johnston #include <util.h> 40*240afd8cSMark Johnston 41*240afd8cSMark Johnston #include "makefs.h" 42*240afd8cSMark Johnston #include "zfs.h" 43*240afd8cSMark Johnston 44*240afd8cSMark Johnston typedef struct { 45*240afd8cSMark Johnston const char *name; 46*240afd8cSMark Johnston unsigned int id; 47*240afd8cSMark Johnston uint16_t size; 48*240afd8cSMark Johnston sa_bswap_type_t bs; 49*240afd8cSMark Johnston } zfs_sattr_t; 50*240afd8cSMark Johnston 51*240afd8cSMark Johnston typedef struct zfs_fs { 52*240afd8cSMark Johnston zfs_objset_t *os; 53*240afd8cSMark Johnston 54*240afd8cSMark Johnston /* Offset table for system attributes, indexed by a zpl_attr_t. */ 55*240afd8cSMark Johnston uint16_t *saoffs; 56*240afd8cSMark Johnston size_t sacnt; 57*240afd8cSMark Johnston const zfs_sattr_t *satab; 58*240afd8cSMark Johnston } zfs_fs_t; 59*240afd8cSMark Johnston 60*240afd8cSMark Johnston /* 61*240afd8cSMark Johnston * The order of the attributes doesn't matter, this is simply the one hard-coded 62*240afd8cSMark Johnston * by OpenZFS, based on a zdb dump of the SA_REGISTRY table. 63*240afd8cSMark Johnston */ 64*240afd8cSMark Johnston typedef enum zpl_attr { 65*240afd8cSMark Johnston ZPL_ATIME, 66*240afd8cSMark Johnston ZPL_MTIME, 67*240afd8cSMark Johnston ZPL_CTIME, 68*240afd8cSMark Johnston ZPL_CRTIME, 69*240afd8cSMark Johnston ZPL_GEN, 70*240afd8cSMark Johnston ZPL_MODE, 71*240afd8cSMark Johnston ZPL_SIZE, 72*240afd8cSMark Johnston ZPL_PARENT, 73*240afd8cSMark Johnston ZPL_LINKS, 74*240afd8cSMark Johnston ZPL_XATTR, 75*240afd8cSMark Johnston ZPL_RDEV, 76*240afd8cSMark Johnston ZPL_FLAGS, 77*240afd8cSMark Johnston ZPL_UID, 78*240afd8cSMark Johnston ZPL_GID, 79*240afd8cSMark Johnston ZPL_PAD, 80*240afd8cSMark Johnston ZPL_ZNODE_ACL, 81*240afd8cSMark Johnston ZPL_DACL_COUNT, 82*240afd8cSMark Johnston ZPL_SYMLINK, 83*240afd8cSMark Johnston ZPL_SCANSTAMP, 84*240afd8cSMark Johnston ZPL_DACL_ACES, 85*240afd8cSMark Johnston ZPL_DXATTR, 86*240afd8cSMark Johnston ZPL_PROJID, 87*240afd8cSMark Johnston } zpl_attr_t; 88*240afd8cSMark Johnston 89*240afd8cSMark Johnston /* 90*240afd8cSMark Johnston * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t. 91*240afd8cSMark Johnston */ 92*240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = { 93*240afd8cSMark Johnston #define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b } 94*240afd8cSMark Johnston _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 95*240afd8cSMark Johnston _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 96*240afd8cSMark Johnston _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 97*240afd8cSMark Johnston _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), 98*240afd8cSMark Johnston _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY), 99*240afd8cSMark Johnston _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY), 100*240afd8cSMark Johnston _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY), 101*240afd8cSMark Johnston _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY), 102*240afd8cSMark Johnston _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY), 103*240afd8cSMark Johnston _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY), 104*240afd8cSMark Johnston _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY), 105*240afd8cSMark Johnston _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY), 106*240afd8cSMark Johnston _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY), 107*240afd8cSMark Johnston _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY), 108*240afd8cSMark Johnston _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY), 109*240afd8cSMark Johnston _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY), 110*240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY), 111*240afd8cSMark Johnston _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY), 112*240afd8cSMark Johnston _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY), 113*240afd8cSMark Johnston _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL), 114*240afd8cSMark Johnston _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY), 115*240afd8cSMark Johnston _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY), 116*240afd8cSMark Johnston #undef ZPL_ATTR 117*240afd8cSMark Johnston }; 118*240afd8cSMark Johnston 119*240afd8cSMark Johnston /* 120*240afd8cSMark Johnston * This layout matches that of a filesystem created using OpenZFS on FreeBSD. 121*240afd8cSMark Johnston * It need not match in general, but FreeBSD's loader doesn't bother parsing the 122*240afd8cSMark Johnston * layout and just hard-codes attribute offsets. 123*240afd8cSMark Johnston */ 124*240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = { 125*240afd8cSMark Johnston ZPL_MODE, 126*240afd8cSMark Johnston ZPL_SIZE, 127*240afd8cSMark Johnston ZPL_GEN, 128*240afd8cSMark Johnston ZPL_UID, 129*240afd8cSMark Johnston ZPL_GID, 130*240afd8cSMark Johnston ZPL_PARENT, 131*240afd8cSMark Johnston ZPL_FLAGS, 132*240afd8cSMark Johnston ZPL_ATIME, 133*240afd8cSMark Johnston ZPL_MTIME, 134*240afd8cSMark Johnston ZPL_CTIME, 135*240afd8cSMark Johnston ZPL_CRTIME, 136*240afd8cSMark Johnston ZPL_LINKS, 137*240afd8cSMark Johnston ZPL_DACL_COUNT, 138*240afd8cSMark Johnston ZPL_DACL_ACES, 139*240afd8cSMark Johnston ZPL_SYMLINK, 140*240afd8cSMark Johnston }; 141*240afd8cSMark Johnston 142*240afd8cSMark Johnston /* 143*240afd8cSMark Johnston * Keys for the ZPL attribute tables in the SA layout ZAP. The first two 144*240afd8cSMark Johnston * indices are reserved for legacy attribute encoding. 145*240afd8cSMark Johnston */ 146*240afd8cSMark Johnston #define SA_LAYOUT_INDEX_DEFAULT 2 147*240afd8cSMark Johnston #define SA_LAYOUT_INDEX_SYMLINK 3 148*240afd8cSMark Johnston 149*240afd8cSMark Johnston struct fs_populate_dir { 150*240afd8cSMark Johnston SLIST_ENTRY(fs_populate_dir) next; 151*240afd8cSMark Johnston int dirfd; 152*240afd8cSMark Johnston uint64_t objid; 153*240afd8cSMark Johnston zfs_zap_t *zap; 154*240afd8cSMark Johnston }; 155*240afd8cSMark Johnston 156*240afd8cSMark Johnston struct fs_populate_arg { 157*240afd8cSMark Johnston zfs_opt_t *zfs; 158*240afd8cSMark Johnston zfs_fs_t *fs; /* owning filesystem */ 159*240afd8cSMark Johnston int dirfd; /* current directory fd */ 160*240afd8cSMark Johnston uint64_t rootdirid; /* root directory dnode ID */ 161*240afd8cSMark Johnston SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */ 162*240afd8cSMark Johnston }; 163*240afd8cSMark Johnston 164*240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int); 165*240afd8cSMark Johnston 166*240afd8cSMark Johnston static bool 167*240afd8cSMark Johnston fsnode_isroot(const fsnode *cur) 168*240afd8cSMark Johnston { 169*240afd8cSMark Johnston return (strcmp(cur->name, ".") == 0); 170*240afd8cSMark Johnston } 171*240afd8cSMark Johnston 172*240afd8cSMark Johnston /* 173*240afd8cSMark Johnston * Visit each node in a directory hierarchy, in pre-order depth-first order. 174*240afd8cSMark Johnston */ 175*240afd8cSMark Johnston static void 176*240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg) 177*240afd8cSMark Johnston { 178*240afd8cSMark Johnston assert(root->type == S_IFDIR); 179*240afd8cSMark Johnston 180*240afd8cSMark Johnston for (fsnode *cur = root; cur != NULL; cur = cur->next) { 181*240afd8cSMark Johnston assert(cur->type == S_IFREG || cur->type == S_IFDIR || 182*240afd8cSMark Johnston cur->type == S_IFLNK); 183*240afd8cSMark Johnston 184*240afd8cSMark Johnston if (cb(cur, arg) == 0) 185*240afd8cSMark Johnston continue; 186*240afd8cSMark Johnston if (cur->type == S_IFDIR && cur->child != NULL) 187*240afd8cSMark Johnston fsnode_foreach(cur->child, cb, arg); 188*240afd8cSMark Johnston } 189*240afd8cSMark Johnston } 190*240afd8cSMark Johnston 191*240afd8cSMark Johnston static void 192*240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid) 193*240afd8cSMark Johnston { 194*240afd8cSMark Johnston struct fs_populate_dir *dir; 195*240afd8cSMark Johnston uint64_t type; 196*240afd8cSMark Johnston 197*240afd8cSMark Johnston switch (cur->type) { 198*240afd8cSMark Johnston case S_IFREG: 199*240afd8cSMark Johnston type = DT_REG; 200*240afd8cSMark Johnston break; 201*240afd8cSMark Johnston case S_IFDIR: 202*240afd8cSMark Johnston type = DT_DIR; 203*240afd8cSMark Johnston break; 204*240afd8cSMark Johnston case S_IFLNK: 205*240afd8cSMark Johnston type = DT_LNK; 206*240afd8cSMark Johnston break; 207*240afd8cSMark Johnston default: 208*240afd8cSMark Johnston assert(0); 209*240afd8cSMark Johnston } 210*240afd8cSMark Johnston 211*240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 212*240afd8cSMark Johnston zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid)); 213*240afd8cSMark Johnston } 214*240afd8cSMark Johnston 215*240afd8cSMark Johnston static void 216*240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind, 217*240afd8cSMark Johnston size_t *szp) 218*240afd8cSMark Johnston { 219*240afd8cSMark Johnston assert(ind < fs->sacnt); 220*240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 221*240afd8cSMark Johnston 222*240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size); 223*240afd8cSMark Johnston *szp += fs->satab[ind].size; 224*240afd8cSMark Johnston } 225*240afd8cSMark Johnston 226*240afd8cSMark Johnston static void 227*240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val, 228*240afd8cSMark Johnston size_t valsz, size_t varoff, uint16_t ind, size_t *szp) 229*240afd8cSMark Johnston { 230*240afd8cSMark Johnston assert(ind < fs->sacnt); 231*240afd8cSMark Johnston assert(fs->saoffs[ind] != 0xffff); 232*240afd8cSMark Johnston assert(fs->satab[ind].size == 0); 233*240afd8cSMark Johnston 234*240afd8cSMark Johnston memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz); 235*240afd8cSMark Johnston *szp += valsz; 236*240afd8cSMark Johnston } 237*240afd8cSMark Johnston 238*240afd8cSMark Johnston static void 239*240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, 240*240afd8cSMark Johnston dnode_phys_t *dnode) 241*240afd8cSMark Johnston { 242*240afd8cSMark Johnston char target[PATH_MAX]; 243*240afd8cSMark Johnston zfs_fs_t *fs; 244*240afd8cSMark Johnston zfs_ace_hdr_t aces[3]; 245*240afd8cSMark Johnston struct stat *sb; 246*240afd8cSMark Johnston sa_hdr_phys_t *sahdr; 247*240afd8cSMark Johnston uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid; 248*240afd8cSMark Johnston char *attrbuf; 249*240afd8cSMark Johnston size_t bonussz, hdrsz; 250*240afd8cSMark Johnston int layout; 251*240afd8cSMark Johnston 252*240afd8cSMark Johnston assert(dnode->dn_bonustype == DMU_OT_SA); 253*240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 254*240afd8cSMark Johnston 255*240afd8cSMark Johnston fs = arg->fs; 256*240afd8cSMark Johnston sb = &cur->inode->st; 257*240afd8cSMark Johnston 258*240afd8cSMark Johnston switch (cur->type) { 259*240afd8cSMark Johnston case S_IFREG: 260*240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 261*240afd8cSMark Johnston links = cur->inode->nlink; 262*240afd8cSMark Johnston objsize = sb->st_size; 263*240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 264*240afd8cSMark Johnston break; 265*240afd8cSMark Johnston case S_IFDIR: 266*240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_DEFAULT; 267*240afd8cSMark Johnston links = 1; /* .. */ 268*240afd8cSMark Johnston objsize = 1; /* .. */ 269*240afd8cSMark Johnston 270*240afd8cSMark Johnston /* 271*240afd8cSMark Johnston * The size of a ZPL directory is the number of entries 272*240afd8cSMark Johnston * (including "." and ".."), and the link count is the number of 273*240afd8cSMark Johnston * entries which are directories (including "." and ".."). 274*240afd8cSMark Johnston */ 275*240afd8cSMark Johnston for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child; 276*240afd8cSMark Johnston c != NULL; c = c->next) { 277*240afd8cSMark Johnston if (c->type == S_IFDIR) 278*240afd8cSMark Johnston links++; 279*240afd8cSMark Johnston objsize++; 280*240afd8cSMark Johnston } 281*240afd8cSMark Johnston 282*240afd8cSMark Johnston /* The root directory is its own parent. */ 283*240afd8cSMark Johnston parent = SLIST_EMPTY(&arg->dirs) ? 284*240afd8cSMark Johnston arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid; 285*240afd8cSMark Johnston break; 286*240afd8cSMark Johnston case S_IFLNK: { 287*240afd8cSMark Johnston ssize_t n; 288*240afd8cSMark Johnston 289*240afd8cSMark Johnston if ((n = readlinkat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name, 290*240afd8cSMark Johnston target, sizeof(target) - 1)) == -1) 291*240afd8cSMark Johnston err(1, "readlinkat(%s)", cur->name); 292*240afd8cSMark Johnston target[n] = '\0'; 293*240afd8cSMark Johnston 294*240afd8cSMark Johnston layout = SA_LAYOUT_INDEX_SYMLINK; 295*240afd8cSMark Johnston links = 1; 296*240afd8cSMark Johnston objsize = strlen(target); 297*240afd8cSMark Johnston parent = SLIST_FIRST(&arg->dirs)->objid; 298*240afd8cSMark Johnston break; 299*240afd8cSMark Johnston } 300*240afd8cSMark Johnston default: 301*240afd8cSMark Johnston assert(0); 302*240afd8cSMark Johnston } 303*240afd8cSMark Johnston 304*240afd8cSMark Johnston daclcount = nitems(aces); 305*240afd8cSMark Johnston flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_NO_EXECS_DENIED | 306*240afd8cSMark Johnston ZFS_ARCHIVE | ZFS_AV_MODIFIED; /* XXX-MJ */ 307*240afd8cSMark Johnston gen = 1; 308*240afd8cSMark Johnston gid = sb->st_gid; 309*240afd8cSMark Johnston mode = sb->st_mode; 310*240afd8cSMark Johnston uid = sb->st_uid; 311*240afd8cSMark Johnston 312*240afd8cSMark Johnston memset(aces, 0, sizeof(aces)); 313*240afd8cSMark Johnston aces[0].z_flags = ACE_OWNER; 314*240afd8cSMark Johnston aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 315*240afd8cSMark Johnston aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER | 316*240afd8cSMark Johnston ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL | 317*240afd8cSMark Johnston ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 318*240afd8cSMark Johnston if ((mode & S_IRUSR) != 0) 319*240afd8cSMark Johnston aces[0].z_access_mask |= ACE_READ_DATA; 320*240afd8cSMark Johnston if ((mode & S_IWUSR) != 0) 321*240afd8cSMark Johnston aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 322*240afd8cSMark Johnston if ((mode & S_IXUSR) != 0) 323*240afd8cSMark Johnston aces[0].z_access_mask |= ACE_EXECUTE; 324*240afd8cSMark Johnston 325*240afd8cSMark Johnston aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP; 326*240afd8cSMark Johnston aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 327*240afd8cSMark Johnston aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 328*240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 329*240afd8cSMark Johnston if ((mode & S_IRGRP) != 0) 330*240afd8cSMark Johnston aces[1].z_access_mask |= ACE_READ_DATA; 331*240afd8cSMark Johnston if ((mode & S_IWGRP) != 0) 332*240afd8cSMark Johnston aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 333*240afd8cSMark Johnston if ((mode & S_IXGRP) != 0) 334*240afd8cSMark Johnston aces[1].z_access_mask |= ACE_EXECUTE; 335*240afd8cSMark Johnston 336*240afd8cSMark Johnston aces[2].z_flags = ACE_EVERYONE; 337*240afd8cSMark Johnston aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; 338*240afd8cSMark Johnston aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | 339*240afd8cSMark Johnston ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; 340*240afd8cSMark Johnston if ((mode & S_IROTH) != 0) 341*240afd8cSMark Johnston aces[2].z_access_mask |= ACE_READ_DATA; 342*240afd8cSMark Johnston if ((mode & S_IWOTH) != 0) 343*240afd8cSMark Johnston aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; 344*240afd8cSMark Johnston if ((mode & S_IXOTH) != 0) 345*240afd8cSMark Johnston aces[2].z_access_mask |= ACE_EXECUTE; 346*240afd8cSMark Johnston 347*240afd8cSMark Johnston switch (layout) { 348*240afd8cSMark Johnston case SA_LAYOUT_INDEX_DEFAULT: 349*240afd8cSMark Johnston /* At most one variable-length attribute. */ 350*240afd8cSMark Johnston hdrsz = sizeof(uint64_t); 351*240afd8cSMark Johnston break; 352*240afd8cSMark Johnston case SA_LAYOUT_INDEX_SYMLINK: 353*240afd8cSMark Johnston /* At most five variable-length attributes. */ 354*240afd8cSMark Johnston hdrsz = sizeof(uint64_t) * 2; 355*240afd8cSMark Johnston break; 356*240afd8cSMark Johnston default: 357*240afd8cSMark Johnston assert(0); 358*240afd8cSMark Johnston } 359*240afd8cSMark Johnston 360*240afd8cSMark Johnston sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode); 361*240afd8cSMark Johnston sahdr->sa_magic = SA_MAGIC; 362*240afd8cSMark Johnston SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz); 363*240afd8cSMark Johnston 364*240afd8cSMark Johnston bonussz = SA_HDR_SIZE(sahdr); 365*240afd8cSMark Johnston attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr); 366*240afd8cSMark Johnston 367*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz); 368*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz); 369*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz); 370*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz); 371*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz); 372*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz); 373*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz); 374*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz); 375*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz); 376*240afd8cSMark Johnston 377*240afd8cSMark Johnston /* 378*240afd8cSMark Johnston * We deliberately set atime = mtime here to ensure that images are 379*240afd8cSMark Johnston * reproducible. 380*240afd8cSMark Johnston */ 381*240afd8cSMark Johnston assert(sizeof(sb->st_mtim) == fs->satab[ZPL_ATIME].size); 382*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz); 383*240afd8cSMark Johnston assert(sizeof(sb->st_ctim) == fs->satab[ZPL_CTIME].size); 384*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz); 385*240afd8cSMark Johnston assert(sizeof(sb->st_mtim) == fs->satab[ZPL_MTIME].size); 386*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz); 387*240afd8cSMark Johnston assert(sizeof(sb->st_birthtim) == fs->satab[ZPL_CRTIME].size); 388*240afd8cSMark Johnston fs_populate_attr(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz); 389*240afd8cSMark Johnston 390*240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0, 391*240afd8cSMark Johnston ZPL_DACL_ACES, &bonussz); 392*240afd8cSMark Johnston sahdr->sa_lengths[0] = sizeof(aces); 393*240afd8cSMark Johnston 394*240afd8cSMark Johnston if (cur->type == S_IFLNK) { 395*240afd8cSMark Johnston assert(layout == SA_LAYOUT_INDEX_SYMLINK); 396*240afd8cSMark Johnston /* Need to use a spill block pointer if the target is long. */ 397*240afd8cSMark Johnston assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN); 398*240afd8cSMark Johnston fs_populate_varszattr(fs, attrbuf, target, objsize, 399*240afd8cSMark Johnston sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz); 400*240afd8cSMark Johnston sahdr->sa_lengths[1] = (uint16_t)objsize; 401*240afd8cSMark Johnston } 402*240afd8cSMark Johnston 403*240afd8cSMark Johnston dnode->dn_bonuslen = bonussz; 404*240afd8cSMark Johnston } 405*240afd8cSMark Johnston 406*240afd8cSMark Johnston static void 407*240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg) 408*240afd8cSMark Johnston { 409*240afd8cSMark Johnston struct dnode_cursor *c; 410*240afd8cSMark Johnston dnode_phys_t *dnode; 411*240afd8cSMark Johnston zfs_opt_t *zfs; 412*240afd8cSMark Johnston char *buf; 413*240afd8cSMark Johnston uint64_t dnid; 414*240afd8cSMark Johnston ssize_t n; 415*240afd8cSMark Johnston size_t bufsz; 416*240afd8cSMark Johnston off_t size, target; 417*240afd8cSMark Johnston int fd; 418*240afd8cSMark Johnston 419*240afd8cSMark Johnston assert(cur->type == S_IFREG); 420*240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 421*240afd8cSMark Johnston 422*240afd8cSMark Johnston zfs = arg->zfs; 423*240afd8cSMark Johnston 424*240afd8cSMark Johnston assert(cur->inode->ino != 0); 425*240afd8cSMark Johnston if ((cur->inode->flags & FI_ALLOCATED) != 0) { 426*240afd8cSMark Johnston /* 427*240afd8cSMark Johnston * This is a hard link of an existing file. 428*240afd8cSMark Johnston * 429*240afd8cSMark Johnston * XXX-MJ need to check whether it crosses datasets, add a test 430*240afd8cSMark Johnston * case for that 431*240afd8cSMark Johnston */ 432*240afd8cSMark Johnston fs_populate_dirent(arg, cur, cur->inode->ino); 433*240afd8cSMark Johnston return; 434*240afd8cSMark Johnston } 435*240afd8cSMark Johnston 436*240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 437*240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 438*240afd8cSMark Johnston cur->inode->ino = dnid; 439*240afd8cSMark Johnston cur->inode->flags |= FI_ALLOCATED; 440*240afd8cSMark Johnston 441*240afd8cSMark Johnston fd = openat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name, O_RDONLY); 442*240afd8cSMark Johnston if (fd == -1) 443*240afd8cSMark Johnston err(1, "openat(%s)", cur->name); 444*240afd8cSMark Johnston 445*240afd8cSMark Johnston buf = zfs->filebuf; 446*240afd8cSMark Johnston bufsz = sizeof(zfs->filebuf); 447*240afd8cSMark Johnston size = cur->inode->st.st_size; 448*240afd8cSMark Johnston c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0); 449*240afd8cSMark Johnston for (off_t foff = 0; foff < size; foff += target) { 450*240afd8cSMark Johnston off_t loc, sofar; 451*240afd8cSMark Johnston 452*240afd8cSMark Johnston /* 453*240afd8cSMark Johnston * Fill up our buffer, handling partial reads. 454*240afd8cSMark Johnston * 455*240afd8cSMark Johnston * It might be profitable to use copy_file_range(2) here. 456*240afd8cSMark Johnston */ 457*240afd8cSMark Johnston sofar = 0; 458*240afd8cSMark Johnston target = MIN(size - foff, (off_t)bufsz); 459*240afd8cSMark Johnston do { 460*240afd8cSMark Johnston n = read(fd, buf + sofar, target); 461*240afd8cSMark Johnston if (n < 0) 462*240afd8cSMark Johnston err(1, "reading from '%s'", cur->name); 463*240afd8cSMark Johnston if (n == 0) 464*240afd8cSMark Johnston errx(1, "unexpected EOF reading '%s'", 465*240afd8cSMark Johnston cur->name); 466*240afd8cSMark Johnston sofar += n; 467*240afd8cSMark Johnston } while (sofar < target); 468*240afd8cSMark Johnston 469*240afd8cSMark Johnston if (target < (off_t)bufsz) 470*240afd8cSMark Johnston memset(buf + target, 0, bufsz - target); 471*240afd8cSMark Johnston 472*240afd8cSMark Johnston loc = objset_space_alloc(zfs, arg->fs->os, &target); 473*240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, target, loc, 474*240afd8cSMark Johnston dnode_cursor_next(zfs, c, foff)); 475*240afd8cSMark Johnston } 476*240afd8cSMark Johnston if (close(fd) != 0) 477*240afd8cSMark Johnston err(1, "close"); 478*240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 479*240afd8cSMark Johnston 480*240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 481*240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 482*240afd8cSMark Johnston } 483*240afd8cSMark Johnston 484*240afd8cSMark Johnston static void 485*240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg) 486*240afd8cSMark Johnston { 487*240afd8cSMark Johnston dnode_phys_t *dnode; 488*240afd8cSMark Johnston zfs_objset_t *os; 489*240afd8cSMark Johnston uint64_t dnid; 490*240afd8cSMark Johnston int dirfd; 491*240afd8cSMark Johnston 492*240afd8cSMark Johnston assert(cur->type == S_IFDIR); 493*240afd8cSMark Johnston assert((cur->inode->flags & FI_ALLOCATED) == 0); 494*240afd8cSMark Johnston 495*240afd8cSMark Johnston os = arg->fs->os; 496*240afd8cSMark Johnston 497*240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS, 498*240afd8cSMark Johnston DMU_OT_SA, 0, &dnid); 499*240afd8cSMark Johnston 500*240afd8cSMark Johnston /* 501*240afd8cSMark Johnston * Add an entry to the parent directory and open this directory. 502*240afd8cSMark Johnston */ 503*240afd8cSMark Johnston if (!SLIST_EMPTY(&arg->dirs)) { 504*240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 505*240afd8cSMark Johnston dirfd = openat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name, 506*240afd8cSMark Johnston O_DIRECTORY); 507*240afd8cSMark Johnston if (dirfd < 0) 508*240afd8cSMark Johnston err(1, "open(%s)", cur->name); 509*240afd8cSMark Johnston } else { 510*240afd8cSMark Johnston arg->rootdirid = dnid; 511*240afd8cSMark Johnston dirfd = arg->dirfd; 512*240afd8cSMark Johnston } 513*240afd8cSMark Johnston 514*240afd8cSMark Johnston /* 515*240afd8cSMark Johnston * Set ZPL attributes. 516*240afd8cSMark Johnston */ 517*240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 518*240afd8cSMark Johnston 519*240afd8cSMark Johnston /* 520*240afd8cSMark Johnston * If this is a root directory, then its children belong to a different 521*240afd8cSMark Johnston * dataset and this directory remains empty in the current objset. 522*240afd8cSMark Johnston */ 523*240afd8cSMark Johnston if ((cur->inode->flags & FI_ROOT) == 0) { 524*240afd8cSMark Johnston struct fs_populate_dir *dir; 525*240afd8cSMark Johnston 526*240afd8cSMark Johnston dir = ecalloc(1, sizeof(*dir)); 527*240afd8cSMark Johnston dir->dirfd = dirfd; 528*240afd8cSMark Johnston dir->objid = dnid; 529*240afd8cSMark Johnston dir->zap = zap_alloc(os, dnode); 530*240afd8cSMark Johnston SLIST_INSERT_HEAD(&arg->dirs, dir, next); 531*240afd8cSMark Johnston } else { 532*240afd8cSMark Johnston zap_write(arg->zfs, zap_alloc(os, dnode)); 533*240afd8cSMark Johnston fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd); 534*240afd8cSMark Johnston } 535*240afd8cSMark Johnston } 536*240afd8cSMark Johnston 537*240afd8cSMark Johnston static void 538*240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg) 539*240afd8cSMark Johnston { 540*240afd8cSMark Johnston dnode_phys_t *dnode; 541*240afd8cSMark Johnston uint64_t dnid; 542*240afd8cSMark Johnston 543*240afd8cSMark Johnston assert(cur->type == S_IFLNK); 544*240afd8cSMark Johnston assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0); 545*240afd8cSMark Johnston 546*240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(arg->fs->os, 547*240afd8cSMark Johnston DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); 548*240afd8cSMark Johnston 549*240afd8cSMark Johnston fs_populate_dirent(arg, cur, dnid); 550*240afd8cSMark Johnston 551*240afd8cSMark Johnston fs_populate_sattrs(arg, cur, dnode); 552*240afd8cSMark Johnston } 553*240afd8cSMark Johnston 554*240afd8cSMark Johnston static int 555*240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg) 556*240afd8cSMark Johnston { 557*240afd8cSMark Johnston struct fs_populate_arg *arg; 558*240afd8cSMark Johnston struct fs_populate_dir *dir; 559*240afd8cSMark Johnston int ret; 560*240afd8cSMark Johnston 561*240afd8cSMark Johnston arg = _arg; 562*240afd8cSMark Johnston switch (cur->type) { 563*240afd8cSMark Johnston case S_IFREG: 564*240afd8cSMark Johnston fs_populate_file(cur, arg); 565*240afd8cSMark Johnston break; 566*240afd8cSMark Johnston case S_IFDIR: 567*240afd8cSMark Johnston if (fsnode_isroot(cur)) 568*240afd8cSMark Johnston break; 569*240afd8cSMark Johnston fs_populate_dir(cur, arg); 570*240afd8cSMark Johnston break; 571*240afd8cSMark Johnston case S_IFLNK: 572*240afd8cSMark Johnston fs_populate_symlink(cur, arg); 573*240afd8cSMark Johnston break; 574*240afd8cSMark Johnston default: 575*240afd8cSMark Johnston assert(0); 576*240afd8cSMark Johnston } 577*240afd8cSMark Johnston 578*240afd8cSMark Johnston ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1; 579*240afd8cSMark Johnston 580*240afd8cSMark Johnston if (cur->next == NULL && 581*240afd8cSMark Johnston (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) { 582*240afd8cSMark Johnston /* 583*240afd8cSMark Johnston * We reached a terminal node in a subtree. Walk back up and 584*240afd8cSMark Johnston * write out directories. We're done once we hit the root of a 585*240afd8cSMark Johnston * dataset or find a level where we're not on the edge of the 586*240afd8cSMark Johnston * tree. 587*240afd8cSMark Johnston */ 588*240afd8cSMark Johnston do { 589*240afd8cSMark Johnston dir = SLIST_FIRST(&arg->dirs); 590*240afd8cSMark Johnston SLIST_REMOVE_HEAD(&arg->dirs, next); 591*240afd8cSMark Johnston zap_write(arg->zfs, dir->zap); 592*240afd8cSMark Johnston if (dir->dirfd != -1 && close(dir->dirfd) != 0) 593*240afd8cSMark Johnston err(1, "close"); 594*240afd8cSMark Johnston free(dir); 595*240afd8cSMark Johnston cur = cur->parent; 596*240afd8cSMark Johnston } while (cur != NULL && cur->next == NULL && 597*240afd8cSMark Johnston (cur->inode->flags & FI_ROOT) == 0); 598*240afd8cSMark Johnston } 599*240afd8cSMark Johnston 600*240afd8cSMark Johnston return (ret); 601*240afd8cSMark Johnston } 602*240afd8cSMark Johnston 603*240afd8cSMark Johnston static void 604*240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, 605*240afd8cSMark Johnston const sa_attr_type_t layout[], size_t sacnt) 606*240afd8cSMark Johnston { 607*240afd8cSMark Johnston char ti[16]; 608*240afd8cSMark Johnston 609*240afd8cSMark Johnston assert(sizeof(layout[0]) == 2); 610*240afd8cSMark Johnston 611*240afd8cSMark Johnston snprintf(ti, sizeof(ti), "%u", index); 612*240afd8cSMark Johnston zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, 613*240afd8cSMark Johnston (const uint8_t *)layout); 614*240afd8cSMark Johnston } 615*240afd8cSMark Johnston 616*240afd8cSMark Johnston /* 617*240afd8cSMark Johnston * Initialize system attribute tables. 618*240afd8cSMark Johnston * 619*240afd8cSMark Johnston * There are two elements to this. First, we write the zpl_attrs[] and 620*240afd8cSMark Johnston * zpl_attr_layout[] tables to disk. Then we create a lookup table which 621*240afd8cSMark Johnston * allows us to set file attributes quickly. 622*240afd8cSMark Johnston */ 623*240afd8cSMark Johnston static uint64_t 624*240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs) 625*240afd8cSMark Johnston { 626*240afd8cSMark Johnston zfs_zap_t *sazap, *salzap, *sarzap; 627*240afd8cSMark Johnston zfs_objset_t *os; 628*240afd8cSMark Johnston dnode_phys_t *saobj, *salobj, *sarobj; 629*240afd8cSMark Johnston uint64_t saobjid, salobjid, sarobjid; 630*240afd8cSMark Johnston uint16_t offset; 631*240afd8cSMark Johnston 632*240afd8cSMark Johnston os = fs->os; 633*240afd8cSMark Johnston 634*240afd8cSMark Johnston /* 635*240afd8cSMark Johnston * The on-disk tables are stored in two ZAP objects, the registry object 636*240afd8cSMark Johnston * and the layout object. Individual attributes are described by 637*240afd8cSMark Johnston * entries in the registry object; for example, the value for the 638*240afd8cSMark Johnston * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute. 639*240afd8cSMark Johnston * The attributes of a file are ordered according to one of the layouts 640*240afd8cSMark Johnston * defined in the layout object. The master node object is simply used 641*240afd8cSMark Johnston * to locate the registry and layout objects. 642*240afd8cSMark Johnston */ 643*240afd8cSMark Johnston saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid); 644*240afd8cSMark Johnston salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid); 645*240afd8cSMark Johnston sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid); 646*240afd8cSMark Johnston 647*240afd8cSMark Johnston sarzap = zap_alloc(os, sarobj); 648*240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) { 649*240afd8cSMark Johnston const zfs_sattr_t *sa; 650*240afd8cSMark Johnston uint64_t attr; 651*240afd8cSMark Johnston 652*240afd8cSMark Johnston attr = 0; 653*240afd8cSMark Johnston sa = &zpl_attrs[i]; 654*240afd8cSMark Johnston SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs); 655*240afd8cSMark Johnston zap_add_uint64(sarzap, sa->name, attr); 656*240afd8cSMark Johnston } 657*240afd8cSMark Johnston zap_write(zfs, sarzap); 658*240afd8cSMark Johnston 659*240afd8cSMark Johnston /* 660*240afd8cSMark Johnston * Layouts are arrays of indices into the registry. We define two 661*240afd8cSMark Johnston * layouts for use by the ZPL, one for non-symlinks and one for 662*240afd8cSMark Johnston * symlinks. They are identical except that the symlink layout includes 663*240afd8cSMark Johnston * ZPL_SYMLINK as its final attribute. 664*240afd8cSMark Johnston */ 665*240afd8cSMark Johnston salzap = zap_alloc(os, salobj); 666*240afd8cSMark Johnston assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK); 667*240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT, 668*240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout) - 1); 669*240afd8cSMark Johnston fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK, 670*240afd8cSMark Johnston zpl_attr_layout, nitems(zpl_attr_layout)); 671*240afd8cSMark Johnston zap_write(zfs, salzap); 672*240afd8cSMark Johnston 673*240afd8cSMark Johnston sazap = zap_alloc(os, saobj); 674*240afd8cSMark Johnston zap_add_uint64(sazap, SA_LAYOUTS, salobjid); 675*240afd8cSMark Johnston zap_add_uint64(sazap, SA_REGISTRY, sarobjid); 676*240afd8cSMark Johnston zap_write(zfs, sazap); 677*240afd8cSMark Johnston 678*240afd8cSMark Johnston /* Sanity check. */ 679*240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attrs); i++) 680*240afd8cSMark Johnston assert(i == zpl_attrs[i].id); 681*240afd8cSMark Johnston 682*240afd8cSMark Johnston /* 683*240afd8cSMark Johnston * Build the offset table used when setting file attributes. File 684*240afd8cSMark Johnston * attributes are stored in the object's bonus buffer; this table 685*240afd8cSMark Johnston * provides the buffer offset of attributes referenced by the layout 686*240afd8cSMark Johnston * table. 687*240afd8cSMark Johnston */ 688*240afd8cSMark Johnston fs->sacnt = nitems(zpl_attrs); 689*240afd8cSMark Johnston fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs)); 690*240afd8cSMark Johnston for (size_t i = 0; i < fs->sacnt; i++) 691*240afd8cSMark Johnston fs->saoffs[i] = 0xffff; 692*240afd8cSMark Johnston offset = 0; 693*240afd8cSMark Johnston for (size_t i = 0; i < nitems(zpl_attr_layout); i++) { 694*240afd8cSMark Johnston uint16_t size; 695*240afd8cSMark Johnston 696*240afd8cSMark Johnston assert(zpl_attr_layout[i] < fs->sacnt); 697*240afd8cSMark Johnston 698*240afd8cSMark Johnston fs->saoffs[zpl_attr_layout[i]] = offset; 699*240afd8cSMark Johnston size = zpl_attrs[zpl_attr_layout[i]].size; 700*240afd8cSMark Johnston offset += size; 701*240afd8cSMark Johnston } 702*240afd8cSMark Johnston fs->satab = zpl_attrs; 703*240afd8cSMark Johnston 704*240afd8cSMark Johnston return (saobjid); 705*240afd8cSMark Johnston } 706*240afd8cSMark Johnston 707*240afd8cSMark Johnston static void 708*240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg) 709*240afd8cSMark Johnston { 710*240afd8cSMark Johnston char *mountpoint, *origmountpoint, *name, *next; 711*240afd8cSMark Johnston fsnode *cur, *root; 712*240afd8cSMark Johnston uint64_t canmount; 713*240afd8cSMark Johnston 714*240afd8cSMark Johnston if (!dsl_dir_has_dataset(dsldir)) 715*240afd8cSMark Johnston return; 716*240afd8cSMark Johnston 717*240afd8cSMark Johnston mountpoint = dsl_dir_get_mountpoint(zfs, dsldir); 718*240afd8cSMark Johnston if (mountpoint == NULL) 719*240afd8cSMark Johnston return; 720*240afd8cSMark Johnston if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0) 721*240afd8cSMark Johnston return; 722*240afd8cSMark Johnston 723*240afd8cSMark Johnston /* 724*240afd8cSMark Johnston * If we were asked to specify a bootfs, set it here. 725*240afd8cSMark Johnston */ 726*240afd8cSMark Johnston if (zfs->bootfs != NULL && strcmp(zfs->bootfs, 727*240afd8cSMark Johnston dsl_dir_fullname(dsldir)) == 0) { 728*240afd8cSMark Johnston zap_add_uint64(zfs->poolprops, "bootfs", 729*240afd8cSMark Johnston dsl_dir_dataset_id(dsldir)); 730*240afd8cSMark Johnston } 731*240afd8cSMark Johnston 732*240afd8cSMark Johnston origmountpoint = mountpoint; 733*240afd8cSMark Johnston 734*240afd8cSMark Johnston /* 735*240afd8cSMark Johnston * Figure out which fsnode corresponds to our mountpoint. 736*240afd8cSMark Johnston */ 737*240afd8cSMark Johnston root = arg; 738*240afd8cSMark Johnston cur = root; 739*240afd8cSMark Johnston if (strcmp(mountpoint, zfs->rootpath) != 0) { 740*240afd8cSMark Johnston mountpoint += strlen(zfs->rootpath); 741*240afd8cSMark Johnston 742*240afd8cSMark Johnston /* 743*240afd8cSMark Johnston * Look up the directory in the staged tree. For example, if 744*240afd8cSMark Johnston * the dataset's mount point is /foo/bar/baz, we'll search the 745*240afd8cSMark Johnston * root directory for "foo", search "foo" for "baz", and so on. 746*240afd8cSMark Johnston * Each intermediate name must refer to a directory; the final 747*240afd8cSMark Johnston * component need not exist. 748*240afd8cSMark Johnston */ 749*240afd8cSMark Johnston cur = root; 750*240afd8cSMark Johnston for (next = name = mountpoint; next != NULL;) { 751*240afd8cSMark Johnston for (; *next == '/'; next++) 752*240afd8cSMark Johnston ; 753*240afd8cSMark Johnston name = strsep(&next, "/"); 754*240afd8cSMark Johnston 755*240afd8cSMark Johnston for (; cur != NULL && strcmp(cur->name, name) != 0; 756*240afd8cSMark Johnston cur = cur->next) 757*240afd8cSMark Johnston ; 758*240afd8cSMark Johnston if (cur == NULL) { 759*240afd8cSMark Johnston if (next == NULL) 760*240afd8cSMark Johnston break; 761*240afd8cSMark Johnston errx(1, "missing mountpoint directory for `%s'", 762*240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 763*240afd8cSMark Johnston } 764*240afd8cSMark Johnston if (cur->type != S_IFDIR) { 765*240afd8cSMark Johnston errx(1, 766*240afd8cSMark Johnston "mountpoint for `%s' is not a directory", 767*240afd8cSMark Johnston dsl_dir_fullname(dsldir)); 768*240afd8cSMark Johnston } 769*240afd8cSMark Johnston if (next != NULL) 770*240afd8cSMark Johnston cur = cur->child; 771*240afd8cSMark Johnston } 772*240afd8cSMark Johnston } 773*240afd8cSMark Johnston 774*240afd8cSMark Johnston if (cur != NULL) { 775*240afd8cSMark Johnston assert(cur->type == S_IFDIR); 776*240afd8cSMark Johnston 777*240afd8cSMark Johnston /* 778*240afd8cSMark Johnston * Multiple datasets shouldn't share a mountpoint. It's 779*240afd8cSMark Johnston * technically allowed, but it's not clear what makefs should do 780*240afd8cSMark Johnston * in that case. 781*240afd8cSMark Johnston */ 782*240afd8cSMark Johnston assert((cur->inode->flags & FI_ROOT) == 0); 783*240afd8cSMark Johnston if (cur != root) 784*240afd8cSMark Johnston cur->inode->flags |= FI_ROOT; 785*240afd8cSMark Johnston assert(cur->inode->param == NULL); 786*240afd8cSMark Johnston cur->inode->param = dsldir; 787*240afd8cSMark Johnston } 788*240afd8cSMark Johnston 789*240afd8cSMark Johnston free(origmountpoint); 790*240afd8cSMark Johnston } 791*240afd8cSMark Johnston 792*240afd8cSMark Johnston static int 793*240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg) 794*240afd8cSMark Johnston { 795*240afd8cSMark Johnston uint64_t *countp; 796*240afd8cSMark Johnston 797*240afd8cSMark Johnston countp = arg; 798*240afd8cSMark Johnston if (cur->type == S_IFDIR && fsnode_isroot(cur)) 799*240afd8cSMark Johnston return (1); 800*240afd8cSMark Johnston 801*240afd8cSMark Johnston if (cur->inode->ino == 0) { 802*240afd8cSMark Johnston cur->inode->ino = ++(*countp); 803*240afd8cSMark Johnston cur->inode->nlink = 1; 804*240afd8cSMark Johnston } else { 805*240afd8cSMark Johnston cur->inode->nlink++; 806*240afd8cSMark Johnston } 807*240afd8cSMark Johnston 808*240afd8cSMark Johnston return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1); 809*240afd8cSMark Johnston } 810*240afd8cSMark Johnston 811*240afd8cSMark Johnston /* 812*240afd8cSMark Johnston * Create a filesystem dataset. More specifically: 813*240afd8cSMark Johnston * - create an object set for the dataset, 814*240afd8cSMark Johnston * - add required metadata (SA tables, property definitions, etc.) to that 815*240afd8cSMark Johnston * object set, 816*240afd8cSMark Johnston * - optionally populate the object set with file objects, using "root" as the 817*240afd8cSMark Johnston * root directory. 818*240afd8cSMark Johnston * 819*240afd8cSMark Johnston * "dirfd" is a directory descriptor for the directory referenced by "root". It 820*240afd8cSMark Johnston * is closed before returning. 821*240afd8cSMark Johnston */ 822*240afd8cSMark Johnston static void 823*240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd) 824*240afd8cSMark Johnston { 825*240afd8cSMark Johnston struct fs_populate_arg arg; 826*240afd8cSMark Johnston zfs_fs_t fs; 827*240afd8cSMark Johnston zfs_zap_t *masterzap; 828*240afd8cSMark Johnston zfs_objset_t *os; 829*240afd8cSMark Johnston dnode_phys_t *deleteq, *masterobj; 830*240afd8cSMark Johnston uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid; 831*240afd8cSMark Johnston bool fakedroot; 832*240afd8cSMark Johnston 833*240afd8cSMark Johnston /* 834*240afd8cSMark Johnston * This dataset's mountpoint doesn't exist in the staging tree, or the 835*240afd8cSMark Johnston * dataset doesn't have a mountpoint at all. In either case we still 836*240afd8cSMark Johnston * need a root directory. Fake up a root fsnode to handle this case. 837*240afd8cSMark Johnston */ 838*240afd8cSMark Johnston fakedroot = root == NULL; 839*240afd8cSMark Johnston if (fakedroot) { 840*240afd8cSMark Johnston struct stat *stp; 841*240afd8cSMark Johnston 842*240afd8cSMark Johnston assert(dirfd == -1); 843*240afd8cSMark Johnston 844*240afd8cSMark Johnston root = ecalloc(1, sizeof(*root)); 845*240afd8cSMark Johnston root->inode = ecalloc(1, sizeof(*root->inode)); 846*240afd8cSMark Johnston root->name = estrdup("."); 847*240afd8cSMark Johnston root->type = S_IFDIR; 848*240afd8cSMark Johnston 849*240afd8cSMark Johnston stp = &root->inode->st; 850*240afd8cSMark Johnston stp->st_uid = 0; 851*240afd8cSMark Johnston stp->st_gid = 0; 852*240afd8cSMark Johnston stp->st_mode = S_IFDIR | 0755; 853*240afd8cSMark Johnston } 854*240afd8cSMark Johnston assert(root->type == S_IFDIR); 855*240afd8cSMark Johnston assert(fsnode_isroot(root)); 856*240afd8cSMark Johnston 857*240afd8cSMark Johnston /* 858*240afd8cSMark Johnston * Initialize the object set for this dataset. 859*240afd8cSMark Johnston */ 860*240afd8cSMark Johnston os = objset_alloc(zfs, DMU_OST_ZFS); 861*240afd8cSMark Johnston masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid); 862*240afd8cSMark Johnston assert(moid == MASTER_NODE_OBJ); 863*240afd8cSMark Johnston 864*240afd8cSMark Johnston memset(&fs, 0, sizeof(fs)); 865*240afd8cSMark Johnston fs.os = os; 866*240afd8cSMark Johnston 867*240afd8cSMark Johnston /* 868*240afd8cSMark Johnston * Create the ZAP SA layout now since filesystem object dnodes will 869*240afd8cSMark Johnston * refer to those attributes. 870*240afd8cSMark Johnston */ 871*240afd8cSMark Johnston saobjid = fs_set_zpl_attrs(zfs, &fs); 872*240afd8cSMark Johnston 873*240afd8cSMark Johnston /* 874*240afd8cSMark Johnston * Make a pass over the staged directory to detect hard links and assign 875*240afd8cSMark Johnston * virtual dnode numbers. 876*240afd8cSMark Johnston */ 877*240afd8cSMark Johnston dnodecount = 1; /* root directory */ 878*240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_mark, &dnodecount); 879*240afd8cSMark Johnston 880*240afd8cSMark Johnston /* 881*240afd8cSMark Johnston * Make a second pass to populate the dataset with files from the 882*240afd8cSMark Johnston * staged directory. Most of our runtime is spent here. 883*240afd8cSMark Johnston */ 884*240afd8cSMark Johnston arg.dirfd = dirfd; 885*240afd8cSMark Johnston arg.zfs = zfs; 886*240afd8cSMark Johnston arg.fs = &fs; 887*240afd8cSMark Johnston SLIST_INIT(&arg.dirs); 888*240afd8cSMark Johnston fs_populate_dir(root, &arg); 889*240afd8cSMark Johnston assert(!SLIST_EMPTY(&arg.dirs)); 890*240afd8cSMark Johnston fsnode_foreach(root, fs_foreach_populate, &arg); 891*240afd8cSMark Johnston assert(SLIST_EMPTY(&arg.dirs)); 892*240afd8cSMark Johnston rootdirid = arg.rootdirid; 893*240afd8cSMark Johnston 894*240afd8cSMark Johnston /* 895*240afd8cSMark Johnston * Create an empty delete queue. We don't do anything with it, but 896*240afd8cSMark Johnston * OpenZFS will refuse to mount filesystems that don't have one. 897*240afd8cSMark Johnston */ 898*240afd8cSMark Johnston deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid); 899*240afd8cSMark Johnston zap_write(zfs, zap_alloc(os, deleteq)); 900*240afd8cSMark Johnston 901*240afd8cSMark Johnston /* 902*240afd8cSMark Johnston * Populate and write the master node object. This is a ZAP object 903*240afd8cSMark Johnston * containing various dataset properties and the object IDs of the root 904*240afd8cSMark Johnston * directory and delete queue. 905*240afd8cSMark Johnston */ 906*240afd8cSMark Johnston masterzap = zap_alloc(os, masterobj); 907*240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid); 908*240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid); 909*240afd8cSMark Johnston zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid); 910*240afd8cSMark Johnston zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */); 911*240afd8cSMark Johnston zap_add_uint64(masterzap, "normalization", 0 /* off */); 912*240afd8cSMark Johnston zap_add_uint64(masterzap, "utf8only", 0 /* off */); 913*240afd8cSMark Johnston zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */); 914*240afd8cSMark Johnston zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */); 915*240afd8cSMark Johnston zap_write(zfs, masterzap); 916*240afd8cSMark Johnston 917*240afd8cSMark Johnston /* 918*240afd8cSMark Johnston * All finished with this object set, we may as well write it now. 919*240afd8cSMark Johnston * The DSL layer will sum up the bytes consumed by each dataset using 920*240afd8cSMark Johnston * information stored in the object set, so it can't be freed just yet. 921*240afd8cSMark Johnston */ 922*240afd8cSMark Johnston dsl_dir_dataset_write(zfs, os, dsldir); 923*240afd8cSMark Johnston 924*240afd8cSMark Johnston if (fakedroot) { 925*240afd8cSMark Johnston free(root->inode); 926*240afd8cSMark Johnston free(root->name); 927*240afd8cSMark Johnston free(root); 928*240afd8cSMark Johnston } 929*240afd8cSMark Johnston free(fs.saoffs); 930*240afd8cSMark Johnston } 931*240afd8cSMark Johnston 932*240afd8cSMark Johnston /* 933*240afd8cSMark Johnston * Create an object set for each DSL directory which has a dataset and doesn't 934*240afd8cSMark Johnston * already have an object set. 935*240afd8cSMark Johnston */ 936*240afd8cSMark Johnston static void 937*240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused) 938*240afd8cSMark Johnston { 939*240afd8cSMark Johnston if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir)) 940*240afd8cSMark Johnston fs_build_one(zfs, dsldir, NULL, -1); 941*240afd8cSMark Johnston } 942*240afd8cSMark Johnston 943*240afd8cSMark Johnston /* 944*240afd8cSMark Johnston * Create our datasets and populate them with files. 945*240afd8cSMark Johnston */ 946*240afd8cSMark Johnston void 947*240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root) 948*240afd8cSMark Johnston { 949*240afd8cSMark Johnston /* 950*240afd8cSMark Johnston * Run through our datasets and find the root fsnode for each one. Each 951*240afd8cSMark Johnston * root fsnode is flagged so that we can figure out which dataset it 952*240afd8cSMark Johnston * belongs to. 953*240afd8cSMark Johnston */ 954*240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root); 955*240afd8cSMark Johnston 956*240afd8cSMark Johnston /* 957*240afd8cSMark Johnston * Did we find our boot filesystem? 958*240afd8cSMark Johnston */ 959*240afd8cSMark Johnston if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs")) 960*240afd8cSMark Johnston errx(1, "no mounted dataset matches bootfs property `%s'", 961*240afd8cSMark Johnston zfs->bootfs); 962*240afd8cSMark Johnston 963*240afd8cSMark Johnston /* 964*240afd8cSMark Johnston * Traverse the file hierarchy starting from the root fsnode. One 965*240afd8cSMark Johnston * dataset, not necessarily the root dataset, must "own" the root 966*240afd8cSMark Johnston * directory by having its mountpoint be equal to the root path. 967*240afd8cSMark Johnston * 968*240afd8cSMark Johnston * As roots of other datasets are encountered during the traversal, 969*240afd8cSMark Johnston * fs_build_one() recursively creates the corresponding object sets and 970*240afd8cSMark Johnston * populates them. Once this function has returned, all datasets will 971*240afd8cSMark Johnston * have been fully populated. 972*240afd8cSMark Johnston */ 973*240afd8cSMark Johnston fs_build_one(zfs, root->inode->param, root, dirfd); 974*240afd8cSMark Johnston 975*240afd8cSMark Johnston /* 976*240afd8cSMark Johnston * Now create object sets for datasets whose mountpoints weren't found 977*240afd8cSMark Johnston * in the staging directory, either because there is no mountpoint, or 978*240afd8cSMark Johnston * because the mountpoint doesn't correspond to an existing directory. 979*240afd8cSMark Johnston */ 980*240afd8cSMark Johnston dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL); 981*240afd8cSMark Johnston } 982