xref: /freebsd/usr.sbin/makefs/zfs/fs.c (revision 240afd8c1fcc8c5f29dbd4ff0c915795d414405d)
1*240afd8cSMark Johnston /*-
2*240afd8cSMark Johnston  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*240afd8cSMark Johnston  *
4*240afd8cSMark Johnston  * Copyright (c) 2022 The FreeBSD Foundation
5*240afd8cSMark Johnston  *
6*240afd8cSMark Johnston  * This software was developed by Mark Johnston under sponsorship from
7*240afd8cSMark Johnston  * the FreeBSD Foundation.
8*240afd8cSMark Johnston  *
9*240afd8cSMark Johnston  * Redistribution and use in source and binary forms, with or without
10*240afd8cSMark Johnston  * modification, are permitted provided that the following conditions are
11*240afd8cSMark Johnston  * met:
12*240afd8cSMark Johnston  * 1. Redistributions of source code must retain the above copyright
13*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer.
14*240afd8cSMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
15*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer in
16*240afd8cSMark Johnston  *    the documentation and/or other materials provided with the distribution.
17*240afd8cSMark Johnston  *
18*240afd8cSMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19*240afd8cSMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*240afd8cSMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*240afd8cSMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22*240afd8cSMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23*240afd8cSMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24*240afd8cSMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25*240afd8cSMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26*240afd8cSMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27*240afd8cSMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28*240afd8cSMark Johnston  * SUCH DAMAGE.
29*240afd8cSMark Johnston  */
30*240afd8cSMark Johnston 
31*240afd8cSMark Johnston #include <sys/dirent.h>
32*240afd8cSMark Johnston #include <sys/stat.h>
33*240afd8cSMark Johnston 
34*240afd8cSMark Johnston #include <assert.h>
35*240afd8cSMark Johnston #include <fcntl.h>
36*240afd8cSMark Johnston #include <string.h>
37*240afd8cSMark Johnston #include <unistd.h>
38*240afd8cSMark Johnston 
39*240afd8cSMark Johnston #include <util.h>
40*240afd8cSMark Johnston 
41*240afd8cSMark Johnston #include "makefs.h"
42*240afd8cSMark Johnston #include "zfs.h"
43*240afd8cSMark Johnston 
44*240afd8cSMark Johnston typedef struct {
45*240afd8cSMark Johnston 	const char	*name;
46*240afd8cSMark Johnston 	unsigned int	id;
47*240afd8cSMark Johnston 	uint16_t	size;
48*240afd8cSMark Johnston 	sa_bswap_type_t	bs;
49*240afd8cSMark Johnston } zfs_sattr_t;
50*240afd8cSMark Johnston 
51*240afd8cSMark Johnston typedef struct zfs_fs {
52*240afd8cSMark Johnston 	zfs_objset_t	*os;
53*240afd8cSMark Johnston 
54*240afd8cSMark Johnston 	/* Offset table for system attributes, indexed by a zpl_attr_t. */
55*240afd8cSMark Johnston 	uint16_t	*saoffs;
56*240afd8cSMark Johnston 	size_t		sacnt;
57*240afd8cSMark Johnston 	const zfs_sattr_t *satab;
58*240afd8cSMark Johnston } zfs_fs_t;
59*240afd8cSMark Johnston 
60*240afd8cSMark Johnston /*
61*240afd8cSMark Johnston  * The order of the attributes doesn't matter, this is simply the one hard-coded
62*240afd8cSMark Johnston  * by OpenZFS, based on a zdb dump of the SA_REGISTRY table.
63*240afd8cSMark Johnston  */
64*240afd8cSMark Johnston typedef enum zpl_attr {
65*240afd8cSMark Johnston 	ZPL_ATIME,
66*240afd8cSMark Johnston 	ZPL_MTIME,
67*240afd8cSMark Johnston 	ZPL_CTIME,
68*240afd8cSMark Johnston 	ZPL_CRTIME,
69*240afd8cSMark Johnston 	ZPL_GEN,
70*240afd8cSMark Johnston 	ZPL_MODE,
71*240afd8cSMark Johnston 	ZPL_SIZE,
72*240afd8cSMark Johnston 	ZPL_PARENT,
73*240afd8cSMark Johnston 	ZPL_LINKS,
74*240afd8cSMark Johnston 	ZPL_XATTR,
75*240afd8cSMark Johnston 	ZPL_RDEV,
76*240afd8cSMark Johnston 	ZPL_FLAGS,
77*240afd8cSMark Johnston 	ZPL_UID,
78*240afd8cSMark Johnston 	ZPL_GID,
79*240afd8cSMark Johnston 	ZPL_PAD,
80*240afd8cSMark Johnston 	ZPL_ZNODE_ACL,
81*240afd8cSMark Johnston 	ZPL_DACL_COUNT,
82*240afd8cSMark Johnston 	ZPL_SYMLINK,
83*240afd8cSMark Johnston 	ZPL_SCANSTAMP,
84*240afd8cSMark Johnston 	ZPL_DACL_ACES,
85*240afd8cSMark Johnston 	ZPL_DXATTR,
86*240afd8cSMark Johnston 	ZPL_PROJID,
87*240afd8cSMark Johnston } zpl_attr_t;
88*240afd8cSMark Johnston 
89*240afd8cSMark Johnston /*
90*240afd8cSMark Johnston  * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t.
91*240afd8cSMark Johnston  */
92*240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = {
93*240afd8cSMark Johnston #define	_ZPL_ATTR(n, s, b)	{ .name = #n, .id = n, .size = s, .bs = b }
94*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
95*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
96*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
97*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
98*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY),
99*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY),
100*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY),
101*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY),
102*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY),
103*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY),
104*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY),
105*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY),
106*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY),
107*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY),
108*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY),
109*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY),
110*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY),
111*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY),
112*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY),
113*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL),
114*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY),
115*240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY),
116*240afd8cSMark Johnston #undef ZPL_ATTR
117*240afd8cSMark Johnston };
118*240afd8cSMark Johnston 
119*240afd8cSMark Johnston /*
120*240afd8cSMark Johnston  * This layout matches that of a filesystem created using OpenZFS on FreeBSD.
121*240afd8cSMark Johnston  * It need not match in general, but FreeBSD's loader doesn't bother parsing the
122*240afd8cSMark Johnston  * layout and just hard-codes attribute offsets.
123*240afd8cSMark Johnston  */
124*240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = {
125*240afd8cSMark Johnston 	ZPL_MODE,
126*240afd8cSMark Johnston 	ZPL_SIZE,
127*240afd8cSMark Johnston 	ZPL_GEN,
128*240afd8cSMark Johnston 	ZPL_UID,
129*240afd8cSMark Johnston 	ZPL_GID,
130*240afd8cSMark Johnston 	ZPL_PARENT,
131*240afd8cSMark Johnston 	ZPL_FLAGS,
132*240afd8cSMark Johnston 	ZPL_ATIME,
133*240afd8cSMark Johnston 	ZPL_MTIME,
134*240afd8cSMark Johnston 	ZPL_CTIME,
135*240afd8cSMark Johnston 	ZPL_CRTIME,
136*240afd8cSMark Johnston 	ZPL_LINKS,
137*240afd8cSMark Johnston 	ZPL_DACL_COUNT,
138*240afd8cSMark Johnston 	ZPL_DACL_ACES,
139*240afd8cSMark Johnston 	ZPL_SYMLINK,
140*240afd8cSMark Johnston };
141*240afd8cSMark Johnston 
142*240afd8cSMark Johnston /*
143*240afd8cSMark Johnston  * Keys for the ZPL attribute tables in the SA layout ZAP.  The first two
144*240afd8cSMark Johnston  * indices are reserved for legacy attribute encoding.
145*240afd8cSMark Johnston  */
146*240afd8cSMark Johnston #define	SA_LAYOUT_INDEX_DEFAULT	2
147*240afd8cSMark Johnston #define	SA_LAYOUT_INDEX_SYMLINK	3
148*240afd8cSMark Johnston 
149*240afd8cSMark Johnston struct fs_populate_dir {
150*240afd8cSMark Johnston 	SLIST_ENTRY(fs_populate_dir) next;
151*240afd8cSMark Johnston 	int			dirfd;
152*240afd8cSMark Johnston 	uint64_t		objid;
153*240afd8cSMark Johnston 	zfs_zap_t		*zap;
154*240afd8cSMark Johnston };
155*240afd8cSMark Johnston 
156*240afd8cSMark Johnston struct fs_populate_arg {
157*240afd8cSMark Johnston 	zfs_opt_t	*zfs;
158*240afd8cSMark Johnston 	zfs_fs_t	*fs;			/* owning filesystem */
159*240afd8cSMark Johnston 	int		dirfd;			/* current directory fd */
160*240afd8cSMark Johnston 	uint64_t	rootdirid;		/* root directory dnode ID */
161*240afd8cSMark Johnston 	SLIST_HEAD(, fs_populate_dir) dirs;	/* stack of directories */
162*240afd8cSMark Johnston };
163*240afd8cSMark Johnston 
164*240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int);
165*240afd8cSMark Johnston 
166*240afd8cSMark Johnston static bool
167*240afd8cSMark Johnston fsnode_isroot(const fsnode *cur)
168*240afd8cSMark Johnston {
169*240afd8cSMark Johnston 	return (strcmp(cur->name, ".") == 0);
170*240afd8cSMark Johnston }
171*240afd8cSMark Johnston 
172*240afd8cSMark Johnston /*
173*240afd8cSMark Johnston  * Visit each node in a directory hierarchy, in pre-order depth-first order.
174*240afd8cSMark Johnston  */
175*240afd8cSMark Johnston static void
176*240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg)
177*240afd8cSMark Johnston {
178*240afd8cSMark Johnston 	assert(root->type == S_IFDIR);
179*240afd8cSMark Johnston 
180*240afd8cSMark Johnston 	for (fsnode *cur = root; cur != NULL; cur = cur->next) {
181*240afd8cSMark Johnston 		assert(cur->type == S_IFREG || cur->type == S_IFDIR ||
182*240afd8cSMark Johnston 		    cur->type == S_IFLNK);
183*240afd8cSMark Johnston 
184*240afd8cSMark Johnston 		if (cb(cur, arg) == 0)
185*240afd8cSMark Johnston 			continue;
186*240afd8cSMark Johnston 		if (cur->type == S_IFDIR && cur->child != NULL)
187*240afd8cSMark Johnston 			fsnode_foreach(cur->child, cb, arg);
188*240afd8cSMark Johnston 	}
189*240afd8cSMark Johnston }
190*240afd8cSMark Johnston 
191*240afd8cSMark Johnston static void
192*240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid)
193*240afd8cSMark Johnston {
194*240afd8cSMark Johnston 	struct fs_populate_dir *dir;
195*240afd8cSMark Johnston 	uint64_t type;
196*240afd8cSMark Johnston 
197*240afd8cSMark Johnston 	switch (cur->type) {
198*240afd8cSMark Johnston 	case S_IFREG:
199*240afd8cSMark Johnston 		type = DT_REG;
200*240afd8cSMark Johnston 		break;
201*240afd8cSMark Johnston 	case S_IFDIR:
202*240afd8cSMark Johnston 		type = DT_DIR;
203*240afd8cSMark Johnston 		break;
204*240afd8cSMark Johnston 	case S_IFLNK:
205*240afd8cSMark Johnston 		type = DT_LNK;
206*240afd8cSMark Johnston 		break;
207*240afd8cSMark Johnston 	default:
208*240afd8cSMark Johnston 		assert(0);
209*240afd8cSMark Johnston 	}
210*240afd8cSMark Johnston 
211*240afd8cSMark Johnston 	dir = SLIST_FIRST(&arg->dirs);
212*240afd8cSMark Johnston 	zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid));
213*240afd8cSMark Johnston }
214*240afd8cSMark Johnston 
215*240afd8cSMark Johnston static void
216*240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind,
217*240afd8cSMark Johnston     size_t *szp)
218*240afd8cSMark Johnston {
219*240afd8cSMark Johnston 	assert(ind < fs->sacnt);
220*240afd8cSMark Johnston 	assert(fs->saoffs[ind] != 0xffff);
221*240afd8cSMark Johnston 
222*240afd8cSMark Johnston 	memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size);
223*240afd8cSMark Johnston 	*szp += fs->satab[ind].size;
224*240afd8cSMark Johnston }
225*240afd8cSMark Johnston 
226*240afd8cSMark Johnston static void
227*240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val,
228*240afd8cSMark Johnston     size_t valsz, size_t varoff, uint16_t ind, size_t *szp)
229*240afd8cSMark Johnston {
230*240afd8cSMark Johnston 	assert(ind < fs->sacnt);
231*240afd8cSMark Johnston 	assert(fs->saoffs[ind] != 0xffff);
232*240afd8cSMark Johnston 	assert(fs->satab[ind].size == 0);
233*240afd8cSMark Johnston 
234*240afd8cSMark Johnston 	memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz);
235*240afd8cSMark Johnston 	*szp += valsz;
236*240afd8cSMark Johnston }
237*240afd8cSMark Johnston 
238*240afd8cSMark Johnston static void
239*240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur,
240*240afd8cSMark Johnston     dnode_phys_t *dnode)
241*240afd8cSMark Johnston {
242*240afd8cSMark Johnston 	char target[PATH_MAX];
243*240afd8cSMark Johnston 	zfs_fs_t *fs;
244*240afd8cSMark Johnston 	zfs_ace_hdr_t aces[3];
245*240afd8cSMark Johnston 	struct stat *sb;
246*240afd8cSMark Johnston 	sa_hdr_phys_t *sahdr;
247*240afd8cSMark Johnston 	uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid;
248*240afd8cSMark Johnston 	char *attrbuf;
249*240afd8cSMark Johnston 	size_t bonussz, hdrsz;
250*240afd8cSMark Johnston 	int layout;
251*240afd8cSMark Johnston 
252*240afd8cSMark Johnston 	assert(dnode->dn_bonustype == DMU_OT_SA);
253*240afd8cSMark Johnston 	assert(dnode->dn_nblkptr == 1);
254*240afd8cSMark Johnston 
255*240afd8cSMark Johnston 	fs = arg->fs;
256*240afd8cSMark Johnston 	sb = &cur->inode->st;
257*240afd8cSMark Johnston 
258*240afd8cSMark Johnston 	switch (cur->type) {
259*240afd8cSMark Johnston 	case S_IFREG:
260*240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_DEFAULT;
261*240afd8cSMark Johnston 		links = cur->inode->nlink;
262*240afd8cSMark Johnston 		objsize = sb->st_size;
263*240afd8cSMark Johnston 		parent = SLIST_FIRST(&arg->dirs)->objid;
264*240afd8cSMark Johnston 		break;
265*240afd8cSMark Johnston 	case S_IFDIR:
266*240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_DEFAULT;
267*240afd8cSMark Johnston 		links = 1; /* .. */
268*240afd8cSMark Johnston 		objsize = 1; /* .. */
269*240afd8cSMark Johnston 
270*240afd8cSMark Johnston 		/*
271*240afd8cSMark Johnston 		 * The size of a ZPL directory is the number of entries
272*240afd8cSMark Johnston 		 * (including "." and ".."), and the link count is the number of
273*240afd8cSMark Johnston 		 * entries which are directories (including "." and "..").
274*240afd8cSMark Johnston 		 */
275*240afd8cSMark Johnston 		for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child;
276*240afd8cSMark Johnston 		    c != NULL; c = c->next) {
277*240afd8cSMark Johnston 			if (c->type == S_IFDIR)
278*240afd8cSMark Johnston 				links++;
279*240afd8cSMark Johnston 			objsize++;
280*240afd8cSMark Johnston 		}
281*240afd8cSMark Johnston 
282*240afd8cSMark Johnston 		/* The root directory is its own parent. */
283*240afd8cSMark Johnston 		parent = SLIST_EMPTY(&arg->dirs) ?
284*240afd8cSMark Johnston 		    arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid;
285*240afd8cSMark Johnston 		break;
286*240afd8cSMark Johnston 	case S_IFLNK: {
287*240afd8cSMark Johnston 		ssize_t n;
288*240afd8cSMark Johnston 
289*240afd8cSMark Johnston 		if ((n = readlinkat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name,
290*240afd8cSMark Johnston 		    target, sizeof(target) - 1)) == -1)
291*240afd8cSMark Johnston 			err(1, "readlinkat(%s)", cur->name);
292*240afd8cSMark Johnston 		target[n] = '\0';
293*240afd8cSMark Johnston 
294*240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_SYMLINK;
295*240afd8cSMark Johnston 		links = 1;
296*240afd8cSMark Johnston 		objsize = strlen(target);
297*240afd8cSMark Johnston 		parent = SLIST_FIRST(&arg->dirs)->objid;
298*240afd8cSMark Johnston 		break;
299*240afd8cSMark Johnston 		}
300*240afd8cSMark Johnston 	default:
301*240afd8cSMark Johnston 		assert(0);
302*240afd8cSMark Johnston 	}
303*240afd8cSMark Johnston 
304*240afd8cSMark Johnston 	daclcount = nitems(aces);
305*240afd8cSMark Johnston 	flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_NO_EXECS_DENIED |
306*240afd8cSMark Johnston 	    ZFS_ARCHIVE | ZFS_AV_MODIFIED; /* XXX-MJ */
307*240afd8cSMark Johnston 	gen = 1;
308*240afd8cSMark Johnston 	gid = sb->st_gid;
309*240afd8cSMark Johnston 	mode = sb->st_mode;
310*240afd8cSMark Johnston 	uid = sb->st_uid;
311*240afd8cSMark Johnston 
312*240afd8cSMark Johnston 	memset(aces, 0, sizeof(aces));
313*240afd8cSMark Johnston 	aces[0].z_flags = ACE_OWNER;
314*240afd8cSMark Johnston 	aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
315*240afd8cSMark Johnston 	aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER |
316*240afd8cSMark Johnston 	    ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL |
317*240afd8cSMark Johnston 	    ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
318*240afd8cSMark Johnston 	if ((mode & S_IRUSR) != 0)
319*240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_READ_DATA;
320*240afd8cSMark Johnston 	if ((mode & S_IWUSR) != 0)
321*240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
322*240afd8cSMark Johnston 	if ((mode & S_IXUSR) != 0)
323*240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_EXECUTE;
324*240afd8cSMark Johnston 
325*240afd8cSMark Johnston 	aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP;
326*240afd8cSMark Johnston 	aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
327*240afd8cSMark Johnston 	aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
328*240afd8cSMark Johnston 	    ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
329*240afd8cSMark Johnston 	if ((mode & S_IRGRP) != 0)
330*240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_READ_DATA;
331*240afd8cSMark Johnston 	if ((mode & S_IWGRP) != 0)
332*240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
333*240afd8cSMark Johnston 	if ((mode & S_IXGRP) != 0)
334*240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_EXECUTE;
335*240afd8cSMark Johnston 
336*240afd8cSMark Johnston 	aces[2].z_flags = ACE_EVERYONE;
337*240afd8cSMark Johnston 	aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
338*240afd8cSMark Johnston 	aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
339*240afd8cSMark Johnston 	    ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
340*240afd8cSMark Johnston 	if ((mode & S_IROTH) != 0)
341*240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_READ_DATA;
342*240afd8cSMark Johnston 	if ((mode & S_IWOTH) != 0)
343*240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
344*240afd8cSMark Johnston 	if ((mode & S_IXOTH) != 0)
345*240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_EXECUTE;
346*240afd8cSMark Johnston 
347*240afd8cSMark Johnston 	switch (layout) {
348*240afd8cSMark Johnston 	case SA_LAYOUT_INDEX_DEFAULT:
349*240afd8cSMark Johnston 		/* At most one variable-length attribute. */
350*240afd8cSMark Johnston 		hdrsz = sizeof(uint64_t);
351*240afd8cSMark Johnston 		break;
352*240afd8cSMark Johnston 	case SA_LAYOUT_INDEX_SYMLINK:
353*240afd8cSMark Johnston 		/* At most five variable-length attributes. */
354*240afd8cSMark Johnston 		hdrsz = sizeof(uint64_t) * 2;
355*240afd8cSMark Johnston 		break;
356*240afd8cSMark Johnston 	default:
357*240afd8cSMark Johnston 		assert(0);
358*240afd8cSMark Johnston 	}
359*240afd8cSMark Johnston 
360*240afd8cSMark Johnston 	sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode);
361*240afd8cSMark Johnston 	sahdr->sa_magic = SA_MAGIC;
362*240afd8cSMark Johnston 	SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz);
363*240afd8cSMark Johnston 
364*240afd8cSMark Johnston 	bonussz = SA_HDR_SIZE(sahdr);
365*240afd8cSMark Johnston 	attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr);
366*240afd8cSMark Johnston 
367*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz);
368*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz);
369*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz);
370*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz);
371*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz);
372*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz);
373*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz);
374*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz);
375*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz);
376*240afd8cSMark Johnston 
377*240afd8cSMark Johnston 	/*
378*240afd8cSMark Johnston 	 * We deliberately set atime = mtime here to ensure that images are
379*240afd8cSMark Johnston 	 * reproducible.
380*240afd8cSMark Johnston 	 */
381*240afd8cSMark Johnston 	assert(sizeof(sb->st_mtim) == fs->satab[ZPL_ATIME].size);
382*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz);
383*240afd8cSMark Johnston 	assert(sizeof(sb->st_ctim) == fs->satab[ZPL_CTIME].size);
384*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz);
385*240afd8cSMark Johnston 	assert(sizeof(sb->st_mtim) == fs->satab[ZPL_MTIME].size);
386*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz);
387*240afd8cSMark Johnston 	assert(sizeof(sb->st_birthtim) == fs->satab[ZPL_CRTIME].size);
388*240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz);
389*240afd8cSMark Johnston 
390*240afd8cSMark Johnston 	fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0,
391*240afd8cSMark Johnston 	    ZPL_DACL_ACES, &bonussz);
392*240afd8cSMark Johnston 	sahdr->sa_lengths[0] = sizeof(aces);
393*240afd8cSMark Johnston 
394*240afd8cSMark Johnston 	if (cur->type == S_IFLNK) {
395*240afd8cSMark Johnston 		assert(layout == SA_LAYOUT_INDEX_SYMLINK);
396*240afd8cSMark Johnston 		/* Need to use a spill block pointer if the target is long. */
397*240afd8cSMark Johnston 		assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN);
398*240afd8cSMark Johnston 		fs_populate_varszattr(fs, attrbuf, target, objsize,
399*240afd8cSMark Johnston 		    sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz);
400*240afd8cSMark Johnston 		sahdr->sa_lengths[1] = (uint16_t)objsize;
401*240afd8cSMark Johnston 	}
402*240afd8cSMark Johnston 
403*240afd8cSMark Johnston 	dnode->dn_bonuslen = bonussz;
404*240afd8cSMark Johnston }
405*240afd8cSMark Johnston 
406*240afd8cSMark Johnston static void
407*240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg)
408*240afd8cSMark Johnston {
409*240afd8cSMark Johnston 	struct dnode_cursor *c;
410*240afd8cSMark Johnston 	dnode_phys_t *dnode;
411*240afd8cSMark Johnston 	zfs_opt_t *zfs;
412*240afd8cSMark Johnston 	char *buf;
413*240afd8cSMark Johnston 	uint64_t dnid;
414*240afd8cSMark Johnston 	ssize_t n;
415*240afd8cSMark Johnston 	size_t bufsz;
416*240afd8cSMark Johnston 	off_t size, target;
417*240afd8cSMark Johnston 	int fd;
418*240afd8cSMark Johnston 
419*240afd8cSMark Johnston 	assert(cur->type == S_IFREG);
420*240afd8cSMark Johnston 	assert((cur->inode->flags & FI_ROOT) == 0);
421*240afd8cSMark Johnston 
422*240afd8cSMark Johnston 	zfs = arg->zfs;
423*240afd8cSMark Johnston 
424*240afd8cSMark Johnston 	assert(cur->inode->ino != 0);
425*240afd8cSMark Johnston 	if ((cur->inode->flags & FI_ALLOCATED) != 0) {
426*240afd8cSMark Johnston 		/*
427*240afd8cSMark Johnston 		 * This is a hard link of an existing file.
428*240afd8cSMark Johnston 		 *
429*240afd8cSMark Johnston 		 * XXX-MJ need to check whether it crosses datasets, add a test
430*240afd8cSMark Johnston 		 * case for that
431*240afd8cSMark Johnston 		 */
432*240afd8cSMark Johnston 		fs_populate_dirent(arg, cur, cur->inode->ino);
433*240afd8cSMark Johnston 		return;
434*240afd8cSMark Johnston 	}
435*240afd8cSMark Johnston 
436*240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(arg->fs->os,
437*240afd8cSMark Johnston 	    DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
438*240afd8cSMark Johnston 	cur->inode->ino = dnid;
439*240afd8cSMark Johnston 	cur->inode->flags |= FI_ALLOCATED;
440*240afd8cSMark Johnston 
441*240afd8cSMark Johnston 	fd = openat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name, O_RDONLY);
442*240afd8cSMark Johnston 	if (fd == -1)
443*240afd8cSMark Johnston 		err(1, "openat(%s)", cur->name);
444*240afd8cSMark Johnston 
445*240afd8cSMark Johnston 	buf = zfs->filebuf;
446*240afd8cSMark Johnston 	bufsz = sizeof(zfs->filebuf);
447*240afd8cSMark Johnston 	size = cur->inode->st.st_size;
448*240afd8cSMark Johnston 	c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0);
449*240afd8cSMark Johnston 	for (off_t foff = 0; foff < size; foff += target) {
450*240afd8cSMark Johnston 		off_t loc, sofar;
451*240afd8cSMark Johnston 
452*240afd8cSMark Johnston 		/*
453*240afd8cSMark Johnston 		 * Fill up our buffer, handling partial reads.
454*240afd8cSMark Johnston 		 *
455*240afd8cSMark Johnston 		 * It might be profitable to use copy_file_range(2) here.
456*240afd8cSMark Johnston 		 */
457*240afd8cSMark Johnston 		sofar = 0;
458*240afd8cSMark Johnston 		target = MIN(size - foff, (off_t)bufsz);
459*240afd8cSMark Johnston 		do {
460*240afd8cSMark Johnston 			n = read(fd, buf + sofar, target);
461*240afd8cSMark Johnston 			if (n < 0)
462*240afd8cSMark Johnston 				err(1, "reading from '%s'", cur->name);
463*240afd8cSMark Johnston 			if (n == 0)
464*240afd8cSMark Johnston 				errx(1, "unexpected EOF reading '%s'",
465*240afd8cSMark Johnston 				    cur->name);
466*240afd8cSMark Johnston 			sofar += n;
467*240afd8cSMark Johnston 		} while (sofar < target);
468*240afd8cSMark Johnston 
469*240afd8cSMark Johnston 		if (target < (off_t)bufsz)
470*240afd8cSMark Johnston 			memset(buf + target, 0, bufsz - target);
471*240afd8cSMark Johnston 
472*240afd8cSMark Johnston 		loc = objset_space_alloc(zfs, arg->fs->os, &target);
473*240afd8cSMark Johnston 		vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, target, loc,
474*240afd8cSMark Johnston 		    dnode_cursor_next(zfs, c, foff));
475*240afd8cSMark Johnston 	}
476*240afd8cSMark Johnston 	if (close(fd) != 0)
477*240afd8cSMark Johnston 		err(1, "close");
478*240afd8cSMark Johnston 	dnode_cursor_finish(zfs, c);
479*240afd8cSMark Johnston 
480*240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
481*240afd8cSMark Johnston 	fs_populate_dirent(arg, cur, dnid);
482*240afd8cSMark Johnston }
483*240afd8cSMark Johnston 
484*240afd8cSMark Johnston static void
485*240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg)
486*240afd8cSMark Johnston {
487*240afd8cSMark Johnston 	dnode_phys_t *dnode;
488*240afd8cSMark Johnston 	zfs_objset_t *os;
489*240afd8cSMark Johnston 	uint64_t dnid;
490*240afd8cSMark Johnston 	int dirfd;
491*240afd8cSMark Johnston 
492*240afd8cSMark Johnston 	assert(cur->type == S_IFDIR);
493*240afd8cSMark Johnston 	assert((cur->inode->flags & FI_ALLOCATED) == 0);
494*240afd8cSMark Johnston 
495*240afd8cSMark Johnston 	os = arg->fs->os;
496*240afd8cSMark Johnston 
497*240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS,
498*240afd8cSMark Johnston 	    DMU_OT_SA, 0, &dnid);
499*240afd8cSMark Johnston 
500*240afd8cSMark Johnston 	/*
501*240afd8cSMark Johnston 	 * Add an entry to the parent directory and open this directory.
502*240afd8cSMark Johnston 	 */
503*240afd8cSMark Johnston 	if (!SLIST_EMPTY(&arg->dirs)) {
504*240afd8cSMark Johnston 		fs_populate_dirent(arg, cur, dnid);
505*240afd8cSMark Johnston 		dirfd = openat(SLIST_FIRST(&arg->dirs)->dirfd, cur->name,
506*240afd8cSMark Johnston 		    O_DIRECTORY);
507*240afd8cSMark Johnston 		if (dirfd < 0)
508*240afd8cSMark Johnston 			err(1, "open(%s)", cur->name);
509*240afd8cSMark Johnston 	} else {
510*240afd8cSMark Johnston 		arg->rootdirid = dnid;
511*240afd8cSMark Johnston 		dirfd = arg->dirfd;
512*240afd8cSMark Johnston 	}
513*240afd8cSMark Johnston 
514*240afd8cSMark Johnston 	/*
515*240afd8cSMark Johnston 	 * Set ZPL attributes.
516*240afd8cSMark Johnston 	 */
517*240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
518*240afd8cSMark Johnston 
519*240afd8cSMark Johnston 	/*
520*240afd8cSMark Johnston 	 * If this is a root directory, then its children belong to a different
521*240afd8cSMark Johnston 	 * dataset and this directory remains empty in the current objset.
522*240afd8cSMark Johnston 	 */
523*240afd8cSMark Johnston 	if ((cur->inode->flags & FI_ROOT) == 0) {
524*240afd8cSMark Johnston 		struct fs_populate_dir *dir;
525*240afd8cSMark Johnston 
526*240afd8cSMark Johnston 		dir = ecalloc(1, sizeof(*dir));
527*240afd8cSMark Johnston 		dir->dirfd = dirfd;
528*240afd8cSMark Johnston 		dir->objid = dnid;
529*240afd8cSMark Johnston 		dir->zap = zap_alloc(os, dnode);
530*240afd8cSMark Johnston 		SLIST_INSERT_HEAD(&arg->dirs, dir, next);
531*240afd8cSMark Johnston 	} else {
532*240afd8cSMark Johnston 		zap_write(arg->zfs, zap_alloc(os, dnode));
533*240afd8cSMark Johnston 		fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd);
534*240afd8cSMark Johnston 	}
535*240afd8cSMark Johnston }
536*240afd8cSMark Johnston 
537*240afd8cSMark Johnston static void
538*240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg)
539*240afd8cSMark Johnston {
540*240afd8cSMark Johnston 	dnode_phys_t *dnode;
541*240afd8cSMark Johnston 	uint64_t dnid;
542*240afd8cSMark Johnston 
543*240afd8cSMark Johnston 	assert(cur->type == S_IFLNK);
544*240afd8cSMark Johnston 	assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0);
545*240afd8cSMark Johnston 
546*240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(arg->fs->os,
547*240afd8cSMark Johnston 	    DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
548*240afd8cSMark Johnston 
549*240afd8cSMark Johnston 	fs_populate_dirent(arg, cur, dnid);
550*240afd8cSMark Johnston 
551*240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
552*240afd8cSMark Johnston }
553*240afd8cSMark Johnston 
554*240afd8cSMark Johnston static int
555*240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg)
556*240afd8cSMark Johnston {
557*240afd8cSMark Johnston 	struct fs_populate_arg *arg;
558*240afd8cSMark Johnston 	struct fs_populate_dir *dir;
559*240afd8cSMark Johnston 	int ret;
560*240afd8cSMark Johnston 
561*240afd8cSMark Johnston 	arg = _arg;
562*240afd8cSMark Johnston 	switch (cur->type) {
563*240afd8cSMark Johnston 	case S_IFREG:
564*240afd8cSMark Johnston 		fs_populate_file(cur, arg);
565*240afd8cSMark Johnston 		break;
566*240afd8cSMark Johnston 	case S_IFDIR:
567*240afd8cSMark Johnston 		if (fsnode_isroot(cur))
568*240afd8cSMark Johnston 			break;
569*240afd8cSMark Johnston 		fs_populate_dir(cur, arg);
570*240afd8cSMark Johnston 		break;
571*240afd8cSMark Johnston 	case S_IFLNK:
572*240afd8cSMark Johnston 		fs_populate_symlink(cur, arg);
573*240afd8cSMark Johnston 		break;
574*240afd8cSMark Johnston 	default:
575*240afd8cSMark Johnston 		assert(0);
576*240afd8cSMark Johnston 	}
577*240afd8cSMark Johnston 
578*240afd8cSMark Johnston 	ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1;
579*240afd8cSMark Johnston 
580*240afd8cSMark Johnston 	if (cur->next == NULL &&
581*240afd8cSMark Johnston 	    (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) {
582*240afd8cSMark Johnston 		/*
583*240afd8cSMark Johnston 		 * We reached a terminal node in a subtree.  Walk back up and
584*240afd8cSMark Johnston 		 * write out directories.  We're done once we hit the root of a
585*240afd8cSMark Johnston 		 * dataset or find a level where we're not on the edge of the
586*240afd8cSMark Johnston 		 * tree.
587*240afd8cSMark Johnston 		 */
588*240afd8cSMark Johnston 		do {
589*240afd8cSMark Johnston 			dir = SLIST_FIRST(&arg->dirs);
590*240afd8cSMark Johnston 			SLIST_REMOVE_HEAD(&arg->dirs, next);
591*240afd8cSMark Johnston 			zap_write(arg->zfs, dir->zap);
592*240afd8cSMark Johnston 			if (dir->dirfd != -1 && close(dir->dirfd) != 0)
593*240afd8cSMark Johnston 				err(1, "close");
594*240afd8cSMark Johnston 			free(dir);
595*240afd8cSMark Johnston 			cur = cur->parent;
596*240afd8cSMark Johnston 		} while (cur != NULL && cur->next == NULL &&
597*240afd8cSMark Johnston 		    (cur->inode->flags & FI_ROOT) == 0);
598*240afd8cSMark Johnston 	}
599*240afd8cSMark Johnston 
600*240afd8cSMark Johnston 	return (ret);
601*240afd8cSMark Johnston }
602*240afd8cSMark Johnston 
603*240afd8cSMark Johnston static void
604*240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index,
605*240afd8cSMark Johnston     const sa_attr_type_t layout[], size_t sacnt)
606*240afd8cSMark Johnston {
607*240afd8cSMark Johnston 	char ti[16];
608*240afd8cSMark Johnston 
609*240afd8cSMark Johnston 	assert(sizeof(layout[0]) == 2);
610*240afd8cSMark Johnston 
611*240afd8cSMark Johnston 	snprintf(ti, sizeof(ti), "%u", index);
612*240afd8cSMark Johnston 	zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt,
613*240afd8cSMark Johnston 	    (const uint8_t *)layout);
614*240afd8cSMark Johnston }
615*240afd8cSMark Johnston 
616*240afd8cSMark Johnston /*
617*240afd8cSMark Johnston  * Initialize system attribute tables.
618*240afd8cSMark Johnston  *
619*240afd8cSMark Johnston  * There are two elements to this.  First, we write the zpl_attrs[] and
620*240afd8cSMark Johnston  * zpl_attr_layout[] tables to disk.  Then we create a lookup table which
621*240afd8cSMark Johnston  * allows us to set file attributes quickly.
622*240afd8cSMark Johnston  */
623*240afd8cSMark Johnston static uint64_t
624*240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs)
625*240afd8cSMark Johnston {
626*240afd8cSMark Johnston 	zfs_zap_t *sazap, *salzap, *sarzap;
627*240afd8cSMark Johnston 	zfs_objset_t *os;
628*240afd8cSMark Johnston 	dnode_phys_t *saobj, *salobj, *sarobj;
629*240afd8cSMark Johnston 	uint64_t saobjid, salobjid, sarobjid;
630*240afd8cSMark Johnston 	uint16_t offset;
631*240afd8cSMark Johnston 
632*240afd8cSMark Johnston 	os = fs->os;
633*240afd8cSMark Johnston 
634*240afd8cSMark Johnston 	/*
635*240afd8cSMark Johnston 	 * The on-disk tables are stored in two ZAP objects, the registry object
636*240afd8cSMark Johnston 	 * and the layout object.  Individual attributes are described by
637*240afd8cSMark Johnston 	 * entries in the registry object; for example, the value for the
638*240afd8cSMark Johnston 	 * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute.
639*240afd8cSMark Johnston 	 * The attributes of a file are ordered according to one of the layouts
640*240afd8cSMark Johnston 	 * defined in the layout object.  The master node object is simply used
641*240afd8cSMark Johnston 	 * to locate the registry and layout objects.
642*240afd8cSMark Johnston 	 */
643*240afd8cSMark Johnston 	saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid);
644*240afd8cSMark Johnston 	salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid);
645*240afd8cSMark Johnston 	sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid);
646*240afd8cSMark Johnston 
647*240afd8cSMark Johnston 	sarzap = zap_alloc(os, sarobj);
648*240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attrs); i++) {
649*240afd8cSMark Johnston 		const zfs_sattr_t *sa;
650*240afd8cSMark Johnston 		uint64_t attr;
651*240afd8cSMark Johnston 
652*240afd8cSMark Johnston 		attr = 0;
653*240afd8cSMark Johnston 		sa = &zpl_attrs[i];
654*240afd8cSMark Johnston 		SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs);
655*240afd8cSMark Johnston 		zap_add_uint64(sarzap, sa->name, attr);
656*240afd8cSMark Johnston 	}
657*240afd8cSMark Johnston 	zap_write(zfs, sarzap);
658*240afd8cSMark Johnston 
659*240afd8cSMark Johnston 	/*
660*240afd8cSMark Johnston 	 * Layouts are arrays of indices into the registry.  We define two
661*240afd8cSMark Johnston 	 * layouts for use by the ZPL, one for non-symlinks and one for
662*240afd8cSMark Johnston 	 * symlinks.  They are identical except that the symlink layout includes
663*240afd8cSMark Johnston 	 * ZPL_SYMLINK as its final attribute.
664*240afd8cSMark Johnston 	 */
665*240afd8cSMark Johnston 	salzap = zap_alloc(os, salobj);
666*240afd8cSMark Johnston 	assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK);
667*240afd8cSMark Johnston 	fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT,
668*240afd8cSMark Johnston 	    zpl_attr_layout, nitems(zpl_attr_layout) - 1);
669*240afd8cSMark Johnston 	fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK,
670*240afd8cSMark Johnston 	    zpl_attr_layout, nitems(zpl_attr_layout));
671*240afd8cSMark Johnston 	zap_write(zfs, salzap);
672*240afd8cSMark Johnston 
673*240afd8cSMark Johnston 	sazap = zap_alloc(os, saobj);
674*240afd8cSMark Johnston 	zap_add_uint64(sazap, SA_LAYOUTS, salobjid);
675*240afd8cSMark Johnston 	zap_add_uint64(sazap, SA_REGISTRY, sarobjid);
676*240afd8cSMark Johnston 	zap_write(zfs, sazap);
677*240afd8cSMark Johnston 
678*240afd8cSMark Johnston 	/* Sanity check. */
679*240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attrs); i++)
680*240afd8cSMark Johnston 		assert(i == zpl_attrs[i].id);
681*240afd8cSMark Johnston 
682*240afd8cSMark Johnston 	/*
683*240afd8cSMark Johnston 	 * Build the offset table used when setting file attributes.  File
684*240afd8cSMark Johnston 	 * attributes are stored in the object's bonus buffer; this table
685*240afd8cSMark Johnston 	 * provides the buffer offset of attributes referenced by the layout
686*240afd8cSMark Johnston 	 * table.
687*240afd8cSMark Johnston 	 */
688*240afd8cSMark Johnston 	fs->sacnt = nitems(zpl_attrs);
689*240afd8cSMark Johnston 	fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs));
690*240afd8cSMark Johnston 	for (size_t i = 0; i < fs->sacnt; i++)
691*240afd8cSMark Johnston 		fs->saoffs[i] = 0xffff;
692*240afd8cSMark Johnston 	offset = 0;
693*240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attr_layout); i++) {
694*240afd8cSMark Johnston 		uint16_t size;
695*240afd8cSMark Johnston 
696*240afd8cSMark Johnston 		assert(zpl_attr_layout[i] < fs->sacnt);
697*240afd8cSMark Johnston 
698*240afd8cSMark Johnston 		fs->saoffs[zpl_attr_layout[i]] = offset;
699*240afd8cSMark Johnston 		size = zpl_attrs[zpl_attr_layout[i]].size;
700*240afd8cSMark Johnston 		offset += size;
701*240afd8cSMark Johnston 	}
702*240afd8cSMark Johnston 	fs->satab = zpl_attrs;
703*240afd8cSMark Johnston 
704*240afd8cSMark Johnston 	return (saobjid);
705*240afd8cSMark Johnston }
706*240afd8cSMark Johnston 
707*240afd8cSMark Johnston static void
708*240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg)
709*240afd8cSMark Johnston {
710*240afd8cSMark Johnston 	char *mountpoint, *origmountpoint, *name, *next;
711*240afd8cSMark Johnston 	fsnode *cur, *root;
712*240afd8cSMark Johnston 	uint64_t canmount;
713*240afd8cSMark Johnston 
714*240afd8cSMark Johnston 	if (!dsl_dir_has_dataset(dsldir))
715*240afd8cSMark Johnston 		return;
716*240afd8cSMark Johnston 
717*240afd8cSMark Johnston 	mountpoint = dsl_dir_get_mountpoint(zfs, dsldir);
718*240afd8cSMark Johnston 	if (mountpoint == NULL)
719*240afd8cSMark Johnston 		return;
720*240afd8cSMark Johnston 	if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0)
721*240afd8cSMark Johnston 		return;
722*240afd8cSMark Johnston 
723*240afd8cSMark Johnston 	/*
724*240afd8cSMark Johnston 	 * If we were asked to specify a bootfs, set it here.
725*240afd8cSMark Johnston 	 */
726*240afd8cSMark Johnston 	if (zfs->bootfs != NULL && strcmp(zfs->bootfs,
727*240afd8cSMark Johnston 	    dsl_dir_fullname(dsldir)) == 0) {
728*240afd8cSMark Johnston 		zap_add_uint64(zfs->poolprops, "bootfs",
729*240afd8cSMark Johnston 		    dsl_dir_dataset_id(dsldir));
730*240afd8cSMark Johnston 	}
731*240afd8cSMark Johnston 
732*240afd8cSMark Johnston 	origmountpoint = mountpoint;
733*240afd8cSMark Johnston 
734*240afd8cSMark Johnston 	/*
735*240afd8cSMark Johnston 	 * Figure out which fsnode corresponds to our mountpoint.
736*240afd8cSMark Johnston 	 */
737*240afd8cSMark Johnston 	root = arg;
738*240afd8cSMark Johnston 	cur = root;
739*240afd8cSMark Johnston 	if (strcmp(mountpoint, zfs->rootpath) != 0) {
740*240afd8cSMark Johnston 		mountpoint += strlen(zfs->rootpath);
741*240afd8cSMark Johnston 
742*240afd8cSMark Johnston 		/*
743*240afd8cSMark Johnston 		 * Look up the directory in the staged tree.  For example, if
744*240afd8cSMark Johnston 		 * the dataset's mount point is /foo/bar/baz, we'll search the
745*240afd8cSMark Johnston 		 * root directory for "foo", search "foo" for "baz", and so on.
746*240afd8cSMark Johnston 		 * Each intermediate name must refer to a directory; the final
747*240afd8cSMark Johnston 		 * component need not exist.
748*240afd8cSMark Johnston 		 */
749*240afd8cSMark Johnston 		cur = root;
750*240afd8cSMark Johnston 		for (next = name = mountpoint; next != NULL;) {
751*240afd8cSMark Johnston 			for (; *next == '/'; next++)
752*240afd8cSMark Johnston 				;
753*240afd8cSMark Johnston 			name = strsep(&next, "/");
754*240afd8cSMark Johnston 
755*240afd8cSMark Johnston 			for (; cur != NULL && strcmp(cur->name, name) != 0;
756*240afd8cSMark Johnston 			    cur = cur->next)
757*240afd8cSMark Johnston 				;
758*240afd8cSMark Johnston 			if (cur == NULL) {
759*240afd8cSMark Johnston 				if (next == NULL)
760*240afd8cSMark Johnston 					break;
761*240afd8cSMark Johnston 				errx(1, "missing mountpoint directory for `%s'",
762*240afd8cSMark Johnston 				    dsl_dir_fullname(dsldir));
763*240afd8cSMark Johnston 			}
764*240afd8cSMark Johnston 			if (cur->type != S_IFDIR) {
765*240afd8cSMark Johnston 				errx(1,
766*240afd8cSMark Johnston 				    "mountpoint for `%s' is not a directory",
767*240afd8cSMark Johnston 				    dsl_dir_fullname(dsldir));
768*240afd8cSMark Johnston 			}
769*240afd8cSMark Johnston 			if (next != NULL)
770*240afd8cSMark Johnston 				cur = cur->child;
771*240afd8cSMark Johnston 		}
772*240afd8cSMark Johnston 	}
773*240afd8cSMark Johnston 
774*240afd8cSMark Johnston 	if (cur != NULL) {
775*240afd8cSMark Johnston 		assert(cur->type == S_IFDIR);
776*240afd8cSMark Johnston 
777*240afd8cSMark Johnston 		/*
778*240afd8cSMark Johnston 		 * Multiple datasets shouldn't share a mountpoint.  It's
779*240afd8cSMark Johnston 		 * technically allowed, but it's not clear what makefs should do
780*240afd8cSMark Johnston 		 * in that case.
781*240afd8cSMark Johnston 		 */
782*240afd8cSMark Johnston 		assert((cur->inode->flags & FI_ROOT) == 0);
783*240afd8cSMark Johnston 		if (cur != root)
784*240afd8cSMark Johnston 			cur->inode->flags |= FI_ROOT;
785*240afd8cSMark Johnston 		assert(cur->inode->param == NULL);
786*240afd8cSMark Johnston 		cur->inode->param = dsldir;
787*240afd8cSMark Johnston 	}
788*240afd8cSMark Johnston 
789*240afd8cSMark Johnston 	free(origmountpoint);
790*240afd8cSMark Johnston }
791*240afd8cSMark Johnston 
792*240afd8cSMark Johnston static int
793*240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg)
794*240afd8cSMark Johnston {
795*240afd8cSMark Johnston 	uint64_t *countp;
796*240afd8cSMark Johnston 
797*240afd8cSMark Johnston 	countp = arg;
798*240afd8cSMark Johnston 	if (cur->type == S_IFDIR && fsnode_isroot(cur))
799*240afd8cSMark Johnston 		return (1);
800*240afd8cSMark Johnston 
801*240afd8cSMark Johnston 	if (cur->inode->ino == 0) {
802*240afd8cSMark Johnston 		cur->inode->ino = ++(*countp);
803*240afd8cSMark Johnston 		cur->inode->nlink = 1;
804*240afd8cSMark Johnston 	} else {
805*240afd8cSMark Johnston 		cur->inode->nlink++;
806*240afd8cSMark Johnston 	}
807*240afd8cSMark Johnston 
808*240afd8cSMark Johnston 	return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1);
809*240afd8cSMark Johnston }
810*240afd8cSMark Johnston 
811*240afd8cSMark Johnston /*
812*240afd8cSMark Johnston  * Create a filesystem dataset.  More specifically:
813*240afd8cSMark Johnston  * - create an object set for the dataset,
814*240afd8cSMark Johnston  * - add required metadata (SA tables, property definitions, etc.) to that
815*240afd8cSMark Johnston  *   object set,
816*240afd8cSMark Johnston  * - optionally populate the object set with file objects, using "root" as the
817*240afd8cSMark Johnston  *   root directory.
818*240afd8cSMark Johnston  *
819*240afd8cSMark Johnston  * "dirfd" is a directory descriptor for the directory referenced by "root".  It
820*240afd8cSMark Johnston  * is closed before returning.
821*240afd8cSMark Johnston  */
822*240afd8cSMark Johnston static void
823*240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd)
824*240afd8cSMark Johnston {
825*240afd8cSMark Johnston 	struct fs_populate_arg arg;
826*240afd8cSMark Johnston 	zfs_fs_t fs;
827*240afd8cSMark Johnston 	zfs_zap_t *masterzap;
828*240afd8cSMark Johnston 	zfs_objset_t *os;
829*240afd8cSMark Johnston 	dnode_phys_t *deleteq, *masterobj;
830*240afd8cSMark Johnston 	uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid;
831*240afd8cSMark Johnston 	bool fakedroot;
832*240afd8cSMark Johnston 
833*240afd8cSMark Johnston 	/*
834*240afd8cSMark Johnston 	 * This dataset's mountpoint doesn't exist in the staging tree, or the
835*240afd8cSMark Johnston 	 * dataset doesn't have a mountpoint at all.  In either case we still
836*240afd8cSMark Johnston 	 * need a root directory.  Fake up a root fsnode to handle this case.
837*240afd8cSMark Johnston 	 */
838*240afd8cSMark Johnston 	fakedroot = root == NULL;
839*240afd8cSMark Johnston 	if (fakedroot) {
840*240afd8cSMark Johnston 		struct stat *stp;
841*240afd8cSMark Johnston 
842*240afd8cSMark Johnston 		assert(dirfd == -1);
843*240afd8cSMark Johnston 
844*240afd8cSMark Johnston 		root = ecalloc(1, sizeof(*root));
845*240afd8cSMark Johnston 		root->inode = ecalloc(1, sizeof(*root->inode));
846*240afd8cSMark Johnston 		root->name = estrdup(".");
847*240afd8cSMark Johnston 		root->type = S_IFDIR;
848*240afd8cSMark Johnston 
849*240afd8cSMark Johnston 		stp = &root->inode->st;
850*240afd8cSMark Johnston 		stp->st_uid = 0;
851*240afd8cSMark Johnston 		stp->st_gid = 0;
852*240afd8cSMark Johnston 		stp->st_mode = S_IFDIR | 0755;
853*240afd8cSMark Johnston 	}
854*240afd8cSMark Johnston 	assert(root->type == S_IFDIR);
855*240afd8cSMark Johnston 	assert(fsnode_isroot(root));
856*240afd8cSMark Johnston 
857*240afd8cSMark Johnston 	/*
858*240afd8cSMark Johnston 	 * Initialize the object set for this dataset.
859*240afd8cSMark Johnston 	 */
860*240afd8cSMark Johnston 	os = objset_alloc(zfs, DMU_OST_ZFS);
861*240afd8cSMark Johnston 	masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid);
862*240afd8cSMark Johnston 	assert(moid == MASTER_NODE_OBJ);
863*240afd8cSMark Johnston 
864*240afd8cSMark Johnston 	memset(&fs, 0, sizeof(fs));
865*240afd8cSMark Johnston 	fs.os = os;
866*240afd8cSMark Johnston 
867*240afd8cSMark Johnston 	/*
868*240afd8cSMark Johnston 	 * Create the ZAP SA layout now since filesystem object dnodes will
869*240afd8cSMark Johnston 	 * refer to those attributes.
870*240afd8cSMark Johnston 	 */
871*240afd8cSMark Johnston 	saobjid = fs_set_zpl_attrs(zfs, &fs);
872*240afd8cSMark Johnston 
873*240afd8cSMark Johnston 	/*
874*240afd8cSMark Johnston 	 * Make a pass over the staged directory to detect hard links and assign
875*240afd8cSMark Johnston 	 * virtual dnode numbers.
876*240afd8cSMark Johnston 	 */
877*240afd8cSMark Johnston 	dnodecount = 1; /* root directory */
878*240afd8cSMark Johnston 	fsnode_foreach(root, fs_foreach_mark, &dnodecount);
879*240afd8cSMark Johnston 
880*240afd8cSMark Johnston 	/*
881*240afd8cSMark Johnston 	 * Make a second pass to populate the dataset with files from the
882*240afd8cSMark Johnston 	 * staged directory.  Most of our runtime is spent here.
883*240afd8cSMark Johnston 	 */
884*240afd8cSMark Johnston 	arg.dirfd = dirfd;
885*240afd8cSMark Johnston 	arg.zfs = zfs;
886*240afd8cSMark Johnston 	arg.fs = &fs;
887*240afd8cSMark Johnston 	SLIST_INIT(&arg.dirs);
888*240afd8cSMark Johnston 	fs_populate_dir(root, &arg);
889*240afd8cSMark Johnston 	assert(!SLIST_EMPTY(&arg.dirs));
890*240afd8cSMark Johnston 	fsnode_foreach(root, fs_foreach_populate, &arg);
891*240afd8cSMark Johnston 	assert(SLIST_EMPTY(&arg.dirs));
892*240afd8cSMark Johnston 	rootdirid = arg.rootdirid;
893*240afd8cSMark Johnston 
894*240afd8cSMark Johnston 	/*
895*240afd8cSMark Johnston 	 * Create an empty delete queue.  We don't do anything with it, but
896*240afd8cSMark Johnston 	 * OpenZFS will refuse to mount filesystems that don't have one.
897*240afd8cSMark Johnston 	 */
898*240afd8cSMark Johnston 	deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid);
899*240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(os, deleteq));
900*240afd8cSMark Johnston 
901*240afd8cSMark Johnston 	/*
902*240afd8cSMark Johnston 	 * Populate and write the master node object.  This is a ZAP object
903*240afd8cSMark Johnston 	 * containing various dataset properties and the object IDs of the root
904*240afd8cSMark Johnston 	 * directory and delete queue.
905*240afd8cSMark Johnston 	 */
906*240afd8cSMark Johnston 	masterzap = zap_alloc(os, masterobj);
907*240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid);
908*240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid);
909*240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid);
910*240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */);
911*240afd8cSMark Johnston 	zap_add_uint64(masterzap, "normalization", 0 /* off */);
912*240afd8cSMark Johnston 	zap_add_uint64(masterzap, "utf8only", 0 /* off */);
913*240afd8cSMark Johnston 	zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */);
914*240afd8cSMark Johnston 	zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */);
915*240afd8cSMark Johnston 	zap_write(zfs, masterzap);
916*240afd8cSMark Johnston 
917*240afd8cSMark Johnston 	/*
918*240afd8cSMark Johnston 	 * All finished with this object set, we may as well write it now.
919*240afd8cSMark Johnston 	 * The DSL layer will sum up the bytes consumed by each dataset using
920*240afd8cSMark Johnston 	 * information stored in the object set, so it can't be freed just yet.
921*240afd8cSMark Johnston 	 */
922*240afd8cSMark Johnston 	dsl_dir_dataset_write(zfs, os, dsldir);
923*240afd8cSMark Johnston 
924*240afd8cSMark Johnston 	if (fakedroot) {
925*240afd8cSMark Johnston 		free(root->inode);
926*240afd8cSMark Johnston 		free(root->name);
927*240afd8cSMark Johnston 		free(root);
928*240afd8cSMark Johnston 	}
929*240afd8cSMark Johnston 	free(fs.saoffs);
930*240afd8cSMark Johnston }
931*240afd8cSMark Johnston 
932*240afd8cSMark Johnston /*
933*240afd8cSMark Johnston  * Create an object set for each DSL directory which has a dataset and doesn't
934*240afd8cSMark Johnston  * already have an object set.
935*240afd8cSMark Johnston  */
936*240afd8cSMark Johnston static void
937*240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused)
938*240afd8cSMark Johnston {
939*240afd8cSMark Johnston 	if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir))
940*240afd8cSMark Johnston 		fs_build_one(zfs, dsldir, NULL, -1);
941*240afd8cSMark Johnston }
942*240afd8cSMark Johnston 
943*240afd8cSMark Johnston /*
944*240afd8cSMark Johnston  * Create our datasets and populate them with files.
945*240afd8cSMark Johnston  */
946*240afd8cSMark Johnston void
947*240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root)
948*240afd8cSMark Johnston {
949*240afd8cSMark Johnston 	/*
950*240afd8cSMark Johnston 	 * Run through our datasets and find the root fsnode for each one.  Each
951*240afd8cSMark Johnston 	 * root fsnode is flagged so that we can figure out which dataset it
952*240afd8cSMark Johnston 	 * belongs to.
953*240afd8cSMark Johnston 	 */
954*240afd8cSMark Johnston 	dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root);
955*240afd8cSMark Johnston 
956*240afd8cSMark Johnston 	/*
957*240afd8cSMark Johnston 	 * Did we find our boot filesystem?
958*240afd8cSMark Johnston 	 */
959*240afd8cSMark Johnston 	if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs"))
960*240afd8cSMark Johnston 		errx(1, "no mounted dataset matches bootfs property `%s'",
961*240afd8cSMark Johnston 		    zfs->bootfs);
962*240afd8cSMark Johnston 
963*240afd8cSMark Johnston 	/*
964*240afd8cSMark Johnston 	 * Traverse the file hierarchy starting from the root fsnode.  One
965*240afd8cSMark Johnston 	 * dataset, not necessarily the root dataset, must "own" the root
966*240afd8cSMark Johnston 	 * directory by having its mountpoint be equal to the root path.
967*240afd8cSMark Johnston 	 *
968*240afd8cSMark Johnston 	 * As roots of other datasets are encountered during the traversal,
969*240afd8cSMark Johnston 	 * fs_build_one() recursively creates the corresponding object sets and
970*240afd8cSMark Johnston 	 * populates them.  Once this function has returned, all datasets will
971*240afd8cSMark Johnston 	 * have been fully populated.
972*240afd8cSMark Johnston 	 */
973*240afd8cSMark Johnston 	fs_build_one(zfs, root->inode->param, root, dirfd);
974*240afd8cSMark Johnston 
975*240afd8cSMark Johnston 	/*
976*240afd8cSMark Johnston 	 * Now create object sets for datasets whose mountpoints weren't found
977*240afd8cSMark Johnston 	 * in the staging directory, either because there is no mountpoint, or
978*240afd8cSMark Johnston 	 * because the mountpoint doesn't correspond to an existing directory.
979*240afd8cSMark Johnston 	 */
980*240afd8cSMark Johnston 	dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL);
981*240afd8cSMark Johnston }
982