xref: /freebsd/usr.sbin/makefs/zfs/fs.c (revision ce878284318e71217d8d8f43f7d590b6c338d3aa)
1240afd8cSMark Johnston /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3240afd8cSMark Johnston  *
4240afd8cSMark Johnston  * Copyright (c) 2022 The FreeBSD Foundation
5240afd8cSMark Johnston  *
6240afd8cSMark Johnston  * This software was developed by Mark Johnston under sponsorship from
7240afd8cSMark Johnston  * the FreeBSD Foundation.
8240afd8cSMark Johnston  *
9240afd8cSMark Johnston  * Redistribution and use in source and binary forms, with or without
10240afd8cSMark Johnston  * modification, are permitted provided that the following conditions are
11240afd8cSMark Johnston  * met:
12240afd8cSMark Johnston  * 1. Redistributions of source code must retain the above copyright
13240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer.
14240afd8cSMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
15240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer in
16240afd8cSMark Johnston  *    the documentation and/or other materials provided with the distribution.
17240afd8cSMark Johnston  *
18240afd8cSMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19240afd8cSMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20240afd8cSMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21240afd8cSMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22240afd8cSMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23240afd8cSMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24240afd8cSMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25240afd8cSMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26240afd8cSMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27240afd8cSMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28240afd8cSMark Johnston  * SUCH DAMAGE.
29240afd8cSMark Johnston  */
30240afd8cSMark Johnston 
31240afd8cSMark Johnston #include <sys/stat.h>
32240afd8cSMark Johnston 
33240afd8cSMark Johnston #include <assert.h>
34c6890399SJessica Clarke #include <dirent.h>
35240afd8cSMark Johnston #include <fcntl.h>
36c6890399SJessica Clarke #include <stdlib.h>
37240afd8cSMark Johnston #include <string.h>
38240afd8cSMark Johnston #include <unistd.h>
39240afd8cSMark Johnston 
40240afd8cSMark Johnston #include <util.h>
41240afd8cSMark Johnston 
42240afd8cSMark Johnston #include "makefs.h"
43240afd8cSMark Johnston #include "zfs.h"
44240afd8cSMark Johnston 
45240afd8cSMark Johnston typedef struct {
46240afd8cSMark Johnston 	const char	*name;
47240afd8cSMark Johnston 	unsigned int	id;
48240afd8cSMark Johnston 	uint16_t	size;
49240afd8cSMark Johnston 	sa_bswap_type_t	bs;
50240afd8cSMark Johnston } zfs_sattr_t;
51240afd8cSMark Johnston 
52240afd8cSMark Johnston typedef struct zfs_fs {
53240afd8cSMark Johnston 	zfs_objset_t	*os;
54240afd8cSMark Johnston 
55240afd8cSMark Johnston 	/* Offset table for system attributes, indexed by a zpl_attr_t. */
56240afd8cSMark Johnston 	uint16_t	*saoffs;
57240afd8cSMark Johnston 	size_t		sacnt;
58240afd8cSMark Johnston 	const zfs_sattr_t *satab;
59240afd8cSMark Johnston } zfs_fs_t;
60240afd8cSMark Johnston 
61240afd8cSMark Johnston /*
62240afd8cSMark Johnston  * The order of the attributes doesn't matter, this is simply the one hard-coded
63240afd8cSMark Johnston  * by OpenZFS, based on a zdb dump of the SA_REGISTRY table.
64240afd8cSMark Johnston  */
65240afd8cSMark Johnston typedef enum zpl_attr {
66240afd8cSMark Johnston 	ZPL_ATIME,
67240afd8cSMark Johnston 	ZPL_MTIME,
68240afd8cSMark Johnston 	ZPL_CTIME,
69240afd8cSMark Johnston 	ZPL_CRTIME,
70240afd8cSMark Johnston 	ZPL_GEN,
71240afd8cSMark Johnston 	ZPL_MODE,
72240afd8cSMark Johnston 	ZPL_SIZE,
73240afd8cSMark Johnston 	ZPL_PARENT,
74240afd8cSMark Johnston 	ZPL_LINKS,
75240afd8cSMark Johnston 	ZPL_XATTR,
76240afd8cSMark Johnston 	ZPL_RDEV,
77240afd8cSMark Johnston 	ZPL_FLAGS,
78240afd8cSMark Johnston 	ZPL_UID,
79240afd8cSMark Johnston 	ZPL_GID,
80240afd8cSMark Johnston 	ZPL_PAD,
81240afd8cSMark Johnston 	ZPL_ZNODE_ACL,
82240afd8cSMark Johnston 	ZPL_DACL_COUNT,
83240afd8cSMark Johnston 	ZPL_SYMLINK,
84240afd8cSMark Johnston 	ZPL_SCANSTAMP,
85240afd8cSMark Johnston 	ZPL_DACL_ACES,
86240afd8cSMark Johnston 	ZPL_DXATTR,
87240afd8cSMark Johnston 	ZPL_PROJID,
88240afd8cSMark Johnston } zpl_attr_t;
89240afd8cSMark Johnston 
90240afd8cSMark Johnston /*
91240afd8cSMark Johnston  * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t.
92240afd8cSMark Johnston  */
93240afd8cSMark Johnston static const zfs_sattr_t zpl_attrs[] = {
94240afd8cSMark Johnston #define	_ZPL_ATTR(n, s, b)	{ .name = #n, .id = n, .size = s, .bs = b }
95240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
96240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
97240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
98240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY),
99240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY),
100240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY),
101240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY),
102240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY),
103240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY),
104240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY),
105240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY),
106240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY),
107240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY),
108240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY),
109240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY),
110240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY),
111240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY),
112240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY),
113240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY),
114240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL),
115240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY),
116240afd8cSMark Johnston 	_ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY),
117240afd8cSMark Johnston #undef ZPL_ATTR
118240afd8cSMark Johnston };
119240afd8cSMark Johnston 
120240afd8cSMark Johnston /*
121240afd8cSMark Johnston  * This layout matches that of a filesystem created using OpenZFS on FreeBSD.
122240afd8cSMark Johnston  * It need not match in general, but FreeBSD's loader doesn't bother parsing the
123240afd8cSMark Johnston  * layout and just hard-codes attribute offsets.
124240afd8cSMark Johnston  */
125240afd8cSMark Johnston static const sa_attr_type_t zpl_attr_layout[] = {
126240afd8cSMark Johnston 	ZPL_MODE,
127240afd8cSMark Johnston 	ZPL_SIZE,
128240afd8cSMark Johnston 	ZPL_GEN,
129240afd8cSMark Johnston 	ZPL_UID,
130240afd8cSMark Johnston 	ZPL_GID,
131240afd8cSMark Johnston 	ZPL_PARENT,
132240afd8cSMark Johnston 	ZPL_FLAGS,
133240afd8cSMark Johnston 	ZPL_ATIME,
134240afd8cSMark Johnston 	ZPL_MTIME,
135240afd8cSMark Johnston 	ZPL_CTIME,
136240afd8cSMark Johnston 	ZPL_CRTIME,
137240afd8cSMark Johnston 	ZPL_LINKS,
138240afd8cSMark Johnston 	ZPL_DACL_COUNT,
139240afd8cSMark Johnston 	ZPL_DACL_ACES,
140240afd8cSMark Johnston 	ZPL_SYMLINK,
141240afd8cSMark Johnston };
142240afd8cSMark Johnston 
143240afd8cSMark Johnston /*
144240afd8cSMark Johnston  * Keys for the ZPL attribute tables in the SA layout ZAP.  The first two
145240afd8cSMark Johnston  * indices are reserved for legacy attribute encoding.
146240afd8cSMark Johnston  */
147240afd8cSMark Johnston #define	SA_LAYOUT_INDEX_DEFAULT	2
148240afd8cSMark Johnston #define	SA_LAYOUT_INDEX_SYMLINK	3
149240afd8cSMark Johnston 
150240afd8cSMark Johnston struct fs_populate_dir {
151240afd8cSMark Johnston 	SLIST_ENTRY(fs_populate_dir) next;
152240afd8cSMark Johnston 	int			dirfd;
153240afd8cSMark Johnston 	uint64_t		objid;
154240afd8cSMark Johnston 	zfs_zap_t		*zap;
155240afd8cSMark Johnston };
156240afd8cSMark Johnston 
157240afd8cSMark Johnston struct fs_populate_arg {
158240afd8cSMark Johnston 	zfs_opt_t	*zfs;
159240afd8cSMark Johnston 	zfs_fs_t	*fs;			/* owning filesystem */
160240afd8cSMark Johnston 	uint64_t	rootdirid;		/* root directory dnode ID */
1618eca3207SMark Johnston 	int		rootdirfd;		/* root directory fd */
162240afd8cSMark Johnston 	SLIST_HEAD(, fs_populate_dir) dirs;	/* stack of directories */
163240afd8cSMark Johnston };
164240afd8cSMark Johnston 
165240afd8cSMark Johnston static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int);
166240afd8cSMark Johnston 
1678eca3207SMark Johnston static void
eclose(int fd)1688eca3207SMark Johnston eclose(int fd)
1698eca3207SMark Johnston {
1708eca3207SMark Johnston 	if (close(fd) != 0)
1718eca3207SMark Johnston 		err(1, "close");
1728eca3207SMark Johnston }
1738eca3207SMark Johnston 
174240afd8cSMark Johnston static bool
fsnode_isroot(const fsnode * cur)175240afd8cSMark Johnston fsnode_isroot(const fsnode *cur)
176240afd8cSMark Johnston {
177240afd8cSMark Johnston 	return (strcmp(cur->name, ".") == 0);
178240afd8cSMark Johnston }
179240afd8cSMark Johnston 
180*ce878284SMark Johnston static bool
fsnode_valid(const fsnode * cur)181*ce878284SMark Johnston fsnode_valid(const fsnode *cur)
182*ce878284SMark Johnston {
183*ce878284SMark Johnston 	return (cur->type == S_IFREG || cur->type == S_IFDIR ||
184*ce878284SMark Johnston 	    cur->type == S_IFLNK);
185*ce878284SMark Johnston }
186*ce878284SMark Johnston 
187240afd8cSMark Johnston /*
188240afd8cSMark Johnston  * Visit each node in a directory hierarchy, in pre-order depth-first order.
189240afd8cSMark Johnston  */
190240afd8cSMark Johnston static void
fsnode_foreach(fsnode * root,int (* cb)(fsnode *,void *),void * arg)191240afd8cSMark Johnston fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg)
192240afd8cSMark Johnston {
193240afd8cSMark Johnston 	assert(root->type == S_IFDIR);
194240afd8cSMark Johnston 
195240afd8cSMark Johnston 	for (fsnode *cur = root; cur != NULL; cur = cur->next) {
196*ce878284SMark Johnston 		if (!fsnode_valid(cur)) {
197*ce878284SMark Johnston 			warnx("skipping unhandled %s %s/%s",
198*ce878284SMark Johnston 			    inode_type(cur->type), cur->path, cur->name);
199*ce878284SMark Johnston 			continue;
200*ce878284SMark Johnston 		}
201240afd8cSMark Johnston 		if (cb(cur, arg) == 0)
202240afd8cSMark Johnston 			continue;
203240afd8cSMark Johnston 		if (cur->type == S_IFDIR && cur->child != NULL)
204240afd8cSMark Johnston 			fsnode_foreach(cur->child, cb, arg);
205240afd8cSMark Johnston 	}
206240afd8cSMark Johnston }
207240afd8cSMark Johnston 
208240afd8cSMark Johnston static void
fs_populate_dirent(struct fs_populate_arg * arg,fsnode * cur,uint64_t dnid)209240afd8cSMark Johnston fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid)
210240afd8cSMark Johnston {
211240afd8cSMark Johnston 	struct fs_populate_dir *dir;
212240afd8cSMark Johnston 	uint64_t type;
213240afd8cSMark Johnston 
214240afd8cSMark Johnston 	switch (cur->type) {
215240afd8cSMark Johnston 	case S_IFREG:
216240afd8cSMark Johnston 		type = DT_REG;
217240afd8cSMark Johnston 		break;
218240afd8cSMark Johnston 	case S_IFDIR:
219240afd8cSMark Johnston 		type = DT_DIR;
220240afd8cSMark Johnston 		break;
221240afd8cSMark Johnston 	case S_IFLNK:
222240afd8cSMark Johnston 		type = DT_LNK;
223240afd8cSMark Johnston 		break;
224240afd8cSMark Johnston 	default:
225240afd8cSMark Johnston 		assert(0);
226240afd8cSMark Johnston 	}
227240afd8cSMark Johnston 
228240afd8cSMark Johnston 	dir = SLIST_FIRST(&arg->dirs);
229240afd8cSMark Johnston 	zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid));
230240afd8cSMark Johnston }
231240afd8cSMark Johnston 
232240afd8cSMark Johnston static void
fs_populate_attr(zfs_fs_t * fs,char * attrbuf,const void * val,uint16_t ind,size_t * szp)233240afd8cSMark Johnston fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind,
234240afd8cSMark Johnston     size_t *szp)
235240afd8cSMark Johnston {
236240afd8cSMark Johnston 	assert(ind < fs->sacnt);
237240afd8cSMark Johnston 	assert(fs->saoffs[ind] != 0xffff);
238240afd8cSMark Johnston 
239240afd8cSMark Johnston 	memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size);
240240afd8cSMark Johnston 	*szp += fs->satab[ind].size;
241240afd8cSMark Johnston }
242240afd8cSMark Johnston 
243240afd8cSMark Johnston static void
fs_populate_varszattr(zfs_fs_t * fs,char * attrbuf,const void * val,size_t valsz,size_t varoff,uint16_t ind,size_t * szp)244240afd8cSMark Johnston fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val,
245240afd8cSMark Johnston     size_t valsz, size_t varoff, uint16_t ind, size_t *szp)
246240afd8cSMark Johnston {
247240afd8cSMark Johnston 	assert(ind < fs->sacnt);
248240afd8cSMark Johnston 	assert(fs->saoffs[ind] != 0xffff);
249240afd8cSMark Johnston 	assert(fs->satab[ind].size == 0);
250240afd8cSMark Johnston 
251240afd8cSMark Johnston 	memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz);
252240afd8cSMark Johnston 	*szp += valsz;
253240afd8cSMark Johnston }
254240afd8cSMark Johnston 
2558eca3207SMark Johnston /*
2568eca3207SMark Johnston  * Derive the relative fd/path combo needed to access a file.  Ideally we'd
2578eca3207SMark Johnston  * always be able to use relative lookups (i.e., use the *at() system calls),
2588eca3207SMark Johnston  * since they require less path translation and are more amenable to sandboxing,
2598eca3207SMark Johnston  * but the handling of multiple staging directories makes that difficult.  To
2608eca3207SMark Johnston  * make matters worse, we have no choice but to use relative lookups when
2618eca3207SMark Johnston  * dealing with an mtree manifest, so both mechanisms are implemented.
2628eca3207SMark Johnston  */
2638eca3207SMark Johnston static void
fs_populate_path(const fsnode * cur,struct fs_populate_arg * arg,char * path,size_t sz,int * dirfdp)2648eca3207SMark Johnston fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg,
2658eca3207SMark Johnston     char *path, size_t sz, int *dirfdp)
2668eca3207SMark Johnston {
267aac389a3SBrooks Davis 	if (cur->contents != NULL) {
268aac389a3SBrooks Davis 		size_t n;
269aac389a3SBrooks Davis 
270aac389a3SBrooks Davis 		*dirfdp = AT_FDCWD;
271aac389a3SBrooks Davis 		n = strlcpy(path, cur->contents, sz);
272aac389a3SBrooks Davis 		assert(n < sz);
273aac389a3SBrooks Davis 	} else if (cur->root == NULL) {
2748eca3207SMark Johnston 		size_t n;
2758eca3207SMark Johnston 
2768eca3207SMark Johnston 		*dirfdp = SLIST_FIRST(&arg->dirs)->dirfd;
2778eca3207SMark Johnston 		n = strlcpy(path, cur->name, sz);
2788eca3207SMark Johnston 		assert(n < sz);
2798eca3207SMark Johnston 	} else {
2808eca3207SMark Johnston 		int n;
2818eca3207SMark Johnston 
2828eca3207SMark Johnston 		*dirfdp = AT_FDCWD;
2838eca3207SMark Johnston 		n = snprintf(path, sz, "%s/%s/%s",
2848eca3207SMark Johnston 		    cur->root, cur->path, cur->name);
2858eca3207SMark Johnston 		assert(n >= 0);
2868eca3207SMark Johnston 		assert((size_t)n < sz);
2878eca3207SMark Johnston 	}
2888eca3207SMark Johnston }
2898eca3207SMark Johnston 
2908eca3207SMark Johnston static int
fs_open(const fsnode * cur,struct fs_populate_arg * arg,int flags)2918eca3207SMark Johnston fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags)
2928eca3207SMark Johnston {
2938eca3207SMark Johnston 	char path[PATH_MAX];
2948eca3207SMark Johnston 	int fd;
2958eca3207SMark Johnston 
2968eca3207SMark Johnston 	fs_populate_path(cur, arg, path, sizeof(path), &fd);
2978eca3207SMark Johnston 
2988eca3207SMark Johnston 	fd = openat(fd, path, flags);
2998eca3207SMark Johnston 	if (fd < 0)
3008eca3207SMark Johnston 		err(1, "openat(%s)", path);
3018eca3207SMark Johnston 	return (fd);
3028eca3207SMark Johnston }
3038eca3207SMark Johnston 
3046e011d15SBrooks Davis static int
fs_open_can_fail(const fsnode * cur,struct fs_populate_arg * arg,int flags)3056e011d15SBrooks Davis fs_open_can_fail(const fsnode *cur, struct fs_populate_arg *arg, int flags)
3066e011d15SBrooks Davis {
3076e011d15SBrooks Davis 	int fd;
3086e011d15SBrooks Davis 	char path[PATH_MAX];
3096e011d15SBrooks Davis 
3106e011d15SBrooks Davis 	fs_populate_path(cur, arg, path, sizeof(path), &fd);
3116e011d15SBrooks Davis 
3126e011d15SBrooks Davis 	return (openat(fd, path, flags));
3136e011d15SBrooks Davis }
3146e011d15SBrooks Davis 
3158eca3207SMark Johnston static void
fs_readlink(const fsnode * cur,struct fs_populate_arg * arg,char * buf,size_t bufsz)3168eca3207SMark Johnston fs_readlink(const fsnode *cur, struct fs_populate_arg *arg,
3178eca3207SMark Johnston     char *buf, size_t bufsz)
3188eca3207SMark Johnston {
3198eca3207SMark Johnston 	char path[PATH_MAX];
3208eca3207SMark Johnston 	int fd;
3218eca3207SMark Johnston 
322b78d5b42SBrooks Davis 	if (cur->symlink != NULL) {
323b78d5b42SBrooks Davis 		size_t n;
324b78d5b42SBrooks Davis 
325b78d5b42SBrooks Davis 		n = strlcpy(buf, cur->symlink, bufsz);
326b78d5b42SBrooks Davis 		assert(n < bufsz);
327b78d5b42SBrooks Davis 	} else {
328b78d5b42SBrooks Davis 		ssize_t n;
329b78d5b42SBrooks Davis 
3308eca3207SMark Johnston 		fs_populate_path(cur, arg, path, sizeof(path), &fd);
3318eca3207SMark Johnston 
3328eca3207SMark Johnston 		n = readlinkat(fd, path, buf, bufsz - 1);
3338eca3207SMark Johnston 		if (n == -1)
3348eca3207SMark Johnston 			err(1, "readlinkat(%s)", cur->name);
3358eca3207SMark Johnston 		buf[n] = '\0';
3368eca3207SMark Johnston 	}
337b78d5b42SBrooks Davis }
3388eca3207SMark Johnston 
339240afd8cSMark Johnston static void
fs_populate_time(zfs_fs_t * fs,char * attrbuf,struct timespec * ts,uint16_t ind,size_t * szp)340b0ce7dfcSJessica Clarke fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts,
341b0ce7dfcSJessica Clarke     uint16_t ind, size_t *szp)
342b0ce7dfcSJessica Clarke {
343b0ce7dfcSJessica Clarke 	uint64_t timebuf[2];
344b0ce7dfcSJessica Clarke 
345b0ce7dfcSJessica Clarke 	assert(ind < fs->sacnt);
346b0ce7dfcSJessica Clarke 	assert(fs->saoffs[ind] != 0xffff);
347b0ce7dfcSJessica Clarke 	assert(fs->satab[ind].size == sizeof(timebuf));
348b0ce7dfcSJessica Clarke 
349b0ce7dfcSJessica Clarke 	timebuf[0] = ts->tv_sec;
350b0ce7dfcSJessica Clarke 	timebuf[1] = ts->tv_nsec;
351b0ce7dfcSJessica Clarke 	fs_populate_attr(fs, attrbuf, timebuf, ind, szp);
352b0ce7dfcSJessica Clarke }
353b0ce7dfcSJessica Clarke 
354b0ce7dfcSJessica Clarke static void
fs_populate_sattrs(struct fs_populate_arg * arg,const fsnode * cur,dnode_phys_t * dnode)355240afd8cSMark Johnston fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur,
356240afd8cSMark Johnston     dnode_phys_t *dnode)
357240afd8cSMark Johnston {
358240afd8cSMark Johnston 	char target[PATH_MAX];
359240afd8cSMark Johnston 	zfs_fs_t *fs;
360240afd8cSMark Johnston 	zfs_ace_hdr_t aces[3];
361240afd8cSMark Johnston 	struct stat *sb;
362240afd8cSMark Johnston 	sa_hdr_phys_t *sahdr;
363240afd8cSMark Johnston 	uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid;
364240afd8cSMark Johnston 	char *attrbuf;
365240afd8cSMark Johnston 	size_t bonussz, hdrsz;
366240afd8cSMark Johnston 	int layout;
367240afd8cSMark Johnston 
368240afd8cSMark Johnston 	assert(dnode->dn_bonustype == DMU_OT_SA);
369240afd8cSMark Johnston 	assert(dnode->dn_nblkptr == 1);
370240afd8cSMark Johnston 
371240afd8cSMark Johnston 	fs = arg->fs;
372240afd8cSMark Johnston 	sb = &cur->inode->st;
373240afd8cSMark Johnston 
374240afd8cSMark Johnston 	switch (cur->type) {
375240afd8cSMark Johnston 	case S_IFREG:
376240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_DEFAULT;
377240afd8cSMark Johnston 		links = cur->inode->nlink;
378240afd8cSMark Johnston 		objsize = sb->st_size;
379240afd8cSMark Johnston 		parent = SLIST_FIRST(&arg->dirs)->objid;
380240afd8cSMark Johnston 		break;
381240afd8cSMark Johnston 	case S_IFDIR:
382240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_DEFAULT;
383240afd8cSMark Johnston 		links = 1; /* .. */
384240afd8cSMark Johnston 		objsize = 1; /* .. */
385240afd8cSMark Johnston 
386240afd8cSMark Johnston 		/*
387240afd8cSMark Johnston 		 * The size of a ZPL directory is the number of entries
388240afd8cSMark Johnston 		 * (including "." and ".."), and the link count is the number of
389240afd8cSMark Johnston 		 * entries which are directories (including "." and "..").
390240afd8cSMark Johnston 		 */
391240afd8cSMark Johnston 		for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child;
392240afd8cSMark Johnston 		    c != NULL; c = c->next) {
393*ce878284SMark Johnston 			switch (c->type) {
394*ce878284SMark Johnston 			case S_IFDIR:
395240afd8cSMark Johnston 				links++;
396*ce878284SMark Johnston 				/* FALLTHROUGH */
397*ce878284SMark Johnston 			case S_IFREG:
398*ce878284SMark Johnston 			case S_IFLNK:
399240afd8cSMark Johnston 				objsize++;
400*ce878284SMark Johnston 				break;
401*ce878284SMark Johnston 			}
402240afd8cSMark Johnston 		}
403240afd8cSMark Johnston 
404240afd8cSMark Johnston 		/* The root directory is its own parent. */
405240afd8cSMark Johnston 		parent = SLIST_EMPTY(&arg->dirs) ?
406240afd8cSMark Johnston 		    arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid;
407240afd8cSMark Johnston 		break;
4088eca3207SMark Johnston 	case S_IFLNK:
4098eca3207SMark Johnston 		fs_readlink(cur, arg, target, sizeof(target));
410240afd8cSMark Johnston 
411240afd8cSMark Johnston 		layout = SA_LAYOUT_INDEX_SYMLINK;
412240afd8cSMark Johnston 		links = 1;
413240afd8cSMark Johnston 		objsize = strlen(target);
414240afd8cSMark Johnston 		parent = SLIST_FIRST(&arg->dirs)->objid;
415240afd8cSMark Johnston 		break;
416240afd8cSMark Johnston 	default:
417240afd8cSMark Johnston 		assert(0);
418240afd8cSMark Johnston 	}
419240afd8cSMark Johnston 
420240afd8cSMark Johnston 	daclcount = nitems(aces);
42150565cf5SMark Johnston 	flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_ARCHIVE |
42250565cf5SMark Johnston 	    ZFS_AV_MODIFIED;
423240afd8cSMark Johnston 	gen = 1;
424240afd8cSMark Johnston 	gid = sb->st_gid;
425240afd8cSMark Johnston 	mode = sb->st_mode;
426240afd8cSMark Johnston 	uid = sb->st_uid;
427240afd8cSMark Johnston 
428240afd8cSMark Johnston 	memset(aces, 0, sizeof(aces));
429240afd8cSMark Johnston 	aces[0].z_flags = ACE_OWNER;
430240afd8cSMark Johnston 	aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
431240afd8cSMark Johnston 	aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER |
432240afd8cSMark Johnston 	    ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL |
433240afd8cSMark Johnston 	    ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
434240afd8cSMark Johnston 	if ((mode & S_IRUSR) != 0)
435240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_READ_DATA;
436240afd8cSMark Johnston 	if ((mode & S_IWUSR) != 0)
437240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
438240afd8cSMark Johnston 	if ((mode & S_IXUSR) != 0)
439240afd8cSMark Johnston 		aces[0].z_access_mask |= ACE_EXECUTE;
440240afd8cSMark Johnston 
441240afd8cSMark Johnston 	aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP;
442240afd8cSMark Johnston 	aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
443240afd8cSMark Johnston 	aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
444240afd8cSMark Johnston 	    ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
445240afd8cSMark Johnston 	if ((mode & S_IRGRP) != 0)
446240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_READ_DATA;
447240afd8cSMark Johnston 	if ((mode & S_IWGRP) != 0)
448240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
449240afd8cSMark Johnston 	if ((mode & S_IXGRP) != 0)
450240afd8cSMark Johnston 		aces[1].z_access_mask |= ACE_EXECUTE;
451240afd8cSMark Johnston 
452240afd8cSMark Johnston 	aces[2].z_flags = ACE_EVERYONE;
453240afd8cSMark Johnston 	aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
454240afd8cSMark Johnston 	aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES |
455240afd8cSMark Johnston 	    ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE;
456240afd8cSMark Johnston 	if ((mode & S_IROTH) != 0)
457240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_READ_DATA;
458240afd8cSMark Johnston 	if ((mode & S_IWOTH) != 0)
459240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA;
460240afd8cSMark Johnston 	if ((mode & S_IXOTH) != 0)
461240afd8cSMark Johnston 		aces[2].z_access_mask |= ACE_EXECUTE;
462240afd8cSMark Johnston 
463240afd8cSMark Johnston 	switch (layout) {
464240afd8cSMark Johnston 	case SA_LAYOUT_INDEX_DEFAULT:
465240afd8cSMark Johnston 		/* At most one variable-length attribute. */
466240afd8cSMark Johnston 		hdrsz = sizeof(uint64_t);
467240afd8cSMark Johnston 		break;
468240afd8cSMark Johnston 	case SA_LAYOUT_INDEX_SYMLINK:
469240afd8cSMark Johnston 		/* At most five variable-length attributes. */
470240afd8cSMark Johnston 		hdrsz = sizeof(uint64_t) * 2;
471240afd8cSMark Johnston 		break;
472240afd8cSMark Johnston 	default:
473240afd8cSMark Johnston 		assert(0);
474240afd8cSMark Johnston 	}
475240afd8cSMark Johnston 
476240afd8cSMark Johnston 	sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode);
477240afd8cSMark Johnston 	sahdr->sa_magic = SA_MAGIC;
478240afd8cSMark Johnston 	SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz);
479240afd8cSMark Johnston 
480240afd8cSMark Johnston 	bonussz = SA_HDR_SIZE(sahdr);
481240afd8cSMark Johnston 	attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr);
482240afd8cSMark Johnston 
483240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz);
484240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz);
485240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz);
486240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz);
487240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz);
488240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz);
489240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz);
490240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz);
491240afd8cSMark Johnston 	fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz);
492240afd8cSMark Johnston 
493240afd8cSMark Johnston 	/*
494240afd8cSMark Johnston 	 * We deliberately set atime = mtime here to ensure that images are
495240afd8cSMark Johnston 	 * reproducible.
496240afd8cSMark Johnston 	 */
497b0ce7dfcSJessica Clarke 	fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz);
498b0ce7dfcSJessica Clarke 	fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz);
499b0ce7dfcSJessica Clarke 	fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz);
500c6890399SJessica Clarke #ifdef __linux__
501c6890399SJessica Clarke 	/* Linux has no st_birthtim; approximate with st_ctim */
502c6890399SJessica Clarke 	fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz);
503c6890399SJessica Clarke #else
504b0ce7dfcSJessica Clarke 	fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz);
505c6890399SJessica Clarke #endif
506240afd8cSMark Johnston 
507240afd8cSMark Johnston 	fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0,
508240afd8cSMark Johnston 	    ZPL_DACL_ACES, &bonussz);
509240afd8cSMark Johnston 	sahdr->sa_lengths[0] = sizeof(aces);
510240afd8cSMark Johnston 
511240afd8cSMark Johnston 	if (cur->type == S_IFLNK) {
512240afd8cSMark Johnston 		assert(layout == SA_LAYOUT_INDEX_SYMLINK);
513240afd8cSMark Johnston 		/* Need to use a spill block pointer if the target is long. */
514240afd8cSMark Johnston 		assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN);
515240afd8cSMark Johnston 		fs_populate_varszattr(fs, attrbuf, target, objsize,
516240afd8cSMark Johnston 		    sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz);
517240afd8cSMark Johnston 		sahdr->sa_lengths[1] = (uint16_t)objsize;
518240afd8cSMark Johnston 	}
519240afd8cSMark Johnston 
520240afd8cSMark Johnston 	dnode->dn_bonuslen = bonussz;
521240afd8cSMark Johnston }
522240afd8cSMark Johnston 
523240afd8cSMark Johnston static void
fs_populate_file(fsnode * cur,struct fs_populate_arg * arg)524240afd8cSMark Johnston fs_populate_file(fsnode *cur, struct fs_populate_arg *arg)
525240afd8cSMark Johnston {
526240afd8cSMark Johnston 	struct dnode_cursor *c;
527240afd8cSMark Johnston 	dnode_phys_t *dnode;
528240afd8cSMark Johnston 	zfs_opt_t *zfs;
529240afd8cSMark Johnston 	char *buf;
530240afd8cSMark Johnston 	uint64_t dnid;
531240afd8cSMark Johnston 	ssize_t n;
532240afd8cSMark Johnston 	size_t bufsz;
533ef20cd33SMark Johnston 	off_t nbytes, reqbytes, size;
534240afd8cSMark Johnston 	int fd;
535240afd8cSMark Johnston 
536240afd8cSMark Johnston 	assert(cur->type == S_IFREG);
537240afd8cSMark Johnston 	assert((cur->inode->flags & FI_ROOT) == 0);
538240afd8cSMark Johnston 
539240afd8cSMark Johnston 	zfs = arg->zfs;
540240afd8cSMark Johnston 
541240afd8cSMark Johnston 	assert(cur->inode->ino != 0);
542240afd8cSMark Johnston 	if ((cur->inode->flags & FI_ALLOCATED) != 0) {
543240afd8cSMark Johnston 		/*
544240afd8cSMark Johnston 		 * This is a hard link of an existing file.
545240afd8cSMark Johnston 		 *
546240afd8cSMark Johnston 		 * XXX-MJ need to check whether it crosses datasets, add a test
547240afd8cSMark Johnston 		 * case for that
548240afd8cSMark Johnston 		 */
549240afd8cSMark Johnston 		fs_populate_dirent(arg, cur, cur->inode->ino);
550240afd8cSMark Johnston 		return;
551240afd8cSMark Johnston 	}
552240afd8cSMark Johnston 
553240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(arg->fs->os,
554240afd8cSMark Johnston 	    DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
555240afd8cSMark Johnston 	cur->inode->ino = dnid;
556240afd8cSMark Johnston 	cur->inode->flags |= FI_ALLOCATED;
557240afd8cSMark Johnston 
5588eca3207SMark Johnston 	fd = fs_open(cur, arg, O_RDONLY);
559240afd8cSMark Johnston 
560240afd8cSMark Johnston 	buf = zfs->filebuf;
561240afd8cSMark Johnston 	bufsz = sizeof(zfs->filebuf);
562240afd8cSMark Johnston 	size = cur->inode->st.st_size;
563240afd8cSMark Johnston 	c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0);
564ef20cd33SMark Johnston 	for (off_t foff = 0; foff < size; foff += nbytes) {
565240afd8cSMark Johnston 		off_t loc, sofar;
566240afd8cSMark Johnston 
567240afd8cSMark Johnston 		/*
568240afd8cSMark Johnston 		 * Fill up our buffer, handling partial reads.
569240afd8cSMark Johnston 		 */
570240afd8cSMark Johnston 		sofar = 0;
571ef20cd33SMark Johnston 		nbytes = MIN(size - foff, (off_t)bufsz);
572240afd8cSMark Johnston 		do {
573ef20cd33SMark Johnston 			n = read(fd, buf + sofar, nbytes);
574240afd8cSMark Johnston 			if (n < 0)
575240afd8cSMark Johnston 				err(1, "reading from '%s'", cur->name);
576240afd8cSMark Johnston 			if (n == 0)
577240afd8cSMark Johnston 				errx(1, "unexpected EOF reading '%s'",
578240afd8cSMark Johnston 				    cur->name);
579240afd8cSMark Johnston 			sofar += n;
580ef20cd33SMark Johnston 		} while (sofar < nbytes);
581240afd8cSMark Johnston 
582ef20cd33SMark Johnston 		if (nbytes < (off_t)bufsz)
583ef20cd33SMark Johnston 			memset(buf + nbytes, 0, bufsz - nbytes);
584240afd8cSMark Johnston 
585ef20cd33SMark Johnston 		reqbytes = foff == 0 ? nbytes : MAXBLOCKSIZE;
586ef20cd33SMark Johnston 		loc = objset_space_alloc(zfs, arg->fs->os, &reqbytes);
587ef20cd33SMark Johnston 		vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, reqbytes, loc,
588240afd8cSMark Johnston 		    dnode_cursor_next(zfs, c, foff));
589240afd8cSMark Johnston 	}
5908eca3207SMark Johnston 	eclose(fd);
591240afd8cSMark Johnston 	dnode_cursor_finish(zfs, c);
592240afd8cSMark Johnston 
593240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
594240afd8cSMark Johnston 	fs_populate_dirent(arg, cur, dnid);
595240afd8cSMark Johnston }
596240afd8cSMark Johnston 
597240afd8cSMark Johnston static void
fs_populate_dir(fsnode * cur,struct fs_populate_arg * arg)598240afd8cSMark Johnston fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg)
599240afd8cSMark Johnston {
600240afd8cSMark Johnston 	dnode_phys_t *dnode;
601240afd8cSMark Johnston 	zfs_objset_t *os;
602240afd8cSMark Johnston 	uint64_t dnid;
603240afd8cSMark Johnston 	int dirfd;
604240afd8cSMark Johnston 
605240afd8cSMark Johnston 	assert(cur->type == S_IFDIR);
606240afd8cSMark Johnston 	assert((cur->inode->flags & FI_ALLOCATED) == 0);
607240afd8cSMark Johnston 
608240afd8cSMark Johnston 	os = arg->fs->os;
609240afd8cSMark Johnston 
610240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS,
611240afd8cSMark Johnston 	    DMU_OT_SA, 0, &dnid);
612240afd8cSMark Johnston 
613240afd8cSMark Johnston 	/*
614240afd8cSMark Johnston 	 * Add an entry to the parent directory and open this directory.
615240afd8cSMark Johnston 	 */
616240afd8cSMark Johnston 	if (!SLIST_EMPTY(&arg->dirs)) {
617240afd8cSMark Johnston 		fs_populate_dirent(arg, cur, dnid);
6186e011d15SBrooks Davis 		/*
6196e011d15SBrooks Davis 		 * We only need the directory fd if we're finding files in
6206e011d15SBrooks Davis 		 * it.  If it's just there for other directories or
6216e011d15SBrooks Davis 		 * files using contents= we don't need to succeed here.
6226e011d15SBrooks Davis 		 */
6236e011d15SBrooks Davis 		dirfd = fs_open_can_fail(cur, arg, O_DIRECTORY | O_RDONLY);
624240afd8cSMark Johnston 	} else {
625240afd8cSMark Johnston 		arg->rootdirid = dnid;
6268eca3207SMark Johnston 		dirfd = arg->rootdirfd;
6278eca3207SMark Johnston 		arg->rootdirfd = -1;
628240afd8cSMark Johnston 	}
629240afd8cSMark Johnston 
630240afd8cSMark Johnston 	/*
631240afd8cSMark Johnston 	 * Set ZPL attributes.
632240afd8cSMark Johnston 	 */
633240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
634240afd8cSMark Johnston 
635240afd8cSMark Johnston 	/*
636240afd8cSMark Johnston 	 * If this is a root directory, then its children belong to a different
637240afd8cSMark Johnston 	 * dataset and this directory remains empty in the current objset.
638240afd8cSMark Johnston 	 */
639240afd8cSMark Johnston 	if ((cur->inode->flags & FI_ROOT) == 0) {
640240afd8cSMark Johnston 		struct fs_populate_dir *dir;
641240afd8cSMark Johnston 
642240afd8cSMark Johnston 		dir = ecalloc(1, sizeof(*dir));
643240afd8cSMark Johnston 		dir->dirfd = dirfd;
644240afd8cSMark Johnston 		dir->objid = dnid;
645240afd8cSMark Johnston 		dir->zap = zap_alloc(os, dnode);
646240afd8cSMark Johnston 		SLIST_INSERT_HEAD(&arg->dirs, dir, next);
647240afd8cSMark Johnston 	} else {
648240afd8cSMark Johnston 		zap_write(arg->zfs, zap_alloc(os, dnode));
649240afd8cSMark Johnston 		fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd);
650240afd8cSMark Johnston 	}
651240afd8cSMark Johnston }
652240afd8cSMark Johnston 
653240afd8cSMark Johnston static void
fs_populate_symlink(fsnode * cur,struct fs_populate_arg * arg)654240afd8cSMark Johnston fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg)
655240afd8cSMark Johnston {
656240afd8cSMark Johnston 	dnode_phys_t *dnode;
657240afd8cSMark Johnston 	uint64_t dnid;
658240afd8cSMark Johnston 
659240afd8cSMark Johnston 	assert(cur->type == S_IFLNK);
660240afd8cSMark Johnston 	assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0);
661240afd8cSMark Johnston 
662240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(arg->fs->os,
663240afd8cSMark Johnston 	    DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid);
664240afd8cSMark Johnston 
665240afd8cSMark Johnston 	fs_populate_dirent(arg, cur, dnid);
666240afd8cSMark Johnston 
667240afd8cSMark Johnston 	fs_populate_sattrs(arg, cur, dnode);
668240afd8cSMark Johnston }
669240afd8cSMark Johnston 
670*ce878284SMark Johnston static fsnode *
fsnode_next(fsnode * cur)671*ce878284SMark Johnston fsnode_next(fsnode *cur)
672*ce878284SMark Johnston {
673*ce878284SMark Johnston 	for (cur = cur->next; cur != NULL; cur = cur->next) {
674*ce878284SMark Johnston 		if (fsnode_valid(cur))
675*ce878284SMark Johnston 			return (cur);
676*ce878284SMark Johnston 	}
677*ce878284SMark Johnston 	return (NULL);
678*ce878284SMark Johnston }
679*ce878284SMark Johnston 
680240afd8cSMark Johnston static int
fs_foreach_populate(fsnode * cur,void * _arg)681240afd8cSMark Johnston fs_foreach_populate(fsnode *cur, void *_arg)
682240afd8cSMark Johnston {
683240afd8cSMark Johnston 	struct fs_populate_arg *arg;
684240afd8cSMark Johnston 	struct fs_populate_dir *dir;
685240afd8cSMark Johnston 	int ret;
686240afd8cSMark Johnston 
687240afd8cSMark Johnston 	arg = _arg;
688240afd8cSMark Johnston 	switch (cur->type) {
689240afd8cSMark Johnston 	case S_IFREG:
690240afd8cSMark Johnston 		fs_populate_file(cur, arg);
691240afd8cSMark Johnston 		break;
692240afd8cSMark Johnston 	case S_IFDIR:
693240afd8cSMark Johnston 		if (fsnode_isroot(cur))
694240afd8cSMark Johnston 			break;
695240afd8cSMark Johnston 		fs_populate_dir(cur, arg);
696240afd8cSMark Johnston 		break;
697240afd8cSMark Johnston 	case S_IFLNK:
698240afd8cSMark Johnston 		fs_populate_symlink(cur, arg);
699240afd8cSMark Johnston 		break;
700240afd8cSMark Johnston 	default:
701240afd8cSMark Johnston 		assert(0);
702240afd8cSMark Johnston 	}
703240afd8cSMark Johnston 
704240afd8cSMark Johnston 	ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1;
705240afd8cSMark Johnston 
706*ce878284SMark Johnston 	if (fsnode_next(cur) == NULL &&
707240afd8cSMark Johnston 	    (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) {
708240afd8cSMark Johnston 		/*
709240afd8cSMark Johnston 		 * We reached a terminal node in a subtree.  Walk back up and
710240afd8cSMark Johnston 		 * write out directories.  We're done once we hit the root of a
711240afd8cSMark Johnston 		 * dataset or find a level where we're not on the edge of the
712240afd8cSMark Johnston 		 * tree.
713240afd8cSMark Johnston 		 */
714240afd8cSMark Johnston 		do {
715240afd8cSMark Johnston 			dir = SLIST_FIRST(&arg->dirs);
716240afd8cSMark Johnston 			SLIST_REMOVE_HEAD(&arg->dirs, next);
717240afd8cSMark Johnston 			zap_write(arg->zfs, dir->zap);
7188eca3207SMark Johnston 			if (dir->dirfd != -1)
7198eca3207SMark Johnston 				eclose(dir->dirfd);
720240afd8cSMark Johnston 			free(dir);
721240afd8cSMark Johnston 			cur = cur->parent;
722*ce878284SMark Johnston 		} while (cur != NULL && fsnode_next(cur) == NULL &&
723240afd8cSMark Johnston 		    (cur->inode->flags & FI_ROOT) == 0);
724240afd8cSMark Johnston 	}
725240afd8cSMark Johnston 
726240afd8cSMark Johnston 	return (ret);
727240afd8cSMark Johnston }
728240afd8cSMark Johnston 
729240afd8cSMark Johnston static void
fs_add_zpl_attr_layout(zfs_zap_t * zap,unsigned int index,const sa_attr_type_t layout[],size_t sacnt)730240afd8cSMark Johnston fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index,
731240afd8cSMark Johnston     const sa_attr_type_t layout[], size_t sacnt)
732240afd8cSMark Johnston {
733240afd8cSMark Johnston 	char ti[16];
734240afd8cSMark Johnston 
735240afd8cSMark Johnston 	assert(sizeof(layout[0]) == 2);
736240afd8cSMark Johnston 
737240afd8cSMark Johnston 	snprintf(ti, sizeof(ti), "%u", index);
738240afd8cSMark Johnston 	zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt,
739240afd8cSMark Johnston 	    (const uint8_t *)layout);
740240afd8cSMark Johnston }
741240afd8cSMark Johnston 
742240afd8cSMark Johnston /*
743240afd8cSMark Johnston  * Initialize system attribute tables.
744240afd8cSMark Johnston  *
745240afd8cSMark Johnston  * There are two elements to this.  First, we write the zpl_attrs[] and
746240afd8cSMark Johnston  * zpl_attr_layout[] tables to disk.  Then we create a lookup table which
747240afd8cSMark Johnston  * allows us to set file attributes quickly.
748240afd8cSMark Johnston  */
749240afd8cSMark Johnston static uint64_t
fs_set_zpl_attrs(zfs_opt_t * zfs,zfs_fs_t * fs)750240afd8cSMark Johnston fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs)
751240afd8cSMark Johnston {
752240afd8cSMark Johnston 	zfs_zap_t *sazap, *salzap, *sarzap;
753240afd8cSMark Johnston 	zfs_objset_t *os;
754240afd8cSMark Johnston 	dnode_phys_t *saobj, *salobj, *sarobj;
755240afd8cSMark Johnston 	uint64_t saobjid, salobjid, sarobjid;
756240afd8cSMark Johnston 	uint16_t offset;
757240afd8cSMark Johnston 
758240afd8cSMark Johnston 	os = fs->os;
759240afd8cSMark Johnston 
760240afd8cSMark Johnston 	/*
761240afd8cSMark Johnston 	 * The on-disk tables are stored in two ZAP objects, the registry object
762240afd8cSMark Johnston 	 * and the layout object.  Individual attributes are described by
763240afd8cSMark Johnston 	 * entries in the registry object; for example, the value for the
764240afd8cSMark Johnston 	 * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute.
765240afd8cSMark Johnston 	 * The attributes of a file are ordered according to one of the layouts
766240afd8cSMark Johnston 	 * defined in the layout object.  The master node object is simply used
767240afd8cSMark Johnston 	 * to locate the registry and layout objects.
768240afd8cSMark Johnston 	 */
769240afd8cSMark Johnston 	saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid);
770240afd8cSMark Johnston 	salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid);
771240afd8cSMark Johnston 	sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid);
772240afd8cSMark Johnston 
773240afd8cSMark Johnston 	sarzap = zap_alloc(os, sarobj);
774240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attrs); i++) {
775240afd8cSMark Johnston 		const zfs_sattr_t *sa;
776240afd8cSMark Johnston 		uint64_t attr;
777240afd8cSMark Johnston 
778240afd8cSMark Johnston 		attr = 0;
779240afd8cSMark Johnston 		sa = &zpl_attrs[i];
780240afd8cSMark Johnston 		SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs);
781240afd8cSMark Johnston 		zap_add_uint64(sarzap, sa->name, attr);
782240afd8cSMark Johnston 	}
783240afd8cSMark Johnston 	zap_write(zfs, sarzap);
784240afd8cSMark Johnston 
785240afd8cSMark Johnston 	/*
786240afd8cSMark Johnston 	 * Layouts are arrays of indices into the registry.  We define two
787240afd8cSMark Johnston 	 * layouts for use by the ZPL, one for non-symlinks and one for
788240afd8cSMark Johnston 	 * symlinks.  They are identical except that the symlink layout includes
789240afd8cSMark Johnston 	 * ZPL_SYMLINK as its final attribute.
790240afd8cSMark Johnston 	 */
791240afd8cSMark Johnston 	salzap = zap_alloc(os, salobj);
792240afd8cSMark Johnston 	assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK);
793240afd8cSMark Johnston 	fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT,
794240afd8cSMark Johnston 	    zpl_attr_layout, nitems(zpl_attr_layout) - 1);
795240afd8cSMark Johnston 	fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK,
796240afd8cSMark Johnston 	    zpl_attr_layout, nitems(zpl_attr_layout));
797240afd8cSMark Johnston 	zap_write(zfs, salzap);
798240afd8cSMark Johnston 
799240afd8cSMark Johnston 	sazap = zap_alloc(os, saobj);
800240afd8cSMark Johnston 	zap_add_uint64(sazap, SA_LAYOUTS, salobjid);
801240afd8cSMark Johnston 	zap_add_uint64(sazap, SA_REGISTRY, sarobjid);
802240afd8cSMark Johnston 	zap_write(zfs, sazap);
803240afd8cSMark Johnston 
804240afd8cSMark Johnston 	/* Sanity check. */
805240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attrs); i++)
806240afd8cSMark Johnston 		assert(i == zpl_attrs[i].id);
807240afd8cSMark Johnston 
808240afd8cSMark Johnston 	/*
809240afd8cSMark Johnston 	 * Build the offset table used when setting file attributes.  File
810240afd8cSMark Johnston 	 * attributes are stored in the object's bonus buffer; this table
811240afd8cSMark Johnston 	 * provides the buffer offset of attributes referenced by the layout
812240afd8cSMark Johnston 	 * table.
813240afd8cSMark Johnston 	 */
814240afd8cSMark Johnston 	fs->sacnt = nitems(zpl_attrs);
815240afd8cSMark Johnston 	fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs));
816240afd8cSMark Johnston 	for (size_t i = 0; i < fs->sacnt; i++)
817240afd8cSMark Johnston 		fs->saoffs[i] = 0xffff;
818240afd8cSMark Johnston 	offset = 0;
819240afd8cSMark Johnston 	for (size_t i = 0; i < nitems(zpl_attr_layout); i++) {
820240afd8cSMark Johnston 		uint16_t size;
821240afd8cSMark Johnston 
822240afd8cSMark Johnston 		assert(zpl_attr_layout[i] < fs->sacnt);
823240afd8cSMark Johnston 
824240afd8cSMark Johnston 		fs->saoffs[zpl_attr_layout[i]] = offset;
825240afd8cSMark Johnston 		size = zpl_attrs[zpl_attr_layout[i]].size;
826240afd8cSMark Johnston 		offset += size;
827240afd8cSMark Johnston 	}
828240afd8cSMark Johnston 	fs->satab = zpl_attrs;
829240afd8cSMark Johnston 
830240afd8cSMark Johnston 	return (saobjid);
831240afd8cSMark Johnston }
832240afd8cSMark Johnston 
833240afd8cSMark Johnston static void
fs_layout_one(zfs_opt_t * zfs,zfs_dsl_dir_t * dsldir,void * arg)834240afd8cSMark Johnston fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg)
835240afd8cSMark Johnston {
836240afd8cSMark Johnston 	char *mountpoint, *origmountpoint, *name, *next;
837240afd8cSMark Johnston 	fsnode *cur, *root;
838240afd8cSMark Johnston 	uint64_t canmount;
839240afd8cSMark Johnston 
840240afd8cSMark Johnston 	if (!dsl_dir_has_dataset(dsldir))
841240afd8cSMark Johnston 		return;
842240afd8cSMark Johnston 
84378d7704bSMark Johnston 	if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0)
84478d7704bSMark Johnston 		return;
845240afd8cSMark Johnston 	mountpoint = dsl_dir_get_mountpoint(zfs, dsldir);
846240afd8cSMark Johnston 	if (mountpoint == NULL)
847240afd8cSMark Johnston 		return;
848240afd8cSMark Johnston 
849240afd8cSMark Johnston 	/*
850240afd8cSMark Johnston 	 * If we were asked to specify a bootfs, set it here.
851240afd8cSMark Johnston 	 */
852240afd8cSMark Johnston 	if (zfs->bootfs != NULL && strcmp(zfs->bootfs,
853240afd8cSMark Johnston 	    dsl_dir_fullname(dsldir)) == 0) {
854240afd8cSMark Johnston 		zap_add_uint64(zfs->poolprops, "bootfs",
855240afd8cSMark Johnston 		    dsl_dir_dataset_id(dsldir));
856240afd8cSMark Johnston 	}
857240afd8cSMark Johnston 
858240afd8cSMark Johnston 	origmountpoint = mountpoint;
859240afd8cSMark Johnston 
860240afd8cSMark Johnston 	/*
861240afd8cSMark Johnston 	 * Figure out which fsnode corresponds to our mountpoint.
862240afd8cSMark Johnston 	 */
863240afd8cSMark Johnston 	root = arg;
864240afd8cSMark Johnston 	cur = root;
865240afd8cSMark Johnston 	if (strcmp(mountpoint, zfs->rootpath) != 0) {
866240afd8cSMark Johnston 		mountpoint += strlen(zfs->rootpath);
867240afd8cSMark Johnston 
868240afd8cSMark Johnston 		/*
869240afd8cSMark Johnston 		 * Look up the directory in the staged tree.  For example, if
870240afd8cSMark Johnston 		 * the dataset's mount point is /foo/bar/baz, we'll search the
871240afd8cSMark Johnston 		 * root directory for "foo", search "foo" for "baz", and so on.
872240afd8cSMark Johnston 		 * Each intermediate name must refer to a directory; the final
873240afd8cSMark Johnston 		 * component need not exist.
874240afd8cSMark Johnston 		 */
875240afd8cSMark Johnston 		cur = root;
876240afd8cSMark Johnston 		for (next = name = mountpoint; next != NULL;) {
877240afd8cSMark Johnston 			for (; *next == '/'; next++)
878240afd8cSMark Johnston 				;
879240afd8cSMark Johnston 			name = strsep(&next, "/");
880240afd8cSMark Johnston 
881240afd8cSMark Johnston 			for (; cur != NULL && strcmp(cur->name, name) != 0;
882240afd8cSMark Johnston 			    cur = cur->next)
883240afd8cSMark Johnston 				;
884240afd8cSMark Johnston 			if (cur == NULL) {
885240afd8cSMark Johnston 				if (next == NULL)
886240afd8cSMark Johnston 					break;
887240afd8cSMark Johnston 				errx(1, "missing mountpoint directory for `%s'",
888240afd8cSMark Johnston 				    dsl_dir_fullname(dsldir));
889240afd8cSMark Johnston 			}
890240afd8cSMark Johnston 			if (cur->type != S_IFDIR) {
891240afd8cSMark Johnston 				errx(1,
892240afd8cSMark Johnston 				    "mountpoint for `%s' is not a directory",
893240afd8cSMark Johnston 				    dsl_dir_fullname(dsldir));
894240afd8cSMark Johnston 			}
895240afd8cSMark Johnston 			if (next != NULL)
896240afd8cSMark Johnston 				cur = cur->child;
897240afd8cSMark Johnston 		}
898240afd8cSMark Johnston 	}
899240afd8cSMark Johnston 
900240afd8cSMark Johnston 	if (cur != NULL) {
901240afd8cSMark Johnston 		assert(cur->type == S_IFDIR);
902240afd8cSMark Johnston 
903240afd8cSMark Johnston 		/*
904240afd8cSMark Johnston 		 * Multiple datasets shouldn't share a mountpoint.  It's
905240afd8cSMark Johnston 		 * technically allowed, but it's not clear what makefs should do
906240afd8cSMark Johnston 		 * in that case.
907240afd8cSMark Johnston 		 */
908240afd8cSMark Johnston 		assert((cur->inode->flags & FI_ROOT) == 0);
909240afd8cSMark Johnston 		if (cur != root)
910240afd8cSMark Johnston 			cur->inode->flags |= FI_ROOT;
911240afd8cSMark Johnston 		assert(cur->inode->param == NULL);
912240afd8cSMark Johnston 		cur->inode->param = dsldir;
913240afd8cSMark Johnston 	}
914240afd8cSMark Johnston 
915240afd8cSMark Johnston 	free(origmountpoint);
916240afd8cSMark Johnston }
917240afd8cSMark Johnston 
918240afd8cSMark Johnston static int
fs_foreach_mark(fsnode * cur,void * arg)919240afd8cSMark Johnston fs_foreach_mark(fsnode *cur, void *arg)
920240afd8cSMark Johnston {
921240afd8cSMark Johnston 	uint64_t *countp;
922240afd8cSMark Johnston 
923240afd8cSMark Johnston 	countp = arg;
924240afd8cSMark Johnston 	if (cur->type == S_IFDIR && fsnode_isroot(cur))
925240afd8cSMark Johnston 		return (1);
926240afd8cSMark Johnston 
927240afd8cSMark Johnston 	if (cur->inode->ino == 0) {
928240afd8cSMark Johnston 		cur->inode->ino = ++(*countp);
929240afd8cSMark Johnston 		cur->inode->nlink = 1;
930240afd8cSMark Johnston 	} else {
931240afd8cSMark Johnston 		cur->inode->nlink++;
932240afd8cSMark Johnston 	}
933240afd8cSMark Johnston 
934240afd8cSMark Johnston 	return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1);
935240afd8cSMark Johnston }
936240afd8cSMark Johnston 
937240afd8cSMark Johnston /*
938240afd8cSMark Johnston  * Create a filesystem dataset.  More specifically:
939240afd8cSMark Johnston  * - create an object set for the dataset,
940240afd8cSMark Johnston  * - add required metadata (SA tables, property definitions, etc.) to that
941240afd8cSMark Johnston  *   object set,
942240afd8cSMark Johnston  * - optionally populate the object set with file objects, using "root" as the
943240afd8cSMark Johnston  *   root directory.
944240afd8cSMark Johnston  *
945240afd8cSMark Johnston  * "dirfd" is a directory descriptor for the directory referenced by "root".  It
946240afd8cSMark Johnston  * is closed before returning.
947240afd8cSMark Johnston  */
948240afd8cSMark Johnston static void
fs_build_one(zfs_opt_t * zfs,zfs_dsl_dir_t * dsldir,fsnode * root,int dirfd)949240afd8cSMark Johnston fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd)
950240afd8cSMark Johnston {
951240afd8cSMark Johnston 	struct fs_populate_arg arg;
952240afd8cSMark Johnston 	zfs_fs_t fs;
953240afd8cSMark Johnston 	zfs_zap_t *masterzap;
954240afd8cSMark Johnston 	zfs_objset_t *os;
955240afd8cSMark Johnston 	dnode_phys_t *deleteq, *masterobj;
956240afd8cSMark Johnston 	uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid;
957240afd8cSMark Johnston 	bool fakedroot;
958240afd8cSMark Johnston 
959240afd8cSMark Johnston 	/*
960240afd8cSMark Johnston 	 * This dataset's mountpoint doesn't exist in the staging tree, or the
961240afd8cSMark Johnston 	 * dataset doesn't have a mountpoint at all.  In either case we still
962240afd8cSMark Johnston 	 * need a root directory.  Fake up a root fsnode to handle this case.
963240afd8cSMark Johnston 	 */
964240afd8cSMark Johnston 	fakedroot = root == NULL;
965240afd8cSMark Johnston 	if (fakedroot) {
966240afd8cSMark Johnston 		struct stat *stp;
967240afd8cSMark Johnston 
968240afd8cSMark Johnston 		assert(dirfd == -1);
969240afd8cSMark Johnston 
970240afd8cSMark Johnston 		root = ecalloc(1, sizeof(*root));
971240afd8cSMark Johnston 		root->inode = ecalloc(1, sizeof(*root->inode));
972240afd8cSMark Johnston 		root->name = estrdup(".");
973240afd8cSMark Johnston 		root->type = S_IFDIR;
974240afd8cSMark Johnston 
975240afd8cSMark Johnston 		stp = &root->inode->st;
976240afd8cSMark Johnston 		stp->st_uid = 0;
977240afd8cSMark Johnston 		stp->st_gid = 0;
978240afd8cSMark Johnston 		stp->st_mode = S_IFDIR | 0755;
979240afd8cSMark Johnston 	}
980240afd8cSMark Johnston 	assert(root->type == S_IFDIR);
981240afd8cSMark Johnston 	assert(fsnode_isroot(root));
982240afd8cSMark Johnston 
983240afd8cSMark Johnston 	/*
984240afd8cSMark Johnston 	 * Initialize the object set for this dataset.
985240afd8cSMark Johnston 	 */
986240afd8cSMark Johnston 	os = objset_alloc(zfs, DMU_OST_ZFS);
987240afd8cSMark Johnston 	masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid);
988240afd8cSMark Johnston 	assert(moid == MASTER_NODE_OBJ);
989240afd8cSMark Johnston 
990240afd8cSMark Johnston 	memset(&fs, 0, sizeof(fs));
991240afd8cSMark Johnston 	fs.os = os;
992240afd8cSMark Johnston 
993240afd8cSMark Johnston 	/*
994240afd8cSMark Johnston 	 * Create the ZAP SA layout now since filesystem object dnodes will
995240afd8cSMark Johnston 	 * refer to those attributes.
996240afd8cSMark Johnston 	 */
997240afd8cSMark Johnston 	saobjid = fs_set_zpl_attrs(zfs, &fs);
998240afd8cSMark Johnston 
999240afd8cSMark Johnston 	/*
1000240afd8cSMark Johnston 	 * Make a pass over the staged directory to detect hard links and assign
1001240afd8cSMark Johnston 	 * virtual dnode numbers.
1002240afd8cSMark Johnston 	 */
1003240afd8cSMark Johnston 	dnodecount = 1; /* root directory */
1004240afd8cSMark Johnston 	fsnode_foreach(root, fs_foreach_mark, &dnodecount);
1005240afd8cSMark Johnston 
1006240afd8cSMark Johnston 	/*
1007240afd8cSMark Johnston 	 * Make a second pass to populate the dataset with files from the
1008240afd8cSMark Johnston 	 * staged directory.  Most of our runtime is spent here.
1009240afd8cSMark Johnston 	 */
10108eca3207SMark Johnston 	arg.rootdirfd = dirfd;
1011240afd8cSMark Johnston 	arg.zfs = zfs;
1012240afd8cSMark Johnston 	arg.fs = &fs;
1013240afd8cSMark Johnston 	SLIST_INIT(&arg.dirs);
1014240afd8cSMark Johnston 	fs_populate_dir(root, &arg);
1015240afd8cSMark Johnston 	assert(!SLIST_EMPTY(&arg.dirs));
1016240afd8cSMark Johnston 	fsnode_foreach(root, fs_foreach_populate, &arg);
1017240afd8cSMark Johnston 	assert(SLIST_EMPTY(&arg.dirs));
1018240afd8cSMark Johnston 	rootdirid = arg.rootdirid;
1019240afd8cSMark Johnston 
1020240afd8cSMark Johnston 	/*
1021240afd8cSMark Johnston 	 * Create an empty delete queue.  We don't do anything with it, but
1022240afd8cSMark Johnston 	 * OpenZFS will refuse to mount filesystems that don't have one.
1023240afd8cSMark Johnston 	 */
1024240afd8cSMark Johnston 	deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid);
1025240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(os, deleteq));
1026240afd8cSMark Johnston 
1027240afd8cSMark Johnston 	/*
1028240afd8cSMark Johnston 	 * Populate and write the master node object.  This is a ZAP object
1029240afd8cSMark Johnston 	 * containing various dataset properties and the object IDs of the root
1030240afd8cSMark Johnston 	 * directory and delete queue.
1031240afd8cSMark Johnston 	 */
1032240afd8cSMark Johnston 	masterzap = zap_alloc(os, masterobj);
1033240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid);
1034240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid);
1035240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid);
1036240afd8cSMark Johnston 	zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */);
1037240afd8cSMark Johnston 	zap_add_uint64(masterzap, "normalization", 0 /* off */);
1038240afd8cSMark Johnston 	zap_add_uint64(masterzap, "utf8only", 0 /* off */);
1039240afd8cSMark Johnston 	zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */);
1040240afd8cSMark Johnston 	zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */);
1041240afd8cSMark Johnston 	zap_write(zfs, masterzap);
1042240afd8cSMark Johnston 
1043240afd8cSMark Johnston 	/*
1044240afd8cSMark Johnston 	 * All finished with this object set, we may as well write it now.
1045240afd8cSMark Johnston 	 * The DSL layer will sum up the bytes consumed by each dataset using
1046240afd8cSMark Johnston 	 * information stored in the object set, so it can't be freed just yet.
1047240afd8cSMark Johnston 	 */
1048240afd8cSMark Johnston 	dsl_dir_dataset_write(zfs, os, dsldir);
1049240afd8cSMark Johnston 
1050240afd8cSMark Johnston 	if (fakedroot) {
1051240afd8cSMark Johnston 		free(root->inode);
1052240afd8cSMark Johnston 		free(root->name);
1053240afd8cSMark Johnston 		free(root);
1054240afd8cSMark Johnston 	}
1055240afd8cSMark Johnston 	free(fs.saoffs);
1056240afd8cSMark Johnston }
1057240afd8cSMark Johnston 
1058240afd8cSMark Johnston /*
1059240afd8cSMark Johnston  * Create an object set for each DSL directory which has a dataset and doesn't
1060240afd8cSMark Johnston  * already have an object set.
1061240afd8cSMark Johnston  */
1062240afd8cSMark Johnston static void
fs_build_unmounted(zfs_opt_t * zfs,zfs_dsl_dir_t * dsldir,void * arg __unused)1063240afd8cSMark Johnston fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused)
1064240afd8cSMark Johnston {
1065240afd8cSMark Johnston 	if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir))
1066240afd8cSMark Johnston 		fs_build_one(zfs, dsldir, NULL, -1);
1067240afd8cSMark Johnston }
1068240afd8cSMark Johnston 
1069240afd8cSMark Johnston /*
1070240afd8cSMark Johnston  * Create our datasets and populate them with files.
1071240afd8cSMark Johnston  */
1072240afd8cSMark Johnston void
fs_build(zfs_opt_t * zfs,int dirfd,fsnode * root)1073240afd8cSMark Johnston fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root)
1074240afd8cSMark Johnston {
1075240afd8cSMark Johnston 	/*
1076240afd8cSMark Johnston 	 * Run through our datasets and find the root fsnode for each one.  Each
1077240afd8cSMark Johnston 	 * root fsnode is flagged so that we can figure out which dataset it
1078240afd8cSMark Johnston 	 * belongs to.
1079240afd8cSMark Johnston 	 */
1080240afd8cSMark Johnston 	dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root);
1081240afd8cSMark Johnston 
1082240afd8cSMark Johnston 	/*
1083240afd8cSMark Johnston 	 * Did we find our boot filesystem?
1084240afd8cSMark Johnston 	 */
1085240afd8cSMark Johnston 	if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs"))
1086240afd8cSMark Johnston 		errx(1, "no mounted dataset matches bootfs property `%s'",
1087240afd8cSMark Johnston 		    zfs->bootfs);
1088240afd8cSMark Johnston 
1089240afd8cSMark Johnston 	/*
1090240afd8cSMark Johnston 	 * Traverse the file hierarchy starting from the root fsnode.  One
1091240afd8cSMark Johnston 	 * dataset, not necessarily the root dataset, must "own" the root
1092240afd8cSMark Johnston 	 * directory by having its mountpoint be equal to the root path.
1093240afd8cSMark Johnston 	 *
1094240afd8cSMark Johnston 	 * As roots of other datasets are encountered during the traversal,
1095240afd8cSMark Johnston 	 * fs_build_one() recursively creates the corresponding object sets and
1096240afd8cSMark Johnston 	 * populates them.  Once this function has returned, all datasets will
1097240afd8cSMark Johnston 	 * have been fully populated.
1098240afd8cSMark Johnston 	 */
1099240afd8cSMark Johnston 	fs_build_one(zfs, root->inode->param, root, dirfd);
1100240afd8cSMark Johnston 
1101240afd8cSMark Johnston 	/*
1102240afd8cSMark Johnston 	 * Now create object sets for datasets whose mountpoints weren't found
1103240afd8cSMark Johnston 	 * in the staging directory, either because there is no mountpoint, or
1104240afd8cSMark Johnston 	 * because the mountpoint doesn't correspond to an existing directory.
1105240afd8cSMark Johnston 	 */
1106240afd8cSMark Johnston 	dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL);
1107240afd8cSMark Johnston }
1108