xref: /illumos-gate/usr/src/lib/lib9p/common/backend/fs.c (revision 1e56f352c1c208679012bca47d552e127f5b1072)
1 /*
2  * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
3  * All rights reserved
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted providing that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  *
26  * Copyright 2021 Joyent, Inc.
27  */
28 
29 /*
30  * Based on libixp code: �2007-2010 Kris Maglione <maglione.k at Gmail>
31  */
32 
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <stdbool.h>
37 #include <fcntl.h>
38 #include <errno.h>
39 #include <assert.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <sys/mount.h>
43 #include <sys/param.h>
44 #include <sys/queue.h>
45 #include <sys/socket.h>
46 #include <sys/un.h>
47 #include <dirent.h>
48 #include <pwd.h>
49 #include <grp.h>
50 #include <libgen.h>
51 #include <pthread.h>
52 #include "../lib9p.h"
53 #include "../lib9p_impl.h"
54 #include "../fid.h"
55 #include "../log.h"
56 #include "../rfuncs.h"
57 #include "../genacl.h"
58 #include "backend.h"
59 #include "fs.h"
60 
61 #if defined(WITH_CASPER)
62   #include <libcasper.h>
63   #include <casper/cap_pwd.h>
64   #include <casper/cap_grp.h>
65 #endif
66 
67 #if defined(__FreeBSD__)
68   #include <sys/param.h>
69   #if __FreeBSD_version >= 1000000
70     #define	HAVE_BINDAT
71   #endif
72 #endif
73 
74 #if defined(__FreeBSD__)
75   #define	HAVE_BIRTHTIME
76 #endif
77 
78 #if defined(__APPLE__)
79   #include <sys/syscall.h>
80   #include "Availability.h"
81   #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
82 #endif
83 
84 #if defined (__illumos__)
85   #include <sys/sysmacros.h>
86   #include <sys/statvfs.h>
87   #include <sys/un.h>
88   #include <attr.h>
89   #include <sys/nvpair.h>
90 #endif
91 
92 struct fs_softc {
93 	int 	fs_rootfd;
94 	bool	fs_readonly;
95 #if defined(__illumos__)
96 	/*
97 	 * On illumos, the file creation time (birthtime) is stored (on
98 	 * supported filesystems -- i.e. zfs) in an extended attribute.
99 	 * If for some reason the fs doesn't support extended attributes,
100 	 * we skip trying to read the creation time.
101 	 */
102 	bool	fs_hasxattr;
103 #endif
104 #if defined(WITH_CASPER)
105 	cap_channel_t *fs_cappwd;
106 	cap_channel_t *fs_capgrp;
107 #endif
108 };
109 
110 struct fs_fid {
111 	DIR	*ff_dir;
112 	int	ff_dirfd;
113 	int	ff_fd;
114 	int	ff_flags;
115 	char	*ff_name;
116 	struct fs_authinfo *ff_ai;
117 	pthread_mutex_t ff_mtx;
118 	struct l9p_acl *ff_acl; /* cached ACL if any */
119 };
120 
121 #if defined(__FreeBSD__)
122 # define	STATFS_FSID(_s) \
123 	(((uint64_t)(_s)->f_fsid.val[0] << 32) | (uint64_t)(_s)->f_fsid.val[1])
124 
125 # define	STAT_ATIME(_s)	((_s)->st_atimespec)
126 # define	STAT_MTIME(_s)	((_s)->st_mtimespec)
127 # define	STAT_CTIME(_s)	((_s)->st_ctimespec)
128 #elif defined (__illumos__)
129 # define	STATFS_FSID(_s)	((_s)->f_fsid)
130 
131 # define	STAT_ATIME(_s)	((_s)->st_atim)
132 # define	STAT_MTIME(_s)	((_s)->st_mtim)
133 # define	STAT_CTIME(_s)	((_s)->st_ctim)
134 #else
135 #error "Port me"
136 #endif
137 
138 #define	FF_NO_NFSV4_ACL	0x01	/* don't go looking for NFSv4 ACLs */
139 /*	FF_NO_POSIX_ACL	0x02	-- not yet */
140 
141 /*
142  * Our authinfo consists of:
143  *
144  *  - a reference count
145  *  - a uid
146  *  - a gid-set
147  *
148  * The "default" gid is the first gid in the git-set, provided the
149  * set size is at least 1.  The set-size may be zero, though.
150  *
151  * Adjustments to the ref-count must be atomic, once it's shared.
152  * It would be nice to use C11 atomics here but they are not common
153  * enough to all systems just yet; for now, we use a mutex.
154  *
155  * Note that some ops (Linux style ones) pass an effective gid for
156  * the op, in which case, that gid may override.  To achieve this
157  * effect, permissions testing functions also take an extra gid.
158  * If this gid is (gid_t)-1 it is not used and only the remaining
159  * gids take part.
160  *
161  * The uid may also be (uid_t)-1, meaning "no uid was available
162  * at all at attach time".  In this case, new files inherit parent
163  * directory uids.
164  *
165  * The refcount is simply the number of "openfile"s using this
166  * authinfo (so that when the last ref goes away, we can free it).
167  *
168  * There are also master ACL flags (same as in ff_flags).
169  */
170 struct fs_authinfo {
171 	pthread_mutex_t ai_mtx;	/* lock for refcnt */
172 	uint32_t ai_refcnt;
173 	int	ai_flags;
174 	uid_t	ai_uid;
175 	int	ai_ngids;
176 	gid_t	ai_gids[];	/* NB: flexible array member */
177 };
178 
179 /*
180  * We have a global-static mutex for single-threading Tattach
181  * requests, which use getpwnam (and indirectly, getgr* functions)
182  * which are not reentrant.
183  */
184 static bool fs_attach_mutex_inited;
185 static pthread_mutex_t fs_attach_mutex;
186 
187 static pthread_mutexattr_t fs_mutexattr;
188 
189 /*
190  * Internal functions (except inline functions).
191  */
192 static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
193 static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
194 static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
195 static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
196     struct stat *st);
197 static int fs_dpf(char *, char *, size_t);
198 static int fs_oflags_dotu(int, int *);
199 static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
200 static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
201     struct stat *, uid_t *, gid_t *);
202 static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
203 static void dostat(struct fs_softc *, struct l9p_stat *, char *,
204     struct stat *, bool dotu);
205 #ifdef __illumos__
206 static void getcrtime(struct fs_softc *, int, const char *, uint64_t *,
207     uint64_t *);
208 static void dostatfs(struct l9p_statfs *, struct statvfs *, long);
209 #define	ACL_TYPE_NFS4 1
210 acl_t *acl_get_fd_np(int fd, int type);
211 #else
212 static void dostatfs(struct l9p_statfs *, struct statfs *, long);
213 #endif
214 static void fillacl(struct fs_fid *ff);
215 static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
216 static void dropacl(struct fs_fid *ff);
217 static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
218     const char *path);
219 static int check_access(int32_t,
220     struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
221     struct fs_authinfo *, gid_t);
222 static void generate_qid(struct stat *, struct l9p_qid *);
223 
224 static int fs_icreate(void *, struct l9p_fid *, char *, int,
225     bool, mode_t, gid_t, struct stat *);
226 static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
227     gid_t, struct stat *);
228 static int fs_imkdir(void *, struct l9p_fid *, char *,
229     bool, mode_t, gid_t, struct stat *);
230 static int fs_imkfifo(void *, struct l9p_fid *, char *,
231     bool, mode_t, gid_t, struct stat *);
232 static int fs_imknod(void *, struct l9p_fid *, char *,
233     bool, mode_t, dev_t, gid_t, struct stat *);
234 static int fs_imksocket(void *, struct l9p_fid *, char *,
235     bool, mode_t, gid_t, struct stat *);
236 static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
237     gid_t, struct stat *);
238 
239 /*
240  * Internal functions implementing backend.
241  */
242 static int fs_attach(void *, struct l9p_request *);
243 static int fs_clunk(void *, struct l9p_fid *);
244 static int fs_create(void *, struct l9p_request *);
245 static int fs_open(void *, struct l9p_request *);
246 static int fs_read(void *, struct l9p_request *);
247 static int fs_remove(void *, struct l9p_fid *);
248 static int fs_stat(void *, struct l9p_request *);
249 static int fs_walk(void *, struct l9p_request *);
250 static int fs_write(void *, struct l9p_request *);
251 static int fs_wstat(void *, struct l9p_request *);
252 static int fs_statfs(void *, struct l9p_request *);
253 static int fs_lopen(void *, struct l9p_request *);
254 static int fs_lcreate(void *, struct l9p_request *);
255 static int fs_symlink(void *, struct l9p_request *);
256 static int fs_mknod(void *, struct l9p_request *);
257 static int fs_rename(void *, struct l9p_request *);
258 static int fs_readlink(void *, struct l9p_request *);
259 static int fs_getattr(void *, struct l9p_request *);
260 static int fs_setattr(void *, struct l9p_request *);
261 static int fs_xattrwalk(void *, struct l9p_request *);
262 static int fs_xattrcreate(void *, struct l9p_request *);
263 static int fs_readdir(void *, struct l9p_request *);
264 static int fs_fsync(void *, struct l9p_request *);
265 static int fs_lock(void *, struct l9p_request *);
266 static int fs_getlock(void *, struct l9p_request *);
267 static int fs_link(void *, struct l9p_request *);
268 static int fs_renameat(void *, struct l9p_request *);
269 static int fs_unlinkat(void *, struct l9p_request *);
270 static void fs_freefid(void *, struct l9p_fid *);
271 
272 /*
273  * Convert from 9p2000 open/create mode to Unix-style O_* flags.
274  * This includes 9p2000.u extensions, but not 9p2000.L protocol,
275  * which has entirely different open, create, etc., flag bits.
276  *
277  * The <mode> given here is the one-byte (uint8_t) "mode"
278  * argument to Tcreate or Topen, so it can have at most 8 bits.
279  *
280  * https://swtch.com/plan9port/man/man9/open.html and
281  * http://plan9.bell-labs.com/magic/man2html/5/open
282  * both say:
283  *
284  *   The [low two bits of the] mode field determines the
285  *   type of I/O ... [I]f mode has the OTRUNC (0x10) bit
286  *   set, the file is to be truncated, which requires write
287  *   permission ...; if the mode has the ORCLOSE (0x40) bit
288  *   set, the file is to be removed when the fid is clunked,
289  *   which requires permission to remove the file from its
290  *   directory.  All other bits in mode should be zero.  It
291  *   is illegal to write a directory, truncate it, or
292  *   attempt to remove it on close.
293  *
294  * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
295  * The fcall.h header defines OCEXEC (0x20) as well, but it makes
296  * no sense to send this to a server.  There seem to be no bits
297  * 0x04 and 0x08.
298  *
299  * We always turn on O_NOCTTY since as a server, we never want
300  * to gain a controlling terminal.  We always turn on O_NOFOLLOW
301  * for reasons described elsewhere.
302  */
303 static int
304 fs_oflags_dotu(int mode, int *aflags)
305 {
306 	int flags;
307 #define	CONVERT(theirs, ours) \
308 	do { \
309 		if (mode & (theirs)) { \
310 			mode &= ~(theirs); \
311 			flags |= ours; \
312 		} \
313 	} while (0)
314 
315 	switch (mode & L9P_OACCMODE) {
316 
317 	case L9P_OREAD:
318 	default:
319 		flags = O_RDONLY;
320 		break;
321 
322 	case L9P_OWRITE:
323 		flags = O_WRONLY;
324 		break;
325 
326 	case L9P_ORDWR:
327 		flags = O_RDWR;
328 		break;
329 
330 	case L9P_OEXEC:
331 		if (mode & L9P_OTRUNC)
332 			return (EINVAL);
333 		flags = O_RDONLY;
334 		break;
335 	}
336 
337 	flags |= O_NOCTTY | O_NOFOLLOW;
338 
339 	CONVERT(L9P_OTRUNC, O_TRUNC);
340 
341 	/*
342 	 * Now take away some flags locally:
343 	 *   the access mode (already translated)
344 	 *   ORCLOSE - caller only
345 	 *   OCEXEC - makes no sense in server
346 	 *   ODIRECT - not applicable here
347 	 * If there are any flag bits left after this,
348 	 * we were unable to translate them.  For now, let's
349 	 * treat this as EINVAL so that we can catch problems.
350 	 */
351 	mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
352 	if (mode != 0) {
353 		L9P_LOG(L9P_INFO,
354 		    "fs_oflags_dotu: untranslated bits: %#x",
355 		    (unsigned)mode);
356 		return (EINVAL);
357 	}
358 
359 	*aflags = flags;
360 	return (0);
361 #undef CONVERT
362 }
363 
364 /*
365  * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
366  * See fs_oflags_dotu above.
367  *
368  * Linux currently does not have open-for-exec, but there is a
369  * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
370  *
371  * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
372  */
373 static int
374 fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
375 {
376 	int flags;
377 	enum l9p_omode p9;
378 #define	CLEAR(theirs)	l_mode &= ~(uint32_t)(theirs)
379 #define	CONVERT(theirs, ours) \
380 	do { \
381 		if (l_mode & (theirs)) { \
382 			CLEAR(theirs); \
383 			flags |= ours; \
384 		} \
385 	} while (0)
386 
387 	/*
388 	 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
389 	 */
390 	flags = l_mode & O_ACCMODE;
391 	if (flags == 3)
392 		return (EINVAL);
393 	CLEAR(O_ACCMODE);
394 
395 	if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
396 		    (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
397 		CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
398 		p9 = L9P_OEXEC;
399 	} else {
400 		/*
401 		 * Slightly dirty, but same dirt, really, as
402 		 * setting flags from l_mode & O_ACCMODE.
403 		 */
404 		p9 = (enum l9p_omode)flags;	/* slightly dirty */
405 	}
406 
407 	/* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
408 	if (l_mode & L9P_L_O_TRUNC)
409 		p9 |= L9P_OTRUNC;	/* but don't CLEAR yet */
410 
411 	flags |= O_NOCTTY | O_NOFOLLOW;
412 
413 	/*
414 	 * L_O_CREAT seems to be noise, since we get separate open
415 	 * and create.  But it is actually set sometimes.  We just
416 	 * throw it out here; create ops must set it themselves and
417 	 * open ops have no permissions bits and hence cannot create.
418 	 *
419 	 * L_O_EXCL does make sense on create ops, i.e., we can
420 	 * take a create op with or without L_O_EXCL.  We pass that
421 	 * through.
422 	 */
423 	CLEAR(L9P_L_O_CREAT);
424 	CONVERT(L9P_L_O_EXCL, O_EXCL);
425 	CONVERT(L9P_L_O_TRUNC, O_TRUNC);
426 	CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
427 	CONVERT(L9P_L_O_APPEND, O_APPEND);
428 	CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
429 
430 	/*
431 	 * Discard these as useless noise at our (server) end.
432 	 * (NOATIME might be useful but we can only set it on a
433 	 * per-mount basis.)
434 	 */
435 	CLEAR(L9P_L_O_CLOEXEC);
436 	CLEAR(L9P_L_O_DIRECT);
437 	CLEAR(L9P_L_O_DSYNC);
438 	CLEAR(L9P_L_O_FASYNC);
439 	CLEAR(L9P_L_O_LARGEFILE);
440 	CLEAR(L9P_L_O_NOATIME);
441 	CLEAR(L9P_L_O_NOCTTY);
442 	CLEAR(L9P_L_O_NOFOLLOW);
443 	CLEAR(L9P_L_O_SYNC);
444 
445 	if (l_mode != 0) {
446 		L9P_LOG(L9P_INFO,
447 		    "fs_oflags_dotl: untranslated bits: %#x",
448 		    (unsigned)l_mode);
449 		return (EINVAL);
450 	}
451 
452 	*aflags = flags;
453 	*ap9 = p9;
454 	return (0);
455 #undef CLEAR
456 #undef CONVERT
457 }
458 
459 static struct passwd *
460 fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
461 {
462 #if defined(WITH_CASPER)
463 	return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
464 #else
465 	(void)sc;
466 	return (r_getpwuid(uid, pg));
467 #endif
468 }
469 
470 static struct group *
471 fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
472 {
473 #if defined(WITH_CASPER)
474 	return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
475 #else
476 	(void)sc;
477 	return (r_getgrgid(gid, pg));
478 #endif
479 }
480 
481 /*
482  * Build full name of file by appending given name to directory name.
483  */
484 static int
485 fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
486 {
487 	struct fs_fid *dirf = dir->lo_aux;
488 	size_t dlen, nlen1;
489 
490 	assert(dirf != NULL);
491 	dlen = strlen(dirf->ff_name);
492 	nlen1 = strlen(name) + 1;	/* +1 for '\0' */
493 	if (dlen + 1 + nlen1 > size)
494 		return (ENAMETOOLONG);
495 	memcpy(buf, dirf->ff_name, dlen);
496 	buf[dlen] = '/';
497 	memcpy(buf + dlen + 1, name, nlen1);
498 	return (0);
499 }
500 
501 /*
502  * Build parent name of file by splitting it off.  Return an error
503  * if the given fid represents the root, so that there is no such
504  * parent, or if the discovered parent is not a directory.
505  */
506 static int
507 fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
508     size_t size, struct stat *st)
509 {
510 	struct fs_fid *ff;
511 	char *path;
512 
513 	ff = fid->lo_aux;
514 	assert(ff != NULL);
515 	path = ff->ff_name;
516 	path = r_dirname(path, buf, size);
517 	if (path == NULL)
518 		return (ENAMETOOLONG);
519 	if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
520 		return (errno);
521 	if (!S_ISDIR(st->st_mode))
522 		return (ENOTDIR);
523 	return (0);
524 }
525 
526 /*
527  * Like fs_buildname() but for adding a file name to a buffer
528  * already holding a directory name.  Essentially does
529  *     strcat(dbuf, "/");
530  *     strcat(dbuf, fname);
531  * but with size checking and an ENAMETOOLONG error as needed.
532  *
533  * (Think of the function name as "directory plus-equals file".)
534  */
535 static int
536 fs_dpf(char *dbuf, char *fname, size_t size)
537 {
538 	size_t dlen, nlen1;
539 
540 	dlen = strlen(dbuf);
541 	nlen1 = strlen(fname) + 1;
542 	if (dlen + 1 + nlen1 > size)
543 		return (ENAMETOOLONG);
544 	dbuf[dlen] = '/';
545 	memcpy(dbuf + dlen + 1, fname, nlen1);
546 	return (0);
547 }
548 
549 /*
550  * Prepare to create a new directory entry (open with O_CREAT,
551  * mkdir, etc -- any operation that creates a new inode),
552  * operating in parent data <dir>, based on authinfo <ai> and
553  * effective gid <egid>.
554  *
555  * The new entity should be owned by user/group <*nuid, *ngid>,
556  * if it's really a new entity.  It will be a directory if isdir.
557  *
558  * Returns an error number if the entry should not be created
559  * (e.g., read-only file system or no permission to write in
560  * parent directory).  Always sets *nuid and *ngid on success:
561  * in the worst case, when there is no available ID, this will
562  * use the parent directory's IDs.  Fills in <*st> on success.
563  */
564 static int
565 fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
566     struct stat *st, uid_t *nuid, gid_t *ngid)
567 {
568 	struct fs_fid *dirf;
569 	struct fs_authinfo *ai;
570 	int32_t op;
571 	int error;
572 
573 	if (sc->fs_readonly)
574 		return (EROFS);
575 	dirf = dir->lo_aux;
576 	assert(dirf != NULL);
577 	if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
578 	    AT_SYMLINK_NOFOLLOW) != 0)
579 		return (errno);
580 	if (!S_ISDIR(st->st_mode))
581 		return (ENOTDIR);
582 	dirf = dir->lo_aux;
583 	ai = dirf->ff_ai;
584 	fillacl(dirf);
585 	op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
586 	error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
587 	if (error)
588 		return (EPERM);
589 
590 	*nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
591 	*ngid = egid != (gid_t)-1 ? egid :
592 	    ai->ai_ngids > 0 ?  ai->ai_gids[0] : st->st_gid;
593 	return (0);
594 }
595 
596 /*
597  * Allocate new open-file data structure to attach to a fid.
598  *
599  * The new file's authinfo is the same as the old one's, and
600  * we gain a reference.
601  */
602 static struct fs_fid *
603 open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
604 {
605 	struct fs_fid *ret;
606 	uint32_t newcount;
607 	int error;
608 
609 	ret = l9p_calloc(1, sizeof(*ret));
610 #ifdef __illumos__
611 	error = pthread_mutex_init(&ret->ff_mtx, &fs_mutexattr);
612 #else
613 	error = pthread_mutex_init(&ret->ff_mtx, NULL);
614 #endif
615 	if (error) {
616 		free(ret);
617 		return (NULL);
618 	}
619 	ret->ff_fd = -1;
620 	ret->ff_dirfd = dirfd;
621 	ret->ff_name = strdup(path);
622 	if (ret->ff_name == NULL) {
623 		(void) pthread_mutex_destroy(&ret->ff_mtx);
624 		free(ret);
625 		return (NULL);
626 	}
627 	if (pthread_mutex_lock(&ai->ai_mtx) != 0) {
628 		(void) pthread_mutex_destroy(&ret->ff_mtx);
629 		free(ret->ff_name);
630 		free(ret);
631 		return (NULL);
632 	}
633 	newcount = ++ai->ai_refcnt;
634 	(void) pthread_mutex_unlock(&ai->ai_mtx);
635 	/*
636 	 * If we just incremented the count to 1, we're the *first*
637 	 * reference.  This is only allowed when creating the authinfo,
638 	 * otherwise it means something has gone wrong.  This cannot
639 	 * catch every bad (re)use of a freed authinfo but it may catch
640 	 * a few.
641 	 */
642 	assert(newcount > 1 || creating);
643 	L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
644 	    (void *)ai, (u_long)newcount);
645 	ret->ff_ai = ai;
646 	return (ret);
647 }
648 
649 static void
650 dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
651     struct stat *buf, bool dotu)
652 {
653 	struct passwd *user;
654 	struct group *group;
655 
656 	memset(s, 0, sizeof(struct l9p_stat));
657 
658 	generate_qid(buf, &s->qid);
659 
660 	s->type = 0;
661 	s->dev = 0;
662 	s->mode = buf->st_mode & 0777;
663 
664 	if (S_ISDIR(buf->st_mode))
665 		s->mode |= L9P_DMDIR;
666 
667 	if (S_ISLNK(buf->st_mode) && dotu)
668 		s->mode |= L9P_DMSYMLINK;
669 
670 	if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
671 		s->mode |= L9P_DMDEVICE;
672 
673 	if (S_ISSOCK(buf->st_mode))
674 		s->mode |= L9P_DMSOCKET;
675 
676 	if (S_ISFIFO(buf->st_mode))
677 		s->mode |= L9P_DMNAMEDPIPE;
678 
679 	s->atime = (uint32_t)buf->st_atime;
680 	s->mtime = (uint32_t)buf->st_mtime;
681 	s->length = (uint64_t)buf->st_size;
682 
683 	s->name = r_basename(name, NULL, 0);
684 
685 	if (!dotu) {
686 		struct r_pgdata udata, gdata;
687 
688 		user = fs_getpwuid(sc, buf->st_uid, &udata);
689 		group = fs_getgrgid(sc, buf->st_gid, &gdata);
690 		s->uid = user != NULL ? strdup(user->pw_name) : NULL;
691 		s->gid = group != NULL ? strdup(group->gr_name) : NULL;
692 		s->muid = user != NULL ? strdup(user->pw_name) : NULL;
693 		r_pgfree(&udata);
694 		r_pgfree(&gdata);
695 	} else {
696 		/*
697 		 * When using 9P2000.u, we don't need to bother about
698 		 * providing user and group names in textual form.
699 		 *
700 		 * NB: if the asprintf()s fail, s->extension should
701 		 * be unset so we can ignore these.
702 		 */
703 		s->n_uid = buf->st_uid;
704 		s->n_gid = buf->st_gid;
705 		s->n_muid = buf->st_uid;
706 
707 		if (S_ISLNK(buf->st_mode)) {
708 			char target[MAXPATHLEN];
709 			ssize_t ret = readlink(name, target, MAXPATHLEN);
710 
711 			if (ret < 0) {
712 				s->extension = NULL;
713 				return;
714 			}
715 
716 			s->extension = strndup(target, (size_t)ret);
717 		}
718 
719 		if (S_ISBLK(buf->st_mode)) {
720 			asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
721 			    minor(buf->st_rdev));
722 		}
723 
724 		if (S_ISCHR(buf->st_mode)) {
725 			asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
726 			    minor(buf->st_rdev));
727 		}
728 	}
729 }
730 
731 #ifndef __illumos__
732 static void
733 dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
734 #else
735 static void
736 dostatfs(struct l9p_statfs *out, struct statvfs *in, long namelen)
737 #endif
738 {
739 
740 	out->type = L9P_FSTYPE;
741 	out->bsize = in->f_bsize;
742 #ifndef __illumos__
743 	out->blocks = in->f_blocks;
744 	out->bfree = in->f_bfree;
745 	out->bavail = in->f_bavail;
746 #else
747 	out->blocks = in->f_blocks * in->f_frsize / in->f_bsize;
748 	out->bfree = in->f_bfree * in->f_frsize / in->f_bsize;
749 	out->bavail = in->f_bavail * in->f_frsize / in->f_bsize;
750 #endif
751 	out->files = in->f_files;
752 	out->ffree = in->f_ffree;
753 	out->namelen = (uint32_t)namelen;
754 	out->fsid = STATFS_FSID(in);
755 }
756 
757 static void
758 generate_qid(struct stat *buf, struct l9p_qid *qid)
759 {
760 	qid->path = buf->st_ino;
761 	qid->version = 0;
762 
763 	if (S_ISREG(buf->st_mode))
764 		qid->type |= L9P_QTFILE;
765 
766 	if (S_ISDIR(buf->st_mode))
767 		qid->type |= L9P_QTDIR;
768 
769 	if (S_ISLNK(buf->st_mode))
770 		qid->type |= L9P_QTSYMLINK;
771 }
772 
773 /*
774  * Fill in ff->ff_acl if it's not set yet.  Skip if the "don't use
775  * ACLs" flag is set, and use the flag to remember failure so
776  * we don't bother retrying either.
777  */
778 static void
779 fillacl(struct fs_fid *ff)
780 {
781 
782 	if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
783 		ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
784 		if (ff->ff_acl == NULL)
785 			ff->ff_flags |= FF_NO_NFSV4_ACL;
786 	}
787 }
788 
789 /*
790  * Get an ACL given fd and/or path name.  We check for the "don't get
791  * ACL" flag in the given ff_fid data structure first, but don't set
792  * the flag here.  The fillacl() code is similar but will set the
793  * flag; it also uses the ff_fd and ff_name directly.
794  *
795  * (This is used to get ACLs for parent directories, for instance.)
796  */
797 static struct l9p_acl *
798 getacl(struct fs_fid *ff, int fd, const char *path)
799 {
800 
801 	if (ff->ff_flags & FF_NO_NFSV4_ACL)
802 		return (NULL);
803 	return look_for_nfsv4_acl(ff, fd, path);
804 }
805 
806 /*
807  * Drop cached ff->ff_acl, e.g., after moving from one directory to
808  * another, where inherited ACLs might change.
809  */
810 static void
811 dropacl(struct fs_fid *ff)
812 {
813 
814 	l9p_acl_free(ff->ff_acl);
815 	ff->ff_acl = NULL;
816 	ff->ff_flags = ff->ff_ai->ai_flags;
817 }
818 
819 /*
820  * Check to see if we can find NFSv4 ACLs for the given file.
821  * If we have an open fd, we can use that, otherwise we need
822  * to use the path.
823  */
824 static struct l9p_acl *
825 look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
826 {
827 	struct l9p_acl *acl;
828 #ifdef __illumos__
829 	acl_t *sysacl;
830 #else
831 	acl_t sysacl;
832 #endif
833 	int doclose = 0;
834 
835 	if (fd < 0) {
836 		fd = openat(ff->ff_dirfd, path, 0);
837 		doclose = 1;
838 	}
839 
840 	sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
841 	if (sysacl == NULL) {
842 		/*
843 		 * EINVAL means no NFSv4 ACLs apply for this file.
844 		 * Other error numbers indicate some kind of problem.
845 		 */
846 		if (errno != EINVAL) {
847 			L9P_LOG(L9P_ERROR,
848 			    "error retrieving NFSv4 ACL from "
849 			    "fdesc %d (%s): %s", fd,
850 			    path, strerror(errno));
851 		}
852 
853 		if (doclose)
854 			close(fd);
855 
856 		return (NULL);
857 	}
858 #if defined(HAVE_FREEBSD_ACLS)
859 	acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
860 #elif defined(HAVE__ILLUMOS_ACLS)
861 	acl = l9p_illumos_nfsv4acl_to_acl(sysacl);
862 #else
863 	acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
864 #endif
865 	acl_free(sysacl);
866 
867 	if (doclose)
868 		close(fd);
869 
870 	return (acl);
871 }
872 
873 /*
874  * Verify that the user whose authinfo is in <ai> and effective
875  * group ID is <egid> ((gid_t)-1 means no egid supplied) has
876  * permission to do something.
877  *
878  * The "something" may be rather complex: we allow NFSv4 style
879  * operation masks here, and provide parent and child ACLs and
880  * stat data.  At most one of pacl+pst and cacl+cst can be NULL,
881  * unless ACLs are not supported; then pacl and cacl can both
882  * be NULL but pst or cst must be non-NULL depending on the
883  * operation.
884  */
885 static int
886 check_access(int32_t opmask,
887     struct l9p_acl *pacl, struct stat *pst,
888     struct l9p_acl *cacl, struct stat *cst,
889     struct fs_authinfo *ai, gid_t egid)
890 {
891 	struct l9p_acl_check_args args;
892 
893 	/*
894 	 * If we have ACLs, use them exclusively, ignoring Unix
895 	 * permissions.  Otherwise, fall back on stat st_mode
896 	 * bits, and allow super-user as well.
897 	 */
898 	args.aca_uid = ai->ai_uid;
899 	args.aca_gid = egid;
900 	args.aca_groups = ai->ai_gids;
901 	args.aca_ngroups = (size_t)ai->ai_ngids;
902 	args.aca_parent = pacl;
903 	args.aca_pstat = pst;
904 	args.aca_child = cacl;
905 	args.aca_cstat = cst;
906 	args.aca_aclmode = pacl == NULL && cacl == NULL
907 	    ? L9P_ACM_STAT_MODE
908 	    : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
909 
910 	args.aca_superuser = true;
911 	return (l9p_acl_check_access(opmask, &args));
912 }
913 
914 static int
915 fs_attach(void *softc, struct l9p_request *req)
916 {
917 	struct fs_authinfo *ai;
918 	struct fs_softc *sc = (struct fs_softc *)softc;
919 	struct fs_fid *file;
920 	struct passwd *pwd;
921 	struct stat st;
922 	struct r_pgdata udata;
923 	uint32_t n_uname;
924 	gid_t *gids;
925 	uid_t uid;
926 	int error;
927 	int ngroups;
928 
929 	assert(req->lr_fid != NULL);
930 
931 	/*
932 	 * Single-thread pwd/group related items.  We have a reentrant
933 	 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
934 	 * may use non-reentrant C library getgr* routines.
935 	 */
936 	if ((error = pthread_mutex_lock(&fs_attach_mutex)) != 0)
937 		return (error);
938 
939 	n_uname = req->lr_req.tattach.n_uname;
940 	if (n_uname != L9P_NONUNAME) {
941 		uid = (uid_t)n_uname;
942 		pwd = fs_getpwuid(sc, uid, &udata);
943 #if defined(L9P_DEBUG)
944 		if (pwd == NULL)
945 			L9P_LOG(L9P_DEBUG,
946 			    "Tattach: uid %ld: no such user", (long)uid);
947 #endif
948 	} else {
949 		uid = (uid_t)-1;
950 #if defined(WITH_CASPER)
951 		pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
952 #else
953 		pwd = getpwnam(req->lr_req.tattach.uname);
954 #endif
955 #if defined(L9P_DEBUG)
956 		if (pwd == NULL)
957 			L9P_LOG(L9P_DEBUG,
958 			    "Tattach: %s: no such user",
959 			    req->lr_req.tattach.uname);
960 #endif
961 	}
962 
963 	/*
964 	 * If caller didn't give a numeric UID, pick it up from pwd
965 	 * if possible.  If that doesn't work we can't continue.
966 	 *
967 	 * Note that pwd also supplies the group set.  This assumes
968 	 * the server has the right mapping; this needs improvement.
969 	 * We do at least support ai->ai_ngids==0 properly now though.
970 	 */
971 	if (uid == (uid_t)-1 && pwd != NULL)
972 		uid = pwd->pw_uid;
973 	if (uid == (uid_t)-1)
974 		error = EPERM;
975 	else {
976 		error = 0;
977 		if (fstat(sc->fs_rootfd, &st) != 0)
978 			error = errno;
979 		else if (!S_ISDIR(st.st_mode))
980 			error = ENOTDIR;
981 	}
982 	if (error) {
983 		(void) pthread_mutex_unlock(&fs_attach_mutex);
984 		L9P_LOG(L9P_DEBUG,
985 		    "Tattach: denying uid=%ld access to rootdir: %s",
986 		    (long)uid, strerror(error));
987 		/*
988 		 * Pass ENOENT and ENOTDIR through for diagnosis;
989 		 * others become EPERM.  This should not leak too
990 		 * much security.
991 		 */
992 		return (error == ENOENT || error == ENOTDIR ? error : EPERM);
993 	}
994 
995 	if (pwd != NULL) {
996 		/*
997 		 * This either succeeds and fills in ngroups and
998 		 * returns non-NULL, or fails and sets ngroups to 0
999 		 * and returns NULL.  Either way ngroups is correct.
1000 		 */
1001 		gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
1002 	} else {
1003 		gids = NULL;
1004 		ngroups = 0;
1005 	}
1006 
1007 	/*
1008 	 * Done with pwd and group related items that may use
1009 	 * non-reentrant C library routines; allow other threads in.
1010 	 */
1011 	(void) pthread_mutex_unlock(&fs_attach_mutex);
1012 
1013 	ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
1014 	if (ai == NULL) {
1015 		free(gids);
1016 		return (ENOMEM);
1017 	}
1018 #ifdef __illumos__
1019 	error = pthread_mutex_init(&ai->ai_mtx, &fs_mutexattr);
1020 #else
1021 	error = pthread_mutex_init(&ai->ai_mtx, NULL);
1022 #endif
1023 	if (error) {
1024 		free(gids);
1025 		free(ai);
1026 		return (error);
1027 	}
1028 	ai->ai_refcnt = 0;
1029 	ai->ai_uid = uid;
1030 	ai->ai_flags = 0;	/* XXX for now */
1031 	ai->ai_ngids = ngroups;
1032 	memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
1033 	free(gids);
1034 
1035 	file = open_fid(sc->fs_rootfd, ".", ai, true);
1036 	if (file == NULL) {
1037 		(void) pthread_mutex_destroy(&ai->ai_mtx);
1038 		free(ai);
1039 		return (ENOMEM);
1040 	}
1041 
1042 	req->lr_fid->lo_aux = file;
1043 	generate_qid(&st, &req->lr_resp.rattach.qid);
1044 	return (0);
1045 }
1046 
1047 static int
1048 fs_clunk(void *softc __unused, struct l9p_fid *fid)
1049 {
1050 	struct fs_fid *file;
1051 
1052 	file = fid->lo_aux;
1053 	assert(file != NULL);
1054 
1055 	if (file->ff_dir) {
1056 		closedir(file->ff_dir);
1057 		file->ff_dir = NULL;
1058 	} else if (file->ff_fd != -1) {
1059 		close(file->ff_fd);
1060 		file->ff_fd = -1;
1061 	}
1062 
1063 	return (0);
1064 }
1065 
1066 /*
1067  * Create ops.
1068  *
1069  * We are to create a new file under some existing path,
1070  * where the new file's name is in the Tcreate request and the
1071  * existing path is due to a fid-based file (req->lr_fid).
1072  *
1073  * One op (create regular file) sets file->fd, the rest do not.
1074  */
1075 static int
1076 fs_create(void *softc, struct l9p_request *req)
1077 {
1078 	struct l9p_fid *dir;
1079 	struct stat st;
1080 	uint32_t dmperm;
1081 	mode_t perm;
1082 	char *name;
1083 	int error;
1084 
1085 	dir = req->lr_fid;
1086 	name = req->lr_req.tcreate.name;
1087 	dmperm = req->lr_req.tcreate.perm;
1088 	perm = (mode_t)(dmperm & 0777);
1089 
1090 	if (dmperm & L9P_DMDIR)
1091 		error = fs_imkdir(softc, dir, name, true,
1092 		    perm, (gid_t)-1, &st);
1093 	else if (dmperm & L9P_DMSYMLINK)
1094 		error = fs_isymlink(softc, dir, name,
1095 		    req->lr_req.tcreate.extension, (gid_t)-1, &st);
1096 	else if (dmperm & L9P_DMNAMEDPIPE)
1097 		error = fs_imkfifo(softc, dir, name, true,
1098 		    perm, (gid_t)-1, &st);
1099 	else if (dmperm & L9P_DMSOCKET)
1100 		error = fs_imksocket(softc, dir, name, true,
1101 		    perm, (gid_t)-1, &st);
1102 	else if (dmperm & L9P_DMDEVICE) {
1103 		unsigned int major, minor;
1104 		char type;
1105 		dev_t dev;
1106 
1107 		/*
1108 		 * ??? Should this be testing < 3?  For now, allow a single
1109 		 * integer mode with minor==0 implied.
1110 		 */
1111 		minor = 0;
1112 		if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
1113 		    &type, &major, &minor) < 2) {
1114 			return (EINVAL);
1115 		}
1116 
1117 		switch (type) {
1118 		case 'b':
1119 			perm |= S_IFBLK;
1120 			break;
1121 		case 'c':
1122 			perm |= S_IFCHR;
1123 			break;
1124 		default:
1125 			return (EINVAL);
1126 		}
1127 		dev = makedev(major, minor);
1128 		error = fs_imknod(softc, dir, name, true, perm, dev,
1129 		    (gid_t)-1, &st);
1130 	} else {
1131 		enum l9p_omode p9;
1132 		int flags;
1133 
1134 		p9 = req->lr_req.tcreate.mode;
1135 		error = fs_oflags_dotu(p9, &flags);
1136 		if (error)
1137 			return (error);
1138 		error = fs_icreate(softc, dir, name, flags,
1139 		    true, perm, (gid_t)-1, &st);
1140 		req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
1141 	}
1142 
1143 	if (error == 0)
1144 		generate_qid(&st, &req->lr_resp.rcreate.qid);
1145 
1146 	return (error);
1147 }
1148 
1149 /*
1150  * https://swtch.com/plan9port/man/man9/open.html and
1151  * http://plan9.bell-labs.com/magic/man2html/5/open
1152  * say that permissions are actually
1153  *     perm & (~0666 | (dir.perm & 0666))
1154  * for files, and
1155  *     perm & (~0777 | (dir.perm & 0777))
1156  * for directories.  That is, the parent directory may
1157  * take away permissions granted by the operation.
1158  *
1159  * This seems a bit restrictive; probably
1160  * there should be a control knob for this.
1161  */
1162 static inline mode_t
1163 fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
1164 {
1165 
1166 	if (isdir)
1167 		perm &= ~0777 | (dir_perm & 0777);
1168 	else
1169 		perm &= ~0666 | (dir_perm & 0666);
1170 	return (perm);
1171 }
1172 
1173 /*
1174  * Internal form of create (plain file).
1175  *
1176  * Our caller takes care of splitting off all the special
1177  * types of create (mknod, etc), so this is purely for files.
1178  * We receive the fs_softc <softc>, the directory fid <dir>
1179  * in which the new file is to be created, the name of the
1180  * new file, a flag <isp9> indicating whether to do plan9 style
1181  * permissions or Linux style permissions, the permissions <perm>,
1182  * an effective group id <egid>, and a pointer to a stat structure
1183  * <st> to fill in describing the final result on success.
1184  *
1185  * On successful create, the fid switches to the newly created
1186  * file, which is now open; its associated file-name changes too.
1187  *
1188  * Note that the original (dir) fid is never currently open,
1189  * so there is nothing to close.
1190  */
1191 static int
1192 fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
1193     bool isp9, mode_t perm, gid_t egid, struct stat *st)
1194 {
1195 	struct fs_fid *file;
1196 	gid_t gid;
1197 	uid_t uid;
1198 	char newname[MAXPATHLEN];
1199 	int error, fd;
1200 
1201 	file = dir->lo_aux;
1202 
1203 	/*
1204 	 * Build full path name from directory + file name.  We'll
1205 	 * check permissions on the parent directory, then race to
1206 	 * create the file before anything bad happens like symlinks.
1207 	 *
1208 	 * (To close this race we need to use openat(), which is
1209 	 * left for a later version of this code.)
1210 	 */
1211 	error = fs_buildname(dir, name, newname, sizeof(newname));
1212 	if (error)
1213 		return (error);
1214 
1215 	/* In case of success, we will need a new file->ff_name. */
1216 	name = strdup(newname);
1217 	if (name == NULL)
1218 		return (ENOMEM);
1219 
1220 	/* Check create permission and compute new file ownership. */
1221 	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1222 	if (error) {
1223 		free(name);
1224 		return (error);
1225 	}
1226 
1227 	/* Adjust new-file permissions for Plan9 protocol. */
1228 	if (isp9)
1229 		perm = fs_p9perm(perm, st->st_mode, false);
1230 
1231 	/* Create is always exclusive so O_TRUNC is irrelevant. */
1232 	fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
1233 	if (fd < 0) {
1234 		error = errno;
1235 		free(name);
1236 		return (error);
1237 	}
1238 
1239 	/* Fix permissions and owner. */
1240 	if (fchmod(fd, perm) != 0 ||
1241 	    fchown(fd, uid, gid) != 0 ||
1242 	    fstat(fd, st) != 0) {
1243 		error = errno;
1244 		(void) close(fd);
1245 		/* unlink(newname); ? */
1246 		free(name);
1247 		return (error);
1248 	}
1249 
1250 	/* It *was* a directory; now it's a file, and it's open. */
1251 	free(file->ff_name);
1252 	file->ff_name = name;
1253 	file->ff_fd = fd;
1254 	return (0);
1255 }
1256 
1257 /*
1258  * Internal form of open: stat file and verify permissions (from p9
1259  * argument), then open the file-or-directory, leaving the internal
1260  * fs_fid fields set up.  If we cannot open the file, return a
1261  * suitable error number, and leave everything unchanged.
1262  *
1263  * To mitigate the race between permissions testing and the actual
1264  * open, we can stat the file twice (once with lstat() before open,
1265  * then with fstat() after).  We assume O_NOFOLLOW is set in flags,
1266  * so if some other race-winner substitutes in a symlink we won't
1267  * open it here.  (However, embedded symlinks, if they occur, are
1268  * still an issue.  Ideally we would like to have an O_NEVERFOLLOW
1269  * that fails on embedded symlinks, and a way to pass this to
1270  * lstat() as well.)
1271  *
1272  * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
1273  * on substitution-detection via fstat().  To simplify the code we
1274  * just always re-check.
1275  *
1276  * (For a proper fix in the future, we can require openat(), keep
1277  * each parent directory open during walk etc, and allow only final
1278  * name components with O_NOFOLLOW.)
1279  *
1280  * On successful return, st has been filled in.
1281  */
1282 static int
1283 fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
1284     gid_t egid __unused, struct stat *st)
1285 {
1286 	struct fs_softc *sc = softc;
1287 	struct fs_fid *file;
1288 	struct stat first;
1289 	int32_t op;
1290 	char *name;
1291 	int error;
1292 	int fd;
1293 	DIR *dirp;
1294 
1295 	/* Forbid write ops on read-only file system. */
1296 	if (sc->fs_readonly) {
1297 		if ((flags & O_TRUNC) != 0)
1298 			return (EROFS);
1299 		if ((flags & O_ACCMODE) != O_RDONLY)
1300 			return (EROFS);
1301 		if (p9 & L9P_ORCLOSE)
1302 			return (EROFS);
1303 	}
1304 
1305 	file = fid->lo_aux;
1306 	assert(file != NULL);
1307 	name = file->ff_name;
1308 
1309 	if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
1310 		return (errno);
1311 	if (S_ISLNK(first.st_mode))
1312 		return (EPERM);
1313 
1314 	/* Can we rely on O_APPEND here?  Best not, can be cleared. */
1315 	switch (flags & O_ACCMODE) {
1316 	case O_RDONLY:
1317 		op = L9P_ACE_READ_DATA;
1318 		break;
1319 	case O_WRONLY:
1320 		op = L9P_ACE_WRITE_DATA;
1321 		break;
1322 	case O_RDWR:
1323 		op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
1324 		break;
1325 	default:
1326 		return (EINVAL);
1327 	}
1328 	fillacl(file);
1329 	error = check_access(op, NULL, NULL, file->ff_acl, &first,
1330 	    file->ff_ai, (gid_t)-1);
1331 	if (error)
1332 		return (error);
1333 
1334 	if (S_ISDIR(first.st_mode)) {
1335 		/* Forbid write or truncate on directory. */
1336 		if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
1337 			return (EPERM);
1338 		fd = openat(file->ff_dirfd, name, O_DIRECTORY);
1339 		dirp = fdopendir(fd);
1340 		if (dirp == NULL)
1341 			return (EPERM);
1342 		fd = dirfd(dirp);
1343 	} else {
1344 		dirp = NULL;
1345 		fd = openat(file->ff_dirfd, name, flags);
1346 		if (fd < 0)
1347 			return (EPERM);
1348 	}
1349 
1350 	/*
1351 	 * We have a valid fd, and maybe non-null dirp.  Re-check
1352 	 * the file, and fail if st_dev or st_ino changed.
1353 	 */
1354 	if (fstat(fd, st) != 0 ||
1355 	    first.st_dev != st->st_dev ||
1356 	    first.st_ino != st->st_ino) {
1357 		if (dirp != NULL)
1358 			(void) closedir(dirp);
1359 		else
1360 			(void) close(fd);
1361 		return (EPERM);
1362 	}
1363 	if (dirp != NULL)
1364 		file->ff_dir = dirp;
1365 	else
1366 		file->ff_fd = fd;
1367 	return (0);
1368 }
1369 
1370 /*
1371  * Internal form of mkdir (common code for all forms).
1372  * We receive the fs_softc <softc>, the directory fid <dir>
1373  * in which the new entry is to be created, the name of the
1374  * new entry, a flag <isp9> indicating whether to do plan9 style
1375  * permissions or Linux style permissions, the permissions <perm>,
1376  * an effective group id <egid>, and a pointer to a stat structure
1377  * <st> to fill in describing the final result on success.
1378  *
1379  * See also fs_icreate() above.
1380  */
1381 static int
1382 fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
1383     bool isp9, mode_t perm, gid_t egid, struct stat *st)
1384 {
1385 	struct fs_fid *ff;
1386 	gid_t gid;
1387 	uid_t uid;
1388 	char newname[MAXPATHLEN];
1389 	int error, fd;
1390 
1391 	ff = dir->lo_aux;
1392 	error = fs_buildname(dir, name, newname, sizeof(newname));
1393 	if (error)
1394 		return (error);
1395 
1396 	error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
1397 	if (error)
1398 		return (error);
1399 
1400 	if (isp9)
1401 		perm = fs_p9perm(perm, st->st_mode, true);
1402 
1403 	if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
1404 		return (errno);
1405 
1406 	fd = openat(ff->ff_dirfd, newname,
1407 	    O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
1408 	if (fd < 0 ||
1409 	    fchown(fd, uid, gid) != 0 ||
1410 	    fchmod(fd, perm) != 0 ||
1411 	    fstat(fd, st) != 0) {
1412 		error = errno;
1413 		/* rmdir(newname) ? */
1414 	}
1415 	if (fd >= 0)
1416 		(void) close(fd);
1417 
1418 	return (error);
1419 }
1420 
1421 #ifdef __APPLE__
1422 /*
1423  * This is an undocumented OS X syscall. It would be best to avoid it,
1424  * but there doesn't seem to be another safe way to implement mknodat.
1425  * Dear Apple, please implement mknodat before you remove this syscall.
1426  */
1427 static int fs_ifchdir_thread_local(int fd)
1428 {
1429 #pragma clang diagnostic push
1430 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1431 	return syscall(SYS___pthread_fchdir, fd);
1432 #pragma clang diagnostic pop
1433 }
1434 #endif
1435 
1436 /*
1437  * Internal form of mknod (special device).
1438  *
1439  * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
1440  */
1441 static int
1442 fs_imknod(void *softc, struct l9p_fid *dir, char *name,
1443     bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
1444 {
1445 	struct fs_fid *ff;
1446 	mode_t perm;
1447 	gid_t gid;
1448 	uid_t uid;
1449 	char newname[MAXPATHLEN];
1450 	int error;
1451 
1452 	ff = dir->lo_aux;
1453 	error = fs_buildname(dir, name, newname, sizeof(newname));
1454 	if (error)
1455 		return (error);
1456 
1457 	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1458 	if (error)
1459 		return (error);
1460 
1461 	if (isp9) {
1462 		perm = fs_p9perm(mode & 0777, st->st_mode, false);
1463 		mode = (mode & ~0777) | perm;
1464 	} else {
1465 		perm = mode & 0777;
1466 	}
1467 
1468 #ifdef __APPLE__
1469 	if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
1470 		return -1;
1471 	}
1472 	error = mknod(newname, mode, dev);
1473 	int preserved_errno = errno;
1474 	/* Stop using the thread-local cwd */
1475 	fs_ifchdir_thread_local(-1);
1476 	if (error < 0) {
1477 		errno = preserved_errno;
1478 		return errno;
1479 	}
1480 #else
1481 	if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
1482 		return (errno);
1483 #endif
1484 
1485 	/* We cannot open the new name; race to use l* syscalls. */
1486 	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1487 	    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1488 	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1489 		error = errno;
1490 	else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
1491 		error = EPERM;		/* ??? lost a race anyway */
1492 
1493 	/* if (error) unlink(newname) ? */
1494 
1495 	return (error);
1496 }
1497 
1498 /*
1499  * Internal form of mkfifo.
1500  */
1501 static int
1502 fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
1503     bool isp9, mode_t perm, gid_t egid, struct stat *st)
1504 {
1505 	struct fs_fid *ff;
1506 	gid_t gid;
1507 	uid_t uid;
1508 	char newname[MAXPATHLEN];
1509 	int error;
1510 
1511 	ff = dir->lo_aux;
1512 	error = fs_buildname(dir, name, newname, sizeof(newname));
1513 	if (error)
1514 		return (error);
1515 
1516 	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1517 	if (error)
1518 		return (error);
1519 
1520 	if (isp9)
1521 		perm = fs_p9perm(perm, st->st_mode, false);
1522 
1523 	if (mkfifo(newname, perm) != 0)
1524 		return (errno);
1525 
1526 	/* We cannot open the new name; race to use l* syscalls. */
1527 	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1528 	    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1529 	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1530 		error = errno;
1531 	else if (!S_ISFIFO(st->st_mode))
1532 		error = EPERM;		/* ??? lost a race anyway */
1533 
1534 	/* if (error) unlink(newname) ? */
1535 
1536 	return (error);
1537 }
1538 
1539 /*
1540  * Internal form of mksocket.
1541  *
1542  * This is a bit different because of the horrible socket naming
1543  * system (bind() with sockaddr_un sun_path).
1544  */
1545 static int
1546 fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
1547     bool isp9, mode_t perm, gid_t egid, struct stat *st)
1548 {
1549 	struct fs_fid *ff;
1550 	struct sockaddr_un un;
1551 	char *path;
1552 	char newname[MAXPATHLEN];
1553 	gid_t gid;
1554 	uid_t uid;
1555 	int error = 0, s, fd, slen;
1556 
1557 	ff = dir->lo_aux;
1558 	error = fs_buildname(dir, name, newname, sizeof(newname));
1559 	if (error)
1560 		return (error);
1561 
1562 	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1563 	if (error)
1564 		return (error);
1565 
1566 	if (isp9)
1567 		perm = fs_p9perm(perm, st->st_mode, false);
1568 
1569 	s = socket(AF_UNIX, SOCK_STREAM, 0);
1570 	if (s < 0)
1571 		return (errno);
1572 
1573 	path = newname;
1574 	fd = -1;
1575 #ifdef HAVE_BINDAT
1576 	/* Try bindat() if needed. */
1577 	if (strlen(path) >= sizeof(un.sun_path)) {
1578 		fd = openat(ff->ff_dirfd, ff->ff_name,
1579 		    O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
1580 		if (fd >= 0)
1581 			path = name;
1582 	}
1583 #endif
1584 
1585 	/*
1586 	 * Can only create the socket if the path will fit.
1587 	 * Even if we are using bindat() there are limits
1588 	 * (the API for AF_UNIX sockets is ... not good).
1589 	 *
1590 	 * Note: in theory we can fill sun_path to the end
1591 	 * (omitting a terminating '\0') but in at least one
1592 	 * Unix-like system, this was known to behave oddly,
1593 	 * so we test for ">=" rather than just ">".
1594 	 */
1595 	if (strlen(path) >= sizeof(un.sun_path)) {
1596 		error = ENAMETOOLONG;
1597 		goto out;
1598 	}
1599 	un.sun_family = AF_UNIX;
1600 #ifndef __illumos__
1601 	slen = un.sun_len = sizeof(struct sockaddr_un);
1602 #else
1603 	slen = SUN_LEN(&un);
1604 #endif
1605 
1606 	strncpy(un.sun_path, path, sizeof(un.sun_path));
1607 
1608 #ifdef HAVE_BINDAT
1609 	if (fd >= 0) {
1610 		if (bindat(fd, s, (struct sockaddr *)&un, slen) < 0)
1611 			error = errno;
1612 		goto out;	/* done now, for good or ill */
1613 	}
1614 #endif
1615 
1616 	if (bind(s, (struct sockaddr *)&un, slen) < 0)
1617 		error = errno;
1618 out:
1619 
1620 	if (error == 0) {
1621 		/*
1622 		 * We believe we created the socket-inode.  Fix
1623 		 * permissions etc.  Note that we cannot use
1624 		 * fstat() on the socket descriptor: it succeeds,
1625 		 * but we get bogus data!
1626 		 */
1627 		if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1628 		    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1629 		    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1630 			error = errno;
1631 		else if (!S_ISSOCK(st->st_mode))
1632 			error = EPERM;		/* ??? lost a race anyway */
1633 
1634 		/* if (error) unlink(newname) ? */
1635 	}
1636 
1637 	/*
1638 	 * It's not clear which error should override, although
1639 	 * ideally we should never see either close() call fail.
1640 	 * In any case we do want to try to close both fd and s,
1641 	 * always.  Let's set error only if it is not already set,
1642 	 * so that all exit paths can use the same code.
1643 	 */
1644 	if (fd >= 0 && close(fd) != 0)
1645 		if (error == 0)
1646 			error = errno;
1647 	if (close(s) != 0)
1648 		if (error == 0)
1649 			error = errno;
1650 
1651 	return (error);
1652 }
1653 
1654 /*
1655  * Internal form of symlink.
1656  *
1657  * Note that symlinks are presumed to carry no permission bits.
1658  * They do have owners, however (who may be charged for quotas).
1659  */
1660 static int
1661 fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
1662     char *symtgt, gid_t egid, struct stat *st)
1663 {
1664 	struct fs_fid *ff;
1665 	gid_t gid;
1666 	uid_t uid;
1667 	char newname[MAXPATHLEN];
1668 	int error;
1669 
1670 	ff = dir->lo_aux;
1671 	error = fs_buildname(dir, name, newname, sizeof(newname));
1672 	if (error)
1673 		return (error);
1674 
1675 	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1676 	if (error)
1677 		return (error);
1678 
1679 	if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
1680 		return (errno);
1681 
1682 	/* We cannot open the new name; race to use l* syscalls. */
1683 	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1684 	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1685 		error = errno;
1686 	else if (!S_ISLNK(st->st_mode))
1687 		error = EPERM;		/* ??? lost a race anyway */
1688 
1689 	/* if (error) unlink(newname) ? */
1690 
1691 	return (error);
1692 }
1693 
1694 static int
1695 fs_open(void *softc, struct l9p_request *req)
1696 {
1697 	struct l9p_fid *fid = req->lr_fid;
1698 	struct stat st;
1699 	enum l9p_omode p9;
1700 	int error, flags;
1701 
1702 	p9 = req->lr_req.topen.mode;
1703 	error = fs_oflags_dotu(p9, &flags);
1704 	if (error)
1705 		return (error);
1706 
1707 	error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
1708 	if (error)
1709 		return (error);
1710 
1711 	generate_qid(&st, &req->lr_resp.ropen.qid);
1712 	req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
1713 	return (0);
1714 }
1715 
1716 /*
1717  * Helper for directory read.  We want to run an lstat on each
1718  * file name within the directory.  This is a lot faster if we
1719  * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
1720  * all systems do, so hide the ifdef-ed code in an inline function.
1721  */
1722 static inline int
1723 fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
1724 {
1725 
1726 	return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
1727 }
1728 
1729 static int
1730 fs_read(void *softc, struct l9p_request *req)
1731 {
1732 	struct l9p_stat l9stat;
1733 	struct fs_softc *sc;
1734 	struct fs_fid *file;
1735 	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1736 	ssize_t ret;
1737 
1738 	sc = softc;
1739 	file = req->lr_fid->lo_aux;
1740 	assert(file != NULL);
1741 
1742 	if (file->ff_dir != NULL) {
1743 		struct dirent *d;
1744 		struct stat st;
1745 		struct l9p_message msg;
1746 		long o;
1747 		int err;
1748 
1749 		if ((err = pthread_mutex_lock(&file->ff_mtx)) != 0)
1750 			return (err);
1751 
1752 		/*
1753 		 * Must use telldir before readdir since seekdir
1754 		 * takes cookie values.  Unfortunately this wastes
1755 		 * a lot of time (and memory) building unneeded
1756 		 * cookies that can only be flushed by closing
1757 		 * the directory.
1758 		 *
1759 		 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
1760 		 * so in fact, we can discard the cookies by
1761 		 * calling seekdir on them.  This clears up wasted
1762 		 * memory at the cost of even more wasted time...
1763 		 *
1764 		 * XXX: readdir/telldir/seekdir not thread safe
1765 		 */
1766 		l9p_init_msg(&msg, req, L9P_PACK);
1767 		for (;;) {
1768 			o = telldir(file->ff_dir);
1769 			d = readdir(file->ff_dir);
1770 			if (d == NULL)
1771 				break;
1772 			if (fs_lstatat(file, d->d_name, &st))
1773 				continue;
1774 			dostat(sc, &l9stat, d->d_name, &st, dotu);
1775 			if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
1776 				seekdir(file->ff_dir, o);
1777 				break;
1778 			}
1779 #if defined(__FreeBSD__)
1780 			seekdir(file->ff_dir, o);
1781 			(void) readdir(file->ff_dir);
1782 #endif
1783 		}
1784 
1785 		(void) pthread_mutex_unlock(&file->ff_mtx);
1786 	} else {
1787 		size_t niov = l9p_truncate_iov(req->lr_data_iov,
1788                     req->lr_data_niov, req->lr_req.io.count);
1789 
1790 #if defined(__FreeBSD__) || defined(__illumos__)
1791 		ret = preadv(file->ff_fd, req->lr_data_iov, niov,
1792 		    req->lr_req.io.offset);
1793 #else
1794 		/* XXX: not thread safe, should really use aio_listio. */
1795 		if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
1796 			return (errno);
1797 
1798 		ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
1799 #endif
1800 
1801 		if (ret < 0)
1802 			return (errno);
1803 
1804 		req->lr_resp.io.count = (uint32_t)ret;
1805 	}
1806 
1807 	return (0);
1808 }
1809 
1810 static int
1811 fs_remove(void *softc, struct l9p_fid *fid)
1812 {
1813 	struct fs_softc *sc = softc;
1814 	struct l9p_acl *parent_acl;
1815 	struct fs_fid *file;
1816 	struct stat pst, cst;
1817 	char dirname[MAXPATHLEN];
1818 	int error;
1819 
1820 	if (sc->fs_readonly)
1821 		return (EROFS);
1822 
1823 	error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
1824 	if (error)
1825 		return (error);
1826 
1827 	file = fid->lo_aux;
1828 	if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
1829 		return (error);
1830 
1831 	parent_acl = getacl(file, -1, dirname);
1832 	fillacl(file);
1833 
1834 	error = check_access(L9P_ACOP_UNLINK,
1835 	    parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
1836 	l9p_acl_free(parent_acl);
1837 	if (error)
1838 		return (error);
1839 
1840 	if (unlinkat(file->ff_dirfd, file->ff_name,
1841 	    S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0) {
1842 		error = errno;
1843 		if (error == EEXIST && S_ISDIR(cst.st_mode))
1844 			error = ENOTEMPTY;
1845 	}
1846 
1847 	return (error);
1848 }
1849 
1850 static int
1851 fs_stat(void *softc, struct l9p_request *req)
1852 {
1853 	struct fs_softc *sc;
1854 	struct fs_fid *file;
1855 	struct stat st;
1856 	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1857 
1858 	sc = softc;
1859 	file = req->lr_fid->lo_aux;
1860 	assert(file);
1861 
1862 	if (fstatat(file->ff_dirfd, file->ff_name, &st,
1863 	    AT_SYMLINK_NOFOLLOW) != 0)
1864 		return (errno);
1865 
1866 	dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
1867 	return (0);
1868 }
1869 
1870 static int
1871 fs_walk(void *softc, struct l9p_request *req)
1872 {
1873 	struct l9p_acl *acl;
1874 	struct fs_authinfo *ai;
1875 	struct fs_fid *file = req->lr_fid->lo_aux;
1876 	struct fs_fid *newfile;
1877 	struct stat st;
1878 	size_t clen, namelen, need;
1879 	char *comp, *succ, *next, *swtmp;
1880 	bool atroot;
1881 	bool dotdot;
1882 	int i, nwname;
1883 	int error = 0;
1884 	char namebufs[2][MAXPATHLEN];
1885 
1886 	/*
1887 	 * https://swtch.com/plan9port/man/man9/walk.html:
1888 	 *
1889 	 *    It is legal for nwname to be zero, in which case newfid
1890 	 *    will represent the same file as fid and the walk will
1891 	 *    usually succeed; this is equivalent to walking to dot.
1892 	 * [Aside: it's not clear if we should test S_ISDIR here.]
1893 	 *    ...
1894 	 *    The name ".." ... represents the parent directory.
1895 	 *    The name "." ... is not used in the protocol.
1896 	 *    ... A walk of the name ".." in the root directory
1897 	 *    of the server is equivalent to a walk with no name
1898 	 *    elements.
1899 	 *
1900 	 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
1901 	 * so it is safe to convert to plain int.
1902 	 *
1903 	 * We are to return an error only if the first walk fails,
1904 	 * else stop at the end of the names or on the first error.
1905 	 * The final fid is based on the last name successfully
1906 	 * walked.
1907 	 *
1908 	 * Note that we *do* get Twalk requests with nwname==0 on files.
1909 	 *
1910 	 * Set up "successful name" buffer pointer with base fid name,
1911 	 * initially.  We'll swap each new success into it as we go.
1912 	 *
1913 	 * Invariant: atroot and stat data correspond to current
1914 	 * (succ) path.
1915 	 */
1916 	succ = namebufs[0];
1917 	next = namebufs[1];
1918 	namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
1919 	if (namelen >= MAXPATHLEN)
1920 		return (ENAMETOOLONG);
1921 	if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
1922 		return (errno);
1923 	ai = file->ff_ai;
1924 	atroot = strlen(succ) == 0; /* XXX? */
1925 	fillacl(file);
1926 	acl = file->ff_acl;
1927 
1928 	nwname = (int)req->lr_req.twalk.nwname;
1929 
1930 	for (i = 0; i < nwname; i++) {
1931 		/*
1932 		 * Must have execute permission to search a directory.
1933 		 * Then, look up each component in its directory-so-far.
1934 		 * Check for ".." along the way, handlng specially
1935 		 * as needed.  Forbid "/" in name components.
1936 		 *
1937 		 */
1938 		if (!S_ISDIR(st.st_mode)) {
1939 			error = ENOTDIR;
1940 			goto out;
1941 		}
1942 		error = check_access(L9P_ACE_EXECUTE,
1943 		     NULL, NULL, acl, &st, ai, (gid_t)-1);
1944 		if (error) {
1945 			L9P_LOG(L9P_DEBUG,
1946 			    "Twalk: denying dir-walk on \"%s\" for uid %u",
1947 			    succ, (unsigned)ai->ai_uid);
1948 			error = EPERM;
1949 			goto out;
1950 		}
1951 		comp = req->lr_req.twalk.wname[i];
1952 		if (strchr(comp, '/') != NULL) {
1953 			error = EINVAL;
1954 			break;
1955 		}
1956 
1957 		clen = strlen(comp);
1958 		dotdot = false;
1959 
1960 		/*
1961 		 * Build next pathname (into "next").  If "..",
1962 		 * just strip one name component off the success
1963 		 * name so far.  Since we know this name fits, the
1964 		 * stripped down version also fits.  Otherwise,
1965 		 * the name is the base name plus '/' plus the
1966 		 * component name plus terminating '\0'; this may
1967 		 * or may not fit.
1968 		 */
1969 		if (comp[0] == '.') {
1970 			if (clen == 1) {
1971 				error = EINVAL;
1972 				break;
1973 			}
1974 			if (comp[1] == '.' && clen == 2)
1975 				dotdot = true;
1976 		}
1977 		if (dotdot) {
1978 			/*
1979 			 * It's not clear how ".." at root should
1980 			 * be handled when i > 0.  Obeying the man
1981 			 * page exactly, we reset i to 0 and stop,
1982 			 * declaring terminal success.
1983 			 *
1984 			 * Otherwise, we just climbed up one level
1985 			 * so adjust "atroot".
1986 			 */
1987 			if (atroot) {
1988 				i = 0;
1989 				break;
1990 			}
1991 			(void) r_dirname(succ, next, MAXPATHLEN);
1992 			namelen = strlen(next);
1993 			atroot = strlen(next) == 0; /* XXX? */
1994 		} else {
1995 			need = namelen + 1 + clen + 1;
1996 			if (need > MAXPATHLEN) {
1997 				error = ENAMETOOLONG;
1998 				break;
1999 			}
2000 			memcpy(next, succ, namelen);
2001 			next[namelen++] = '/';
2002 			memcpy(&next[namelen], comp, clen + 1);
2003 			namelen += clen;
2004 			/*
2005 			 * Since name is never ".", we are necessarily
2006 			 * descending below the root now.
2007 			 */
2008 			atroot = false;
2009 		}
2010 
2011 		if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
2012 			error = ENOENT;
2013 			break;
2014 		}
2015 
2016 		/*
2017 		 * Success: generate qid and swap this
2018 		 * successful name into place.  Update acl.
2019 		 */
2020 		generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
2021 		swtmp = succ;
2022 		succ = next;
2023 		next = swtmp;
2024 		if (acl != NULL && acl != file->ff_acl)
2025 			l9p_acl_free(acl);
2026 		acl = getacl(file, -1, next);
2027 	}
2028 
2029 	/*
2030 	 * Fail only if we failed on the first name.
2031 	 * Otherwise we succeeded on something, and "succ"
2032 	 * points to the last successful name in namebufs[].
2033 	 */
2034 	if (error) {
2035 		if (i == 0)
2036 			goto out;
2037 		error = 0;
2038 	}
2039 
2040 	newfile = open_fid(file->ff_dirfd, succ, ai, false);
2041 	if (newfile == NULL) {
2042 		error = ENOMEM;
2043 		goto out;
2044 	}
2045 	if (req->lr_newfid == req->lr_fid) {
2046 		/*
2047 		 * Before overwriting fid->lo_aux, free the old value.
2048 		 * Note that this doesn't free the l9p_fid data,
2049 		 * just the fs_fid data.  (But it does ditch ff_acl.)
2050 		 */
2051 		if (acl == file->ff_acl)
2052 			acl = NULL;
2053 		fs_freefid(softc, req->lr_fid);
2054 		file = NULL;
2055 	}
2056 	req->lr_newfid->lo_aux = newfile;
2057 	if (file != NULL && acl != file->ff_acl) {
2058 		newfile->ff_acl = acl;
2059 		acl = NULL;
2060 	}
2061 	req->lr_resp.rwalk.nwqid = (uint16_t)i;
2062 out:
2063 	if (file != NULL && acl != file->ff_acl)
2064 		l9p_acl_free(acl);
2065 	return (error);
2066 }
2067 
2068 static int
2069 fs_write(void *softc, struct l9p_request *req)
2070 {
2071 	struct fs_softc *sc = softc;
2072 	struct fs_fid *file;
2073 	ssize_t ret;
2074 
2075 	file = req->lr_fid->lo_aux;
2076 	assert(file != NULL);
2077 
2078 	if (sc->fs_readonly)
2079 		return (EROFS);
2080 
2081 	size_t niov = l9p_truncate_iov(req->lr_data_iov,
2082             req->lr_data_niov, req->lr_req.io.count);
2083 
2084 #if defined(__FreeBSD__) || defined(__illumos__)
2085 	ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
2086 	    req->lr_req.io.offset);
2087 #else
2088 	/* XXX: not thread safe, should really use aio_listio. */
2089 	if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
2090 		return (errno);
2091 
2092 	ret = writev(file->ff_fd, req->lr_data_iov,
2093 	    (int)niov);
2094 #endif
2095 
2096 	if (ret < 0)
2097 		return (errno);
2098 
2099 	req->lr_resp.io.count = (uint32_t)ret;
2100 	return (0);
2101 }
2102 
2103 static int
2104 fs_wstat(void *softc, struct l9p_request *req)
2105 {
2106 	struct fs_softc *sc = softc;
2107 	struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
2108 	struct l9p_fid *fid;
2109 	struct fs_fid *file;
2110 	int error = 0;
2111 
2112 	fid = req->lr_fid;
2113 	file = fid->lo_aux;
2114 	assert(file != NULL);
2115 
2116 	/*
2117 	 * XXX:
2118 	 *
2119 	 * stat(9P) sez:
2120 	 *
2121 	 * Either all the changes in wstat request happen, or none of them
2122 	 * does: if the request succeeds, all changes were made; if it fails,
2123 	 * none were.
2124 	 *
2125 	 * Atomicity is clearly missing in current implementation.
2126 	 */
2127 
2128 	if (sc->fs_readonly)
2129 		return (EROFS);
2130 
2131 	if (l9stat->atime != (uint32_t)~0) {
2132 		/* XXX: not implemented, ignore */
2133 	}
2134 
2135 	if (l9stat->mtime != (uint32_t)~0) {
2136 		/* XXX: not implemented, ignore */
2137 	}
2138 
2139 	if (l9stat->dev != (uint32_t)~0) {
2140 		error = EPERM;
2141 		goto out;
2142 	}
2143 
2144 	if (l9stat->length != (uint64_t)~0) {
2145 		if (file->ff_dir != NULL) {
2146 			error = EINVAL;
2147 			goto out;
2148 		}
2149 
2150 		if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
2151 			error = errno;
2152 			goto out;
2153 		}
2154 	}
2155 
2156 	if (req->lr_conn->lc_version >= L9P_2000U) {
2157 		if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
2158 		    l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
2159 			error = errno;
2160 			goto out;
2161 		}
2162 	}
2163 
2164 	if (l9stat->mode != (uint32_t)~0) {
2165 		if (fchmodat(file->ff_dirfd, file->ff_name,
2166 		    l9stat->mode & 0777, 0) != 0) {
2167 			error = errno;
2168 			goto out;
2169 		}
2170 	}
2171 
2172 	if (strlen(l9stat->name) > 0) {
2173 		struct l9p_acl *parent_acl;
2174 		struct stat st;
2175 		char *tmp;
2176 		char newname[MAXPATHLEN];
2177 
2178 		/*
2179 		 * Rename-within-directory: it's not deleting anything,
2180 		 * but we need write permission on the directory.  This
2181 		 * should suffice.
2182 		 */
2183 		error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
2184 		if (error)
2185 			goto out;
2186 		parent_acl = getacl(file, -1, newname);
2187 		error = check_access(L9P_ACE_ADD_FILE,
2188 		    parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
2189 		l9p_acl_free(parent_acl);
2190 		if (error)
2191 			goto out;
2192 		error = fs_dpf(newname, l9stat->name, sizeof(newname));
2193 		if (error)
2194 			goto out;
2195 		tmp = strdup(newname);
2196 		if (tmp == NULL) {
2197 			error = ENOMEM;
2198 			goto out;
2199 		}
2200 		if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2201 		    tmp) != 0) {
2202 			error = errno;
2203 			free(tmp);
2204 			goto out;
2205 		}
2206 		/* Successful rename, update file->ff_name.  ACL can stay. */
2207 		free(file->ff_name);
2208 		file->ff_name = tmp;
2209 	}
2210 out:
2211 	return (error);
2212 }
2213 
2214 static int
2215 fs_statfs(void *softc __unused, struct l9p_request *req)
2216 {
2217 	struct fs_fid *file;
2218 	struct stat st;
2219 #ifdef __illumos__
2220 	struct statvfs f;
2221 #else
2222 	struct statfs f;
2223 #endif
2224 	long name_max;
2225 	int error;
2226 	int fd;
2227 
2228 	file = req->lr_fid->lo_aux;
2229 	assert(file);
2230 
2231 	if (fstatat(file->ff_dirfd, file->ff_name, &st,
2232 	    AT_SYMLINK_NOFOLLOW) != 0)
2233 		return (errno);
2234 
2235 	/*
2236 	 * Not entirely clear what access to require; we'll go
2237 	 * for "read data".
2238 	 */
2239 	fillacl(file);
2240 	error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
2241 	    file->ff_acl, &st, file->ff_ai, (gid_t)-1);
2242 	if (error)
2243 		return (error);
2244 
2245 	fd = openat(file->ff_dirfd, file->ff_name, 0);
2246 	if (fd < 0)
2247 		return (errno);
2248 
2249 #ifdef __illumos__
2250 	if (fstatvfs(fd, &f) != 0)
2251 		return (errno);
2252 #else
2253 	if (fstatfs(fd, &f) != 0)
2254 		return (errno);
2255 #endif
2256 
2257 	name_max = fpathconf(fd, _PC_NAME_MAX);
2258 	error = errno;
2259 	close(fd);
2260 
2261 	if (name_max == -1)
2262 		return (error);
2263 
2264 	dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
2265 
2266 	return (0);
2267 }
2268 
2269 static int
2270 fs_lopen(void *softc, struct l9p_request *req)
2271 {
2272 	struct l9p_fid *fid = req->lr_fid;
2273 	struct stat st;
2274 	enum l9p_omode p9;
2275 	gid_t gid;
2276 	int error, flags;
2277 
2278 	error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
2279 	if (error)
2280 		return (error);
2281 
2282 	gid = req->lr_req.tlopen.gid;
2283 	error = fs_iopen(softc, fid, flags, p9, gid, &st);
2284 	if (error)
2285 		return (error);
2286 
2287 	generate_qid(&st, &req->lr_resp.rlopen.qid);
2288 	req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
2289 	return (0);
2290 }
2291 
2292 static int
2293 fs_lcreate(void *softc, struct l9p_request *req)
2294 {
2295 	struct l9p_fid *dir;
2296 	struct stat st;
2297 	enum l9p_omode p9;
2298 	char *name;
2299 	mode_t perm;
2300 	gid_t gid;
2301 	int error, flags;
2302 
2303 	dir = req->lr_fid;
2304 	name = req->lr_req.tlcreate.name;
2305 
2306 	error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
2307 	if (error)
2308 		return (error);
2309 
2310 	perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
2311 	gid = req->lr_req.tlcreate.gid;
2312 	error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
2313 	if (error == 0)
2314 		generate_qid(&st, &req->lr_resp.rlcreate.qid);
2315 	req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
2316 	return (error);
2317 }
2318 
2319 static int
2320 fs_symlink(void *softc, struct l9p_request *req)
2321 {
2322 	struct l9p_fid *dir;
2323 	struct stat st;
2324 	gid_t gid;
2325 	char *name, *symtgt;
2326 	int error;
2327 
2328 	dir = req->lr_fid;
2329 	name = req->lr_req.tsymlink.name;
2330 	symtgt = req->lr_req.tsymlink.symtgt;
2331 	gid = req->lr_req.tsymlink.gid;
2332 	error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
2333 	if (error == 0)
2334 		generate_qid(&st, &req->lr_resp.rsymlink.qid);
2335 	return (error);
2336 }
2337 
2338 static int
2339 fs_mknod(void *softc, struct l9p_request *req)
2340 {
2341 	struct l9p_fid *dir;
2342 	struct stat st;
2343 	uint32_t mode, major, minor;
2344 	dev_t dev;
2345 	gid_t gid;
2346 	char *name;
2347 	int error;
2348 
2349 	dir = req->lr_fid;
2350 	name = req->lr_req.tmknod.name;
2351 	mode = req->lr_req.tmknod.mode;
2352 	gid = req->lr_req.tmknod.gid;
2353 
2354 	switch (mode & S_IFMT) {
2355 	case S_IFBLK:
2356 	case S_IFCHR:
2357 		mode = (mode & S_IFMT) | (mode & 0777);	/* ??? */
2358 		major = req->lr_req.tmknod.major;
2359 		minor = req->lr_req.tmknod.major;
2360 		dev = makedev(major, minor);
2361 		error = fs_imknod(softc, dir, name, false,
2362 		    (mode_t)mode, dev, gid, &st);
2363 		break;
2364 
2365 	case S_IFIFO:
2366 		error = fs_imkfifo(softc, dir, name, false,
2367 		    (mode_t)(mode & 0777), gid, &st);
2368 		break;
2369 
2370 	case S_IFSOCK:
2371 		error = fs_imksocket(softc, dir, name, false,
2372 		    (mode_t)(mode & 0777), gid, &st);
2373 		break;
2374 
2375 	default:
2376 		error = EINVAL;
2377 		break;
2378 	}
2379 	if (error == 0)
2380 		generate_qid(&st, &req->lr_resp.rmknod.qid);
2381 	return (error);
2382 }
2383 
2384 static int
2385 fs_rename(void *softc, struct l9p_request *req)
2386 {
2387 	struct fs_softc *sc = softc;
2388 	struct fs_authinfo *ai;
2389 	struct l9p_acl *oparent_acl;
2390 	struct l9p_fid *fid, *f2;
2391 	struct fs_fid *file, *f2ff;
2392 	struct stat cst, opst, npst;
2393 	int32_t op;
2394 	bool reparenting;
2395 	char *tmp;
2396 	char olddir[MAXPATHLEN], newname[MAXPATHLEN];
2397 	int error;
2398 
2399 	if (sc->fs_readonly)
2400 		return (EROFS);
2401 
2402 	/*
2403 	 * Note: lr_fid represents the file that is to be renamed,
2404 	 * so we must locate its parent directory and verify that
2405 	 * both this parent directory and the new directory f2 are
2406 	 * writable.  But if the new parent directory is the same
2407 	 * path as the old parent directory, our job is simpler.
2408 	 */
2409 	fid = req->lr_fid;
2410 	file = fid->lo_aux;
2411 	assert(file != NULL);
2412 	ai = file->ff_ai;
2413 
2414 	error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
2415 	if (error)
2416 		return (error);
2417 
2418 	f2 = req->lr_fid2;
2419 	f2ff = f2->lo_aux;
2420 	assert(f2ff != NULL);
2421 
2422 	reparenting = strcmp(olddir, f2ff->ff_name) != 0;
2423 
2424 	fillacl(file);
2425 	fillacl(f2ff);
2426 
2427 	if (fstatat(file->ff_dirfd, file->ff_name, &cst,
2428 	    AT_SYMLINK_NOFOLLOW) != 0)
2429 		return (errno);
2430 
2431 	/*
2432 	 * Are we moving from olddir?  If so, we're unlinking
2433 	 * from it, in terms of ACL access.
2434 	 */
2435 	if (reparenting) {
2436 		oparent_acl = getacl(file, -1, olddir);
2437 		error = check_access(L9P_ACOP_UNLINK,
2438 		    oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
2439 		l9p_acl_free(oparent_acl);
2440 		if (error)
2441 			return (error);
2442 	}
2443 
2444 	/*
2445 	 * Now check that we're allowed to "create" a file or directory in
2446 	 * f2.  (Should we do this, too, only if reparenting?  Maybe check
2447 	 * for dir write permission if not reparenting -- but that's just
2448 	 * add-file/add-subdir, which means doing this always.)
2449 	 */
2450 	if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
2451 	    AT_SYMLINK_NOFOLLOW) != 0)
2452 		return (errno);
2453 
2454 	op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2455 	error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
2456 	    ai, (gid_t)-1);
2457 	if (error)
2458 		return (error);
2459 
2460 	/*
2461 	 * Directories OK, file systems not R/O, etc; build final name.
2462 	 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
2463 	 * paranoia, let's double check anyway.
2464 	 */
2465 	if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
2466 		return (ENAMETOOLONG);
2467 	error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
2468 	if (error)
2469 		return (error);
2470 	tmp = strdup(newname);
2471 	if (tmp == NULL)
2472 		return (ENOMEM);
2473 
2474 	if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
2475 		error = errno;
2476 		free(tmp);
2477 		return (error);
2478 	}
2479 
2480 	/* file has been renamed but old fid is not clunked */
2481 	free(file->ff_name);
2482 	file->ff_name = tmp;
2483 
2484 	dropacl(file);
2485 	return (0);
2486 }
2487 
2488 static int
2489 fs_readlink(void *softc __unused, struct l9p_request *req)
2490 {
2491 	struct fs_fid *file;
2492 	ssize_t linklen;
2493 	char buf[MAXPATHLEN];
2494 	int error = 0;
2495 
2496 	file = req->lr_fid->lo_aux;
2497 	assert(file);
2498 
2499 	linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
2500 	if (linklen < 0)
2501 		error = errno;
2502 	else if ((size_t)linklen >= sizeof(buf))
2503 		error = ENOMEM; /* todo: allocate dynamically */
2504 	else if ((req->lr_resp.rreadlink.target = strndup(buf,
2505 	    (size_t)linklen)) == NULL)
2506 		error = ENOMEM;
2507 	return (error);
2508 }
2509 
2510 static int
2511 fs_getattr(void *softc __unused, struct l9p_request *req)
2512 {
2513 	uint64_t mask, valid;
2514 	struct fs_fid *file;
2515 	struct stat st;
2516 	int error = 0;
2517 
2518 	file = req->lr_fid->lo_aux;
2519 	assert(file);
2520 
2521 	valid = 0;
2522 	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2523 		error = errno;
2524 		goto out;
2525 	}
2526 	/* ?? Can we provide items not-requested? If so, can skip tests. */
2527 	mask = req->lr_req.tgetattr.request_mask;
2528 	if (mask & L9PL_GETATTR_MODE) {
2529 		/* It is not clear if we need any translations. */
2530 		req->lr_resp.rgetattr.mode = st.st_mode;
2531 		valid |= L9PL_GETATTR_MODE;
2532 	}
2533 	if (mask & L9PL_GETATTR_NLINK) {
2534 		req->lr_resp.rgetattr.nlink = st.st_nlink;
2535 		valid |= L9PL_GETATTR_NLINK;
2536 	}
2537 	if (mask & L9PL_GETATTR_UID) {
2538 		/* provide st_uid, or file->ff_uid? */
2539 		req->lr_resp.rgetattr.uid = st.st_uid;
2540 		valid |= L9PL_GETATTR_UID;
2541 	}
2542 	if (mask & L9PL_GETATTR_GID) {
2543 		/* provide st_gid, or file->ff_gid? */
2544 		req->lr_resp.rgetattr.gid = st.st_gid;
2545 		valid |= L9PL_GETATTR_GID;
2546 	}
2547 	if (mask & L9PL_GETATTR_RDEV) {
2548 		/* It is not clear if we need any translations. */
2549 		req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
2550 		valid |= L9PL_GETATTR_RDEV;
2551 	}
2552 	if (mask & L9PL_GETATTR_ATIME) {
2553 		req->lr_resp.rgetattr.atime_sec =
2554 		    (uint64_t)STAT_ATIME(&st).tv_sec;
2555 		req->lr_resp.rgetattr.atime_nsec =
2556 		    (uint64_t)STAT_ATIME(&st).tv_nsec;
2557 		valid |= L9PL_GETATTR_ATIME;
2558 	}
2559 	if (mask & L9PL_GETATTR_MTIME) {
2560 		req->lr_resp.rgetattr.mtime_sec =
2561 		    (uint64_t)STAT_MTIME(&st).tv_sec;
2562 		req->lr_resp.rgetattr.mtime_nsec =
2563 		    (uint64_t)STAT_MTIME(&st).tv_nsec;
2564 		valid |= L9PL_GETATTR_MTIME;
2565 	}
2566 	if (mask & L9PL_GETATTR_CTIME) {
2567 		req->lr_resp.rgetattr.ctime_sec =
2568 		    (uint64_t)STAT_CTIME(&st).tv_sec;
2569 		req->lr_resp.rgetattr.ctime_nsec =
2570 		    (uint64_t)STAT_CTIME(&st).tv_nsec;
2571 		valid |= L9PL_GETATTR_CTIME;
2572 	}
2573 	if (mask & L9PL_GETATTR_BTIME) {
2574 #if defined(HAVE_BIRTHTIME)
2575 		req->lr_resp.rgetattr.btime_sec =
2576 		    (uint64_t)st.st_birthtim.tv_sec;
2577 		req->lr_resp.rgetattr.btime_nsec =
2578 		    (uint64_t)st.st_birthtim.tv_nsec;
2579 #elif defined(__illumos__)
2580 		getcrtime(softc, file->ff_dirfd, file->ff_name,
2581 		    &req->lr_resp.rgetattr.btime_sec,
2582 		    &req->lr_resp.rgetattr.btime_nsec);
2583 #else
2584 		req->lr_resp.rgetattr.btime_sec = 0;
2585 		req->lr_resp.rgetattr.btime_nsec = 0;
2586 #endif
2587 		valid |= L9PL_GETATTR_BTIME;
2588 	}
2589 	if (mask & L9PL_GETATTR_INO)
2590 		valid |= L9PL_GETATTR_INO;
2591 	if (mask & L9PL_GETATTR_SIZE) {
2592 		req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
2593 		valid |= L9PL_GETATTR_SIZE;
2594 	}
2595 	if (mask & L9PL_GETATTR_BLOCKS) {
2596 		req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
2597 		req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
2598 		valid |= L9PL_GETATTR_BLOCKS;
2599 	}
2600 #ifndef __illumos__
2601 	if (mask & L9PL_GETATTR_GEN) {
2602 		req->lr_resp.rgetattr.gen = st.st_gen;
2603 		valid |= L9PL_GETATTR_GEN;
2604 	}
2605 #endif
2606 	/* don't know what to do with data version yet */
2607 
2608 	generate_qid(&st, &req->lr_resp.rgetattr.qid);
2609 out:
2610 	req->lr_resp.rgetattr.valid = valid;
2611 	return (error);
2612 }
2613 
2614 /*
2615  * Should combine some of this with wstat code.
2616  */
2617 static int
2618 fs_setattr(void *softc, struct l9p_request *req)
2619 {
2620 	uint64_t mask;
2621 	struct fs_softc *sc = softc;
2622 	struct timespec ts[2];
2623 	struct fs_fid *file;
2624 	struct stat st;
2625 	int error = 0;
2626 	uid_t uid, gid;
2627 
2628 	file = req->lr_fid->lo_aux;
2629 	assert(file);
2630 
2631 	if (sc->fs_readonly)
2632 		return (EROFS);
2633 
2634 	/*
2635 	 * As with WSTAT we have atomicity issues.
2636 	 */
2637 	mask = req->lr_req.tsetattr.valid;
2638 
2639 	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2640 		error = errno;
2641 		goto out;
2642 	}
2643 
2644 	if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
2645 		error = EISDIR;
2646 		goto out;
2647 	}
2648 
2649 	if (mask & L9PL_SETATTR_MODE) {
2650 		if (fchmodat(file->ff_dirfd, file->ff_name,
2651 		    req->lr_req.tsetattr.mode & 0777,
2652 		    0)) {
2653 			error = errno;
2654 			goto out;
2655 		}
2656 	}
2657 
2658 	if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
2659 		uid = mask & L9PL_SETATTR_UID
2660 		    ? req->lr_req.tsetattr.uid
2661 		    : (uid_t)-1;
2662 
2663 		gid = mask & L9PL_SETATTR_GID
2664 		    ? req->lr_req.tsetattr.gid
2665 		    : (gid_t)-1;
2666 
2667 		if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
2668 		    AT_SYMLINK_NOFOLLOW)) {
2669 			error = errno;
2670 			goto out;
2671 		}
2672 	}
2673 
2674 	if (mask & L9PL_SETATTR_SIZE) {
2675 		/* Truncate follows symlinks, is this OK? */
2676 		int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
2677 		if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
2678 			error = errno;
2679 			(void) close(fd);
2680 			goto out;
2681 		}
2682 		(void) close(fd);
2683 	}
2684 
2685 	if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
2686 		ts[0].tv_sec = STAT_ATIME(&st).tv_sec;
2687 		ts[0].tv_nsec = STAT_ATIME(&st).tv_nsec;
2688 		ts[1].tv_sec = STAT_MTIME(&st).tv_sec;
2689 		ts[1].tv_nsec = STAT_MTIME(&st).tv_nsec;
2690 
2691 		if (mask & L9PL_SETATTR_ATIME) {
2692 			if (mask & L9PL_SETATTR_ATIME_SET) {
2693 				ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
2694 				ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
2695 			} else {
2696 				if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
2697 					error = errno;
2698 					goto out;
2699 				}
2700 			}
2701 		}
2702 
2703 		if (mask & L9PL_SETATTR_MTIME) {
2704 			if (mask & L9PL_SETATTR_MTIME_SET) {
2705 				ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
2706 				ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
2707 			} else {
2708 				if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
2709 					error = errno;
2710 					goto out;
2711 				}
2712 			}
2713 		}
2714 
2715 		if (utimensat(file->ff_dirfd, file->ff_name, ts,
2716 		    AT_SYMLINK_NOFOLLOW)) {
2717 			error = errno;
2718 			goto out;
2719 		}
2720 	}
2721 out:
2722 	return (error);
2723 }
2724 
2725 static int
2726 fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
2727 {
2728 	return (EOPNOTSUPP);
2729 }
2730 
2731 static int
2732 fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
2733 {
2734 	return (EOPNOTSUPP);
2735 }
2736 
2737 static int
2738 fs_readdir(void *softc __unused, struct l9p_request *req)
2739 {
2740 	struct l9p_message msg;
2741 	struct l9p_dirent de;
2742 	struct fs_fid *file;
2743 	struct dirent *dp;
2744 	struct stat st;
2745 	uint32_t count;
2746 	int error = 0;
2747 
2748 	file = req->lr_fid->lo_aux;
2749 	assert(file);
2750 
2751 	if (file->ff_dir == NULL)
2752 		return (ENOTDIR);
2753 
2754 	if ((error = pthread_mutex_lock(&file->ff_mtx)) != 0)
2755 		return (error);
2756 
2757 	/*
2758 	 * It's not clear whether we can use the same trick for
2759 	 * discarding offsets here as we do in fs_read.  It
2760 	 * probably should work, we'll have to see if some
2761 	 * client(s) use the zero-offset thing to rescan without
2762 	 * clunking the directory first.
2763 	 *
2764 	 * Probably the thing to do is switch to calling
2765 	 * getdirentries() / getdents() directly, instead of
2766 	 * going through libc.
2767 	 */
2768 	if (req->lr_req.io.offset == 0)
2769 		rewinddir(file->ff_dir);
2770 	else
2771 		seekdir(file->ff_dir, (long)req->lr_req.io.offset);
2772 
2773 	l9p_init_msg(&msg, req, L9P_PACK);
2774 	count = (uint32_t)msg.lm_size; /* in case we get no entries */
2775 	while ((dp = readdir(file->ff_dir)) != NULL) {
2776 		/*
2777 		 * Although "." is forbidden in naming and ".." is
2778 		 * special cased, testing shows that we must transmit
2779 		 * them through readdir.  (For ".." at root, we
2780 		 * should perhaps alter the inode number, but not
2781 		 * yet.)
2782 		 */
2783 
2784 		/*
2785 		 * TODO: we do a full lstat here; could use dp->d_*
2786 		 * to construct the qid more efficiently, as long
2787 		 * as dp->d_type != DT_UNKNOWN.
2788 		 */
2789 		if (fs_lstatat(file, dp->d_name, &st))
2790 			continue;
2791 
2792 		de.qid.type = 0;
2793 		generate_qid(&st, &de.qid);
2794 		de.offset = (uint64_t)telldir(file->ff_dir);
2795 #ifdef __illumos__
2796 		de.type = st.st_mode & S_IFMT;
2797 #else
2798 		de.type = dp->d_type;
2799 #endif
2800 		de.name = dp->d_name;
2801 
2802 		/* Update count only if we completely pack the dirent. */
2803 		if (l9p_pudirent(&msg, &de) < 0)
2804 			break;
2805 		count = (uint32_t)msg.lm_size;
2806 	}
2807 
2808 	(void) pthread_mutex_unlock(&file->ff_mtx);
2809 	req->lr_resp.io.count = count;
2810 	return (error);
2811 }
2812 
2813 static int
2814 fs_fsync(void *softc __unused, struct l9p_request *req)
2815 {
2816 	struct fs_fid *file;
2817 	int error = 0;
2818 
2819 	file = req->lr_fid->lo_aux;
2820 	assert(file);
2821 	if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
2822 		error = errno;
2823 	return (error);
2824 }
2825 
2826 static int
2827 fs_lock(void *softc __unused, struct l9p_request *req)
2828 {
2829 
2830 	switch (req->lr_req.tlock.type) {
2831 	case L9PL_LOCK_TYPE_RDLOCK:
2832 	case L9PL_LOCK_TYPE_WRLOCK:
2833 	case L9PL_LOCK_TYPE_UNLOCK:
2834 		break;
2835 	default:
2836 		return (EINVAL);
2837 	}
2838 
2839 	req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
2840 	return (0);
2841 }
2842 
2843 static int
2844 fs_getlock(void *softc __unused, struct l9p_request *req)
2845 {
2846 
2847 	/*
2848 	 * Client wants to see if a request to lock a region would
2849 	 * block.  This is, of course, not atomic anyway, so the
2850 	 * op is useless.  QEMU simply says "unlocked!", so we do
2851 	 * too.
2852 	 */
2853 	switch (req->lr_req.getlock.type) {
2854 	case L9PL_LOCK_TYPE_RDLOCK:
2855 	case L9PL_LOCK_TYPE_WRLOCK:
2856 	case L9PL_LOCK_TYPE_UNLOCK:
2857 		break;
2858 	default:
2859 		return (EINVAL);
2860 	}
2861 
2862 	req->lr_resp.getlock = req->lr_req.getlock;
2863 	req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
2864 	req->lr_resp.getlock.client_id = strdup("");  /* XXX what should go here? */
2865 	return (0);
2866 }
2867 
2868 static int
2869 fs_link(void *softc __unused, struct l9p_request *req)
2870 {
2871 	struct l9p_fid *dir;
2872 	struct fs_fid *file;
2873 	struct fs_fid *dirf;
2874 	struct stat fst, tdst;
2875 	int32_t op;
2876 	char *name;
2877 	char newname[MAXPATHLEN];
2878 	int error;
2879 
2880 	/* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
2881 	dir = req->lr_fid2;
2882 	dirf = dir->lo_aux;
2883 	assert(dirf != NULL);
2884 
2885 	name = req->lr_req.tlink.name;
2886 	error = fs_buildname(dir, name, newname, sizeof(newname));
2887 	if (error)
2888 		return (error);
2889 
2890 	file = req->lr_fid->lo_aux;
2891 	assert(file != NULL);
2892 
2893 	if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
2894 	    fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2895 		return (errno);
2896 	if (S_ISDIR(fst.st_mode))
2897 		return (EISDIR);
2898 	fillacl(dirf);
2899 	op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2900 	error = check_access(op,
2901 	    dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
2902 	if (error)
2903 		return (error);
2904 
2905 	if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2906 	    newname, 0) != 0)
2907 		error = errno;
2908 	else
2909 		dropacl(file);
2910 
2911 	return (error);
2912 }
2913 
2914 static int
2915 fs_mkdir(void *softc, struct l9p_request *req)
2916 {
2917 	struct l9p_fid *dir;
2918 	struct stat st;
2919 	mode_t perm;
2920 	gid_t gid;
2921 	char *name;
2922 	int error;
2923 
2924 	dir = req->lr_fid;
2925 	name = req->lr_req.tmkdir.name;
2926 	perm = (mode_t)req->lr_req.tmkdir.mode;
2927 	gid = req->lr_req.tmkdir.gid;
2928 
2929 	error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
2930 	if (error == 0)
2931 		generate_qid(&st, &req->lr_resp.rmkdir.qid);
2932 	return (error);
2933 }
2934 
2935 static int
2936 fs_renameat(void *softc, struct l9p_request *req)
2937 {
2938 	struct fs_softc *sc = softc;
2939 	struct l9p_fid *olddir, *newdir;
2940 	struct l9p_acl *facl;
2941 	struct fs_fid *off, *nff;
2942 	struct stat odst, ndst, fst;
2943 	int32_t op;
2944 	bool reparenting;
2945 	char *onp, *nnp;
2946 	char onb[MAXPATHLEN], nnb[MAXPATHLEN];
2947 	int error;
2948 
2949 	if (sc->fs_readonly)
2950 		return (EROFS);
2951 
2952 	olddir = req->lr_fid;
2953 	newdir = req->lr_fid2;
2954 	assert(olddir != NULL && newdir != NULL);
2955 	off = olddir->lo_aux;
2956 	nff = newdir->lo_aux;
2957 	assert(off != NULL && nff != NULL);
2958 
2959 	onp = req->lr_req.trenameat.oldname;
2960 	nnp = req->lr_req.trenameat.newname;
2961 	error = fs_buildname(olddir, onp, onb, sizeof(onb));
2962 	if (error)
2963 		return (error);
2964 	error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
2965 	if (error)
2966 		return (error);
2967 	if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2968 		return (errno);
2969 
2970 	reparenting = olddir != newdir &&
2971 	    strcmp(off->ff_name, nff->ff_name) != 0;
2972 
2973 	if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
2974 		return (errno);
2975 	if (!S_ISDIR(odst.st_mode))
2976 		return (ENOTDIR);
2977 	fillacl(off);
2978 
2979 	if (reparenting) {
2980 		if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
2981 			return (errno);
2982 		if (!S_ISDIR(ndst.st_mode))
2983 			return (ENOTDIR);
2984 		facl = getacl(off, -1, onb);
2985 		fillacl(nff);
2986 
2987 		error = check_access(L9P_ACOP_UNLINK,
2988 		    off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
2989 		l9p_acl_free(facl);
2990 		if (error)
2991 			return (error);
2992 		op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
2993 		    L9P_ACE_ADD_FILE;
2994 		error = check_access(op,
2995 		    nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
2996 		if (error)
2997 			return (error);
2998 	}
2999 
3000 	if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
3001 		error = errno;
3002 
3003 	return (error);
3004 }
3005 
3006 /*
3007  * Unlink file in given directory, or remove directory in given
3008  * directory, based on flags.
3009  */
3010 static int
3011 fs_unlinkat(void *softc, struct l9p_request *req)
3012 {
3013 	struct fs_softc *sc = softc;
3014 	struct l9p_acl *facl;
3015 	struct l9p_fid *dir;
3016 	struct fs_fid *dirff;
3017 	struct stat dirst, fst;
3018 	char *name;
3019 	char newname[MAXPATHLEN];
3020 	int error;
3021 
3022 	if (sc->fs_readonly)
3023 		return (EROFS);
3024 
3025 	dir = req->lr_fid;
3026 	dirff = dir->lo_aux;
3027 	assert(dirff != NULL);
3028 	name = req->lr_req.tunlinkat.name;
3029 	error = fs_buildname(dir, name, newname, sizeof(newname));
3030 	if (error)
3031 		return (error);
3032 	if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
3033 	    fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
3034 		return (errno);
3035 	fillacl(dirff);
3036 	facl = getacl(dirff, -1, newname);
3037 	error = check_access(L9P_ACOP_UNLINK,
3038 	    dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
3039 	l9p_acl_free(facl);
3040 	if (error)
3041 		return (error);
3042 
3043 	if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
3044 		if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0) {
3045 			error = errno;
3046 			if (error == EEXIST)
3047 				error = ENOTEMPTY;
3048 		}
3049 	} else {
3050 		if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
3051 			error = errno;
3052 	}
3053 	return (error);
3054 }
3055 
3056 static void
3057 fs_freefid(void *softc __unused, struct l9p_fid *fid)
3058 {
3059 	struct fs_fid *f = fid->lo_aux;
3060 	struct fs_authinfo *ai;
3061 	uint32_t newcount;
3062 
3063 	if (f == NULL) {
3064 		/* Nothing to do here */
3065 		return;
3066 	}
3067 
3068 	if (f->ff_fd != -1)
3069 		close(f->ff_fd);
3070 
3071 	if (f->ff_dir)
3072 		closedir(f->ff_dir);
3073 
3074 	(void) pthread_mutex_destroy(&f->ff_mtx);
3075 	free(f->ff_name);
3076 	ai = f->ff_ai;
3077 	l9p_acl_free(f->ff_acl);
3078 	free(f);
3079 	(void) pthread_mutex_lock(&ai->ai_mtx);
3080 	newcount = --ai->ai_refcnt;
3081 	(void) pthread_mutex_unlock(&ai->ai_mtx);
3082 	if (newcount == 0) {
3083 		/*
3084 		 * We *were* the last ref, no one can have gained a ref.
3085 		 */
3086 		L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
3087 		    (void *)ai);
3088 		(void) pthread_mutex_destroy(&ai->ai_mtx);
3089 		free(ai);
3090 	} else {
3091 		L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
3092 		    (void *)ai, (u_long)newcount);
3093 	}
3094 }
3095 
3096 int
3097 l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
3098 {
3099 	struct l9p_backend *backend;
3100 	struct fs_softc *sc;
3101 	int error;
3102 #if defined(WITH_CASPER)
3103 	cap_channel_t *capcas;
3104 #endif
3105 
3106 	if (!fs_attach_mutex_inited) {
3107 #ifdef __illumos__
3108 		if ((error = pthread_mutexattr_init(&fs_mutexattr)) != 0) {
3109 			errno = error;
3110 			return (-1);
3111 		}
3112 		if ((error = pthread_mutexattr_settype(&fs_mutexattr,
3113 		    PTHREAD_MUTEX_ERRORCHECK)) != 0) {
3114 			errno = error;
3115 			return (-1);
3116 		}
3117 		error = pthread_mutex_init(&fs_attach_mutex, &fs_mutexattr);
3118 #else
3119 		error = pthread_mutex_init(&fs_attach_mutex, NULL);
3120 #endif
3121 		if (error) {
3122 			errno = error;
3123 			return (-1);
3124 		}
3125 		fs_attach_mutex_inited = true;
3126 	}
3127 
3128 	backend = l9p_malloc(sizeof(*backend));
3129 	backend->attach = fs_attach;
3130 	backend->clunk = fs_clunk;
3131 	backend->create = fs_create;
3132 	backend->open = fs_open;
3133 	backend->read = fs_read;
3134 	backend->remove = fs_remove;
3135 	backend->stat = fs_stat;
3136 	backend->walk = fs_walk;
3137 	backend->write = fs_write;
3138 	backend->wstat = fs_wstat;
3139 	backend->statfs = fs_statfs;
3140 	backend->lopen = fs_lopen;
3141 	backend->lcreate = fs_lcreate;
3142 	backend->symlink = fs_symlink;
3143 	backend->mknod = fs_mknod;
3144 	backend->rename = fs_rename;
3145 	backend->readlink = fs_readlink;
3146 	backend->getattr = fs_getattr;
3147 	backend->setattr = fs_setattr;
3148 	backend->xattrwalk = fs_xattrwalk;
3149 	backend->xattrcreate = fs_xattrcreate;
3150 	backend->readdir = fs_readdir;
3151 	backend->fsync = fs_fsync;
3152 	backend->lock = fs_lock;
3153 	backend->getlock = fs_getlock;
3154 	backend->link = fs_link;
3155 	backend->mkdir = fs_mkdir;
3156 	backend->renameat = fs_renameat;
3157 	backend->unlinkat = fs_unlinkat;
3158 	backend->freefid = fs_freefid;
3159 
3160 	sc = l9p_malloc(sizeof(*sc));
3161 	sc->fs_rootfd = rootfd;
3162 	sc->fs_readonly = ro;
3163 	backend->softc = sc;
3164 
3165 #if defined(__illumos__)
3166 	if (fpathconf(rootfd, _PC_XATTR_ENABLED) > 0)
3167 		sc->fs_hasxattr = 1;
3168 #endif
3169 
3170 #if defined(WITH_CASPER)
3171 	capcas = cap_init();
3172 	if (capcas == NULL)
3173 		return (-1);
3174 
3175 	sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
3176 	if (sc->fs_cappwd == NULL)
3177 		return (-1);
3178 
3179 	sc->fs_capgrp = cap_service_open(capcas, "system.grp");
3180 	if (sc->fs_capgrp == NULL)
3181 		return (-1);
3182 
3183 	cap_setpassent(sc->fs_cappwd, 1);
3184 	cap_setgroupent(sc->fs_capgrp, 1);
3185 	cap_close(capcas);
3186 #elif defined(__illumos__)
3187 	setpwent();
3188 #else
3189 	setpassent(1);
3190 #endif
3191 
3192 	*backendp = backend;
3193 	return (0);
3194 }
3195 
3196 #ifdef __illumos__
3197 acl_t *
3198 acl_get_fd_np(int fd, int type)
3199 {
3200 	acl_t *acl;
3201 	int flag, ret;
3202 
3203 	flag = 0;
3204 	if (type == ACL_TYPE_NFS4)
3205 		flag = ACL_NO_TRIVIAL;
3206 
3207 	ret = facl_get(fd, flag, &acl);
3208 	if (ret != 0)
3209 		return (NULL);
3210 
3211 	return (acl);
3212 }
3213 
3214 static void
3215 getcrtime(struct fs_softc *sc, int dirfd, const char *fname, uint64_t *secp,
3216     uint64_t *nsp)
3217 {
3218 	nvlist_t *nvl = NULL;
3219 	uint64_t *vals = NULL;
3220 	uint_t nvals = 0;
3221 	int error;
3222 
3223 	*secp = 0;
3224 	*nsp = 0;
3225 
3226 	if (!sc->fs_hasxattr)
3227 		return;
3228 
3229 	if ((error = getattrat(dirfd, XATTR_VIEW_READWRITE, fname, &nvl)) != 0)
3230 		return;
3231 
3232 	if (nvlist_lookup_uint64_array(nvl, "crtime", &vals, &nvals) != 0)
3233 		goto done;
3234 
3235 	if (nvals != 2)
3236 		goto done;
3237 
3238 	*secp = vals[0];
3239 	*nsp = vals[1];
3240 
3241 done:
3242 	nvlist_free(nvl);
3243 }
3244 #endif
3245