xref: /freebsd/sys/fs/fuse/fuse_vnops.c (revision 036d2e814bf0f5d88ffb4b24c159320894541757)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include <sys/param.h>
67 #include <sys/module.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/conf.h>
72 #include <sys/uio.h>
73 #include <sys/malloc.h>
74 #include <sys/queue.h>
75 #include <sys/limits.h>
76 #include <sys/lock.h>
77 #include <sys/rwlock.h>
78 #include <sys/sx.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/vnode.h>
82 #include <sys/namei.h>
83 #include <sys/extattr.h>
84 #include <sys/stat.h>
85 #include <sys/unistd.h>
86 #include <sys/filedesc.h>
87 #include <sys/file.h>
88 #include <sys/fcntl.h>
89 #include <sys/dirent.h>
90 #include <sys/bio.h>
91 #include <sys/buf.h>
92 #include <sys/sysctl.h>
93 #include <sys/vmmeter.h>
94 
95 #include <vm/vm.h>
96 #include <vm/vm_extern.h>
97 #include <vm/pmap.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_page.h>
100 #include <vm/vm_param.h>
101 #include <vm/vm_object.h>
102 #include <vm/vm_pager.h>
103 #include <vm/vnode_pager.h>
104 #include <vm/vm_object.h>
105 
106 #include "fuse.h"
107 #include "fuse_file.h"
108 #include "fuse_internal.h"
109 #include "fuse_ipc.h"
110 #include "fuse_node.h"
111 #include "fuse_io.h"
112 
113 #include <sys/priv.h>
114 
115 /* Maximum number of hardlinks to a single FUSE file */
116 #define FUSE_LINK_MAX                      UINT32_MAX
117 
118 SDT_PROVIDER_DECLARE(fusefs);
119 /*
120  * Fuse trace probe:
121  * arg0: verbosity.  Higher numbers give more verbose messages
122  * arg1: Textual message
123  */
124 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
125 
126 /* vnode ops */
127 static vop_access_t fuse_vnop_access;
128 static vop_advlock_t fuse_vnop_advlock;
129 static vop_bmap_t fuse_vnop_bmap;
130 static vop_close_t fuse_fifo_close;
131 static vop_close_t fuse_vnop_close;
132 static vop_create_t fuse_vnop_create;
133 static vop_deleteextattr_t fuse_vnop_deleteextattr;
134 static vop_fdatasync_t fuse_vnop_fdatasync;
135 static vop_fsync_t fuse_vnop_fsync;
136 static vop_getattr_t fuse_vnop_getattr;
137 static vop_getextattr_t fuse_vnop_getextattr;
138 static vop_inactive_t fuse_vnop_inactive;
139 static vop_link_t fuse_vnop_link;
140 static vop_listextattr_t fuse_vnop_listextattr;
141 static vop_lookup_t fuse_vnop_lookup;
142 static vop_mkdir_t fuse_vnop_mkdir;
143 static vop_mknod_t fuse_vnop_mknod;
144 static vop_open_t fuse_vnop_open;
145 static vop_pathconf_t fuse_vnop_pathconf;
146 static vop_read_t fuse_vnop_read;
147 static vop_readdir_t fuse_vnop_readdir;
148 static vop_readlink_t fuse_vnop_readlink;
149 static vop_reclaim_t fuse_vnop_reclaim;
150 static vop_remove_t fuse_vnop_remove;
151 static vop_rename_t fuse_vnop_rename;
152 static vop_rmdir_t fuse_vnop_rmdir;
153 static vop_setattr_t fuse_vnop_setattr;
154 static vop_setextattr_t fuse_vnop_setextattr;
155 static vop_strategy_t fuse_vnop_strategy;
156 static vop_symlink_t fuse_vnop_symlink;
157 static vop_write_t fuse_vnop_write;
158 static vop_getpages_t fuse_vnop_getpages;
159 static vop_print_t fuse_vnop_print;
160 static vop_vptofh_t fuse_vnop_vptofh;
161 
162 struct vop_vector fuse_fifoops = {
163 	.vop_default =		&fifo_specops,
164 	.vop_access =		fuse_vnop_access,
165 	.vop_close =		fuse_fifo_close,
166 	.vop_fsync =		fuse_vnop_fsync,
167 	.vop_getattr =		fuse_vnop_getattr,
168 	.vop_inactive =		fuse_vnop_inactive,
169 	.vop_pathconf =		fuse_vnop_pathconf,
170 	.vop_print =		fuse_vnop_print,
171 	.vop_read =		VOP_PANIC,
172 	.vop_reclaim =		fuse_vnop_reclaim,
173 	.vop_setattr =		fuse_vnop_setattr,
174 	.vop_write =		VOP_PANIC,
175 	.vop_vptofh =		fuse_vnop_vptofh,
176 };
177 
178 struct vop_vector fuse_vnops = {
179 	.vop_allocate =	VOP_EINVAL,
180 	.vop_default = &default_vnodeops,
181 	.vop_access = fuse_vnop_access,
182 	.vop_advlock = fuse_vnop_advlock,
183 	.vop_bmap = fuse_vnop_bmap,
184 	.vop_close = fuse_vnop_close,
185 	.vop_create = fuse_vnop_create,
186 	.vop_deleteextattr = fuse_vnop_deleteextattr,
187 	.vop_fsync = fuse_vnop_fsync,
188 	.vop_fdatasync = fuse_vnop_fdatasync,
189 	.vop_getattr = fuse_vnop_getattr,
190 	.vop_getextattr = fuse_vnop_getextattr,
191 	.vop_inactive = fuse_vnop_inactive,
192 	/*
193 	 * TODO: implement vop_ioctl after upgrading to protocol 7.16.
194 	 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until
195 	 * 7.16.
196 	 */
197 	.vop_link = fuse_vnop_link,
198 	.vop_listextattr = fuse_vnop_listextattr,
199 	.vop_lookup = fuse_vnop_lookup,
200 	.vop_mkdir = fuse_vnop_mkdir,
201 	.vop_mknod = fuse_vnop_mknod,
202 	.vop_open = fuse_vnop_open,
203 	.vop_pathconf = fuse_vnop_pathconf,
204 	/*
205 	 * TODO: implement vop_poll after upgrading to protocol 7.21.
206 	 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
207 	 * 7.21, which adds the ability for the client to choose which poll
208 	 * events it wants, and for a client to deregister a file handle
209 	 */
210 	.vop_read = fuse_vnop_read,
211 	.vop_readdir = fuse_vnop_readdir,
212 	.vop_readlink = fuse_vnop_readlink,
213 	.vop_reclaim = fuse_vnop_reclaim,
214 	.vop_remove = fuse_vnop_remove,
215 	.vop_rename = fuse_vnop_rename,
216 	.vop_rmdir = fuse_vnop_rmdir,
217 	.vop_setattr = fuse_vnop_setattr,
218 	.vop_setextattr = fuse_vnop_setextattr,
219 	.vop_strategy = fuse_vnop_strategy,
220 	.vop_symlink = fuse_vnop_symlink,
221 	.vop_write = fuse_vnop_write,
222 	.vop_getpages = fuse_vnop_getpages,
223 	.vop_print = fuse_vnop_print,
224 	.vop_vptofh = fuse_vnop_vptofh,
225 };
226 
227 uma_zone_t fuse_pbuf_zone;
228 
229 /* Check permission for extattr operations, much like extattr_check_cred */
230 static int
231 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
232 	struct thread *td, accmode_t accmode)
233 {
234 	struct mount *mp = vnode_mount(vp);
235 	struct fuse_data *data = fuse_get_mpdata(mp);
236 
237 	/*
238 	 * Kernel-invoked always succeeds.
239 	 */
240 	if (cred == NOCRED)
241 		return (0);
242 
243 	/*
244 	 * Do not allow privileged processes in jail to directly manipulate
245 	 * system attributes.
246 	 */
247 	switch (ns) {
248 	case EXTATTR_NAMESPACE_SYSTEM:
249 		if (data->dataflags & FSESS_DEFAULT_PERMISSIONS) {
250 			return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
251 		}
252 		/* FALLTHROUGH */
253 	case EXTATTR_NAMESPACE_USER:
254 		return (fuse_internal_access(vp, accmode, td, cred));
255 	default:
256 		return (EPERM);
257 	}
258 }
259 
260 /* Get a filehandle for a directory */
261 static int
262 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
263 	struct ucred *cred, pid_t pid)
264 {
265 	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
266 		return 0;
267 	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
268 }
269 
270 /* Send FUSE_FLUSH for this vnode */
271 static int
272 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
273 {
274 	struct fuse_flush_in *ffi;
275 	struct fuse_filehandle *fufh;
276 	struct fuse_dispatcher fdi;
277 	struct thread *td = curthread;
278 	struct mount *mp = vnode_mount(vp);
279 	int err;
280 
281 	if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
282 		return 0;
283 
284 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
285 	if (err)
286 		return err;
287 
288 	fdisp_init(&fdi, sizeof(*ffi));
289 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
290 	ffi = fdi.indata;
291 	ffi->fh = fufh->fh_id;
292 	/*
293 	 * If the file has a POSIX lock then we're supposed to set lock_owner.
294 	 * If not, then lock_owner is undefined.  So we may as well always set
295 	 * it.
296 	 */
297 	ffi->lock_owner = td->td_proc->p_pid;
298 
299 	err = fdisp_wait_answ(&fdi);
300 	if (err == ENOSYS) {
301 		fsess_set_notimpl(mp, FUSE_FLUSH);
302 		err = 0;
303 	}
304 	fdisp_destroy(&fdi);
305 	return err;
306 }
307 
308 /* Close wrapper for fifos.  */
309 static int
310 fuse_fifo_close(struct vop_close_args *ap)
311 {
312 	return (fifo_specops.vop_close(ap));
313 }
314 
315 /*
316     struct vnop_access_args {
317 	struct vnode *a_vp;
318 #if VOP_ACCESS_TAKES_ACCMODE_T
319 	accmode_t a_accmode;
320 #else
321 	int a_mode;
322 #endif
323 	struct ucred *a_cred;
324 	struct thread *a_td;
325     };
326 */
327 static int
328 fuse_vnop_access(struct vop_access_args *ap)
329 {
330 	struct vnode *vp = ap->a_vp;
331 	int accmode = ap->a_accmode;
332 	struct ucred *cred = ap->a_cred;
333 
334 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
335 
336 	int err;
337 
338 	if (fuse_isdeadfs(vp)) {
339 		if (vnode_isvroot(vp)) {
340 			return 0;
341 		}
342 		return ENXIO;
343 	}
344 	if (!(data->dataflags & FSESS_INITED)) {
345 		if (vnode_isvroot(vp)) {
346 			if (priv_check_cred(cred, PRIV_VFS_ADMIN) ||
347 			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
348 				return 0;
349 			}
350 		}
351 		return EBADF;
352 	}
353 	if (vnode_islnk(vp)) {
354 		return 0;
355 	}
356 
357 	err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
358 	return err;
359 }
360 
361 /*
362  * struct vop_advlock_args {
363  *	struct vop_generic_args a_gen;
364  *	struct vnode *a_vp;
365  *	void *a_id;
366  *	int a_op;
367  *	struct flock *a_fl;
368  *	int a_flags;
369  * }
370  */
371 static int
372 fuse_vnop_advlock(struct vop_advlock_args *ap)
373 {
374 	struct vnode *vp = ap->a_vp;
375 	struct flock *fl = ap->a_fl;
376 	struct thread *td = curthread;
377 	struct ucred *cred = td->td_ucred;
378 	pid_t pid = td->td_proc->p_pid;
379 	struct fuse_filehandle *fufh;
380 	struct fuse_dispatcher fdi;
381 	struct fuse_lk_in *fli;
382 	struct fuse_lk_out *flo;
383 	enum fuse_opcode op;
384 	int dataflags, err;
385 	int flags = ap->a_flags;
386 
387 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
388 
389 	if (fuse_isdeadfs(vp)) {
390 		return ENXIO;
391 	}
392 
393 	if (!(dataflags & FSESS_POSIX_LOCKS))
394 		return vop_stdadvlock(ap);
395 	/* FUSE doesn't properly support flock until protocol 7.17 */
396 	if (flags & F_FLOCK)
397 		return vop_stdadvlock(ap);
398 
399 	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
400 	if (err)
401 		return err;
402 
403 	fdisp_init(&fdi, sizeof(*fli));
404 
405 	switch(ap->a_op) {
406 	case F_GETLK:
407 		op = FUSE_GETLK;
408 		break;
409 	case F_SETLK:
410 		op = FUSE_SETLK;
411 		break;
412 	case F_SETLKW:
413 		op = FUSE_SETLKW;
414 		break;
415 	default:
416 		return EINVAL;
417 	}
418 
419 	fdisp_make_vp(&fdi, op, vp, td, cred);
420 	fli = fdi.indata;
421 	fli->fh = fufh->fh_id;
422 	fli->owner = fl->l_pid;
423 	fli->lk.start = fl->l_start;
424 	if (fl->l_len != 0)
425 		fli->lk.end = fl->l_start + fl->l_len - 1;
426 	else
427 		fli->lk.end = INT64_MAX;
428 	fli->lk.type = fl->l_type;
429 	fli->lk.pid = fl->l_pid;
430 
431 	err = fdisp_wait_answ(&fdi);
432 	fdisp_destroy(&fdi);
433 
434 	if (err == 0 && op == FUSE_GETLK) {
435 		flo = fdi.answ;
436 		fl->l_type = flo->lk.type;
437 		fl->l_pid = flo->lk.pid;
438 		if (flo->lk.type != F_UNLCK) {
439 			fl->l_start = flo->lk.start;
440 			if (flo->lk.end == INT64_MAX)
441 				fl->l_len = 0;
442 			else
443 				fl->l_len = flo->lk.end - flo->lk.start + 1;
444 			fl->l_start = flo->lk.start;
445 		}
446 	}
447 
448 	return err;
449 }
450 
451 /* {
452 	struct vnode *a_vp;
453 	daddr_t a_bn;
454 	struct bufobj **a_bop;
455 	daddr_t *a_bnp;
456 	int *a_runp;
457 	int *a_runb;
458 } */
459 static int
460 fuse_vnop_bmap(struct vop_bmap_args *ap)
461 {
462 	struct vnode *vp = ap->a_vp;
463 	struct bufobj **bo = ap->a_bop;
464 	struct thread *td = curthread;
465 	struct mount *mp;
466 	struct fuse_dispatcher fdi;
467 	struct fuse_bmap_in *fbi;
468 	struct fuse_bmap_out *fbo;
469 	struct fuse_data *data;
470 	uint64_t biosize;
471 	off_t filesize;
472 	daddr_t lbn = ap->a_bn;
473 	daddr_t *pbn = ap->a_bnp;
474 	int *runp = ap->a_runp;
475 	int *runb = ap->a_runb;
476 	int error = 0;
477 	int maxrun;
478 
479 	if (fuse_isdeadfs(vp)) {
480 		return ENXIO;
481 	}
482 
483 	mp = vnode_mount(vp);
484 	data = fuse_get_mpdata(mp);
485 	biosize = fuse_iosize(vp);
486 	maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
487 		data->max_readahead_blocks);
488 
489 	if (bo != NULL)
490 		*bo = &vp->v_bufobj;
491 
492 	/*
493 	 * The FUSE_BMAP operation does not include the runp and runb
494 	 * variables, so we must guess.  Report nonzero contiguous runs so
495 	 * cluster_read will combine adjacent reads.  It's worthwhile to reduce
496 	 * upcalls even if we don't know the true physical layout of the file.
497 	 *
498 	 * FUSE file systems may opt out of read clustering in two ways:
499 	 * * mounting with -onoclusterr
500 	 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
501 	 */
502 	if (runb != NULL)
503 		*runb = MIN(lbn, maxrun);
504 	if (runp != NULL) {
505 		error = fuse_vnode_size(vp, &filesize, td->td_ucred, td);
506 		if (error == 0)
507 			*runp = MIN(MAX(0, filesize / (off_t)biosize - lbn - 1),
508 				    maxrun);
509 		else
510 			*runp = 0;
511 	}
512 
513 	if (fsess_isimpl(mp, FUSE_BMAP)) {
514 		fdisp_init(&fdi, sizeof(*fbi));
515 		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
516 		fbi = fdi.indata;
517 		fbi->block = lbn;
518 		fbi->blocksize = biosize;
519 		error = fdisp_wait_answ(&fdi);
520 		if (error == ENOSYS) {
521 			fdisp_destroy(&fdi);
522 			fsess_set_notimpl(mp, FUSE_BMAP);
523 			error = 0;
524 		} else {
525 			fbo = fdi.answ;
526 			if (error == 0 && pbn != NULL)
527 				*pbn = fbo->block;
528 			fdisp_destroy(&fdi);
529 			return error;
530 		}
531 	}
532 
533 	/* If the daemon doesn't support BMAP, make up a sensible default */
534 	if (pbn != NULL)
535 		*pbn = lbn * btodb(biosize);
536 	return (error);
537 }
538 
539 /*
540     struct vop_close_args {
541 	struct vnode *a_vp;
542 	int  a_fflag;
543 	struct ucred *a_cred;
544 	struct thread *a_td;
545     };
546 */
547 static int
548 fuse_vnop_close(struct vop_close_args *ap)
549 {
550 	struct vnode *vp = ap->a_vp;
551 	struct ucred *cred = ap->a_cred;
552 	int fflag = ap->a_fflag;
553 	struct thread *td = ap->a_td;
554 	pid_t pid = td->td_proc->p_pid;
555 	int err = 0;
556 
557 	if (fuse_isdeadfs(vp))
558 		return 0;
559 	if (vnode_isdir(vp))
560 		return 0;
561 	if (fflag & IO_NDELAY)
562 		return 0;
563 
564 	err = fuse_flush(vp, cred, pid, fflag);
565 	/* TODO: close the file handle, if we're sure it's no longer used */
566 	if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
567 		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
568 	}
569 	return err;
570 }
571 
572 static void
573 fdisp_make_mknod_for_fallback(
574 	struct fuse_dispatcher *fdip,
575 	struct componentname *cnp,
576 	struct vnode *dvp,
577 	uint64_t parentnid,
578 	struct thread *td,
579 	struct ucred *cred,
580 	mode_t mode,
581 	enum fuse_opcode *op)
582 {
583 	struct fuse_mknod_in *fmni;
584 
585 	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
586 	*op = FUSE_MKNOD;
587 	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
588 	fmni = fdip->indata;
589 	fmni->mode = mode;
590 	fmni->rdev = 0;
591 	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
592 	    cnp->cn_namelen);
593 	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
594 }
595 /*
596     struct vnop_create_args {
597 	struct vnode *a_dvp;
598 	struct vnode **a_vpp;
599 	struct componentname *a_cnp;
600 	struct vattr *a_vap;
601     };
602 */
603 static int
604 fuse_vnop_create(struct vop_create_args *ap)
605 {
606 	struct vnode *dvp = ap->a_dvp;
607 	struct vnode **vpp = ap->a_vpp;
608 	struct componentname *cnp = ap->a_cnp;
609 	struct vattr *vap = ap->a_vap;
610 	struct thread *td = cnp->cn_thread;
611 	struct ucred *cred = cnp->cn_cred;
612 
613 	struct fuse_data *data;
614 	struct fuse_create_in *fci;
615 	struct fuse_entry_out *feo;
616 	struct fuse_open_out *foo;
617 	struct fuse_dispatcher fdi, fdi2;
618 	struct fuse_dispatcher *fdip = &fdi;
619 	struct fuse_dispatcher *fdip2 = NULL;
620 
621 	int err;
622 
623 	struct mount *mp = vnode_mount(dvp);
624 	data = fuse_get_mpdata(mp);
625 	uint64_t parentnid = VTOFUD(dvp)->nid;
626 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
627 	enum fuse_opcode op;
628 	int flags;
629 
630 	if (fuse_isdeadfs(dvp))
631 		return ENXIO;
632 
633 	/* FUSE expects sockets to be created with FUSE_MKNOD */
634 	if (vap->va_type == VSOCK)
635 		return fuse_internal_mknod(dvp, vpp, cnp, vap);
636 
637 	/*
638 	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
639 	 * writable mode makes sense, and we might as well include readability
640 	 * too.
641 	 */
642 	flags = O_RDWR;
643 
644 	bzero(&fdi, sizeof(fdi));
645 
646 	if (vap->va_type != VREG)
647 		return (EINVAL);
648 
649 	if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
650 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
651 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
652 			cred, mode, &op);
653 	} else {
654 		/* Use FUSE_CREATE */
655 		size_t insize;
656 
657 		op = FUSE_CREATE;
658 		fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
659 		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
660 		fci = fdip->indata;
661 		fci->mode = mode;
662 		fci->flags = O_CREAT | flags;
663 		if (fuse_libabi_geq(data, 7, 12)) {
664 			insize = sizeof(*fci);
665 			fci->umask = td->td_proc->p_fd->fd_cmask;
666 		} else {
667 			insize = sizeof(struct fuse_open_in);
668 		}
669 
670 		memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
671 		    cnp->cn_namelen);
672 		((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
673 	}
674 
675 	err = fdisp_wait_answ(fdip);
676 
677 	if (err) {
678 		if (err == ENOSYS && op == FUSE_CREATE) {
679 			fsess_set_notimpl(mp, FUSE_CREATE);
680 			fdisp_destroy(fdip);
681 			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
682 				parentnid, td, cred, mode, &op);
683 			err = fdisp_wait_answ(fdip);
684 		}
685 		if (err)
686 			goto out;
687 	}
688 
689 	feo = fdip->answ;
690 
691 	if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
692 		goto out;
693 	}
694 
695 	if (op == FUSE_CREATE) {
696 		foo = (struct fuse_open_out*)(feo + 1);
697 	} else {
698 		/* Issue a separate FUSE_OPEN */
699 		struct fuse_open_in *foi;
700 
701 		fdip2 = &fdi2;
702 		fdisp_init(fdip2, sizeof(*foi));
703 		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
704 			cred);
705 		foi = fdip2->indata;
706 		foi->flags = flags;
707 		err = fdisp_wait_answ(fdip2);
708 		if (err)
709 			goto out;
710 		foo = fdip2->answ;
711 	}
712 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
713 	if (err) {
714 		struct fuse_release_in *fri;
715 		uint64_t nodeid = feo->nodeid;
716 		uint64_t fh_id = foo->fh;
717 
718 		fdisp_init(fdip, sizeof(*fri));
719 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
720 		fri = fdip->indata;
721 		fri->fh = fh_id;
722 		fri->flags = flags;
723 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
724 		fuse_insert_message(fdip->tick, false);
725 		goto out;
726 	}
727 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
728 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
729 		feo->attr_valid_nsec, NULL);
730 
731 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
732 	fuse_vnode_open(*vpp, foo->open_flags, td);
733 	/*
734 	 * Purge the parent's attribute cache because the daemon should've
735 	 * updated its mtime and ctime
736 	 */
737 	fuse_vnode_clear_attr_cache(dvp);
738 	cache_purge_negative(dvp);
739 
740 out:
741 	if (fdip2)
742 		fdisp_destroy(fdip2);
743 	fdisp_destroy(fdip);
744 	return err;
745 }
746 
747 /*
748     struct vnop_fdatasync_args {
749 	struct vop_generic_args a_gen;
750 	struct vnode * a_vp;
751 	struct thread * a_td;
752     };
753 */
754 static int
755 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
756 {
757 	struct vnode *vp = ap->a_vp;
758 	struct thread *td = ap->a_td;
759 	int waitfor = MNT_WAIT;
760 
761 	int err = 0;
762 
763 	if (fuse_isdeadfs(vp)) {
764 		return 0;
765 	}
766 	if ((err = vop_stdfdatasync_buf(ap)))
767 		return err;
768 
769 	return fuse_internal_fsync(vp, td, waitfor, true);
770 }
771 
772 /*
773     struct vnop_fsync_args {
774 	struct vop_generic_args a_gen;
775 	struct vnode * a_vp;
776 	int  a_waitfor;
777 	struct thread * a_td;
778     };
779 */
780 static int
781 fuse_vnop_fsync(struct vop_fsync_args *ap)
782 {
783 	struct vnode *vp = ap->a_vp;
784 	struct thread *td = ap->a_td;
785 	int waitfor = ap->a_waitfor;
786 	int err = 0;
787 
788 	if (fuse_isdeadfs(vp)) {
789 		return 0;
790 	}
791 	if ((err = vop_stdfsync(ap)))
792 		return err;
793 
794 	return fuse_internal_fsync(vp, td, waitfor, false);
795 }
796 
797 /*
798     struct vnop_getattr_args {
799 	struct vnode *a_vp;
800 	struct vattr *a_vap;
801 	struct ucred *a_cred;
802 	struct thread *a_td;
803     };
804 */
805 static int
806 fuse_vnop_getattr(struct vop_getattr_args *ap)
807 {
808 	struct vnode *vp = ap->a_vp;
809 	struct vattr *vap = ap->a_vap;
810 	struct ucred *cred = ap->a_cred;
811 	struct thread *td = curthread;
812 
813 	int err = 0;
814 	int dataflags;
815 
816 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
817 
818 	/* Note that we are not bailing out on a dead file system just yet. */
819 
820 	if (!(dataflags & FSESS_INITED)) {
821 		if (!vnode_isvroot(vp)) {
822 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
823 			err = ENOTCONN;
824 			return err;
825 		} else {
826 			goto fake;
827 		}
828 	}
829 	err = fuse_internal_getattr(vp, vap, cred, td);
830 	if (err == ENOTCONN && vnode_isvroot(vp)) {
831 		/* see comment in fuse_vfsop_statfs() */
832 		goto fake;
833 	} else {
834 		return err;
835 	}
836 
837 fake:
838 	bzero(vap, sizeof(*vap));
839 	vap->va_type = vnode_vtype(vp);
840 
841 	return 0;
842 }
843 
844 /*
845     struct vnop_inactive_args {
846 	struct vnode *a_vp;
847 	struct thread *a_td;
848     };
849 */
850 static int
851 fuse_vnop_inactive(struct vop_inactive_args *ap)
852 {
853 	struct vnode *vp = ap->a_vp;
854 	struct thread *td = ap->a_td;
855 
856 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
857 	struct fuse_filehandle *fufh, *fufh_tmp;
858 
859 	int need_flush = 1;
860 
861 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
862 		if (need_flush && vp->v_type == VREG) {
863 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
864 				fuse_vnode_savesize(vp, NULL, 0);
865 			}
866 			if ((fvdat->flag & FN_REVOKED) != 0)
867 				fuse_io_invalbuf(vp, td);
868 			else
869 				fuse_io_flushbuf(vp, MNT_WAIT, td);
870 			need_flush = 0;
871 		}
872 		fuse_filehandle_close(vp, fufh, td, NULL);
873 	}
874 
875 	if ((fvdat->flag & FN_REVOKED) != 0)
876 		vrecycle(vp);
877 
878 	return 0;
879 }
880 
881 /*
882     struct vnop_link_args {
883 	struct vnode *a_tdvp;
884 	struct vnode *a_vp;
885 	struct componentname *a_cnp;
886     };
887 */
888 static int
889 fuse_vnop_link(struct vop_link_args *ap)
890 {
891 	struct vnode *vp = ap->a_vp;
892 	struct vnode *tdvp = ap->a_tdvp;
893 	struct componentname *cnp = ap->a_cnp;
894 
895 	struct vattr *vap = VTOVA(vp);
896 
897 	struct fuse_dispatcher fdi;
898 	struct fuse_entry_out *feo;
899 	struct fuse_link_in fli;
900 
901 	int err;
902 
903 	if (fuse_isdeadfs(vp)) {
904 		return ENXIO;
905 	}
906 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
907 		return EXDEV;
908 	}
909 
910 	/*
911 	 * This is a seatbelt check to protect naive userspace filesystems from
912 	 * themselves and the limitations of the FUSE IPC protocol.  If a
913 	 * filesystem does not allow attribute caching, assume it is capable of
914 	 * validating that nlink does not overflow.
915 	 */
916 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
917 		return EMLINK;
918 	fli.oldnodeid = VTOI(vp);
919 
920 	fdisp_init(&fdi, 0);
921 	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
922 	    FUSE_LINK, &fli, sizeof(fli), &fdi);
923 	if ((err = fdisp_wait_answ(&fdi))) {
924 		goto out;
925 	}
926 	feo = fdi.answ;
927 
928 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
929 	if (!err) {
930 		/*
931 		 * Purge the parent's attribute cache because the daemon
932 		 * should've updated its mtime and ctime
933 		 */
934 		fuse_vnode_clear_attr_cache(tdvp);
935 		fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
936 			feo->attr_valid_nsec, NULL);
937 	}
938 out:
939 	fdisp_destroy(&fdi);
940 	return err;
941 }
942 
943 struct fuse_lookup_alloc_arg {
944 	struct fuse_entry_out *feo;
945 	struct componentname *cnp;
946 	uint64_t nid;
947 	enum vtype vtyp;
948 };
949 
950 /* Callback for vn_get_ino */
951 static int
952 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
953 {
954 	struct fuse_lookup_alloc_arg *flaa = arg;
955 
956 	return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
957 		flaa->vtyp);
958 }
959 
960 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
961 	"int", "struct timespec*", "struct timespec*");
962 /*
963     struct vnop_lookup_args {
964 	struct vnodeop_desc *a_desc;
965 	struct vnode *a_dvp;
966 	struct vnode **a_vpp;
967 	struct componentname *a_cnp;
968     };
969 */
970 int
971 fuse_vnop_lookup(struct vop_lookup_args *ap)
972 {
973 	struct vnode *dvp = ap->a_dvp;
974 	struct vnode **vpp = ap->a_vpp;
975 	struct componentname *cnp = ap->a_cnp;
976 	struct thread *td = cnp->cn_thread;
977 	struct ucred *cred = cnp->cn_cred;
978 
979 	int nameiop = cnp->cn_nameiop;
980 	int flags = cnp->cn_flags;
981 	int wantparent = flags & (LOCKPARENT | WANTPARENT);
982 	int islastcn = flags & ISLASTCN;
983 	struct mount *mp = vnode_mount(dvp);
984 
985 	int err = 0;
986 	int lookup_err = 0;
987 	struct vnode *vp = NULL;
988 
989 	struct fuse_dispatcher fdi;
990 	bool did_lookup = false;
991 	struct fuse_entry_out *feo = NULL;
992 	enum vtype vtyp;	/* vnode type of target */
993 	off_t filesize;		/* filesize of target */
994 
995 	uint64_t nid;
996 
997 	if (fuse_isdeadfs(dvp)) {
998 		*vpp = NULL;
999 		return ENXIO;
1000 	}
1001 	if (!vnode_isdir(dvp))
1002 		return ENOTDIR;
1003 
1004 	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
1005 		return EROFS;
1006 
1007 	if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
1008 		return err;
1009 
1010 	if (flags & ISDOTDOT) {
1011 		KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID,
1012 			("Looking up .. is TODO"));
1013 		nid = VTOFUD(dvp)->parent_nid;
1014 		if (nid == 0)
1015 			return ENOENT;
1016 		/* .. is obviously a directory */
1017 		vtyp = VDIR;
1018 		filesize = 0;
1019 	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
1020 		nid = VTOI(dvp);
1021 		/* . is obviously a directory */
1022 		vtyp = VDIR;
1023 		filesize = 0;
1024 	} else {
1025 		struct timespec now, timeout;
1026 
1027 		err = cache_lookup(dvp, vpp, cnp, &timeout, NULL);
1028 		getnanouptime(&now);
1029 		SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
1030 		switch (err) {
1031 		case -1:		/* positive match */
1032 			if (timespeccmp(&timeout, &now, >)) {
1033 				counter_u64_add(fuse_lookup_cache_hits, 1);
1034 			} else {
1035 				/* Cache timeout */
1036 				counter_u64_add(fuse_lookup_cache_misses, 1);
1037 				bintime_clear(
1038 					&VTOFUD(*vpp)->entry_cache_timeout);
1039 				cache_purge(*vpp);
1040 				if (dvp != *vpp)
1041 					vput(*vpp);
1042 				else
1043 					vrele(*vpp);
1044 				*vpp = NULL;
1045 				break;
1046 			}
1047 			return 0;
1048 
1049 		case 0:		/* no match in cache */
1050 			counter_u64_add(fuse_lookup_cache_misses, 1);
1051 			break;
1052 
1053 		case ENOENT:		/* negative match */
1054 			getnanouptime(&now);
1055 			if (timespeccmp(&timeout, &now, <=)) {
1056 				/* Cache timeout */
1057 				cache_purge_negative(dvp);
1058 				break;
1059 			}
1060 			/* fall through */
1061 		default:
1062 			return err;
1063 		}
1064 
1065 		nid = VTOI(dvp);
1066 		fdisp_init(&fdi, cnp->cn_namelen + 1);
1067 		fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred);
1068 
1069 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1070 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
1071 		lookup_err = fdisp_wait_answ(&fdi);
1072 		did_lookup = true;
1073 
1074 		if (!lookup_err) {
1075 			/* lookup call succeeded */
1076 			feo = (struct fuse_entry_out *)fdi.answ;
1077 			nid = feo->nodeid;
1078 			if (nid == 0) {
1079 				/* zero nodeid means ENOENT and cache it */
1080 				struct timespec timeout;
1081 
1082 				fdi.answ_stat = ENOENT;
1083 				lookup_err = ENOENT;
1084 				if (cnp->cn_flags & MAKEENTRY) {
1085 					fuse_validity_2_timespec(feo, &timeout);
1086 					cache_enter_time(dvp, *vpp, cnp,
1087 						&timeout, NULL);
1088 				}
1089 			} else if (nid == FUSE_ROOT_ID) {
1090 				lookup_err = EINVAL;
1091 			}
1092 			vtyp = IFTOVT(feo->attr.mode);
1093 			filesize = feo->attr.size;
1094 		}
1095 		if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
1096 			fdisp_destroy(&fdi);
1097 			return lookup_err;
1098 		}
1099 	}
1100 	/* lookup_err, if non-zero, must be ENOENT at this point */
1101 
1102 	if (lookup_err) {
1103 		/* Entry not found */
1104 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
1105 			err = fuse_internal_access(dvp, VWRITE, td, cred);
1106 			if (!err) {
1107 				/*
1108 				 * Set the SAVENAME flag to hold onto the
1109 				 * pathname for use later in VOP_CREATE or
1110 				 * VOP_RENAME.
1111 				 */
1112 				cnp->cn_flags |= SAVENAME;
1113 
1114 				err = EJUSTRETURN;
1115 			}
1116 		} else {
1117 			err = ENOENT;
1118 		}
1119 	} else {
1120 		/* Entry was found */
1121 		if (flags & ISDOTDOT) {
1122 			struct fuse_lookup_alloc_arg flaa;
1123 
1124 			flaa.nid = nid;
1125 			flaa.feo = feo;
1126 			flaa.cnp = cnp;
1127 			flaa.vtyp = vtyp;
1128 			err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
1129 				&vp);
1130 			*vpp = vp;
1131 		} else if (nid == VTOI(dvp)) {
1132 			vref(dvp);
1133 			*vpp = dvp;
1134 		} else {
1135 			struct fuse_vnode_data *fvdat;
1136 
1137 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
1138 			    &vp, cnp, vtyp);
1139 			if (err)
1140 				goto out;
1141 			*vpp = vp;
1142 
1143 			/*
1144 			 * In the case where we are looking up a FUSE node
1145 			 * represented by an existing cached vnode, and the
1146 			 * true size reported by FUSE_LOOKUP doesn't match
1147 			 * the vnode's cached size, then any cached writes
1148 			 * beyond the file's current size are lost.
1149 			 *
1150 			 * We can get here:
1151 			 * * following attribute cache expiration, or
1152 			 * * due a bug in the daemon, or
1153 			 */
1154 			fvdat = VTOFUD(vp);
1155 			if (vnode_isreg(vp) &&
1156 			    filesize != fvdat->cached_attrs.va_size &&
1157 			    fvdat->flag & FN_SIZECHANGE) {
1158 				/*
1159 				 * The FN_SIZECHANGE flag reflects a dirty
1160 				 * append.  If userspace lets us know our cache
1161 				 * is invalid, that write was lost.  (Dirty
1162 				 * writes that do not cause append are also
1163 				 * lost, but we don't detect them here.)
1164 				 *
1165 				 * XXX: Maybe disable WB caching on this mount.
1166 				 */
1167 				printf("%s: WB cache incoherent on %s!\n",
1168 				    __func__,
1169 				    vnode_mount(vp)->mnt_stat.f_mntonname);
1170 
1171 				fvdat->flag &= ~FN_SIZECHANGE;
1172 			}
1173 
1174 			MPASS(feo != NULL);
1175 			fuse_internal_cache_attrs(*vpp, &feo->attr,
1176 				feo->attr_valid, feo->attr_valid_nsec, NULL);
1177 			fuse_validity_2_bintime(feo->entry_valid,
1178 				feo->entry_valid_nsec,
1179 				&fvdat->entry_cache_timeout);
1180 
1181 			if ((nameiop == DELETE || nameiop == RENAME) &&
1182 				islastcn)
1183 			{
1184 				struct vattr dvattr;
1185 
1186 				err = fuse_internal_access(dvp, VWRITE, td,
1187 					cred);
1188 				if (err != 0)
1189 					goto out;
1190 				/*
1191 				 * if the parent's sticky bit is set, check
1192 				 * whether we're allowed to remove the file.
1193 				 * Need to figure out the vnode locking to make
1194 				 * this work.
1195 				 */
1196 				fuse_internal_getattr(dvp, &dvattr, cred, td);
1197 				if ((dvattr.va_mode & S_ISTXT) &&
1198 					fuse_internal_access(dvp, VADMIN, td,
1199 						cred) &&
1200 					fuse_internal_access(*vpp, VADMIN, td,
1201 						cred)) {
1202 					err = EPERM;
1203 					goto out;
1204 				}
1205 			}
1206 
1207 			if (islastcn && (
1208 				(nameiop == DELETE) ||
1209 				(nameiop == RENAME && wantparent))) {
1210 				cnp->cn_flags |= SAVENAME;
1211 			}
1212 
1213 		}
1214 	}
1215 out:
1216 	if (err) {
1217 		if (vp != NULL && dvp != vp)
1218 			vput(vp);
1219 		else if (vp != NULL)
1220 			vrele(vp);
1221 		*vpp = NULL;
1222 	}
1223 	if (did_lookup)
1224 		fdisp_destroy(&fdi);
1225 
1226 	return err;
1227 }
1228 
1229 /*
1230     struct vnop_mkdir_args {
1231 	struct vnode *a_dvp;
1232 	struct vnode **a_vpp;
1233 	struct componentname *a_cnp;
1234 	struct vattr *a_vap;
1235     };
1236 */
1237 static int
1238 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
1239 {
1240 	struct vnode *dvp = ap->a_dvp;
1241 	struct vnode **vpp = ap->a_vpp;
1242 	struct componentname *cnp = ap->a_cnp;
1243 	struct vattr *vap = ap->a_vap;
1244 
1245 	struct fuse_mkdir_in fmdi;
1246 
1247 	if (fuse_isdeadfs(dvp)) {
1248 		return ENXIO;
1249 	}
1250 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
1251 	fmdi.umask = curthread->td_proc->p_fd->fd_cmask;
1252 
1253 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
1254 	    sizeof(fmdi), VDIR));
1255 }
1256 
1257 /*
1258     struct vnop_mknod_args {
1259 	struct vnode *a_dvp;
1260 	struct vnode **a_vpp;
1261 	struct componentname *a_cnp;
1262 	struct vattr *a_vap;
1263     };
1264 */
1265 static int
1266 fuse_vnop_mknod(struct vop_mknod_args *ap)
1267 {
1268 
1269 	struct vnode *dvp = ap->a_dvp;
1270 	struct vnode **vpp = ap->a_vpp;
1271 	struct componentname *cnp = ap->a_cnp;
1272 	struct vattr *vap = ap->a_vap;
1273 
1274 	if (fuse_isdeadfs(dvp))
1275 		return ENXIO;
1276 
1277 	return fuse_internal_mknod(dvp, vpp, cnp, vap);
1278 }
1279 
1280 /*
1281     struct vop_open_args {
1282 	struct vnode *a_vp;
1283 	int  a_mode;
1284 	struct ucred *a_cred;
1285 	struct thread *a_td;
1286 	int a_fdidx; / struct file *a_fp;
1287     };
1288 */
1289 static int
1290 fuse_vnop_open(struct vop_open_args *ap)
1291 {
1292 	struct vnode *vp = ap->a_vp;
1293 	int a_mode = ap->a_mode;
1294 	struct thread *td = ap->a_td;
1295 	struct ucred *cred = ap->a_cred;
1296 	pid_t pid = td->td_proc->p_pid;
1297 	struct fuse_vnode_data *fvdat;
1298 
1299 	if (fuse_isdeadfs(vp))
1300 		return ENXIO;
1301 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
1302 		return (EOPNOTSUPP);
1303 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
1304 		return EINVAL;
1305 
1306 	fvdat = VTOFUD(vp);
1307 
1308 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
1309 		fuse_vnode_open(vp, 0, td);
1310 		return 0;
1311 	}
1312 
1313 	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
1314 }
1315 
1316 static int
1317 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
1318 {
1319 
1320 	switch (ap->a_name) {
1321 	case _PC_FILESIZEBITS:
1322 		*ap->a_retval = 64;
1323 		return (0);
1324 	case _PC_NAME_MAX:
1325 		*ap->a_retval = NAME_MAX;
1326 		return (0);
1327 	case _PC_LINK_MAX:
1328 		*ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
1329 		return (0);
1330 	case _PC_SYMLINK_MAX:
1331 		*ap->a_retval = MAXPATHLEN;
1332 		return (0);
1333 	case _PC_NO_TRUNC:
1334 		*ap->a_retval = 1;
1335 		return (0);
1336 	default:
1337 		return (vop_stdpathconf(ap));
1338 	}
1339 }
1340 
1341 /*
1342     struct vnop_read_args {
1343 	struct vnode *a_vp;
1344 	struct uio *a_uio;
1345 	int  a_ioflag;
1346 	struct ucred *a_cred;
1347     };
1348 */
1349 static int
1350 fuse_vnop_read(struct vop_read_args *ap)
1351 {
1352 	struct vnode *vp = ap->a_vp;
1353 	struct uio *uio = ap->a_uio;
1354 	int ioflag = ap->a_ioflag;
1355 	struct ucred *cred = ap->a_cred;
1356 	pid_t pid = curthread->td_proc->p_pid;
1357 
1358 	if (fuse_isdeadfs(vp)) {
1359 		return ENXIO;
1360 	}
1361 
1362 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1363 		ioflag |= IO_DIRECT;
1364 	}
1365 
1366 	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
1367 }
1368 
1369 /*
1370     struct vnop_readdir_args {
1371 	struct vnode *a_vp;
1372 	struct uio *a_uio;
1373 	struct ucred *a_cred;
1374 	int *a_eofflag;
1375 	int *a_ncookies;
1376 	u_long **a_cookies;
1377     };
1378 */
1379 static int
1380 fuse_vnop_readdir(struct vop_readdir_args *ap)
1381 {
1382 	struct vnode *vp = ap->a_vp;
1383 	struct uio *uio = ap->a_uio;
1384 	struct ucred *cred = ap->a_cred;
1385 	struct fuse_filehandle *fufh = NULL;
1386 	struct fuse_iov cookediov;
1387 	int err = 0;
1388 	u_long *cookies;
1389 	off_t startoff;
1390 	ssize_t tresid;
1391 	int ncookies;
1392 	bool closefufh = false;
1393 	pid_t pid = curthread->td_proc->p_pid;
1394 
1395 	if (ap->a_eofflag)
1396 		*ap->a_eofflag = 0;
1397 	if (fuse_isdeadfs(vp)) {
1398 		return ENXIO;
1399 	}
1400 	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
1401 	    (uio_resid(uio) < sizeof(struct dirent))) {
1402 		return EINVAL;
1403 	}
1404 
1405 	tresid = uio->uio_resid;
1406 	startoff = uio->uio_offset;
1407 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
1408 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
1409 		/*
1410 		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
1411 		 * must implicitly open the directory here
1412 		 */
1413 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
1414 		if (err == 0) {
1415 			/*
1416 			 * When a directory is opened, it must be read from
1417 			 * the beginning.  Hopefully, the "startoff" still
1418 			 * exists as an offset cookie for the directory.
1419 			 * If not, it will read the entire directory without
1420 			 * returning any entries and just return eof.
1421 			 */
1422 			uio->uio_offset = 0;
1423 		}
1424 		closefufh = true;
1425 	}
1426 	if (err)
1427 		return (err);
1428 	if (ap->a_ncookies != NULL) {
1429 		ncookies = uio->uio_resid /
1430 			(offsetof(struct dirent, d_name) + 4) + 1;
1431 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
1432 		*ap->a_ncookies = ncookies;
1433 		*ap->a_cookies = cookies;
1434 	} else {
1435 		ncookies = 0;
1436 		cookies = NULL;
1437 	}
1438 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
1439 	fiov_init(&cookediov, DIRCOOKEDSIZE);
1440 
1441 	err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
1442 		&ncookies, cookies);
1443 
1444 	fiov_teardown(&cookediov);
1445 	if (closefufh)
1446 		fuse_filehandle_close(vp, fufh, curthread, cred);
1447 
1448 	if (ap->a_ncookies != NULL) {
1449 		if (err == 0) {
1450 			*ap->a_ncookies -= ncookies;
1451 		} else {
1452 			free(*ap->a_cookies, M_TEMP);
1453 			*ap->a_ncookies = 0;
1454 			*ap->a_cookies = NULL;
1455 		}
1456 	}
1457 	if (err == 0 && tresid == uio->uio_resid)
1458 		*ap->a_eofflag = 1;
1459 
1460 	return err;
1461 }
1462 
1463 /*
1464     struct vnop_readlink_args {
1465 	struct vnode *a_vp;
1466 	struct uio *a_uio;
1467 	struct ucred *a_cred;
1468     };
1469 */
1470 static int
1471 fuse_vnop_readlink(struct vop_readlink_args *ap)
1472 {
1473 	struct vnode *vp = ap->a_vp;
1474 	struct uio *uio = ap->a_uio;
1475 	struct ucred *cred = ap->a_cred;
1476 
1477 	struct fuse_dispatcher fdi;
1478 	int err;
1479 
1480 	if (fuse_isdeadfs(vp)) {
1481 		return ENXIO;
1482 	}
1483 	if (!vnode_islnk(vp)) {
1484 		return EINVAL;
1485 	}
1486 	fdisp_init(&fdi, 0);
1487 	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
1488 	if (err) {
1489 		goto out;
1490 	}
1491 	if (((char *)fdi.answ)[0] == '/' &&
1492 	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
1493 		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
1494 
1495 		err = uiomove(mpth, strlen(mpth), uio);
1496 	}
1497 	if (!err) {
1498 		err = uiomove(fdi.answ, fdi.iosize, uio);
1499 	}
1500 out:
1501 	fdisp_destroy(&fdi);
1502 	return err;
1503 }
1504 
1505 /*
1506     struct vnop_reclaim_args {
1507 	struct vnode *a_vp;
1508 	struct thread *a_td;
1509     };
1510 */
1511 static int
1512 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
1513 {
1514 	struct vnode *vp = ap->a_vp;
1515 	struct thread *td = ap->a_td;
1516 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
1517 	struct fuse_filehandle *fufh, *fufh_tmp;
1518 
1519 	if (!fvdat) {
1520 		panic("FUSE: no vnode data during recycling");
1521 	}
1522 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
1523 		printf("FUSE: vnode being reclaimed with open fufh "
1524 			"(type=%#x)", fufh->fufh_type);
1525 		fuse_filehandle_close(vp, fufh, td, NULL);
1526 	}
1527 
1528 	if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) {
1529 		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
1530 		    fvdat->nlookup);
1531 	}
1532 	cache_purge(vp);
1533 	vfs_hash_remove(vp);
1534 	fuse_vnode_destroy(vp);
1535 
1536 	return 0;
1537 }
1538 
1539 /*
1540     struct vnop_remove_args {
1541 	struct vnode *a_dvp;
1542 	struct vnode *a_vp;
1543 	struct componentname *a_cnp;
1544     };
1545 */
1546 static int
1547 fuse_vnop_remove(struct vop_remove_args *ap)
1548 {
1549 	struct vnode *dvp = ap->a_dvp;
1550 	struct vnode *vp = ap->a_vp;
1551 	struct componentname *cnp = ap->a_cnp;
1552 
1553 	int err;
1554 
1555 	if (fuse_isdeadfs(vp)) {
1556 		return ENXIO;
1557 	}
1558 	if (vnode_isdir(vp)) {
1559 		return EPERM;
1560 	}
1561 
1562 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
1563 
1564 	return err;
1565 }
1566 
1567 /*
1568     struct vnop_rename_args {
1569 	struct vnode *a_fdvp;
1570 	struct vnode *a_fvp;
1571 	struct componentname *a_fcnp;
1572 	struct vnode *a_tdvp;
1573 	struct vnode *a_tvp;
1574 	struct componentname *a_tcnp;
1575     };
1576 */
1577 static int
1578 fuse_vnop_rename(struct vop_rename_args *ap)
1579 {
1580 	struct vnode *fdvp = ap->a_fdvp;
1581 	struct vnode *fvp = ap->a_fvp;
1582 	struct componentname *fcnp = ap->a_fcnp;
1583 	struct vnode *tdvp = ap->a_tdvp;
1584 	struct vnode *tvp = ap->a_tvp;
1585 	struct componentname *tcnp = ap->a_tcnp;
1586 	struct fuse_data *data;
1587 	bool newparent = fdvp != tdvp;
1588 	bool isdir = fvp->v_type == VDIR;
1589 	int err = 0;
1590 
1591 	if (fuse_isdeadfs(fdvp)) {
1592 		return ENXIO;
1593 	}
1594 	if (fvp->v_mount != tdvp->v_mount ||
1595 	    (tvp && fvp->v_mount != tvp->v_mount)) {
1596 		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
1597 		err = EXDEV;
1598 		goto out;
1599 	}
1600 	cache_purge(fvp);
1601 
1602 	/*
1603 	 * FUSE library is expected to check if target directory is not
1604 	 * under the source directory in the file system tree.
1605 	 * Linux performs this check at VFS level.
1606 	 */
1607 	/*
1608 	 * If source is a directory, and it will get a new parent, user must
1609 	 * have write permission to it, so ".." can be modified.
1610 	 */
1611 	data = fuse_get_mpdata(vnode_mount(tdvp));
1612 	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
1613 		err = fuse_internal_access(fvp, VWRITE,
1614 			tcnp->cn_thread, tcnp->cn_cred);
1615 		if (err)
1616 			goto out;
1617 	}
1618 	sx_xlock(&data->rename_lock);
1619 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
1620 	if (err == 0) {
1621 		if (tdvp != fdvp)
1622 			fuse_vnode_setparent(fvp, tdvp);
1623 		if (tvp != NULL)
1624 			fuse_vnode_setparent(tvp, NULL);
1625 	}
1626 	sx_unlock(&data->rename_lock);
1627 
1628 	if (tvp != NULL && tvp != fvp) {
1629 		cache_purge(tvp);
1630 	}
1631 	if (vnode_isdir(fvp)) {
1632 		if ((tvp != NULL) && vnode_isdir(tvp)) {
1633 			cache_purge(tdvp);
1634 		}
1635 		cache_purge(fdvp);
1636 	}
1637 out:
1638 	if (tdvp == tvp) {
1639 		vrele(tdvp);
1640 	} else {
1641 		vput(tdvp);
1642 	}
1643 	if (tvp != NULL) {
1644 		vput(tvp);
1645 	}
1646 	vrele(fdvp);
1647 	vrele(fvp);
1648 
1649 	return err;
1650 }
1651 
1652 /*
1653     struct vnop_rmdir_args {
1654 	    struct vnode *a_dvp;
1655 	    struct vnode *a_vp;
1656 	    struct componentname *a_cnp;
1657     } *ap;
1658 */
1659 static int
1660 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
1661 {
1662 	struct vnode *dvp = ap->a_dvp;
1663 	struct vnode *vp = ap->a_vp;
1664 
1665 	int err;
1666 
1667 	if (fuse_isdeadfs(vp)) {
1668 		return ENXIO;
1669 	}
1670 	if (VTOFUD(vp) == VTOFUD(dvp)) {
1671 		return EINVAL;
1672 	}
1673 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
1674 
1675 	return err;
1676 }
1677 
1678 /*
1679     struct vnop_setattr_args {
1680 	struct vnode *a_vp;
1681 	struct vattr *a_vap;
1682 	struct ucred *a_cred;
1683 	struct thread *a_td;
1684     };
1685 */
1686 static int
1687 fuse_vnop_setattr(struct vop_setattr_args *ap)
1688 {
1689 	struct vnode *vp = ap->a_vp;
1690 	struct vattr *vap = ap->a_vap;
1691 	struct ucred *cred = ap->a_cred;
1692 	struct thread *td = curthread;
1693 	struct mount *mp;
1694 	struct fuse_data *data;
1695 	struct vattr old_va;
1696 	int dataflags;
1697 	int err = 0, err2;
1698 	accmode_t accmode = 0;
1699 	bool checkperm;
1700 	bool drop_suid = false;
1701 	gid_t cr_gid;
1702 
1703 	mp = vnode_mount(vp);
1704 	data = fuse_get_mpdata(mp);
1705 	dataflags = data->dataflags;
1706 	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
1707 	if (cred->cr_ngroups > 0)
1708 		cr_gid = cred->cr_groups[0];
1709 	else
1710 		cr_gid = 0;
1711 
1712 	if (fuse_isdeadfs(vp)) {
1713 		return ENXIO;
1714 	}
1715 
1716 	if (vap->va_uid != (uid_t)VNOVAL) {
1717 		if (checkperm) {
1718 			/* Only root may change a file's owner */
1719 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
1720 			if (err) {
1721 				/* As a special case, allow the null chown */
1722 				err2 = fuse_internal_getattr(vp, &old_va, cred,
1723 					td);
1724 				if (err2)
1725 					return (err2);
1726 				if (vap->va_uid != old_va.va_uid)
1727 					return err;
1728 				else
1729 					accmode |= VADMIN;
1730 				drop_suid = true;
1731 			} else
1732 				accmode |= VADMIN;
1733 		} else
1734 			accmode |= VADMIN;
1735 	}
1736 	if (vap->va_gid != (gid_t)VNOVAL) {
1737 		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
1738 			drop_suid = true;
1739 		if (checkperm && !groupmember(vap->va_gid, cred))
1740 		{
1741 			/*
1742 			 * Non-root users may only chgrp to one of their own
1743 			 * groups
1744 			 */
1745 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
1746 			if (err) {
1747 				/* As a special case, allow the null chgrp */
1748 				err2 = fuse_internal_getattr(vp, &old_va, cred,
1749 					td);
1750 				if (err2)
1751 					return (err2);
1752 				if (vap->va_gid != old_va.va_gid)
1753 					return err;
1754 				accmode |= VADMIN;
1755 			} else
1756 				accmode |= VADMIN;
1757 		} else
1758 			accmode |= VADMIN;
1759 	}
1760 	if (vap->va_size != VNOVAL) {
1761 		switch (vp->v_type) {
1762 		case VDIR:
1763 			return (EISDIR);
1764 		case VLNK:
1765 		case VREG:
1766 			if (vfs_isrdonly(mp))
1767 				return (EROFS);
1768 			break;
1769 		default:
1770 			/*
1771 			 * According to POSIX, the result is unspecified
1772 			 * for file types other than regular files,
1773 			 * directories and shared memory objects.  We
1774 			 * don't support shared memory objects in the file
1775 			 * system, and have dubious support for truncating
1776 			 * symlinks.  Just ignore the request in other cases.
1777 			 */
1778 			return (0);
1779 		}
1780 		/* Don't set accmode.  Permission to trunc is checked upstack */
1781 	}
1782 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1783 		if (vap->va_vaflags & VA_UTIMES_NULL)
1784 			accmode |= VWRITE;
1785 		else
1786 			accmode |= VADMIN;
1787 	}
1788 	if (drop_suid) {
1789 		if (vap->va_mode != (mode_t)VNOVAL)
1790 			vap->va_mode &= ~(S_ISUID | S_ISGID);
1791 		else {
1792 			err = fuse_internal_getattr(vp, &old_va, cred, td);
1793 			if (err)
1794 				return (err);
1795 			vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
1796 		}
1797 	}
1798 	if (vap->va_mode != (mode_t)VNOVAL) {
1799 		/* Only root may set the sticky bit on non-directories */
1800 		if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
1801 		    && priv_check_cred(cred, PRIV_VFS_STICKYFILE))
1802 			return EFTYPE;
1803 		if (checkperm && (vap->va_mode & S_ISGID)) {
1804 			err = fuse_internal_getattr(vp, &old_va, cred, td);
1805 			if (err)
1806 				return (err);
1807 			if (!groupmember(old_va.va_gid, cred)) {
1808 				err = priv_check_cred(cred, PRIV_VFS_SETGID);
1809 				if (err)
1810 					return (err);
1811 			}
1812 		}
1813 		accmode |= VADMIN;
1814 	}
1815 
1816 	if (vfs_isrdonly(mp))
1817 		return EROFS;
1818 
1819 	err = fuse_internal_access(vp, accmode, td, cred);
1820 	if (err)
1821 		return err;
1822 	else
1823 		return fuse_internal_setattr(vp, vap, td, cred);
1824 }
1825 
1826 /*
1827     struct vnop_strategy_args {
1828 	struct vnode *a_vp;
1829 	struct buf *a_bp;
1830     };
1831 */
1832 static int
1833 fuse_vnop_strategy(struct vop_strategy_args *ap)
1834 {
1835 	struct vnode *vp = ap->a_vp;
1836 	struct buf *bp = ap->a_bp;
1837 
1838 	if (!vp || fuse_isdeadfs(vp)) {
1839 		bp->b_ioflags |= BIO_ERROR;
1840 		bp->b_error = ENXIO;
1841 		bufdone(bp);
1842 		return 0;
1843 	}
1844 
1845 	/*
1846 	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
1847 	 * fuse_io_strategy sets bp's error fields
1848 	 */
1849 	(void)fuse_io_strategy(vp, bp);
1850 
1851 	return 0;
1852 }
1853 
1854 
1855 /*
1856     struct vnop_symlink_args {
1857 	struct vnode *a_dvp;
1858 	struct vnode **a_vpp;
1859 	struct componentname *a_cnp;
1860 	struct vattr *a_vap;
1861 	char *a_target;
1862     };
1863 */
1864 static int
1865 fuse_vnop_symlink(struct vop_symlink_args *ap)
1866 {
1867 	struct vnode *dvp = ap->a_dvp;
1868 	struct vnode **vpp = ap->a_vpp;
1869 	struct componentname *cnp = ap->a_cnp;
1870 	const char *target = ap->a_target;
1871 
1872 	struct fuse_dispatcher fdi;
1873 
1874 	int err;
1875 	size_t len;
1876 
1877 	if (fuse_isdeadfs(dvp)) {
1878 		return ENXIO;
1879 	}
1880 	/*
1881 	 * Unlike the other creator type calls, here we have to create a message
1882 	 * where the name of the new entry comes first, and the data describing
1883 	 * the entry comes second.
1884 	 * Hence we can't rely on our handy fuse_internal_newentry() routine,
1885 	 * but put together the message manually and just call the core part.
1886 	 */
1887 
1888 	len = strlen(target) + 1;
1889 	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
1890 	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
1891 
1892 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1893 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
1894 	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
1895 
1896 	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
1897 	fdisp_destroy(&fdi);
1898 	return err;
1899 }
1900 
1901 /*
1902     struct vnop_write_args {
1903 	struct vnode *a_vp;
1904 	struct uio *a_uio;
1905 	int  a_ioflag;
1906 	struct ucred *a_cred;
1907     };
1908 */
1909 static int
1910 fuse_vnop_write(struct vop_write_args *ap)
1911 {
1912 	struct vnode *vp = ap->a_vp;
1913 	struct uio *uio = ap->a_uio;
1914 	int ioflag = ap->a_ioflag;
1915 	struct ucred *cred = ap->a_cred;
1916 	pid_t pid = curthread->td_proc->p_pid;
1917 
1918 	if (fuse_isdeadfs(vp)) {
1919 		return ENXIO;
1920 	}
1921 
1922 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1923 		ioflag |= IO_DIRECT;
1924 	}
1925 
1926 	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
1927 }
1928 
1929 static daddr_t
1930 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1931 {
1932 	const int biosize = fuse_iosize(vp);
1933 
1934 	return (off / biosize);
1935 }
1936 
1937 static int
1938 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn)
1939 {
1940 	off_t filesize;
1941 	int blksz, err;
1942 	const int biosize = fuse_iosize(vp);
1943 
1944 	err = fuse_vnode_size(vp, &filesize, NULL, NULL);
1945 	KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here"));
1946 	if (err)
1947 		return biosize;
1948 
1949 	if ((off_t)lbn * biosize >= filesize) {
1950 		blksz = 0;
1951 	} else if ((off_t)(lbn + 1) * biosize > filesize) {
1952 		blksz = filesize - (off_t)lbn *biosize;
1953 	} else {
1954 		blksz = biosize;
1955 	}
1956 	return (blksz);
1957 }
1958 
1959 /*
1960     struct vnop_getpages_args {
1961 	struct vnode *a_vp;
1962 	vm_page_t *a_m;
1963 	int a_count;
1964 	int a_reqpage;
1965     };
1966 */
1967 static int
1968 fuse_vnop_getpages(struct vop_getpages_args *ap)
1969 {
1970 	struct vnode *vp = ap->a_vp;
1971 
1972 	if (!fsess_opt_mmap(vnode_mount(vp))) {
1973 		SDT_PROBE2(fusefs, , vnops, trace, 1,
1974 			"called on non-cacheable vnode??\n");
1975 		return (VM_PAGER_ERROR);
1976 	}
1977 
1978 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
1979 	    ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
1980 }
1981 
1982 static const char extattr_namespace_separator = '.';
1983 
1984 /*
1985     struct vop_getextattr_args {
1986 	struct vop_generic_args a_gen;
1987 	struct vnode *a_vp;
1988 	int a_attrnamespace;
1989 	const char *a_name;
1990 	struct uio *a_uio;
1991 	size_t *a_size;
1992 	struct ucred *a_cred;
1993 	struct thread *a_td;
1994     };
1995 */
1996 static int
1997 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
1998 {
1999 	struct vnode *vp = ap->a_vp;
2000 	struct uio *uio = ap->a_uio;
2001 	struct fuse_dispatcher fdi;
2002 	struct fuse_getxattr_in *get_xattr_in;
2003 	struct fuse_getxattr_out *get_xattr_out;
2004 	struct mount *mp = vnode_mount(vp);
2005 	struct thread *td = ap->a_td;
2006 	struct ucred *cred = ap->a_cred;
2007 	char *prefix;
2008 	char *attr_str;
2009 	size_t len;
2010 	int err;
2011 
2012 	if (fuse_isdeadfs(vp))
2013 		return (ENXIO);
2014 
2015 	if (!fsess_isimpl(mp, FUSE_GETXATTR))
2016 		return EOPNOTSUPP;
2017 
2018 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2019 	if (err)
2020 		return err;
2021 
2022 	/* Default to looking for user attributes. */
2023 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2024 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2025 	else
2026 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2027 
2028 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2029 	    strlen(ap->a_name) + 1;
2030 
2031 	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
2032 	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
2033 
2034 	get_xattr_in = fdi.indata;
2035 	/*
2036 	 * Check to see whether we're querying the available size or
2037 	 * issuing the actual request.  If we pass in 0, we get back struct
2038 	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
2039 	 * that much data, without the struct fuse_getxattr_out header.
2040 	 */
2041 	if (uio == NULL)
2042 		get_xattr_in->size = 0;
2043 	else
2044 		get_xattr_in->size = uio->uio_resid;
2045 
2046 	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
2047 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2048 	    ap->a_name);
2049 
2050 	err = fdisp_wait_answ(&fdi);
2051 	if (err != 0) {
2052 		if (err == ENOSYS) {
2053 			fsess_set_notimpl(mp, FUSE_GETXATTR);
2054 			err = EOPNOTSUPP;
2055 		}
2056 		goto out;
2057 	}
2058 
2059 	get_xattr_out = fdi.answ;
2060 
2061 	if (ap->a_size != NULL)
2062 		*ap->a_size = get_xattr_out->size;
2063 
2064 	if (uio != NULL)
2065 		err = uiomove(fdi.answ, fdi.iosize, uio);
2066 
2067 out:
2068 	fdisp_destroy(&fdi);
2069 	return (err);
2070 }
2071 
2072 /*
2073     struct vop_setextattr_args {
2074 	struct vop_generic_args a_gen;
2075 	struct vnode *a_vp;
2076 	int a_attrnamespace;
2077 	const char *a_name;
2078 	struct uio *a_uio;
2079 	struct ucred *a_cred;
2080 	struct thread *a_td;
2081     };
2082 */
2083 static int
2084 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
2085 {
2086 	struct vnode *vp = ap->a_vp;
2087 	struct uio *uio = ap->a_uio;
2088 	struct fuse_dispatcher fdi;
2089 	struct fuse_setxattr_in *set_xattr_in;
2090 	struct mount *mp = vnode_mount(vp);
2091 	struct thread *td = ap->a_td;
2092 	struct ucred *cred = ap->a_cred;
2093 	char *prefix;
2094 	size_t len;
2095 	char *attr_str;
2096 	int err;
2097 
2098 	if (fuse_isdeadfs(vp))
2099 		return (ENXIO);
2100 
2101 	if (!fsess_isimpl(mp, FUSE_SETXATTR))
2102 		return EOPNOTSUPP;
2103 
2104 	if (vfs_isrdonly(mp))
2105 		return EROFS;
2106 
2107 	/* Deleting xattrs must use VOP_DELETEEXTATTR instead */
2108 	if (ap->a_uio == NULL) {
2109 		/*
2110 		 * If we got here as fallback from VOP_DELETEEXTATTR, then
2111 		 * return EOPNOTSUPP.
2112 		 */
2113 		if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
2114 			return (EOPNOTSUPP);
2115 		else
2116 			return (EINVAL);
2117 	}
2118 
2119 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2120 		VWRITE);
2121 	if (err)
2122 		return err;
2123 
2124 	/* Default to looking for user attributes. */
2125 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2126 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2127 	else
2128 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2129 
2130 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2131 	    strlen(ap->a_name) + 1;
2132 
2133 	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
2134 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
2135 
2136 	set_xattr_in = fdi.indata;
2137 	set_xattr_in->size = uio->uio_resid;
2138 
2139 	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
2140 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2141 	    ap->a_name);
2142 
2143 	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
2144 	    uio->uio_resid, uio);
2145 	if (err != 0) {
2146 		goto out;
2147 	}
2148 
2149 	err = fdisp_wait_answ(&fdi);
2150 
2151 	if (err == ENOSYS) {
2152 		fsess_set_notimpl(mp, FUSE_SETXATTR);
2153 		err = EOPNOTSUPP;
2154 	}
2155 	if (err == ERESTART) {
2156 		/* Can't restart after calling uiomove */
2157 		err = EINTR;
2158 	}
2159 
2160 out:
2161 	fdisp_destroy(&fdi);
2162 	return (err);
2163 }
2164 
2165 /*
2166  * The Linux / FUSE extended attribute list is simply a collection of
2167  * NUL-terminated strings.  The FreeBSD extended attribute list is a single
2168  * byte length followed by a non-NUL terminated string.  So, this allows
2169  * conversion of the Linux / FUSE format to the FreeBSD format in place.
2170  * Linux attribute names are reported with the namespace as a prefix (e.g.
2171  * "user.attribute_name"), but in FreeBSD they are reported without the
2172  * namespace prefix (e.g. "attribute_name").  So, we're going from:
2173  *
2174  * user.attr_name1\0user.attr_name2\0
2175  *
2176  * to:
2177  *
2178  * <num>attr_name1<num>attr_name2
2179  *
2180  * Where "<num>" is a single byte number of characters in the attribute name.
2181  *
2182  * Args:
2183  * prefix - exattr namespace prefix string
2184  * list, list_len - input list with namespace prefixes
2185  * bsd_list, bsd_list_len - output list compatible with bsd vfs
2186  */
2187 static int
2188 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
2189     char *bsd_list, int *bsd_list_len)
2190 {
2191 	int len, pos, dist_to_next, prefix_len;
2192 
2193 	pos = 0;
2194 	*bsd_list_len = 0;
2195 	prefix_len = strlen(prefix);
2196 
2197 	while (pos < list_len && list[pos] != '\0') {
2198 		dist_to_next = strlen(&list[pos]) + 1;
2199 		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
2200 		    list[pos + prefix_len] == extattr_namespace_separator) {
2201 			len = dist_to_next -
2202 			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
2203 			if (len >= EXTATTR_MAXNAMELEN)
2204 				return (ENAMETOOLONG);
2205 
2206 			bsd_list[*bsd_list_len] = len;
2207 			memcpy(&bsd_list[*bsd_list_len + 1],
2208 			    &list[pos + prefix_len +
2209 			    sizeof(extattr_namespace_separator)], len);
2210 
2211 			*bsd_list_len += len + 1;
2212 		}
2213 
2214 		pos += dist_to_next;
2215 	}
2216 
2217 	return (0);
2218 }
2219 
2220 /*
2221  * List extended attributes
2222  *
2223  * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which
2224  * has a number of differences compared to its FreeBSD equivalent,
2225  * extattr_list_file:
2226  *
2227  * - FUSE_LISTXATTR returns all extended attributes across all namespaces,
2228  *   whereas listxattr(2) only returns attributes for a single namespace
2229  * - FUSE_LISTXATTR prepends each attribute name with "namespace."
2230  * - If the provided buffer is not large enough to hold the result,
2231  *   FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to
2232  *   return as many results as will fit.
2233  */
2234 /*
2235     struct vop_listextattr_args {
2236 	struct vop_generic_args a_gen;
2237 	struct vnode *a_vp;
2238 	int a_attrnamespace;
2239 	struct uio *a_uio;
2240 	size_t *a_size;
2241 	struct ucred *a_cred;
2242 	struct thread *a_td;
2243     };
2244 */
2245 static int
2246 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
2247 {
2248 	struct vnode *vp = ap->a_vp;
2249 	struct uio *uio = ap->a_uio;
2250 	struct fuse_dispatcher fdi;
2251 	struct fuse_listxattr_in *list_xattr_in;
2252 	struct fuse_listxattr_out *list_xattr_out;
2253 	struct mount *mp = vnode_mount(vp);
2254 	struct thread *td = ap->a_td;
2255 	struct ucred *cred = ap->a_cred;
2256 	char *prefix;
2257 	char *bsd_list = NULL;
2258 	char *linux_list;
2259 	int bsd_list_len;
2260 	int linux_list_len;
2261 	int err;
2262 
2263 	if (fuse_isdeadfs(vp))
2264 		return (ENXIO);
2265 
2266 	if (!fsess_isimpl(mp, FUSE_LISTXATTR))
2267 		return EOPNOTSUPP;
2268 
2269 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2270 	if (err)
2271 		return err;
2272 
2273 	/*
2274 	 * Add space for a NUL and the period separator if enabled.
2275 	 * Default to looking for user attributes.
2276 	 */
2277 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2278 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2279 	else
2280 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2281 
2282 	fdisp_init(&fdi, sizeof(*list_xattr_in));
2283 	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2284 
2285 	/*
2286 	 * Retrieve Linux / FUSE compatible list size.
2287 	 */
2288 	list_xattr_in = fdi.indata;
2289 	list_xattr_in->size = 0;
2290 
2291 	err = fdisp_wait_answ(&fdi);
2292 	if (err != 0) {
2293 		if (err == ENOSYS) {
2294 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
2295 			err = EOPNOTSUPP;
2296 		}
2297 		goto out;
2298 	}
2299 
2300 	list_xattr_out = fdi.answ;
2301 	linux_list_len = list_xattr_out->size;
2302 	if (linux_list_len == 0) {
2303 		if (ap->a_size != NULL)
2304 			*ap->a_size = linux_list_len;
2305 		goto out;
2306 	}
2307 
2308 	/*
2309 	 * Retrieve Linux / FUSE compatible list values.
2310 	 */
2311 	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2312 	list_xattr_in = fdi.indata;
2313 	list_xattr_in->size = linux_list_len;
2314 
2315 	err = fdisp_wait_answ(&fdi);
2316 	if (err == ERANGE) {
2317 		/*
2318 		 * Race detected.  The attribute list must've grown since the
2319 		 * first FUSE_LISTXATTR call.  Start over.  Go all the way back
2320 		 * to userland so we can process signals, if necessary, before
2321 		 * restarting.
2322 		 */
2323 		err = ERESTART;
2324 		goto out;
2325 	} else if (err != 0)
2326 		goto out;
2327 
2328 	linux_list = fdi.answ;
2329 	/* FUSE doesn't allow the server to return more data than requested */
2330 	if (fdi.iosize > linux_list_len) {
2331 		printf("WARNING: FUSE protocol violation.  Server returned "
2332 			"more extended attribute data than requested; "
2333 			"should've returned ERANGE instead");
2334 	} else {
2335 		/* But returning less data is fine */
2336 		linux_list_len = fdi.iosize;
2337 	}
2338 
2339 	/*
2340 	 * Retrieve the BSD compatible list values.
2341 	 * The Linux / FUSE attribute list format isn't the same
2342 	 * as FreeBSD's format. So we need to transform it into
2343 	 * FreeBSD's format before giving it to the user.
2344 	 */
2345 	bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
2346 	err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
2347 	    bsd_list, &bsd_list_len);
2348 	if (err != 0)
2349 		goto out;
2350 
2351 	if (ap->a_size != NULL)
2352 		*ap->a_size = bsd_list_len;
2353 
2354 	if (uio != NULL)
2355 		err = uiomove(bsd_list, bsd_list_len, uio);
2356 
2357 out:
2358 	free(bsd_list, M_TEMP);
2359 	fdisp_destroy(&fdi);
2360 	return (err);
2361 }
2362 
2363 /*
2364     struct vop_deleteextattr_args {
2365 	struct vop_generic_args a_gen;
2366 	struct vnode *a_vp;
2367 	int a_attrnamespace;
2368 	const char *a_name;
2369 	struct ucred *a_cred;
2370 	struct thread *a_td;
2371     };
2372 */
2373 static int
2374 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
2375 {
2376 	struct vnode *vp = ap->a_vp;
2377 	struct fuse_dispatcher fdi;
2378 	struct mount *mp = vnode_mount(vp);
2379 	struct thread *td = ap->a_td;
2380 	struct ucred *cred = ap->a_cred;
2381 	char *prefix;
2382 	size_t len;
2383 	char *attr_str;
2384 	int err;
2385 
2386 	if (fuse_isdeadfs(vp))
2387 		return (ENXIO);
2388 
2389 	if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
2390 		return EOPNOTSUPP;
2391 
2392 	if (vfs_isrdonly(mp))
2393 		return EROFS;
2394 
2395 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2396 		VWRITE);
2397 	if (err)
2398 		return err;
2399 
2400 	/* Default to looking for user attributes. */
2401 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2402 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2403 	else
2404 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2405 
2406 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2407 	    strlen(ap->a_name) + 1;
2408 
2409 	fdisp_init(&fdi, len);
2410 	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
2411 
2412 	attr_str = fdi.indata;
2413 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2414 	    ap->a_name);
2415 
2416 	err = fdisp_wait_answ(&fdi);
2417 	if (err == ENOSYS) {
2418 		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
2419 		err = EOPNOTSUPP;
2420 	}
2421 
2422 	fdisp_destroy(&fdi);
2423 	return (err);
2424 }
2425 
2426 /*
2427     struct vnop_print_args {
2428 	struct vnode *a_vp;
2429     };
2430 */
2431 static int
2432 fuse_vnop_print(struct vop_print_args *ap)
2433 {
2434 	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
2435 
2436 	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
2437 	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
2438 	    (uintmax_t)fvdat->nlookup,
2439 	    fvdat->flag);
2440 
2441 	return 0;
2442 }
2443 
2444 /*
2445  * Get an NFS filehandle for a FUSE file.
2446  *
2447  * This will only work for FUSE file systems that guarantee the uniqueness of
2448  * nodeid:generation, which most don't.
2449  */
2450 /*
2451 vop_vptofh {
2452 	IN struct vnode *a_vp;
2453 	IN struct fid *a_fhp;
2454 };
2455 */
2456 static int
2457 fuse_vnop_vptofh(struct vop_vptofh_args *ap)
2458 {
2459 	struct vnode *vp = ap->a_vp;
2460 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
2461 	struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
2462 	_Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
2463 		"FUSE fid type is too big");
2464 	struct mount *mp = vnode_mount(vp);
2465 	struct fuse_data *data = fuse_get_mpdata(mp);
2466 	struct vattr va;
2467 	int err;
2468 
2469 	if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
2470 		return EOPNOTSUPP;
2471 
2472 	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
2473 	if (err)
2474 		return err;
2475 
2476 	/*ip = VTOI(ap->a_vp);*/
2477 	/*ufhp = (struct ufid *)ap->a_fhp;*/
2478 	fhp->len = sizeof(struct fuse_fid);
2479 	fhp->nid = fvdat->nid;
2480 	if (fvdat->generation <= UINT32_MAX)
2481 		fhp->gen = fvdat->generation;
2482 	else
2483 		return EOVERFLOW;
2484 	return (0);
2485 }
2486 
2487 
2488