xref: /freebsd/sys/fs/fuse/fuse_vnops.c (revision d93b4d32034df7cd70e80b496e8fe8c1bc57c629)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include <sys/param.h>
67 #include <sys/module.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/conf.h>
72 #include <sys/filio.h>
73 #include <sys/uio.h>
74 #include <sys/malloc.h>
75 #include <sys/queue.h>
76 #include <sys/limits.h>
77 #include <sys/lock.h>
78 #include <sys/rwlock.h>
79 #include <sys/sx.h>
80 #include <sys/proc.h>
81 #include <sys/mount.h>
82 #include <sys/vnode.h>
83 #include <sys/namei.h>
84 #include <sys/extattr.h>
85 #include <sys/stat.h>
86 #include <sys/unistd.h>
87 #include <sys/filedesc.h>
88 #include <sys/file.h>
89 #include <sys/fcntl.h>
90 #include <sys/dirent.h>
91 #include <sys/bio.h>
92 #include <sys/buf.h>
93 #include <sys/sysctl.h>
94 #include <sys/vmmeter.h>
95 
96 #include <vm/vm.h>
97 #include <vm/vm_extern.h>
98 #include <vm/pmap.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_page.h>
101 #include <vm/vm_param.h>
102 #include <vm/vm_object.h>
103 #include <vm/vm_pager.h>
104 #include <vm/vnode_pager.h>
105 #include <vm/vm_object.h>
106 
107 #include "fuse.h"
108 #include "fuse_file.h"
109 #include "fuse_internal.h"
110 #include "fuse_ipc.h"
111 #include "fuse_node.h"
112 #include "fuse_io.h"
113 
114 #include <sys/priv.h>
115 
116 /* Maximum number of hardlinks to a single FUSE file */
117 #define FUSE_LINK_MAX                      UINT32_MAX
118 
119 SDT_PROVIDER_DECLARE(fusefs);
120 /*
121  * Fuse trace probe:
122  * arg0: verbosity.  Higher numbers give more verbose messages
123  * arg1: Textual message
124  */
125 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
126 
127 /* vnode ops */
128 static vop_access_t fuse_vnop_access;
129 static vop_advlock_t fuse_vnop_advlock;
130 static vop_bmap_t fuse_vnop_bmap;
131 static vop_close_t fuse_fifo_close;
132 static vop_close_t fuse_vnop_close;
133 static vop_copy_file_range_t fuse_vnop_copy_file_range;
134 static vop_create_t fuse_vnop_create;
135 static vop_deleteextattr_t fuse_vnop_deleteextattr;
136 static vop_fdatasync_t fuse_vnop_fdatasync;
137 static vop_fsync_t fuse_vnop_fsync;
138 static vop_getattr_t fuse_vnop_getattr;
139 static vop_getextattr_t fuse_vnop_getextattr;
140 static vop_inactive_t fuse_vnop_inactive;
141 static vop_ioctl_t fuse_vnop_ioctl;
142 static vop_link_t fuse_vnop_link;
143 static vop_listextattr_t fuse_vnop_listextattr;
144 static vop_lookup_t fuse_vnop_lookup;
145 static vop_mkdir_t fuse_vnop_mkdir;
146 static vop_mknod_t fuse_vnop_mknod;
147 static vop_open_t fuse_vnop_open;
148 static vop_pathconf_t fuse_vnop_pathconf;
149 static vop_read_t fuse_vnop_read;
150 static vop_readdir_t fuse_vnop_readdir;
151 static vop_readlink_t fuse_vnop_readlink;
152 static vop_reclaim_t fuse_vnop_reclaim;
153 static vop_remove_t fuse_vnop_remove;
154 static vop_rename_t fuse_vnop_rename;
155 static vop_rmdir_t fuse_vnop_rmdir;
156 static vop_setattr_t fuse_vnop_setattr;
157 static vop_setextattr_t fuse_vnop_setextattr;
158 static vop_strategy_t fuse_vnop_strategy;
159 static vop_symlink_t fuse_vnop_symlink;
160 static vop_write_t fuse_vnop_write;
161 static vop_getpages_t fuse_vnop_getpages;
162 static vop_print_t fuse_vnop_print;
163 static vop_vptofh_t fuse_vnop_vptofh;
164 
165 struct vop_vector fuse_fifoops = {
166 	.vop_default =		&fifo_specops,
167 	.vop_access =		fuse_vnop_access,
168 	.vop_close =		fuse_fifo_close,
169 	.vop_fsync =		fuse_vnop_fsync,
170 	.vop_getattr =		fuse_vnop_getattr,
171 	.vop_inactive =		fuse_vnop_inactive,
172 	.vop_pathconf =		fuse_vnop_pathconf,
173 	.vop_print =		fuse_vnop_print,
174 	.vop_read =		VOP_PANIC,
175 	.vop_reclaim =		fuse_vnop_reclaim,
176 	.vop_setattr =		fuse_vnop_setattr,
177 	.vop_write =		VOP_PANIC,
178 	.vop_vptofh =		fuse_vnop_vptofh,
179 };
180 VFS_VOP_VECTOR_REGISTER(fuse_fifoops);
181 
182 struct vop_vector fuse_vnops = {
183 	.vop_allocate =	VOP_EINVAL,
184 	.vop_default = &default_vnodeops,
185 	.vop_access = fuse_vnop_access,
186 	.vop_advlock = fuse_vnop_advlock,
187 	.vop_bmap = fuse_vnop_bmap,
188 	.vop_close = fuse_vnop_close,
189 	.vop_copy_file_range = fuse_vnop_copy_file_range,
190 	.vop_create = fuse_vnop_create,
191 	.vop_deleteextattr = fuse_vnop_deleteextattr,
192 	.vop_fsync = fuse_vnop_fsync,
193 	.vop_fdatasync = fuse_vnop_fdatasync,
194 	.vop_getattr = fuse_vnop_getattr,
195 	.vop_getextattr = fuse_vnop_getextattr,
196 	.vop_inactive = fuse_vnop_inactive,
197 	.vop_ioctl = fuse_vnop_ioctl,
198 	.vop_link = fuse_vnop_link,
199 	.vop_listextattr = fuse_vnop_listextattr,
200 	.vop_lookup = fuse_vnop_lookup,
201 	.vop_mkdir = fuse_vnop_mkdir,
202 	.vop_mknod = fuse_vnop_mknod,
203 	.vop_open = fuse_vnop_open,
204 	.vop_pathconf = fuse_vnop_pathconf,
205 	/*
206 	 * TODO: implement vop_poll after upgrading to protocol 7.21.
207 	 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
208 	 * 7.21, which adds the ability for the client to choose which poll
209 	 * events it wants, and for a client to deregister a file handle
210 	 */
211 	.vop_read = fuse_vnop_read,
212 	.vop_readdir = fuse_vnop_readdir,
213 	.vop_readlink = fuse_vnop_readlink,
214 	.vop_reclaim = fuse_vnop_reclaim,
215 	.vop_remove = fuse_vnop_remove,
216 	.vop_rename = fuse_vnop_rename,
217 	.vop_rmdir = fuse_vnop_rmdir,
218 	.vop_setattr = fuse_vnop_setattr,
219 	.vop_setextattr = fuse_vnop_setextattr,
220 	.vop_strategy = fuse_vnop_strategy,
221 	.vop_symlink = fuse_vnop_symlink,
222 	.vop_write = fuse_vnop_write,
223 	.vop_getpages = fuse_vnop_getpages,
224 	.vop_print = fuse_vnop_print,
225 	.vop_vptofh = fuse_vnop_vptofh,
226 };
227 VFS_VOP_VECTOR_REGISTER(fuse_vnops);
228 
229 uma_zone_t fuse_pbuf_zone;
230 
231 /* Check permission for extattr operations, much like extattr_check_cred */
232 static int
233 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
234 	struct thread *td, accmode_t accmode)
235 {
236 	struct mount *mp = vnode_mount(vp);
237 	struct fuse_data *data = fuse_get_mpdata(mp);
238 	int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
239 
240 	/*
241 	 * Kernel-invoked always succeeds.
242 	 */
243 	if (cred == NOCRED)
244 		return (0);
245 
246 	/*
247 	 * Do not allow privileged processes in jail to directly manipulate
248 	 * system attributes.
249 	 */
250 	switch (ns) {
251 	case EXTATTR_NAMESPACE_SYSTEM:
252 		if (default_permissions) {
253 			return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
254 		}
255 		return (0);
256 	case EXTATTR_NAMESPACE_USER:
257 		if (default_permissions) {
258 			return (fuse_internal_access(vp, accmode, td, cred));
259 		}
260 		return (0);
261 	default:
262 		return (EPERM);
263 	}
264 }
265 
266 /* Get a filehandle for a directory */
267 static int
268 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
269 	struct ucred *cred, pid_t pid)
270 {
271 	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
272 		return 0;
273 	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
274 }
275 
276 /* Send FUSE_FLUSH for this vnode */
277 static int
278 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
279 {
280 	struct fuse_flush_in *ffi;
281 	struct fuse_filehandle *fufh;
282 	struct fuse_dispatcher fdi;
283 	struct thread *td = curthread;
284 	struct mount *mp = vnode_mount(vp);
285 	int err;
286 
287 	if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH))
288 		return 0;
289 
290 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
291 	if (err)
292 		return err;
293 
294 	fdisp_init(&fdi, sizeof(*ffi));
295 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
296 	ffi = fdi.indata;
297 	ffi->fh = fufh->fh_id;
298 	/*
299 	 * If the file has a POSIX lock then we're supposed to set lock_owner.
300 	 * If not, then lock_owner is undefined.  So we may as well always set
301 	 * it.
302 	 */
303 	ffi->lock_owner = td->td_proc->p_pid;
304 
305 	err = fdisp_wait_answ(&fdi);
306 	if (err == ENOSYS) {
307 		fsess_set_notimpl(mp, FUSE_FLUSH);
308 		err = 0;
309 	}
310 	fdisp_destroy(&fdi);
311 	return err;
312 }
313 
314 /* Close wrapper for fifos.  */
315 static int
316 fuse_fifo_close(struct vop_close_args *ap)
317 {
318 	return (fifo_specops.vop_close(ap));
319 }
320 
321 /* Invalidate a range of cached data, whether dirty of not */
322 static int
323 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
324 {
325 	struct buf *bp;
326 	daddr_t left_lbn, end_lbn, right_lbn;
327 	off_t new_filesize;
328 	int iosize, left_on, right_on, right_blksize;
329 
330 	iosize = fuse_iosize(vp);
331 	left_lbn = start / iosize;
332 	end_lbn = howmany(end, iosize);
333 	left_on = start & (iosize - 1);
334 	if (left_on != 0) {
335 		bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0);
336 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) {
337 			/*
338 			 * Flush the dirty buffer, because we don't have a
339 			 * byte-granular way to record which parts of the
340 			 * buffer are valid.
341 			 */
342 			bwrite(bp);
343 			if (bp->b_error)
344 				return (bp->b_error);
345 		} else {
346 			brelse(bp);
347 		}
348 	}
349 	right_on = end & (iosize - 1);
350 	if (right_on != 0) {
351 		right_lbn = end / iosize;
352 		new_filesize = MAX(filesize, end);
353 		right_blksize = MIN(iosize, new_filesize - iosize * right_lbn);
354 		bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0);
355 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) {
356 			/*
357 			 * Flush the dirty buffer, because we don't have a
358 			 * byte-granular way to record which parts of the
359 			 * buffer are valid.
360 			 */
361 			bwrite(bp);
362 			if (bp->b_error)
363 				return (bp->b_error);
364 		} else {
365 			brelse(bp);
366 		}
367 	}
368 
369 	v_inval_buf_range(vp, left_lbn, end_lbn, iosize);
370 	return (0);
371 }
372 
373 
374 /* Send FUSE_LSEEK for this node */
375 static int
376 fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred,
377 	pid_t pid, off_t *offp, int whence)
378 {
379 	struct fuse_dispatcher fdi;
380 	struct fuse_filehandle *fufh;
381 	struct fuse_lseek_in *flsi;
382 	struct fuse_lseek_out *flso;
383 	struct mount *mp = vnode_mount(vp);
384 	int err;
385 
386 	ASSERT_VOP_LOCKED(vp, __func__);
387 
388 	err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid);
389 	if (err)
390 		return (err);
391 	fdisp_init(&fdi, sizeof(*flsi));
392 	fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred);
393 	flsi = fdi.indata;
394 	flsi->fh = fufh->fh_id;
395 	flsi->offset = *offp;
396 	flsi->whence = whence;
397 	err = fdisp_wait_answ(&fdi);
398 	if (err == ENOSYS) {
399 		fsess_set_notimpl(mp, FUSE_LSEEK);
400 	} else if (err == 0) {
401 		fsess_set_impl(mp, FUSE_LSEEK);
402 		flso = fdi.answ;
403 		*offp = flso->offset;
404 	}
405 	fdisp_destroy(&fdi);
406 
407 	return (err);
408 }
409 
410 /*
411     struct vnop_access_args {
412 	struct vnode *a_vp;
413 #if VOP_ACCESS_TAKES_ACCMODE_T
414 	accmode_t a_accmode;
415 #else
416 	int a_mode;
417 #endif
418 	struct ucred *a_cred;
419 	struct thread *a_td;
420     };
421 */
422 static int
423 fuse_vnop_access(struct vop_access_args *ap)
424 {
425 	struct vnode *vp = ap->a_vp;
426 	int accmode = ap->a_accmode;
427 	struct ucred *cred = ap->a_cred;
428 
429 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
430 
431 	int err;
432 
433 	if (fuse_isdeadfs(vp)) {
434 		if (vnode_isvroot(vp)) {
435 			return 0;
436 		}
437 		return ENXIO;
438 	}
439 	if (!(data->dataflags & FSESS_INITED)) {
440 		if (vnode_isvroot(vp)) {
441 			if (priv_check_cred(cred, PRIV_VFS_ADMIN) ||
442 			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
443 				return 0;
444 			}
445 		}
446 		return EBADF;
447 	}
448 	if (vnode_islnk(vp)) {
449 		return 0;
450 	}
451 
452 	err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
453 	return err;
454 }
455 
456 /*
457  * struct vop_advlock_args {
458  *	struct vop_generic_args a_gen;
459  *	struct vnode *a_vp;
460  *	void *a_id;
461  *	int a_op;
462  *	struct flock *a_fl;
463  *	int a_flags;
464  * }
465  */
466 static int
467 fuse_vnop_advlock(struct vop_advlock_args *ap)
468 {
469 	struct vnode *vp = ap->a_vp;
470 	struct flock *fl = ap->a_fl;
471 	struct thread *td = curthread;
472 	struct ucred *cred = td->td_ucred;
473 	pid_t pid = td->td_proc->p_pid;
474 	struct fuse_filehandle *fufh;
475 	struct fuse_dispatcher fdi;
476 	struct fuse_lk_in *fli;
477 	struct fuse_lk_out *flo;
478 	enum fuse_opcode op;
479 	int dataflags, err;
480 	int flags = ap->a_flags;
481 
482 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
483 
484 	if (fuse_isdeadfs(vp)) {
485 		return ENXIO;
486 	}
487 
488 	switch(ap->a_op) {
489 	case F_GETLK:
490 		op = FUSE_GETLK;
491 		break;
492 	case F_SETLK:
493 		if (flags & F_WAIT)
494 			op = FUSE_SETLKW;
495 		else
496 			op = FUSE_SETLK;
497 		break;
498 	case F_UNLCK:
499 		op = FUSE_SETLK;
500 		break;
501 	default:
502 		return EINVAL;
503 	}
504 
505 	if (!(dataflags & FSESS_POSIX_LOCKS))
506 		return vop_stdadvlock(ap);
507 	/* FUSE doesn't properly support flock until protocol 7.17 */
508 	if (flags & F_FLOCK)
509 		return vop_stdadvlock(ap);
510 
511 	vn_lock(vp, LK_SHARED | LK_RETRY);
512 
513 	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
514 	if (err)
515 		goto out;
516 
517 	fdisp_init(&fdi, sizeof(*fli));
518 
519 	fdisp_make_vp(&fdi, op, vp, td, cred);
520 	fli = fdi.indata;
521 	fli->fh = fufh->fh_id;
522 	fli->owner = td->td_proc->p_pid;
523 	fli->lk.start = fl->l_start;
524 	if (fl->l_len != 0)
525 		fli->lk.end = fl->l_start + fl->l_len - 1;
526 	else
527 		fli->lk.end = INT64_MAX;
528 	fli->lk.type = fl->l_type;
529 	fli->lk.pid = td->td_proc->p_pid;
530 
531 	err = fdisp_wait_answ(&fdi);
532 	fdisp_destroy(&fdi);
533 
534 	if (err == 0 && op == FUSE_GETLK) {
535 		flo = fdi.answ;
536 		fl->l_type = flo->lk.type;
537 		fl->l_pid = flo->lk.pid;
538 		if (flo->lk.type != F_UNLCK) {
539 			fl->l_start = flo->lk.start;
540 			if (flo->lk.end == INT64_MAX)
541 				fl->l_len = 0;
542 			else
543 				fl->l_len = flo->lk.end - flo->lk.start + 1;
544 			fl->l_start = flo->lk.start;
545 		}
546 	}
547 
548 out:
549 	VOP_UNLOCK(vp);
550 	return err;
551 }
552 
553 /* {
554 	struct vnode *a_vp;
555 	daddr_t a_bn;
556 	struct bufobj **a_bop;
557 	daddr_t *a_bnp;
558 	int *a_runp;
559 	int *a_runb;
560 } */
561 static int
562 fuse_vnop_bmap(struct vop_bmap_args *ap)
563 {
564 	struct vnode *vp = ap->a_vp;
565 	struct bufobj **bo = ap->a_bop;
566 	struct thread *td = curthread;
567 	struct mount *mp;
568 	struct fuse_dispatcher fdi;
569 	struct fuse_bmap_in *fbi;
570 	struct fuse_bmap_out *fbo;
571 	struct fuse_data *data;
572 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
573 	uint64_t biosize;
574 	off_t fsize;
575 	daddr_t lbn = ap->a_bn;
576 	daddr_t *pbn = ap->a_bnp;
577 	int *runp = ap->a_runp;
578 	int *runb = ap->a_runb;
579 	int error = 0;
580 	int maxrun;
581 
582 	if (fuse_isdeadfs(vp)) {
583 		return ENXIO;
584 	}
585 
586 	mp = vnode_mount(vp);
587 	data = fuse_get_mpdata(mp);
588 	biosize = fuse_iosize(vp);
589 	maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
590 		data->max_readahead_blocks);
591 
592 	if (bo != NULL)
593 		*bo = &vp->v_bufobj;
594 
595 	/*
596 	 * The FUSE_BMAP operation does not include the runp and runb
597 	 * variables, so we must guess.  Report nonzero contiguous runs so
598 	 * cluster_read will combine adjacent reads.  It's worthwhile to reduce
599 	 * upcalls even if we don't know the true physical layout of the file.
600 	 *
601 	 * FUSE file systems may opt out of read clustering in two ways:
602 	 * * mounting with -onoclusterr
603 	 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
604 	 */
605 	if (runb != NULL)
606 		*runb = MIN(lbn, maxrun);
607 	if (runp != NULL && maxrun == 0)
608 		*runp = 0;
609 	else if (runp != NULL) {
610 		/*
611 		 * If the file's size is cached, use that value to calculate
612 		 * runp, even if the cache is expired.  runp is only advisory,
613 		 * and the risk of getting it wrong is not worth the cost of
614 		 * another upcall.
615 		 */
616 		if (fvdat->cached_attrs.va_size != VNOVAL)
617 			fsize = fvdat->cached_attrs.va_size;
618 		else
619 			error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
620 		if (error == 0)
621 			*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
622 				    maxrun);
623 		else
624 			*runp = 0;
625 	}
626 
627 	if (fsess_maybe_impl(mp, FUSE_BMAP)) {
628 		fdisp_init(&fdi, sizeof(*fbi));
629 		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
630 		fbi = fdi.indata;
631 		fbi->block = lbn;
632 		fbi->blocksize = biosize;
633 		error = fdisp_wait_answ(&fdi);
634 		if (error == ENOSYS) {
635 			fdisp_destroy(&fdi);
636 			fsess_set_notimpl(mp, FUSE_BMAP);
637 			error = 0;
638 		} else {
639 			fbo = fdi.answ;
640 			if (error == 0 && pbn != NULL)
641 				*pbn = fbo->block;
642 			fdisp_destroy(&fdi);
643 			return error;
644 		}
645 	}
646 
647 	/* If the daemon doesn't support BMAP, make up a sensible default */
648 	if (pbn != NULL)
649 		*pbn = lbn * btodb(biosize);
650 	return (error);
651 }
652 
653 /*
654     struct vop_close_args {
655 	struct vnode *a_vp;
656 	int  a_fflag;
657 	struct ucred *a_cred;
658 	struct thread *a_td;
659     };
660 */
661 static int
662 fuse_vnop_close(struct vop_close_args *ap)
663 {
664 	struct vnode *vp = ap->a_vp;
665 	struct ucred *cred = ap->a_cred;
666 	int fflag = ap->a_fflag;
667 	struct thread *td = ap->a_td;
668 	pid_t pid = td->td_proc->p_pid;
669 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
670 	int err = 0;
671 
672 	if (fuse_isdeadfs(vp))
673 		return 0;
674 	if (vnode_isdir(vp))
675 		return 0;
676 	if (fflag & IO_NDELAY)
677 		return 0;
678 
679 	err = fuse_flush(vp, cred, pid, fflag);
680 	if (err == 0 && (fvdat->flag & FN_ATIMECHANGE)) {
681 		struct vattr vap;
682 
683 		VATTR_NULL(&vap);
684 		vap.va_atime = fvdat->cached_attrs.va_atime;
685 		err = fuse_internal_setattr(vp, &vap, td, NULL);
686 	}
687 	/* TODO: close the file handle, if we're sure it's no longer used */
688 	if ((fvdat->flag & FN_SIZECHANGE) != 0) {
689 		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
690 	}
691 	return err;
692 }
693 
694 /*
695    struct vop_copy_file_range_args {
696 	struct vop_generic_args a_gen;
697 	struct vnode *a_invp;
698 	off_t *a_inoffp;
699 	struct vnode *a_outvp;
700 	off_t *a_outoffp;
701 	size_t *a_lenp;
702 	unsigned int a_flags;
703 	struct ucred *a_incred;
704 	struct ucred *a_outcred;
705 	struct thread *a_fsizetd;
706 }
707  */
708 static int
709 fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
710 {
711 	struct vnode *invp = ap->a_invp;
712 	struct vnode *outvp = ap->a_outvp;
713 	struct mount *mp = vnode_mount(invp);
714 	struct fuse_vnode_data *outfvdat = VTOFUD(outvp);
715 	struct fuse_dispatcher fdi;
716 	struct fuse_filehandle *infufh, *outfufh;
717 	struct fuse_copy_file_range_in *fcfri;
718 	struct ucred *incred = ap->a_incred;
719 	struct ucred *outcred = ap->a_outcred;
720 	struct fuse_write_out *fwo;
721 	struct thread *td;
722 	struct uio io;
723 	off_t outfilesize;
724 	pid_t pid;
725 	int err;
726 
727 	if (mp != vnode_mount(outvp))
728 		goto fallback;
729 
730 	if (incred->cr_uid != outcred->cr_uid)
731 		goto fallback;
732 
733 	if (incred->cr_groups[0] != outcred->cr_groups[0])
734 		goto fallback;
735 
736 	if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
737 		goto fallback;
738 
739 	if (ap->a_fsizetd == NULL)
740 		td = curthread;
741 	else
742 		td = ap->a_fsizetd;
743 	pid = td->td_proc->p_pid;
744 
745 	/* Lock both vnodes, avoiding risk of deadlock. */
746 	do {
747 		err = vn_lock(outvp, LK_EXCLUSIVE);
748 		if (invp == outvp)
749 			break;
750 		if (err == 0) {
751 			err = vn_lock(invp, LK_SHARED | LK_NOWAIT);
752 			if (err == 0)
753 				break;
754 			VOP_UNLOCK(outvp);
755 			err = vn_lock(invp, LK_SHARED);
756 			if (err == 0)
757 				VOP_UNLOCK(invp);
758 		}
759 	} while (err == 0);
760 	if (err != 0)
761 		return (err);
762 
763 	err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid);
764 	if (err)
765 		goto unlock;
766 
767 	err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid);
768 	if (err)
769 		goto unlock;
770 
771 	if (ap->a_fsizetd) {
772 		io.uio_offset = *ap->a_outoffp;
773 		io.uio_resid = *ap->a_lenp;
774 		err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
775 		if (err)
776 			goto unlock;
777 	}
778 
779 	err = fuse_vnode_size(outvp, &outfilesize, outcred, curthread);
780 	if (err)
781 		goto unlock;
782 
783 	err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp,
784 		*ap->a_outoffp + *ap->a_lenp);
785 	if (err)
786 		goto unlock;
787 
788 	fdisp_init(&fdi, sizeof(*fcfri));
789 	fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred);
790 	fcfri = fdi.indata;
791 	fcfri->fh_in = infufh->fh_id;
792 	fcfri->off_in = *ap->a_inoffp;
793 	fcfri->nodeid_out = VTOI(outvp);
794 	fcfri->fh_out = outfufh->fh_id;
795 	fcfri->off_out = *ap->a_outoffp;
796 	fcfri->len = *ap->a_lenp;
797 	fcfri->flags = 0;
798 
799 	err = fdisp_wait_answ(&fdi);
800 	if (err == 0) {
801 		fwo = fdi.answ;
802 		*ap->a_lenp = fwo->size;
803 		*ap->a_inoffp += fwo->size;
804 		*ap->a_outoffp += fwo->size;
805 		fuse_internal_clear_suid_on_write(outvp, outcred, td);
806 		if (*ap->a_outoffp > outfvdat->cached_attrs.va_size)
807 			fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
808 	}
809 	fdisp_destroy(&fdi);
810 
811 unlock:
812 	if (invp != outvp)
813 		VOP_UNLOCK(invp);
814 	VOP_UNLOCK(outvp);
815 
816 	if (err == ENOSYS) {
817 		fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
818 fallback:
819 		err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
820 		    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
821 		    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
822 	}
823 
824 	return (err);
825 }
826 
827 static void
828 fdisp_make_mknod_for_fallback(
829 	struct fuse_dispatcher *fdip,
830 	struct componentname *cnp,
831 	struct vnode *dvp,
832 	uint64_t parentnid,
833 	struct thread *td,
834 	struct ucred *cred,
835 	mode_t mode,
836 	enum fuse_opcode *op)
837 {
838 	struct fuse_mknod_in *fmni;
839 
840 	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
841 	*op = FUSE_MKNOD;
842 	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
843 	fmni = fdip->indata;
844 	fmni->mode = mode;
845 	fmni->rdev = 0;
846 	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
847 	    cnp->cn_namelen);
848 	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
849 }
850 /*
851     struct vnop_create_args {
852 	struct vnode *a_dvp;
853 	struct vnode **a_vpp;
854 	struct componentname *a_cnp;
855 	struct vattr *a_vap;
856     };
857 */
858 static int
859 fuse_vnop_create(struct vop_create_args *ap)
860 {
861 	struct vnode *dvp = ap->a_dvp;
862 	struct vnode **vpp = ap->a_vpp;
863 	struct componentname *cnp = ap->a_cnp;
864 	struct vattr *vap = ap->a_vap;
865 	struct thread *td = curthread;
866 	struct ucred *cred = cnp->cn_cred;
867 
868 	struct fuse_data *data;
869 	struct fuse_create_in *fci;
870 	struct fuse_entry_out *feo;
871 	struct fuse_open_out *foo;
872 	struct fuse_dispatcher fdi, fdi2;
873 	struct fuse_dispatcher *fdip = &fdi;
874 	struct fuse_dispatcher *fdip2 = NULL;
875 
876 	int err;
877 
878 	struct mount *mp = vnode_mount(dvp);
879 	data = fuse_get_mpdata(mp);
880 	uint64_t parentnid = VTOFUD(dvp)->nid;
881 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
882 	enum fuse_opcode op;
883 	int flags;
884 
885 	if (fuse_isdeadfs(dvp))
886 		return ENXIO;
887 
888 	/* FUSE expects sockets to be created with FUSE_MKNOD */
889 	if (vap->va_type == VSOCK)
890 		return fuse_internal_mknod(dvp, vpp, cnp, vap);
891 
892 	/*
893 	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
894 	 * writable mode makes sense, and we might as well include readability
895 	 * too.
896 	 */
897 	flags = O_RDWR;
898 
899 	bzero(&fdi, sizeof(fdi));
900 
901 	if (vap->va_type != VREG)
902 		return (EINVAL);
903 
904 	if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
905 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
906 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
907 			cred, mode, &op);
908 	} else {
909 		/* Use FUSE_CREATE */
910 		size_t insize;
911 
912 		op = FUSE_CREATE;
913 		fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
914 		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
915 		fci = fdip->indata;
916 		fci->mode = mode;
917 		fci->flags = O_CREAT | flags;
918 		if (fuse_libabi_geq(data, 7, 12)) {
919 			insize = sizeof(*fci);
920 			fci->umask = td->td_proc->p_pd->pd_cmask;
921 		} else {
922 			insize = sizeof(struct fuse_open_in);
923 		}
924 
925 		memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
926 		    cnp->cn_namelen);
927 		((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
928 	}
929 
930 	err = fdisp_wait_answ(fdip);
931 
932 	if (err) {
933 		if (err == ENOSYS && op == FUSE_CREATE) {
934 			fsess_set_notimpl(mp, FUSE_CREATE);
935 			fdisp_destroy(fdip);
936 			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
937 				parentnid, td, cred, mode, &op);
938 			err = fdisp_wait_answ(fdip);
939 		}
940 		if (err)
941 			goto out;
942 	}
943 
944 	feo = fdip->answ;
945 
946 	if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
947 		goto out;
948 	}
949 
950 	if (op == FUSE_CREATE) {
951 		foo = (struct fuse_open_out*)(feo + 1);
952 	} else {
953 		/* Issue a separate FUSE_OPEN */
954 		struct fuse_open_in *foi;
955 
956 		fdip2 = &fdi2;
957 		fdisp_init(fdip2, sizeof(*foi));
958 		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
959 			cred);
960 		foi = fdip2->indata;
961 		foi->flags = flags;
962 		err = fdisp_wait_answ(fdip2);
963 		if (err)
964 			goto out;
965 		foo = fdip2->answ;
966 	}
967 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
968 	if (err) {
969 		struct fuse_release_in *fri;
970 		uint64_t nodeid = feo->nodeid;
971 		uint64_t fh_id = foo->fh;
972 
973 		fdisp_init(fdip, sizeof(*fri));
974 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
975 		fri = fdip->indata;
976 		fri->fh = fh_id;
977 		fri->flags = flags;
978 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
979 		fuse_insert_message(fdip->tick, false);
980 		goto out;
981 	}
982 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
983 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
984 		feo->attr_valid_nsec, NULL, true);
985 
986 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
987 	fuse_vnode_open(*vpp, foo->open_flags, td);
988 	/*
989 	 * Purge the parent's attribute cache because the daemon should've
990 	 * updated its mtime and ctime
991 	 */
992 	fuse_vnode_clear_attr_cache(dvp);
993 	cache_purge_negative(dvp);
994 
995 out:
996 	if (fdip2)
997 		fdisp_destroy(fdip2);
998 	fdisp_destroy(fdip);
999 	return err;
1000 }
1001 
1002 /*
1003     struct vnop_fdatasync_args {
1004 	struct vop_generic_args a_gen;
1005 	struct vnode * a_vp;
1006 	struct thread * a_td;
1007     };
1008 */
1009 static int
1010 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
1011 {
1012 	struct vnode *vp = ap->a_vp;
1013 	struct thread *td = ap->a_td;
1014 	int waitfor = MNT_WAIT;
1015 
1016 	int err = 0;
1017 
1018 	if (fuse_isdeadfs(vp)) {
1019 		return 0;
1020 	}
1021 	if ((err = vop_stdfdatasync_buf(ap)))
1022 		return err;
1023 
1024 	return fuse_internal_fsync(vp, td, waitfor, true);
1025 }
1026 
1027 /*
1028     struct vnop_fsync_args {
1029 	struct vop_generic_args a_gen;
1030 	struct vnode * a_vp;
1031 	int  a_waitfor;
1032 	struct thread * a_td;
1033     };
1034 */
1035 static int
1036 fuse_vnop_fsync(struct vop_fsync_args *ap)
1037 {
1038 	struct vnode *vp = ap->a_vp;
1039 	struct thread *td = ap->a_td;
1040 	int waitfor = ap->a_waitfor;
1041 	int err = 0;
1042 
1043 	if (fuse_isdeadfs(vp)) {
1044 		return 0;
1045 	}
1046 	if ((err = vop_stdfsync(ap)))
1047 		return err;
1048 
1049 	return fuse_internal_fsync(vp, td, waitfor, false);
1050 }
1051 
1052 /*
1053     struct vnop_getattr_args {
1054 	struct vnode *a_vp;
1055 	struct vattr *a_vap;
1056 	struct ucred *a_cred;
1057 	struct thread *a_td;
1058     };
1059 */
1060 static int
1061 fuse_vnop_getattr(struct vop_getattr_args *ap)
1062 {
1063 	struct vnode *vp = ap->a_vp;
1064 	struct vattr *vap = ap->a_vap;
1065 	struct ucred *cred = ap->a_cred;
1066 	struct thread *td = curthread;
1067 
1068 	int err = 0;
1069 	int dataflags;
1070 
1071 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
1072 
1073 	/* Note that we are not bailing out on a dead file system just yet. */
1074 
1075 	if (!(dataflags & FSESS_INITED)) {
1076 		if (!vnode_isvroot(vp)) {
1077 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
1078 			err = ENOTCONN;
1079 			return err;
1080 		} else {
1081 			goto fake;
1082 		}
1083 	}
1084 	err = fuse_internal_getattr(vp, vap, cred, td);
1085 	if (err == ENOTCONN && vnode_isvroot(vp)) {
1086 		/* see comment in fuse_vfsop_statfs() */
1087 		goto fake;
1088 	} else {
1089 		return err;
1090 	}
1091 
1092 fake:
1093 	bzero(vap, sizeof(*vap));
1094 	vap->va_type = vnode_vtype(vp);
1095 
1096 	return 0;
1097 }
1098 
1099 /*
1100     struct vnop_inactive_args {
1101 	struct vnode *a_vp;
1102     };
1103 */
1104 static int
1105 fuse_vnop_inactive(struct vop_inactive_args *ap)
1106 {
1107 	struct vnode *vp = ap->a_vp;
1108 	struct thread *td = curthread;
1109 
1110 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
1111 	struct fuse_filehandle *fufh, *fufh_tmp;
1112 
1113 	int need_flush = 1;
1114 
1115 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
1116 		if (need_flush && vp->v_type == VREG) {
1117 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
1118 				fuse_vnode_savesize(vp, NULL, 0);
1119 			}
1120 			if ((fvdat->flag & FN_REVOKED) != 0)
1121 				fuse_io_invalbuf(vp, td);
1122 			else
1123 				fuse_io_flushbuf(vp, MNT_WAIT, td);
1124 			need_flush = 0;
1125 		}
1126 		fuse_filehandle_close(vp, fufh, td, NULL);
1127 	}
1128 
1129 	if ((fvdat->flag & FN_REVOKED) != 0)
1130 		vrecycle(vp);
1131 
1132 	return 0;
1133 }
1134 
1135 /*
1136     struct vnop_ioctl_args {
1137 	struct vnode *a_vp;
1138 	u_long a_command;
1139 	caddr_t a_data;
1140 	int a_fflag;
1141 	struct ucred *a_cred;
1142 	struct thread *a_td;
1143     };
1144 */
1145 static int
1146 fuse_vnop_ioctl(struct vop_ioctl_args *ap)
1147 {
1148 	struct vnode *vp = ap->a_vp;
1149 	struct mount *mp = vnode_mount(vp);
1150 	struct ucred *cred = ap->a_cred;
1151 	off_t *offp;
1152 	pid_t pid = ap->a_td->td_proc->p_pid;
1153 	int err;
1154 
1155 	switch (ap->a_command) {
1156 	case FIOSEEKDATA:
1157 	case FIOSEEKHOLE:
1158 		/* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */
1159 		if (fsess_maybe_impl(mp, FUSE_LSEEK)) {
1160 			int whence;
1161 
1162 			offp = ap->a_data;
1163 			if (ap->a_command == FIOSEEKDATA)
1164 				whence = SEEK_DATA;
1165 			else
1166 				whence = SEEK_HOLE;
1167 
1168 			vn_lock(vp, LK_SHARED | LK_RETRY);
1169 			err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp,
1170 			    whence);
1171 			VOP_UNLOCK(vp);
1172 		}
1173 		if (fsess_not_impl(mp, FUSE_LSEEK))
1174 			err = vop_stdioctl(ap);
1175 		break;
1176 	default:
1177 		/* TODO: implement FUSE_IOCTL */
1178 		err = ENOTTY;
1179 		break;
1180 	}
1181 	return (err);
1182 }
1183 
1184 
1185 /*
1186     struct vnop_link_args {
1187 	struct vnode *a_tdvp;
1188 	struct vnode *a_vp;
1189 	struct componentname *a_cnp;
1190     };
1191 */
1192 static int
1193 fuse_vnop_link(struct vop_link_args *ap)
1194 {
1195 	struct vnode *vp = ap->a_vp;
1196 	struct vnode *tdvp = ap->a_tdvp;
1197 	struct componentname *cnp = ap->a_cnp;
1198 
1199 	struct vattr *vap = VTOVA(vp);
1200 
1201 	struct fuse_dispatcher fdi;
1202 	struct fuse_entry_out *feo;
1203 	struct fuse_link_in fli;
1204 
1205 	int err;
1206 
1207 	if (fuse_isdeadfs(vp)) {
1208 		return ENXIO;
1209 	}
1210 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
1211 		return EXDEV;
1212 	}
1213 
1214 	/*
1215 	 * This is a seatbelt check to protect naive userspace filesystems from
1216 	 * themselves and the limitations of the FUSE IPC protocol.  If a
1217 	 * filesystem does not allow attribute caching, assume it is capable of
1218 	 * validating that nlink does not overflow.
1219 	 */
1220 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
1221 		return EMLINK;
1222 	fli.oldnodeid = VTOI(vp);
1223 
1224 	fdisp_init(&fdi, 0);
1225 	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
1226 	    FUSE_LINK, &fli, sizeof(fli), &fdi);
1227 	if ((err = fdisp_wait_answ(&fdi))) {
1228 		goto out;
1229 	}
1230 	feo = fdi.answ;
1231 
1232 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
1233 	if (!err) {
1234 		/*
1235 		 * Purge the parent's attribute cache because the daemon
1236 		 * should've updated its mtime and ctime
1237 		 */
1238 		fuse_vnode_clear_attr_cache(tdvp);
1239 		fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
1240 			feo->attr_valid_nsec, NULL, true);
1241 	}
1242 out:
1243 	fdisp_destroy(&fdi);
1244 	return err;
1245 }
1246 
1247 struct fuse_lookup_alloc_arg {
1248 	struct fuse_entry_out *feo;
1249 	struct componentname *cnp;
1250 	uint64_t nid;
1251 	enum vtype vtyp;
1252 };
1253 
1254 /* Callback for vn_get_ino */
1255 static int
1256 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
1257 {
1258 	struct fuse_lookup_alloc_arg *flaa = arg;
1259 
1260 	return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
1261 		flaa->vtyp);
1262 }
1263 
1264 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
1265 	"int", "struct timespec*", "struct timespec*");
1266 /*
1267     struct vnop_lookup_args {
1268 	struct vnodeop_desc *a_desc;
1269 	struct vnode *a_dvp;
1270 	struct vnode **a_vpp;
1271 	struct componentname *a_cnp;
1272     };
1273 */
1274 int
1275 fuse_vnop_lookup(struct vop_lookup_args *ap)
1276 {
1277 	struct vnode *dvp = ap->a_dvp;
1278 	struct vnode **vpp = ap->a_vpp;
1279 	struct componentname *cnp = ap->a_cnp;
1280 	struct thread *td = curthread;
1281 	struct ucred *cred = cnp->cn_cred;
1282 
1283 	int nameiop = cnp->cn_nameiop;
1284 	int flags = cnp->cn_flags;
1285 	int wantparent = flags & (LOCKPARENT | WANTPARENT);
1286 	int islastcn = flags & ISLASTCN;
1287 	struct mount *mp = vnode_mount(dvp);
1288 	struct fuse_data *data = fuse_get_mpdata(mp);
1289 	int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
1290 
1291 	int err = 0;
1292 	int lookup_err = 0;
1293 	struct vnode *vp = NULL;
1294 
1295 	struct fuse_dispatcher fdi;
1296 	bool did_lookup = false;
1297 	struct fuse_entry_out *feo = NULL;
1298 	enum vtype vtyp;	/* vnode type of target */
1299 	off_t filesize;		/* filesize of target */
1300 
1301 	uint64_t nid;
1302 
1303 	if (fuse_isdeadfs(dvp)) {
1304 		*vpp = NULL;
1305 		return ENXIO;
1306 	}
1307 	if (!vnode_isdir(dvp))
1308 		return ENOTDIR;
1309 
1310 	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
1311 		return EROFS;
1312 
1313 	if ((cnp->cn_flags & NOEXECCHECK) != 0)
1314 		cnp->cn_flags &= ~NOEXECCHECK;
1315 	else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
1316 		return err;
1317 
1318 	if (flags & ISDOTDOT) {
1319 		KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID,
1320 			("Looking up .. is TODO"));
1321 		nid = VTOFUD(dvp)->parent_nid;
1322 		if (nid == 0)
1323 			return ENOENT;
1324 		/* .. is obviously a directory */
1325 		vtyp = VDIR;
1326 		filesize = 0;
1327 	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
1328 		nid = VTOI(dvp);
1329 		/* . is obviously a directory */
1330 		vtyp = VDIR;
1331 		filesize = 0;
1332 	} else {
1333 		struct timespec now, timeout;
1334 		int ncpticks; /* here to accomodate for API contract */
1335 
1336 		err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks);
1337 		getnanouptime(&now);
1338 		SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
1339 		switch (err) {
1340 		case -1:		/* positive match */
1341 			if (timespeccmp(&timeout, &now, >)) {
1342 				counter_u64_add(fuse_lookup_cache_hits, 1);
1343 			} else {
1344 				/* Cache timeout */
1345 				counter_u64_add(fuse_lookup_cache_misses, 1);
1346 				bintime_clear(
1347 					&VTOFUD(*vpp)->entry_cache_timeout);
1348 				cache_purge(*vpp);
1349 				if (dvp != *vpp)
1350 					vput(*vpp);
1351 				else
1352 					vrele(*vpp);
1353 				*vpp = NULL;
1354 				break;
1355 			}
1356 			return 0;
1357 
1358 		case 0:		/* no match in cache */
1359 			counter_u64_add(fuse_lookup_cache_misses, 1);
1360 			break;
1361 
1362 		case ENOENT:		/* negative match */
1363 			if (timespeccmp(&timeout, &now, <=)) {
1364 				/* Cache timeout */
1365 				cache_purge_negative(dvp);
1366 				break;
1367 			}
1368 			/* fall through */
1369 		default:
1370 			return err;
1371 		}
1372 
1373 		nid = VTOI(dvp);
1374 		fdisp_init(&fdi, cnp->cn_namelen + 1);
1375 		fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred);
1376 
1377 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1378 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
1379 		lookup_err = fdisp_wait_answ(&fdi);
1380 		did_lookup = true;
1381 
1382 		if (!lookup_err) {
1383 			/* lookup call succeeded */
1384 			feo = (struct fuse_entry_out *)fdi.answ;
1385 			nid = feo->nodeid;
1386 			if (nid == 0) {
1387 				/* zero nodeid means ENOENT and cache it */
1388 				struct timespec timeout;
1389 
1390 				fdi.answ_stat = ENOENT;
1391 				lookup_err = ENOENT;
1392 				if (cnp->cn_flags & MAKEENTRY) {
1393 					fuse_validity_2_timespec(feo, &timeout);
1394 					cache_enter_time(dvp, *vpp, cnp,
1395 						&timeout, NULL);
1396 				}
1397 			} else if (nid == FUSE_ROOT_ID) {
1398 				lookup_err = EINVAL;
1399 			}
1400 			vtyp = IFTOVT(feo->attr.mode);
1401 			filesize = feo->attr.size;
1402 		}
1403 		if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
1404 			fdisp_destroy(&fdi);
1405 			return lookup_err;
1406 		}
1407 	}
1408 	/* lookup_err, if non-zero, must be ENOENT at this point */
1409 
1410 	if (lookup_err) {
1411 		/* Entry not found */
1412 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
1413 			if (default_permissions)
1414 				err = fuse_internal_access(dvp, VWRITE, td,
1415 				    cred);
1416 			else
1417 				err = 0;
1418 			if (!err) {
1419 				/*
1420 				 * Set the SAVENAME flag to hold onto the
1421 				 * pathname for use later in VOP_CREATE or
1422 				 * VOP_RENAME.
1423 				 */
1424 				cnp->cn_flags |= SAVENAME;
1425 
1426 				err = EJUSTRETURN;
1427 			}
1428 		} else {
1429 			err = ENOENT;
1430 		}
1431 	} else {
1432 		/* Entry was found */
1433 		if (flags & ISDOTDOT) {
1434 			struct fuse_lookup_alloc_arg flaa;
1435 
1436 			flaa.nid = nid;
1437 			flaa.feo = feo;
1438 			flaa.cnp = cnp;
1439 			flaa.vtyp = vtyp;
1440 			err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
1441 				&vp);
1442 			*vpp = vp;
1443 		} else if (nid == VTOI(dvp)) {
1444 			vref(dvp);
1445 			*vpp = dvp;
1446 		} else {
1447 			struct fuse_vnode_data *fvdat;
1448 
1449 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
1450 			    &vp, cnp, vtyp);
1451 			if (err)
1452 				goto out;
1453 			*vpp = vp;
1454 			fvdat = VTOFUD(vp);
1455 
1456 			MPASS(feo != NULL);
1457 			fuse_internal_cache_attrs(*vpp, &feo->attr,
1458 				feo->attr_valid, feo->attr_valid_nsec, NULL, true);
1459 			fuse_validity_2_bintime(feo->entry_valid,
1460 				feo->entry_valid_nsec,
1461 				&fvdat->entry_cache_timeout);
1462 
1463 			if ((nameiop == DELETE || nameiop == RENAME) &&
1464 				islastcn && default_permissions)
1465 			{
1466 				struct vattr dvattr;
1467 
1468 				err = fuse_internal_access(dvp, VWRITE, td,
1469 					cred);
1470 				if (err != 0)
1471 					goto out;
1472 				/*
1473 				 * if the parent's sticky bit is set, check
1474 				 * whether we're allowed to remove the file.
1475 				 * Need to figure out the vnode locking to make
1476 				 * this work.
1477 				 */
1478 				fuse_internal_getattr(dvp, &dvattr, cred, td);
1479 				if ((dvattr.va_mode & S_ISTXT) &&
1480 					fuse_internal_access(dvp, VADMIN, td,
1481 						cred) &&
1482 					fuse_internal_access(*vpp, VADMIN, td,
1483 						cred)) {
1484 					err = EPERM;
1485 					goto out;
1486 				}
1487 			}
1488 
1489 			if (islastcn && (
1490 				(nameiop == DELETE) ||
1491 				(nameiop == RENAME && wantparent))) {
1492 				cnp->cn_flags |= SAVENAME;
1493 			}
1494 		}
1495 	}
1496 out:
1497 	if (err) {
1498 		if (vp != NULL && dvp != vp)
1499 			vput(vp);
1500 		else if (vp != NULL)
1501 			vrele(vp);
1502 		*vpp = NULL;
1503 	}
1504 	if (did_lookup)
1505 		fdisp_destroy(&fdi);
1506 
1507 	return err;
1508 }
1509 
1510 /*
1511     struct vnop_mkdir_args {
1512 	struct vnode *a_dvp;
1513 	struct vnode **a_vpp;
1514 	struct componentname *a_cnp;
1515 	struct vattr *a_vap;
1516     };
1517 */
1518 static int
1519 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
1520 {
1521 	struct vnode *dvp = ap->a_dvp;
1522 	struct vnode **vpp = ap->a_vpp;
1523 	struct componentname *cnp = ap->a_cnp;
1524 	struct vattr *vap = ap->a_vap;
1525 
1526 	struct fuse_mkdir_in fmdi;
1527 
1528 	if (fuse_isdeadfs(dvp)) {
1529 		return ENXIO;
1530 	}
1531 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
1532 	fmdi.umask = curthread->td_proc->p_pd->pd_cmask;
1533 
1534 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
1535 	    sizeof(fmdi), VDIR));
1536 }
1537 
1538 /*
1539     struct vnop_mknod_args {
1540 	struct vnode *a_dvp;
1541 	struct vnode **a_vpp;
1542 	struct componentname *a_cnp;
1543 	struct vattr *a_vap;
1544     };
1545 */
1546 static int
1547 fuse_vnop_mknod(struct vop_mknod_args *ap)
1548 {
1549 
1550 	struct vnode *dvp = ap->a_dvp;
1551 	struct vnode **vpp = ap->a_vpp;
1552 	struct componentname *cnp = ap->a_cnp;
1553 	struct vattr *vap = ap->a_vap;
1554 
1555 	if (fuse_isdeadfs(dvp))
1556 		return ENXIO;
1557 
1558 	return fuse_internal_mknod(dvp, vpp, cnp, vap);
1559 }
1560 
1561 /*
1562     struct vop_open_args {
1563 	struct vnode *a_vp;
1564 	int  a_mode;
1565 	struct ucred *a_cred;
1566 	struct thread *a_td;
1567 	int a_fdidx; / struct file *a_fp;
1568     };
1569 */
1570 static int
1571 fuse_vnop_open(struct vop_open_args *ap)
1572 {
1573 	struct vnode *vp = ap->a_vp;
1574 	int a_mode = ap->a_mode;
1575 	struct thread *td = ap->a_td;
1576 	struct ucred *cred = ap->a_cred;
1577 	pid_t pid = td->td_proc->p_pid;
1578 	struct fuse_vnode_data *fvdat;
1579 
1580 	if (fuse_isdeadfs(vp))
1581 		return ENXIO;
1582 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
1583 		return (EOPNOTSUPP);
1584 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
1585 		return EINVAL;
1586 
1587 	fvdat = VTOFUD(vp);
1588 
1589 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
1590 		fuse_vnode_open(vp, 0, td);
1591 		return 0;
1592 	}
1593 
1594 	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
1595 }
1596 
1597 static int
1598 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
1599 {
1600 	struct vnode *vp = ap->a_vp;
1601 	struct mount *mp;
1602 
1603 	switch (ap->a_name) {
1604 	case _PC_FILESIZEBITS:
1605 		*ap->a_retval = 64;
1606 		return (0);
1607 	case _PC_NAME_MAX:
1608 		*ap->a_retval = NAME_MAX;
1609 		return (0);
1610 	case _PC_LINK_MAX:
1611 		*ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
1612 		return (0);
1613 	case _PC_SYMLINK_MAX:
1614 		*ap->a_retval = MAXPATHLEN;
1615 		return (0);
1616 	case _PC_NO_TRUNC:
1617 		*ap->a_retval = 1;
1618 		return (0);
1619 	case _PC_MIN_HOLE_SIZE:
1620 		/*
1621 		 * The FUSE protocol provides no mechanism for a server to
1622 		 * report _PC_MIN_HOLE_SIZE.  It's a protocol bug.  Instead,
1623 		 * return EINVAL if the server does not support FUSE_LSEEK, or
1624 		 * 1 if it does.
1625 		 */
1626 		mp = vnode_mount(vp);
1627 		if (!fsess_is_impl(mp, FUSE_LSEEK) &&
1628 		    !fsess_not_impl(mp, FUSE_LSEEK)) {
1629 			off_t offset = 0;
1630 
1631 			/* Issue a FUSE_LSEEK to find out if it's implemented */
1632 			fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred,
1633 			    curthread->td_proc->p_pid, &offset, SEEK_DATA);
1634 		}
1635 
1636 		if (fsess_is_impl(mp, FUSE_LSEEK)) {
1637 			*ap->a_retval = 1;
1638 			return (0);
1639 		} else {
1640 			/*
1641 			 * Probably FUSE_LSEEK is not implemented.  It might
1642 			 * be, if the FUSE_LSEEK above returned an error like
1643 			 * EACCES, but in that case we can't tell, so it's
1644 			 * safest to report EINVAL anyway.
1645 			 */
1646 			return (EINVAL);
1647 		}
1648 	default:
1649 		return (vop_stdpathconf(ap));
1650 	}
1651 }
1652 
1653 SDT_PROBE_DEFINE3(fusefs, , vnops, filehandles_closed, "struct vnode*",
1654     "struct uio*", "struct ucred*");
1655 /*
1656     struct vnop_read_args {
1657 	struct vnode *a_vp;
1658 	struct uio *a_uio;
1659 	int  a_ioflag;
1660 	struct ucred *a_cred;
1661     };
1662 */
1663 static int
1664 fuse_vnop_read(struct vop_read_args *ap)
1665 {
1666 	struct vnode *vp = ap->a_vp;
1667 	struct uio *uio = ap->a_uio;
1668 	int ioflag = ap->a_ioflag;
1669 	struct ucred *cred = ap->a_cred;
1670 	pid_t pid = curthread->td_proc->p_pid;
1671 	struct fuse_filehandle *fufh;
1672 	int err;
1673 	bool closefufh = false, directio;
1674 
1675 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
1676 
1677 	if (fuse_isdeadfs(vp)) {
1678 		return ENXIO;
1679 	}
1680 
1681 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1682 		ioflag |= IO_DIRECT;
1683 	}
1684 
1685 	err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid);
1686 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
1687 		/*
1688 		 * nfsd will do I/O without first doing VOP_OPEN.  We
1689 		 * must implicitly open the file here
1690 		 */
1691 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
1692 		closefufh = true;
1693 	}
1694 	if (err) {
1695 		SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred);
1696 		return err;
1697 	}
1698 
1699 	/*
1700          * Ideally, when the daemon asks for direct io at open time, the
1701          * standard file flag should be set according to this, so that would
1702          * just change the default mode, which later on could be changed via
1703          * fcntl(2).
1704          * But this doesn't work, the O_DIRECT flag gets cleared at some point
1705          * (don't know where). So to make any use of the Fuse direct_io option,
1706          * we hardwire it into the file's private data (similarly to Linux,
1707          * btw.).
1708          */
1709 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
1710 
1711 	fuse_vnode_update(vp, FN_ATIMECHANGE);
1712 	if (directio) {
1713 		SDT_PROBE2(fusefs, , vnops, trace, 1, "direct read of vnode");
1714 		err = fuse_read_directbackend(vp, uio, cred, fufh);
1715 	} else {
1716 		SDT_PROBE2(fusefs, , vnops, trace, 1, "buffered read of vnode");
1717 		err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid);
1718 	}
1719 
1720 	if (closefufh)
1721 		fuse_filehandle_close(vp, fufh, curthread, cred);
1722 
1723 	return (err);
1724 }
1725 
1726 /*
1727     struct vnop_readdir_args {
1728 	struct vnode *a_vp;
1729 	struct uio *a_uio;
1730 	struct ucred *a_cred;
1731 	int *a_eofflag;
1732 	int *a_ncookies;
1733 	uint64_t **a_cookies;
1734     };
1735 */
1736 static int
1737 fuse_vnop_readdir(struct vop_readdir_args *ap)
1738 {
1739 	struct vnode *vp = ap->a_vp;
1740 	struct uio *uio = ap->a_uio;
1741 	struct ucred *cred = ap->a_cred;
1742 	struct fuse_filehandle *fufh = NULL;
1743 	struct fuse_iov cookediov;
1744 	int err = 0;
1745 	uint64_t *cookies;
1746 	off_t startoff;
1747 	ssize_t tresid;
1748 	int ncookies;
1749 	bool closefufh = false;
1750 	pid_t pid = curthread->td_proc->p_pid;
1751 
1752 	if (ap->a_eofflag)
1753 		*ap->a_eofflag = 0;
1754 	if (fuse_isdeadfs(vp)) {
1755 		return ENXIO;
1756 	}
1757 	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
1758 	    (uio_resid(uio) < sizeof(struct dirent))) {
1759 		return EINVAL;
1760 	}
1761 
1762 	tresid = uio->uio_resid;
1763 	startoff = uio->uio_offset;
1764 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
1765 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
1766 		/*
1767 		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
1768 		 * must implicitly open the directory here
1769 		 */
1770 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
1771 		if (err == 0) {
1772 			/*
1773 			 * When a directory is opened, it must be read from
1774 			 * the beginning.  Hopefully, the "startoff" still
1775 			 * exists as an offset cookie for the directory.
1776 			 * If not, it will read the entire directory without
1777 			 * returning any entries and just return eof.
1778 			 */
1779 			uio->uio_offset = 0;
1780 		}
1781 		closefufh = true;
1782 	}
1783 	if (err)
1784 		return (err);
1785 	if (ap->a_ncookies != NULL) {
1786 		ncookies = uio->uio_resid /
1787 			(offsetof(struct dirent, d_name) + 4) + 1;
1788 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
1789 		*ap->a_ncookies = ncookies;
1790 		*ap->a_cookies = cookies;
1791 	} else {
1792 		ncookies = 0;
1793 		cookies = NULL;
1794 	}
1795 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
1796 	fiov_init(&cookediov, DIRCOOKEDSIZE);
1797 
1798 	err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
1799 		&ncookies, cookies);
1800 
1801 	fiov_teardown(&cookediov);
1802 	if (closefufh)
1803 		fuse_filehandle_close(vp, fufh, curthread, cred);
1804 
1805 	if (ap->a_ncookies != NULL) {
1806 		if (err == 0) {
1807 			*ap->a_ncookies -= ncookies;
1808 		} else {
1809 			free(*ap->a_cookies, M_TEMP);
1810 			*ap->a_ncookies = 0;
1811 			*ap->a_cookies = NULL;
1812 		}
1813 	}
1814 	if (err == 0 && tresid == uio->uio_resid)
1815 		*ap->a_eofflag = 1;
1816 
1817 	return err;
1818 }
1819 
1820 /*
1821     struct vnop_readlink_args {
1822 	struct vnode *a_vp;
1823 	struct uio *a_uio;
1824 	struct ucred *a_cred;
1825     };
1826 */
1827 static int
1828 fuse_vnop_readlink(struct vop_readlink_args *ap)
1829 {
1830 	struct vnode *vp = ap->a_vp;
1831 	struct uio *uio = ap->a_uio;
1832 	struct ucred *cred = ap->a_cred;
1833 
1834 	struct fuse_dispatcher fdi;
1835 	int err;
1836 
1837 	if (fuse_isdeadfs(vp)) {
1838 		return ENXIO;
1839 	}
1840 	if (!vnode_islnk(vp)) {
1841 		return EINVAL;
1842 	}
1843 	fdisp_init(&fdi, 0);
1844 	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
1845 	if (err) {
1846 		goto out;
1847 	}
1848 	if (((char *)fdi.answ)[0] == '/' &&
1849 	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
1850 		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
1851 
1852 		err = uiomove(mpth, strlen(mpth), uio);
1853 	}
1854 	if (!err) {
1855 		err = uiomove(fdi.answ, fdi.iosize, uio);
1856 	}
1857 out:
1858 	fdisp_destroy(&fdi);
1859 	return err;
1860 }
1861 
1862 /*
1863     struct vnop_reclaim_args {
1864 	struct vnode *a_vp;
1865     };
1866 */
1867 static int
1868 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
1869 {
1870 	struct vnode *vp = ap->a_vp;
1871 	struct thread *td = curthread;
1872 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
1873 	struct fuse_filehandle *fufh, *fufh_tmp;
1874 
1875 	if (!fvdat) {
1876 		panic("FUSE: no vnode data during recycling");
1877 	}
1878 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
1879 		printf("FUSE: vnode being reclaimed with open fufh "
1880 			"(type=%#x)", fufh->fufh_type);
1881 		fuse_filehandle_close(vp, fufh, td, NULL);
1882 	}
1883 
1884 	if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) {
1885 		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
1886 		    fvdat->nlookup);
1887 	}
1888 	cache_purge(vp);
1889 	vfs_hash_remove(vp);
1890 	fuse_vnode_destroy(vp);
1891 
1892 	return 0;
1893 }
1894 
1895 /*
1896     struct vnop_remove_args {
1897 	struct vnode *a_dvp;
1898 	struct vnode *a_vp;
1899 	struct componentname *a_cnp;
1900     };
1901 */
1902 static int
1903 fuse_vnop_remove(struct vop_remove_args *ap)
1904 {
1905 	struct vnode *dvp = ap->a_dvp;
1906 	struct vnode *vp = ap->a_vp;
1907 	struct componentname *cnp = ap->a_cnp;
1908 
1909 	int err;
1910 
1911 	if (fuse_isdeadfs(vp)) {
1912 		return ENXIO;
1913 	}
1914 	if (vnode_isdir(vp)) {
1915 		return EPERM;
1916 	}
1917 
1918 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
1919 
1920 	return err;
1921 }
1922 
1923 /*
1924     struct vnop_rename_args {
1925 	struct vnode *a_fdvp;
1926 	struct vnode *a_fvp;
1927 	struct componentname *a_fcnp;
1928 	struct vnode *a_tdvp;
1929 	struct vnode *a_tvp;
1930 	struct componentname *a_tcnp;
1931     };
1932 */
1933 static int
1934 fuse_vnop_rename(struct vop_rename_args *ap)
1935 {
1936 	struct vnode *fdvp = ap->a_fdvp;
1937 	struct vnode *fvp = ap->a_fvp;
1938 	struct componentname *fcnp = ap->a_fcnp;
1939 	struct vnode *tdvp = ap->a_tdvp;
1940 	struct vnode *tvp = ap->a_tvp;
1941 	struct componentname *tcnp = ap->a_tcnp;
1942 	struct fuse_data *data;
1943 	bool newparent = fdvp != tdvp;
1944 	bool isdir = fvp->v_type == VDIR;
1945 	int err = 0;
1946 
1947 	if (fuse_isdeadfs(fdvp)) {
1948 		return ENXIO;
1949 	}
1950 	if (fvp->v_mount != tdvp->v_mount ||
1951 	    (tvp && fvp->v_mount != tvp->v_mount)) {
1952 		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
1953 		err = EXDEV;
1954 		goto out;
1955 	}
1956 	cache_purge(fvp);
1957 
1958 	/*
1959 	 * FUSE library is expected to check if target directory is not
1960 	 * under the source directory in the file system tree.
1961 	 * Linux performs this check at VFS level.
1962 	 */
1963 	/*
1964 	 * If source is a directory, and it will get a new parent, user must
1965 	 * have write permission to it, so ".." can be modified.
1966 	 */
1967 	data = fuse_get_mpdata(vnode_mount(tdvp));
1968 	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
1969 		err = fuse_internal_access(fvp, VWRITE,
1970 			curthread, tcnp->cn_cred);
1971 		if (err)
1972 			goto out;
1973 	}
1974 	sx_xlock(&data->rename_lock);
1975 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
1976 	if (err == 0) {
1977 		if (tdvp != fdvp)
1978 			fuse_vnode_setparent(fvp, tdvp);
1979 		if (tvp != NULL)
1980 			fuse_vnode_setparent(tvp, NULL);
1981 	}
1982 	sx_unlock(&data->rename_lock);
1983 
1984 	if (tvp != NULL && tvp != fvp) {
1985 		cache_purge(tvp);
1986 	}
1987 	if (vnode_isdir(fvp)) {
1988 		if ((tvp != NULL) && vnode_isdir(tvp)) {
1989 			cache_purge(tdvp);
1990 		}
1991 		cache_purge(fdvp);
1992 	}
1993 out:
1994 	if (tdvp == tvp) {
1995 		vrele(tdvp);
1996 	} else {
1997 		vput(tdvp);
1998 	}
1999 	if (tvp != NULL) {
2000 		vput(tvp);
2001 	}
2002 	vrele(fdvp);
2003 	vrele(fvp);
2004 
2005 	return err;
2006 }
2007 
2008 /*
2009     struct vnop_rmdir_args {
2010 	    struct vnode *a_dvp;
2011 	    struct vnode *a_vp;
2012 	    struct componentname *a_cnp;
2013     } *ap;
2014 */
2015 static int
2016 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
2017 {
2018 	struct vnode *dvp = ap->a_dvp;
2019 	struct vnode *vp = ap->a_vp;
2020 
2021 	int err;
2022 
2023 	if (fuse_isdeadfs(vp)) {
2024 		return ENXIO;
2025 	}
2026 	if (VTOFUD(vp) == VTOFUD(dvp)) {
2027 		return EINVAL;
2028 	}
2029 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
2030 
2031 	return err;
2032 }
2033 
2034 /*
2035     struct vnop_setattr_args {
2036 	struct vnode *a_vp;
2037 	struct vattr *a_vap;
2038 	struct ucred *a_cred;
2039 	struct thread *a_td;
2040     };
2041 */
2042 static int
2043 fuse_vnop_setattr(struct vop_setattr_args *ap)
2044 {
2045 	struct vnode *vp = ap->a_vp;
2046 	struct vattr *vap = ap->a_vap;
2047 	struct ucred *cred = ap->a_cred;
2048 	struct thread *td = curthread;
2049 	struct mount *mp;
2050 	struct fuse_data *data;
2051 	struct vattr old_va;
2052 	int dataflags;
2053 	int err = 0, err2;
2054 	accmode_t accmode = 0;
2055 	bool checkperm;
2056 	bool drop_suid = false;
2057 	gid_t cr_gid;
2058 
2059 	mp = vnode_mount(vp);
2060 	data = fuse_get_mpdata(mp);
2061 	dataflags = data->dataflags;
2062 	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
2063 	if (cred->cr_ngroups > 0)
2064 		cr_gid = cred->cr_groups[0];
2065 	else
2066 		cr_gid = 0;
2067 
2068 	if (fuse_isdeadfs(vp)) {
2069 		return ENXIO;
2070 	}
2071 
2072 	if (vap->va_uid != (uid_t)VNOVAL) {
2073 		if (checkperm) {
2074 			/* Only root may change a file's owner */
2075 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
2076 			if (err) {
2077 				/* As a special case, allow the null chown */
2078 				err2 = fuse_internal_getattr(vp, &old_va, cred,
2079 					td);
2080 				if (err2)
2081 					return (err2);
2082 				if (vap->va_uid != old_va.va_uid)
2083 					return err;
2084 				else
2085 					accmode |= VADMIN;
2086 				drop_suid = true;
2087 			} else
2088 				accmode |= VADMIN;
2089 		} else
2090 			accmode |= VADMIN;
2091 	}
2092 	if (vap->va_gid != (gid_t)VNOVAL) {
2093 		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
2094 			drop_suid = true;
2095 		if (checkperm && !groupmember(vap->va_gid, cred))
2096 		{
2097 			/*
2098 			 * Non-root users may only chgrp to one of their own
2099 			 * groups
2100 			 */
2101 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
2102 			if (err) {
2103 				/* As a special case, allow the null chgrp */
2104 				err2 = fuse_internal_getattr(vp, &old_va, cred,
2105 					td);
2106 				if (err2)
2107 					return (err2);
2108 				if (vap->va_gid != old_va.va_gid)
2109 					return err;
2110 				accmode |= VADMIN;
2111 			} else
2112 				accmode |= VADMIN;
2113 		} else
2114 			accmode |= VADMIN;
2115 	}
2116 	if (vap->va_size != VNOVAL) {
2117 		switch (vp->v_type) {
2118 		case VDIR:
2119 			return (EISDIR);
2120 		case VLNK:
2121 		case VREG:
2122 			if (vfs_isrdonly(mp))
2123 				return (EROFS);
2124 			break;
2125 		default:
2126 			/*
2127 			 * According to POSIX, the result is unspecified
2128 			 * for file types other than regular files,
2129 			 * directories and shared memory objects.  We
2130 			 * don't support shared memory objects in the file
2131 			 * system, and have dubious support for truncating
2132 			 * symlinks.  Just ignore the request in other cases.
2133 			 */
2134 			return (0);
2135 		}
2136 		/* Don't set accmode.  Permission to trunc is checked upstack */
2137 	}
2138 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
2139 		if (vap->va_vaflags & VA_UTIMES_NULL)
2140 			accmode |= VWRITE;
2141 		else
2142 			accmode |= VADMIN;
2143 	}
2144 	if (drop_suid) {
2145 		if (vap->va_mode != (mode_t)VNOVAL)
2146 			vap->va_mode &= ~(S_ISUID | S_ISGID);
2147 		else {
2148 			err = fuse_internal_getattr(vp, &old_va, cred, td);
2149 			if (err)
2150 				return (err);
2151 			vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
2152 		}
2153 	}
2154 	if (vap->va_mode != (mode_t)VNOVAL) {
2155 		/* Only root may set the sticky bit on non-directories */
2156 		if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
2157 		    && priv_check_cred(cred, PRIV_VFS_STICKYFILE))
2158 			return EFTYPE;
2159 		if (checkperm && (vap->va_mode & S_ISGID)) {
2160 			err = fuse_internal_getattr(vp, &old_va, cred, td);
2161 			if (err)
2162 				return (err);
2163 			if (!groupmember(old_va.va_gid, cred)) {
2164 				err = priv_check_cred(cred, PRIV_VFS_SETGID);
2165 				if (err)
2166 					return (err);
2167 			}
2168 		}
2169 		accmode |= VADMIN;
2170 	}
2171 
2172 	if (vfs_isrdonly(mp))
2173 		return EROFS;
2174 
2175 	if (checkperm) {
2176 		err = fuse_internal_access(vp, accmode, td, cred);
2177 	} else {
2178 		err = 0;
2179 	}
2180 	if (err)
2181 		return err;
2182 	else
2183 		return fuse_internal_setattr(vp, vap, td, cred);
2184 }
2185 
2186 /*
2187     struct vnop_strategy_args {
2188 	struct vnode *a_vp;
2189 	struct buf *a_bp;
2190     };
2191 */
2192 static int
2193 fuse_vnop_strategy(struct vop_strategy_args *ap)
2194 {
2195 	struct vnode *vp = ap->a_vp;
2196 	struct buf *bp = ap->a_bp;
2197 
2198 	if (!vp || fuse_isdeadfs(vp)) {
2199 		bp->b_ioflags |= BIO_ERROR;
2200 		bp->b_error = ENXIO;
2201 		bufdone(bp);
2202 		return 0;
2203 	}
2204 
2205 	/*
2206 	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
2207 	 * fuse_io_strategy sets bp's error fields
2208 	 */
2209 	(void)fuse_io_strategy(vp, bp);
2210 
2211 	return 0;
2212 }
2213 
2214 /*
2215     struct vnop_symlink_args {
2216 	struct vnode *a_dvp;
2217 	struct vnode **a_vpp;
2218 	struct componentname *a_cnp;
2219 	struct vattr *a_vap;
2220 	char *a_target;
2221     };
2222 */
2223 static int
2224 fuse_vnop_symlink(struct vop_symlink_args *ap)
2225 {
2226 	struct vnode *dvp = ap->a_dvp;
2227 	struct vnode **vpp = ap->a_vpp;
2228 	struct componentname *cnp = ap->a_cnp;
2229 	const char *target = ap->a_target;
2230 
2231 	struct fuse_dispatcher fdi;
2232 
2233 	int err;
2234 	size_t len;
2235 
2236 	if (fuse_isdeadfs(dvp)) {
2237 		return ENXIO;
2238 	}
2239 	/*
2240 	 * Unlike the other creator type calls, here we have to create a message
2241 	 * where the name of the new entry comes first, and the data describing
2242 	 * the entry comes second.
2243 	 * Hence we can't rely on our handy fuse_internal_newentry() routine,
2244 	 * but put together the message manually and just call the core part.
2245 	 */
2246 
2247 	len = strlen(target) + 1;
2248 	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
2249 	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
2250 
2251 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
2252 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
2253 	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
2254 
2255 	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
2256 	fdisp_destroy(&fdi);
2257 	return err;
2258 }
2259 
2260 /*
2261     struct vnop_write_args {
2262 	struct vnode *a_vp;
2263 	struct uio *a_uio;
2264 	int  a_ioflag;
2265 	struct ucred *a_cred;
2266     };
2267 */
2268 static int
2269 fuse_vnop_write(struct vop_write_args *ap)
2270 {
2271 	struct vnode *vp = ap->a_vp;
2272 	struct uio *uio = ap->a_uio;
2273 	int ioflag = ap->a_ioflag;
2274 	struct ucred *cred = ap->a_cred;
2275 	pid_t pid = curthread->td_proc->p_pid;
2276 	struct fuse_filehandle *fufh;
2277 	int err;
2278 	bool closefufh = false, directio;
2279 
2280 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
2281 
2282 	if (fuse_isdeadfs(vp)) {
2283 		return ENXIO;
2284 	}
2285 
2286 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
2287 		ioflag |= IO_DIRECT;
2288 	}
2289 
2290 	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
2291 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
2292 		/*
2293 		 * nfsd will do I/O without first doing VOP_OPEN.  We
2294 		 * must implicitly open the file here
2295 		 */
2296 		err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred);
2297 		closefufh = true;
2298 	}
2299 	if (err) {
2300 		SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred);
2301 		return err;
2302 	}
2303 
2304 	/*
2305          * Ideally, when the daemon asks for direct io at open time, the
2306          * standard file flag should be set according to this, so that would
2307          * just change the default mode, which later on could be changed via
2308          * fcntl(2).
2309          * But this doesn't work, the O_DIRECT flag gets cleared at some point
2310          * (don't know where). So to make any use of the Fuse direct_io option,
2311          * we hardwire it into the file's private data (similarly to Linux,
2312          * btw.).
2313          */
2314 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
2315 
2316 	fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
2317 	if (directio) {
2318 		off_t start, end, filesize;
2319 		bool pages = (ioflag & IO_VMIO) != 0;
2320 
2321 		SDT_PROBE2(fusefs, , vnops, trace, 1, "direct write of vnode");
2322 
2323 		err = fuse_vnode_size(vp, &filesize, cred, curthread);
2324 		if (err)
2325 			goto out;
2326 
2327 		start = uio->uio_offset;
2328 		end = start + uio->uio_resid;
2329 		if (!pages) {
2330 			err = fuse_inval_buf_range(vp, filesize, start,
2331 			    end);
2332 			if (err)
2333 				goto out;
2334 		}
2335 		err = fuse_write_directbackend(vp, uio, cred, fufh,
2336 			filesize, ioflag, pages);
2337 	} else {
2338 		SDT_PROBE2(fusefs, , vnops, trace, 1,
2339 			"buffered write of vnode");
2340 		if (!fsess_opt_writeback(vnode_mount(vp)))
2341 			ioflag |= IO_SYNC;
2342 		err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid);
2343 	}
2344 	fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td);
2345 
2346 out:
2347 	if (closefufh)
2348 		fuse_filehandle_close(vp, fufh, curthread, cred);
2349 
2350 	return (err);
2351 }
2352 
2353 static daddr_t
2354 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
2355 {
2356 	const int biosize = fuse_iosize(vp);
2357 
2358 	return (off / biosize);
2359 }
2360 
2361 static int
2362 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz)
2363 {
2364 	off_t filesize;
2365 	int err;
2366 	const int biosize = fuse_iosize(vp);
2367 
2368 	err = fuse_vnode_size(vp, &filesize, NULL, NULL);
2369 	if (err) {
2370 		/* This will turn into a SIGBUS */
2371 		return (EIO);
2372 	} else if ((off_t)lbn * biosize >= filesize) {
2373 		*blksz = 0;
2374 	} else if ((off_t)(lbn + 1) * biosize > filesize) {
2375 		*blksz = filesize - (off_t)lbn *biosize;
2376 	} else {
2377 		*blksz = biosize;
2378 	}
2379 	return (0);
2380 }
2381 
2382 /*
2383     struct vnop_getpages_args {
2384 	struct vnode *a_vp;
2385 	vm_page_t *a_m;
2386 	int a_count;
2387 	int a_reqpage;
2388     };
2389 */
2390 static int
2391 fuse_vnop_getpages(struct vop_getpages_args *ap)
2392 {
2393 	struct vnode *vp = ap->a_vp;
2394 
2395 	if (!fsess_opt_mmap(vnode_mount(vp))) {
2396 		SDT_PROBE2(fusefs, , vnops, trace, 1,
2397 			"called on non-cacheable vnode??\n");
2398 		return (VM_PAGER_ERROR);
2399 	}
2400 
2401 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
2402 	    ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
2403 }
2404 
2405 static const char extattr_namespace_separator = '.';
2406 
2407 /*
2408     struct vop_getextattr_args {
2409 	struct vop_generic_args a_gen;
2410 	struct vnode *a_vp;
2411 	int a_attrnamespace;
2412 	const char *a_name;
2413 	struct uio *a_uio;
2414 	size_t *a_size;
2415 	struct ucred *a_cred;
2416 	struct thread *a_td;
2417     };
2418 */
2419 static int
2420 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
2421 {
2422 	struct vnode *vp = ap->a_vp;
2423 	struct uio *uio = ap->a_uio;
2424 	struct fuse_dispatcher fdi;
2425 	struct fuse_getxattr_in *get_xattr_in;
2426 	struct fuse_getxattr_out *get_xattr_out;
2427 	struct mount *mp = vnode_mount(vp);
2428 	struct thread *td = ap->a_td;
2429 	struct ucred *cred = ap->a_cred;
2430 	char *prefix;
2431 	char *attr_str;
2432 	size_t len;
2433 	int err;
2434 
2435 	if (fuse_isdeadfs(vp))
2436 		return (ENXIO);
2437 
2438 	if (fsess_not_impl(mp, FUSE_GETXATTR))
2439 		return EOPNOTSUPP;
2440 
2441 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2442 	if (err)
2443 		return err;
2444 
2445 	/* Default to looking for user attributes. */
2446 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2447 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2448 	else
2449 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2450 
2451 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2452 	    strlen(ap->a_name) + 1;
2453 
2454 	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
2455 	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
2456 
2457 	get_xattr_in = fdi.indata;
2458 	/*
2459 	 * Check to see whether we're querying the available size or
2460 	 * issuing the actual request.  If we pass in 0, we get back struct
2461 	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
2462 	 * that much data, without the struct fuse_getxattr_out header.
2463 	 */
2464 	if (uio == NULL)
2465 		get_xattr_in->size = 0;
2466 	else
2467 		get_xattr_in->size = uio->uio_resid;
2468 
2469 	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
2470 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2471 	    ap->a_name);
2472 
2473 	err = fdisp_wait_answ(&fdi);
2474 	if (err != 0) {
2475 		if (err == ENOSYS) {
2476 			fsess_set_notimpl(mp, FUSE_GETXATTR);
2477 			err = EOPNOTSUPP;
2478 		}
2479 		goto out;
2480 	}
2481 
2482 	get_xattr_out = fdi.answ;
2483 
2484 	if (ap->a_size != NULL)
2485 		*ap->a_size = get_xattr_out->size;
2486 
2487 	if (uio != NULL)
2488 		err = uiomove(fdi.answ, fdi.iosize, uio);
2489 
2490 out:
2491 	fdisp_destroy(&fdi);
2492 	return (err);
2493 }
2494 
2495 /*
2496     struct vop_setextattr_args {
2497 	struct vop_generic_args a_gen;
2498 	struct vnode *a_vp;
2499 	int a_attrnamespace;
2500 	const char *a_name;
2501 	struct uio *a_uio;
2502 	struct ucred *a_cred;
2503 	struct thread *a_td;
2504     };
2505 */
2506 static int
2507 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
2508 {
2509 	struct vnode *vp = ap->a_vp;
2510 	struct uio *uio = ap->a_uio;
2511 	struct fuse_dispatcher fdi;
2512 	struct fuse_setxattr_in *set_xattr_in;
2513 	struct mount *mp = vnode_mount(vp);
2514 	struct thread *td = ap->a_td;
2515 	struct ucred *cred = ap->a_cred;
2516 	char *prefix;
2517 	size_t len;
2518 	char *attr_str;
2519 	int err;
2520 
2521 	if (fuse_isdeadfs(vp))
2522 		return (ENXIO);
2523 
2524 	if (fsess_not_impl(mp, FUSE_SETXATTR))
2525 		return EOPNOTSUPP;
2526 
2527 	if (vfs_isrdonly(mp))
2528 		return EROFS;
2529 
2530 	/* Deleting xattrs must use VOP_DELETEEXTATTR instead */
2531 	if (ap->a_uio == NULL) {
2532 		/*
2533 		 * If we got here as fallback from VOP_DELETEEXTATTR, then
2534 		 * return EOPNOTSUPP.
2535 		 */
2536 		if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
2537 			return (EOPNOTSUPP);
2538 		else
2539 			return (EINVAL);
2540 	}
2541 
2542 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2543 		VWRITE);
2544 	if (err)
2545 		return err;
2546 
2547 	/* Default to looking for user attributes. */
2548 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2549 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2550 	else
2551 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2552 
2553 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2554 	    strlen(ap->a_name) + 1;
2555 
2556 	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
2557 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
2558 
2559 	set_xattr_in = fdi.indata;
2560 	set_xattr_in->size = uio->uio_resid;
2561 
2562 	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
2563 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2564 	    ap->a_name);
2565 
2566 	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
2567 	    uio->uio_resid, uio);
2568 	if (err != 0) {
2569 		goto out;
2570 	}
2571 
2572 	err = fdisp_wait_answ(&fdi);
2573 
2574 	if (err == ENOSYS) {
2575 		fsess_set_notimpl(mp, FUSE_SETXATTR);
2576 		err = EOPNOTSUPP;
2577 	}
2578 	if (err == ERESTART) {
2579 		/* Can't restart after calling uiomove */
2580 		err = EINTR;
2581 	}
2582 
2583 out:
2584 	fdisp_destroy(&fdi);
2585 	return (err);
2586 }
2587 
2588 /*
2589  * The Linux / FUSE extended attribute list is simply a collection of
2590  * NUL-terminated strings.  The FreeBSD extended attribute list is a single
2591  * byte length followed by a non-NUL terminated string.  So, this allows
2592  * conversion of the Linux / FUSE format to the FreeBSD format in place.
2593  * Linux attribute names are reported with the namespace as a prefix (e.g.
2594  * "user.attribute_name"), but in FreeBSD they are reported without the
2595  * namespace prefix (e.g. "attribute_name").  So, we're going from:
2596  *
2597  * user.attr_name1\0user.attr_name2\0
2598  *
2599  * to:
2600  *
2601  * <num>attr_name1<num>attr_name2
2602  *
2603  * Where "<num>" is a single byte number of characters in the attribute name.
2604  *
2605  * Args:
2606  * prefix - exattr namespace prefix string
2607  * list, list_len - input list with namespace prefixes
2608  * bsd_list, bsd_list_len - output list compatible with bsd vfs
2609  */
2610 static int
2611 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
2612     char *bsd_list, int *bsd_list_len)
2613 {
2614 	int len, pos, dist_to_next, prefix_len;
2615 
2616 	pos = 0;
2617 	*bsd_list_len = 0;
2618 	prefix_len = strlen(prefix);
2619 
2620 	while (pos < list_len && list[pos] != '\0') {
2621 		dist_to_next = strlen(&list[pos]) + 1;
2622 		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
2623 		    list[pos + prefix_len] == extattr_namespace_separator) {
2624 			len = dist_to_next -
2625 			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
2626 			if (len >= EXTATTR_MAXNAMELEN)
2627 				return (ENAMETOOLONG);
2628 
2629 			bsd_list[*bsd_list_len] = len;
2630 			memcpy(&bsd_list[*bsd_list_len + 1],
2631 			    &list[pos + prefix_len +
2632 			    sizeof(extattr_namespace_separator)], len);
2633 
2634 			*bsd_list_len += len + 1;
2635 		}
2636 
2637 		pos += dist_to_next;
2638 	}
2639 
2640 	return (0);
2641 }
2642 
2643 /*
2644  * List extended attributes
2645  *
2646  * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which
2647  * has a number of differences compared to its FreeBSD equivalent,
2648  * extattr_list_file:
2649  *
2650  * - FUSE_LISTXATTR returns all extended attributes across all namespaces,
2651  *   whereas listxattr(2) only returns attributes for a single namespace
2652  * - FUSE_LISTXATTR prepends each attribute name with "namespace."
2653  * - If the provided buffer is not large enough to hold the result,
2654  *   FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to
2655  *   return as many results as will fit.
2656  */
2657 /*
2658     struct vop_listextattr_args {
2659 	struct vop_generic_args a_gen;
2660 	struct vnode *a_vp;
2661 	int a_attrnamespace;
2662 	struct uio *a_uio;
2663 	size_t *a_size;
2664 	struct ucred *a_cred;
2665 	struct thread *a_td;
2666     };
2667 */
2668 static int
2669 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
2670 {
2671 	struct vnode *vp = ap->a_vp;
2672 	struct uio *uio = ap->a_uio;
2673 	struct fuse_dispatcher fdi;
2674 	struct fuse_listxattr_in *list_xattr_in;
2675 	struct fuse_listxattr_out *list_xattr_out;
2676 	struct mount *mp = vnode_mount(vp);
2677 	struct thread *td = ap->a_td;
2678 	struct ucred *cred = ap->a_cred;
2679 	char *prefix;
2680 	char *bsd_list = NULL;
2681 	char *linux_list;
2682 	int bsd_list_len;
2683 	int linux_list_len;
2684 	int err;
2685 
2686 	if (fuse_isdeadfs(vp))
2687 		return (ENXIO);
2688 
2689 	if (fsess_not_impl(mp, FUSE_LISTXATTR))
2690 		return EOPNOTSUPP;
2691 
2692 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2693 	if (err)
2694 		return err;
2695 
2696 	/*
2697 	 * Add space for a NUL and the period separator if enabled.
2698 	 * Default to looking for user attributes.
2699 	 */
2700 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2701 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2702 	else
2703 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2704 
2705 	fdisp_init(&fdi, sizeof(*list_xattr_in));
2706 	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2707 
2708 	/*
2709 	 * Retrieve Linux / FUSE compatible list size.
2710 	 */
2711 	list_xattr_in = fdi.indata;
2712 	list_xattr_in->size = 0;
2713 
2714 	err = fdisp_wait_answ(&fdi);
2715 	if (err != 0) {
2716 		if (err == ENOSYS) {
2717 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
2718 			err = EOPNOTSUPP;
2719 		}
2720 		goto out;
2721 	}
2722 
2723 	list_xattr_out = fdi.answ;
2724 	linux_list_len = list_xattr_out->size;
2725 	if (linux_list_len == 0) {
2726 		if (ap->a_size != NULL)
2727 			*ap->a_size = linux_list_len;
2728 		goto out;
2729 	}
2730 
2731 	/*
2732 	 * Retrieve Linux / FUSE compatible list values.
2733 	 */
2734 	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2735 	list_xattr_in = fdi.indata;
2736 	list_xattr_in->size = linux_list_len;
2737 
2738 	err = fdisp_wait_answ(&fdi);
2739 	if (err == ERANGE) {
2740 		/*
2741 		 * Race detected.  The attribute list must've grown since the
2742 		 * first FUSE_LISTXATTR call.  Start over.  Go all the way back
2743 		 * to userland so we can process signals, if necessary, before
2744 		 * restarting.
2745 		 */
2746 		err = ERESTART;
2747 		goto out;
2748 	} else if (err != 0)
2749 		goto out;
2750 
2751 	linux_list = fdi.answ;
2752 	/* FUSE doesn't allow the server to return more data than requested */
2753 	if (fdi.iosize > linux_list_len) {
2754 		struct fuse_data *data = fuse_get_mpdata(mp);
2755 
2756 		fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG,
2757 			"server returned "
2758 			"more extended attribute data than requested; "
2759 			"should've returned ERANGE instead.");
2760 	} else {
2761 		/* But returning less data is fine */
2762 		linux_list_len = fdi.iosize;
2763 	}
2764 
2765 	/*
2766 	 * Retrieve the BSD compatible list values.
2767 	 * The Linux / FUSE attribute list format isn't the same
2768 	 * as FreeBSD's format. So we need to transform it into
2769 	 * FreeBSD's format before giving it to the user.
2770 	 */
2771 	bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
2772 	err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
2773 	    bsd_list, &bsd_list_len);
2774 	if (err != 0)
2775 		goto out;
2776 
2777 	if (ap->a_size != NULL)
2778 		*ap->a_size = bsd_list_len;
2779 
2780 	if (uio != NULL)
2781 		err = uiomove(bsd_list, bsd_list_len, uio);
2782 
2783 out:
2784 	free(bsd_list, M_TEMP);
2785 	fdisp_destroy(&fdi);
2786 	return (err);
2787 }
2788 
2789 /*
2790     struct vop_deleteextattr_args {
2791 	struct vop_generic_args a_gen;
2792 	struct vnode *a_vp;
2793 	int a_attrnamespace;
2794 	const char *a_name;
2795 	struct ucred *a_cred;
2796 	struct thread *a_td;
2797     };
2798 */
2799 static int
2800 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
2801 {
2802 	struct vnode *vp = ap->a_vp;
2803 	struct fuse_dispatcher fdi;
2804 	struct mount *mp = vnode_mount(vp);
2805 	struct thread *td = ap->a_td;
2806 	struct ucred *cred = ap->a_cred;
2807 	char *prefix;
2808 	size_t len;
2809 	char *attr_str;
2810 	int err;
2811 
2812 	if (fuse_isdeadfs(vp))
2813 		return (ENXIO);
2814 
2815 	if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
2816 		return EOPNOTSUPP;
2817 
2818 	if (vfs_isrdonly(mp))
2819 		return EROFS;
2820 
2821 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2822 		VWRITE);
2823 	if (err)
2824 		return err;
2825 
2826 	/* Default to looking for user attributes. */
2827 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2828 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2829 	else
2830 		prefix = EXTATTR_NAMESPACE_USER_STRING;
2831 
2832 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2833 	    strlen(ap->a_name) + 1;
2834 
2835 	fdisp_init(&fdi, len);
2836 	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
2837 
2838 	attr_str = fdi.indata;
2839 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2840 	    ap->a_name);
2841 
2842 	err = fdisp_wait_answ(&fdi);
2843 	if (err == ENOSYS) {
2844 		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
2845 		err = EOPNOTSUPP;
2846 	}
2847 
2848 	fdisp_destroy(&fdi);
2849 	return (err);
2850 }
2851 
2852 /*
2853     struct vnop_print_args {
2854 	struct vnode *a_vp;
2855     };
2856 */
2857 static int
2858 fuse_vnop_print(struct vop_print_args *ap)
2859 {
2860 	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
2861 
2862 	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
2863 	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
2864 	    (uintmax_t)fvdat->nlookup,
2865 	    fvdat->flag);
2866 
2867 	return 0;
2868 }
2869 
2870 /*
2871  * Get an NFS filehandle for a FUSE file.
2872  *
2873  * This will only work for FUSE file systems that guarantee the uniqueness of
2874  * nodeid:generation, which most don't.
2875  */
2876 /*
2877 vop_vptofh {
2878 	IN struct vnode *a_vp;
2879 	IN struct fid *a_fhp;
2880 };
2881 */
2882 static int
2883 fuse_vnop_vptofh(struct vop_vptofh_args *ap)
2884 {
2885 	struct vnode *vp = ap->a_vp;
2886 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
2887 	struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
2888 	_Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
2889 		"FUSE fid type is too big");
2890 	struct mount *mp = vnode_mount(vp);
2891 	struct fuse_data *data = fuse_get_mpdata(mp);
2892 	struct vattr va;
2893 	int err;
2894 
2895 	if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
2896 		return EOPNOTSUPP;
2897 
2898 	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
2899 	if (err)
2900 		return err;
2901 
2902 	/*ip = VTOI(ap->a_vp);*/
2903 	/*ufhp = (struct ufid *)ap->a_fhp;*/
2904 	fhp->len = sizeof(struct fuse_fid);
2905 	fhp->nid = fvdat->nid;
2906 	if (fvdat->generation <= UINT32_MAX)
2907 		fhp->gen = fvdat->generation;
2908 	else
2909 		return EOVERFLOW;
2910 	return (0);
2911 }
2912