1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/param.h> 64 #include <sys/module.h> 65 #include <sys/systm.h> 66 #include <sys/errno.h> 67 #include <sys/kernel.h> 68 #include <sys/conf.h> 69 #include <sys/filio.h> 70 #include <sys/uio.h> 71 #include <sys/malloc.h> 72 #include <sys/queue.h> 73 #include <sys/limits.h> 74 #include <sys/lock.h> 75 #include <sys/rwlock.h> 76 #include <sys/sx.h> 77 #include <sys/proc.h> 78 #include <sys/mount.h> 79 #include <sys/vnode.h> 80 #include <sys/namei.h> 81 #include <sys/extattr.h> 82 #include <sys/stat.h> 83 #include <sys/unistd.h> 84 #include <sys/filedesc.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/dirent.h> 88 #include <sys/bio.h> 89 #include <sys/buf.h> 90 #include <sys/sysctl.h> 91 #include <sys/vmmeter.h> 92 93 #include <vm/vm.h> 94 #include <vm/vm_extern.h> 95 #include <vm/pmap.h> 96 #include <vm/vm_map.h> 97 #include <vm/vm_page.h> 98 #include <vm/vm_param.h> 99 #include <vm/vm_object.h> 100 #include <vm/vm_pager.h> 101 #include <vm/vnode_pager.h> 102 #include <vm/vm_object.h> 103 104 #include "fuse.h" 105 #include "fuse_file.h" 106 #include "fuse_internal.h" 107 #include "fuse_ipc.h" 108 #include "fuse_node.h" 109 #include "fuse_io.h" 110 111 #include <sys/priv.h> 112 113 /* Maximum number of hardlinks to a single FUSE file */ 114 #define FUSE_LINK_MAX UINT32_MAX 115 116 SDT_PROVIDER_DECLARE(fusefs); 117 /* 118 * Fuse trace probe: 119 * arg0: verbosity. Higher numbers give more verbose messages 120 * arg1: Textual message 121 */ 122 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); 123 124 /* vnode ops */ 125 static vop_access_t fuse_vnop_access; 126 static vop_advlock_t fuse_vnop_advlock; 127 static vop_allocate_t fuse_vnop_allocate; 128 static vop_bmap_t fuse_vnop_bmap; 129 static vop_close_t fuse_fifo_close; 130 static vop_close_t fuse_vnop_close; 131 static vop_copy_file_range_t fuse_vnop_copy_file_range; 132 static vop_create_t fuse_vnop_create; 133 static vop_deallocate_t fuse_vnop_deallocate; 134 static vop_deleteextattr_t fuse_vnop_deleteextattr; 135 static vop_fdatasync_t fuse_vnop_fdatasync; 136 static vop_fsync_t fuse_vnop_fsync; 137 static vop_getattr_t fuse_vnop_getattr; 138 static vop_getextattr_t fuse_vnop_getextattr; 139 static vop_inactive_t fuse_vnop_inactive; 140 static vop_ioctl_t fuse_vnop_ioctl; 141 static vop_link_t fuse_vnop_link; 142 static vop_listextattr_t fuse_vnop_listextattr; 143 static vop_lookup_t fuse_vnop_lookup; 144 static vop_mkdir_t fuse_vnop_mkdir; 145 static vop_mknod_t fuse_vnop_mknod; 146 static vop_open_t fuse_vnop_open; 147 static vop_pathconf_t fuse_vnop_pathconf; 148 static vop_read_t fuse_vnop_read; 149 static vop_readdir_t fuse_vnop_readdir; 150 static vop_readlink_t fuse_vnop_readlink; 151 static vop_reclaim_t fuse_vnop_reclaim; 152 static vop_remove_t fuse_vnop_remove; 153 static vop_rename_t fuse_vnop_rename; 154 static vop_rmdir_t fuse_vnop_rmdir; 155 static vop_setattr_t fuse_vnop_setattr; 156 static vop_setextattr_t fuse_vnop_setextattr; 157 static vop_strategy_t fuse_vnop_strategy; 158 static vop_symlink_t fuse_vnop_symlink; 159 static vop_write_t fuse_vnop_write; 160 static vop_getpages_t fuse_vnop_getpages; 161 static vop_print_t fuse_vnop_print; 162 static vop_vptofh_t fuse_vnop_vptofh; 163 164 struct vop_vector fuse_fifoops = { 165 .vop_default = &fifo_specops, 166 .vop_access = fuse_vnop_access, 167 .vop_close = fuse_fifo_close, 168 .vop_fsync = fuse_vnop_fsync, 169 .vop_getattr = fuse_vnop_getattr, 170 .vop_inactive = fuse_vnop_inactive, 171 .vop_pathconf = fuse_vnop_pathconf, 172 .vop_print = fuse_vnop_print, 173 .vop_read = VOP_PANIC, 174 .vop_reclaim = fuse_vnop_reclaim, 175 .vop_setattr = fuse_vnop_setattr, 176 .vop_write = VOP_PANIC, 177 .vop_vptofh = fuse_vnop_vptofh, 178 }; 179 VFS_VOP_VECTOR_REGISTER(fuse_fifoops); 180 181 struct vop_vector fuse_vnops = { 182 .vop_allocate = fuse_vnop_allocate, 183 .vop_default = &default_vnodeops, 184 .vop_access = fuse_vnop_access, 185 .vop_advlock = fuse_vnop_advlock, 186 .vop_bmap = fuse_vnop_bmap, 187 .vop_close = fuse_vnop_close, 188 .vop_copy_file_range = fuse_vnop_copy_file_range, 189 .vop_create = fuse_vnop_create, 190 .vop_deallocate = fuse_vnop_deallocate, 191 .vop_deleteextattr = fuse_vnop_deleteextattr, 192 .vop_fsync = fuse_vnop_fsync, 193 .vop_fdatasync = fuse_vnop_fdatasync, 194 .vop_getattr = fuse_vnop_getattr, 195 .vop_getextattr = fuse_vnop_getextattr, 196 .vop_inactive = fuse_vnop_inactive, 197 .vop_ioctl = fuse_vnop_ioctl, 198 .vop_link = fuse_vnop_link, 199 .vop_listextattr = fuse_vnop_listextattr, 200 .vop_lookup = fuse_vnop_lookup, 201 .vop_mkdir = fuse_vnop_mkdir, 202 .vop_mknod = fuse_vnop_mknod, 203 .vop_open = fuse_vnop_open, 204 .vop_pathconf = fuse_vnop_pathconf, 205 /* 206 * TODO: implement vop_poll after upgrading to protocol 7.21. 207 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until 208 * 7.21, which adds the ability for the client to choose which poll 209 * events it wants, and for a client to deregister a file handle 210 */ 211 .vop_read = fuse_vnop_read, 212 .vop_readdir = fuse_vnop_readdir, 213 .vop_readlink = fuse_vnop_readlink, 214 .vop_reclaim = fuse_vnop_reclaim, 215 .vop_remove = fuse_vnop_remove, 216 .vop_rename = fuse_vnop_rename, 217 .vop_rmdir = fuse_vnop_rmdir, 218 .vop_setattr = fuse_vnop_setattr, 219 .vop_setextattr = fuse_vnop_setextattr, 220 .vop_strategy = fuse_vnop_strategy, 221 .vop_symlink = fuse_vnop_symlink, 222 .vop_write = fuse_vnop_write, 223 .vop_getpages = fuse_vnop_getpages, 224 .vop_print = fuse_vnop_print, 225 .vop_vptofh = fuse_vnop_vptofh, 226 }; 227 VFS_VOP_VECTOR_REGISTER(fuse_vnops); 228 229 /* Check permission for extattr operations, much like extattr_check_cred */ 230 static int 231 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, 232 struct thread *td, accmode_t accmode) 233 { 234 struct mount *mp = vnode_mount(vp); 235 struct fuse_data *data = fuse_get_mpdata(mp); 236 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 237 238 /* 239 * Kernel-invoked always succeeds. 240 */ 241 if (cred == NOCRED) 242 return (0); 243 244 /* 245 * Do not allow privileged processes in jail to directly manipulate 246 * system attributes. 247 */ 248 switch (ns) { 249 case EXTATTR_NAMESPACE_SYSTEM: 250 if (default_permissions) { 251 return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); 252 } 253 return (0); 254 case EXTATTR_NAMESPACE_USER: 255 if (default_permissions) { 256 return (fuse_internal_access(vp, accmode, td, cred)); 257 } 258 return (0); 259 default: 260 return (EPERM); 261 } 262 } 263 264 /* Get a filehandle for a directory */ 265 static int 266 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, 267 struct ucred *cred, pid_t pid) 268 { 269 if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) 270 return 0; 271 return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); 272 } 273 274 /* Send FUSE_FLUSH for this vnode */ 275 static int 276 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) 277 { 278 struct fuse_flush_in *ffi; 279 struct fuse_filehandle *fufh; 280 struct fuse_dispatcher fdi; 281 struct thread *td = curthread; 282 struct mount *mp = vnode_mount(vp); 283 int err; 284 285 if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH)) 286 return 0; 287 288 err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); 289 if (err) 290 return err; 291 292 fdisp_init(&fdi, sizeof(*ffi)); 293 fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); 294 ffi = fdi.indata; 295 ffi->fh = fufh->fh_id; 296 /* 297 * If the file has a POSIX lock then we're supposed to set lock_owner. 298 * If not, then lock_owner is undefined. So we may as well always set 299 * it. 300 */ 301 ffi->lock_owner = td->td_proc->p_pid; 302 303 err = fdisp_wait_answ(&fdi); 304 if (err == ENOSYS) { 305 fsess_set_notimpl(mp, FUSE_FLUSH); 306 err = 0; 307 } 308 fdisp_destroy(&fdi); 309 return err; 310 } 311 312 /* Close wrapper for fifos. */ 313 static int 314 fuse_fifo_close(struct vop_close_args *ap) 315 { 316 return (fifo_specops.vop_close(ap)); 317 } 318 319 /* Invalidate a range of cached data, whether dirty of not */ 320 static int 321 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) 322 { 323 struct buf *bp; 324 daddr_t left_lbn, end_lbn, right_lbn; 325 off_t new_filesize; 326 int iosize, left_on, right_on, right_blksize; 327 328 iosize = fuse_iosize(vp); 329 left_lbn = start / iosize; 330 end_lbn = howmany(end, iosize); 331 left_on = start & (iosize - 1); 332 if (left_on != 0) { 333 bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); 334 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { 335 /* 336 * Flush the dirty buffer, because we don't have a 337 * byte-granular way to record which parts of the 338 * buffer are valid. 339 */ 340 bwrite(bp); 341 if (bp->b_error) 342 return (bp->b_error); 343 } else { 344 brelse(bp); 345 } 346 } 347 right_on = end & (iosize - 1); 348 if (right_on != 0) { 349 right_lbn = end / iosize; 350 new_filesize = MAX(filesize, end); 351 right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); 352 bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); 353 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { 354 /* 355 * Flush the dirty buffer, because we don't have a 356 * byte-granular way to record which parts of the 357 * buffer are valid. 358 */ 359 bwrite(bp); 360 if (bp->b_error) 361 return (bp->b_error); 362 } else { 363 brelse(bp); 364 } 365 } 366 367 v_inval_buf_range(vp, left_lbn, end_lbn, iosize); 368 return (0); 369 } 370 371 372 /* Send FUSE_LSEEK for this node */ 373 static int 374 fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred, 375 pid_t pid, off_t *offp, int whence) 376 { 377 struct fuse_dispatcher fdi; 378 struct fuse_filehandle *fufh; 379 struct fuse_lseek_in *flsi; 380 struct fuse_lseek_out *flso; 381 struct mount *mp = vnode_mount(vp); 382 int err; 383 384 ASSERT_VOP_LOCKED(vp, __func__); 385 386 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 387 if (err) 388 return (err); 389 fdisp_init(&fdi, sizeof(*flsi)); 390 fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred); 391 flsi = fdi.indata; 392 flsi->fh = fufh->fh_id; 393 flsi->offset = *offp; 394 flsi->whence = whence; 395 err = fdisp_wait_answ(&fdi); 396 if (err == ENOSYS) { 397 fsess_set_notimpl(mp, FUSE_LSEEK); 398 } else if (err == 0) { 399 fsess_set_impl(mp, FUSE_LSEEK); 400 flso = fdi.answ; 401 *offp = flso->offset; 402 } 403 fdisp_destroy(&fdi); 404 405 return (err); 406 } 407 408 /* 409 struct vnop_access_args { 410 struct vnode *a_vp; 411 #if VOP_ACCESS_TAKES_ACCMODE_T 412 accmode_t a_accmode; 413 #else 414 int a_mode; 415 #endif 416 struct ucred *a_cred; 417 struct thread *a_td; 418 }; 419 */ 420 static int 421 fuse_vnop_access(struct vop_access_args *ap) 422 { 423 struct vnode *vp = ap->a_vp; 424 int accmode = ap->a_accmode; 425 struct ucred *cred = ap->a_cred; 426 427 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 428 429 int err; 430 431 if (fuse_isdeadfs(vp)) { 432 if (vnode_isvroot(vp)) { 433 return 0; 434 } 435 return ENXIO; 436 } 437 if (!(data->dataflags & FSESS_INITED)) { 438 if (vnode_isvroot(vp)) { 439 if (priv_check_cred(cred, PRIV_VFS_ADMIN) || 440 (fuse_match_cred(data->daemoncred, cred) == 0)) { 441 return 0; 442 } 443 } 444 return EBADF; 445 } 446 if (vnode_islnk(vp)) { 447 return 0; 448 } 449 450 err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); 451 return err; 452 } 453 454 /* 455 * struct vop_advlock_args { 456 * struct vop_generic_args a_gen; 457 * struct vnode *a_vp; 458 * void *a_id; 459 * int a_op; 460 * struct flock *a_fl; 461 * int a_flags; 462 * } 463 */ 464 static int 465 fuse_vnop_advlock(struct vop_advlock_args *ap) 466 { 467 struct vnode *vp = ap->a_vp; 468 struct flock *fl = ap->a_fl; 469 struct thread *td = curthread; 470 struct ucred *cred = td->td_ucred; 471 pid_t pid = td->td_proc->p_pid; 472 struct fuse_filehandle *fufh; 473 struct fuse_dispatcher fdi; 474 struct fuse_lk_in *fli; 475 struct fuse_lk_out *flo; 476 struct vattr vattr; 477 enum fuse_opcode op; 478 off_t size, start; 479 int dataflags, err; 480 int flags = ap->a_flags; 481 482 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 483 484 if (fuse_isdeadfs(vp)) { 485 return ENXIO; 486 } 487 488 switch(ap->a_op) { 489 case F_GETLK: 490 op = FUSE_GETLK; 491 break; 492 case F_SETLK: 493 if (flags & F_WAIT) 494 op = FUSE_SETLKW; 495 else 496 op = FUSE_SETLK; 497 break; 498 case F_UNLCK: 499 op = FUSE_SETLK; 500 break; 501 default: 502 return EINVAL; 503 } 504 505 if (!(dataflags & FSESS_POSIX_LOCKS)) 506 return vop_stdadvlock(ap); 507 /* FUSE doesn't properly support flock until protocol 7.17 */ 508 if (flags & F_FLOCK) 509 return vop_stdadvlock(ap); 510 511 vn_lock(vp, LK_SHARED | LK_RETRY); 512 513 switch (fl->l_whence) { 514 case SEEK_SET: 515 case SEEK_CUR: 516 /* 517 * Caller is responsible for adding any necessary offset 518 * when SEEK_CUR is used. 519 */ 520 start = fl->l_start; 521 break; 522 523 case SEEK_END: 524 err = fuse_internal_getattr(vp, &vattr, cred, td); 525 if (err) 526 goto out; 527 size = vattr.va_size; 528 if (size > OFF_MAX || 529 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { 530 err = EOVERFLOW; 531 goto out; 532 } 533 start = size + fl->l_start; 534 break; 535 536 default: 537 return (EINVAL); 538 } 539 540 err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); 541 if (err) 542 goto out; 543 544 fdisp_init(&fdi, sizeof(*fli)); 545 546 fdisp_make_vp(&fdi, op, vp, td, cred); 547 fli = fdi.indata; 548 fli->fh = fufh->fh_id; 549 fli->owner = td->td_proc->p_pid; 550 fli->lk.start = start; 551 if (fl->l_len != 0) 552 fli->lk.end = start + fl->l_len - 1; 553 else 554 fli->lk.end = INT64_MAX; 555 fli->lk.type = fl->l_type; 556 fli->lk.pid = td->td_proc->p_pid; 557 558 err = fdisp_wait_answ(&fdi); 559 fdisp_destroy(&fdi); 560 561 if (err == 0 && op == FUSE_GETLK) { 562 flo = fdi.answ; 563 fl->l_type = flo->lk.type; 564 fl->l_whence = SEEK_SET; 565 if (flo->lk.type != F_UNLCK) { 566 fl->l_pid = flo->lk.pid; 567 fl->l_start = flo->lk.start; 568 if (flo->lk.end == INT64_MAX) 569 fl->l_len = 0; 570 else 571 fl->l_len = flo->lk.end - flo->lk.start + 1; 572 fl->l_start = flo->lk.start; 573 } 574 } 575 576 out: 577 VOP_UNLOCK(vp); 578 return err; 579 } 580 581 static int 582 fuse_vnop_allocate(struct vop_allocate_args *ap) 583 { 584 struct vnode *vp = ap->a_vp; 585 off_t *len = ap->a_len; 586 off_t *offset = ap->a_offset; 587 struct ucred *cred = ap->a_cred; 588 struct fuse_filehandle *fufh; 589 struct mount *mp = vnode_mount(vp); 590 struct fuse_dispatcher fdi; 591 struct fuse_fallocate_in *ffi; 592 struct uio io; 593 pid_t pid = curthread->td_proc->p_pid; 594 struct fuse_vnode_data *fvdat = VTOFUD(vp); 595 off_t filesize; 596 int err; 597 598 if (fuse_isdeadfs(vp)) 599 return (ENXIO); 600 601 switch (vp->v_type) { 602 case VFIFO: 603 return (ESPIPE); 604 case VLNK: 605 case VREG: 606 if (vfs_isrdonly(mp)) 607 return (EROFS); 608 break; 609 default: 610 return (ENODEV); 611 } 612 613 if (vfs_isrdonly(mp)) 614 return (EROFS); 615 616 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 617 return (EINVAL); 618 619 io.uio_offset = *offset; 620 io.uio_resid = *len; 621 err = vn_rlimit_fsize(vp, &io, curthread); 622 if (err) 623 return (err); 624 625 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 626 if (err) 627 return (err); 628 629 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 630 631 err = fuse_vnode_size(vp, &filesize, cred, curthread); 632 if (err) 633 return (err); 634 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 635 636 fdisp_init(&fdi, sizeof(*ffi)); 637 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 638 ffi = fdi.indata; 639 ffi->fh = fufh->fh_id; 640 ffi->offset = *offset; 641 ffi->length = *len; 642 ffi->mode = 0; 643 err = fdisp_wait_answ(&fdi); 644 645 if (err == ENOSYS) { 646 fsess_set_notimpl(mp, FUSE_FALLOCATE); 647 err = EINVAL; 648 } else if (err == EOPNOTSUPP) { 649 /* 650 * The file system server does not support FUSE_FALLOCATE with 651 * the supplied mode for this particular file. 652 */ 653 err = EINVAL; 654 } else if (!err) { 655 *offset += *len; 656 *len = 0; 657 fuse_vnode_undirty_cached_timestamps(vp, false); 658 fuse_internal_clear_suid_on_write(vp, cred, curthread); 659 if (*offset > fvdat->cached_attrs.va_size) { 660 fuse_vnode_setsize(vp, *offset, false); 661 getnanouptime(&fvdat->last_local_modify); 662 } 663 } 664 665 fdisp_destroy(&fdi); 666 return (err); 667 } 668 669 /* { 670 struct vnode *a_vp; 671 daddr_t a_bn; 672 struct bufobj **a_bop; 673 daddr_t *a_bnp; 674 int *a_runp; 675 int *a_runb; 676 } */ 677 static int 678 fuse_vnop_bmap(struct vop_bmap_args *ap) 679 { 680 struct vnode *vp = ap->a_vp; 681 struct bufobj **bo = ap->a_bop; 682 struct thread *td = curthread; 683 struct mount *mp; 684 struct fuse_dispatcher fdi; 685 struct fuse_bmap_in *fbi; 686 struct fuse_bmap_out *fbo; 687 struct fuse_data *data; 688 struct fuse_vnode_data *fvdat = VTOFUD(vp); 689 uint64_t biosize; 690 off_t fsize; 691 daddr_t lbn = ap->a_bn; 692 daddr_t *pbn = ap->a_bnp; 693 int *runp = ap->a_runp; 694 int *runb = ap->a_runb; 695 int error = 0; 696 int maxrun; 697 698 if (fuse_isdeadfs(vp)) { 699 return ENXIO; 700 } 701 702 mp = vnode_mount(vp); 703 data = fuse_get_mpdata(mp); 704 biosize = fuse_iosize(vp); 705 maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, 706 data->max_readahead_blocks); 707 708 if (bo != NULL) 709 *bo = &vp->v_bufobj; 710 711 /* 712 * The FUSE_BMAP operation does not include the runp and runb 713 * variables, so we must guess. Report nonzero contiguous runs so 714 * cluster_read will combine adjacent reads. It's worthwhile to reduce 715 * upcalls even if we don't know the true physical layout of the file. 716 * 717 * FUSE file systems may opt out of read clustering in two ways: 718 * * mounting with -onoclusterr 719 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT 720 */ 721 if (runb != NULL) 722 *runb = MIN(lbn, maxrun); 723 if (runp != NULL && maxrun == 0) 724 *runp = 0; 725 else if (runp != NULL) { 726 /* 727 * If the file's size is cached, use that value to calculate 728 * runp, even if the cache is expired. runp is only advisory, 729 * and the risk of getting it wrong is not worth the cost of 730 * another upcall. 731 */ 732 if (fvdat->cached_attrs.va_size != VNOVAL) 733 fsize = fvdat->cached_attrs.va_size; 734 else 735 error = fuse_vnode_size(vp, &fsize, td->td_ucred, td); 736 if (error == 0) 737 *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1), 738 maxrun); 739 else 740 *runp = 0; 741 } 742 743 if (fsess_maybe_impl(mp, FUSE_BMAP)) { 744 fdisp_init(&fdi, sizeof(*fbi)); 745 fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); 746 fbi = fdi.indata; 747 fbi->block = lbn; 748 fbi->blocksize = biosize; 749 error = fdisp_wait_answ(&fdi); 750 if (error == ENOSYS) { 751 fdisp_destroy(&fdi); 752 fsess_set_notimpl(mp, FUSE_BMAP); 753 error = 0; 754 } else { 755 fbo = fdi.answ; 756 if (error == 0 && pbn != NULL) 757 *pbn = fbo->block; 758 fdisp_destroy(&fdi); 759 return error; 760 } 761 } 762 763 /* If the daemon doesn't support BMAP, make up a sensible default */ 764 if (pbn != NULL) 765 *pbn = lbn * btodb(biosize); 766 return (error); 767 } 768 769 /* 770 struct vop_close_args { 771 struct vnode *a_vp; 772 int a_fflag; 773 struct ucred *a_cred; 774 struct thread *a_td; 775 }; 776 */ 777 static int 778 fuse_vnop_close(struct vop_close_args *ap) 779 { 780 struct vnode *vp = ap->a_vp; 781 struct mount *mp = vnode_mount(vp); 782 struct ucred *cred = ap->a_cred; 783 int fflag = ap->a_fflag; 784 struct thread *td = ap->a_td; 785 pid_t pid = td->td_proc->p_pid; 786 struct fuse_vnode_data *fvdat = VTOFUD(vp); 787 int err = 0; 788 789 if (fuse_isdeadfs(vp)) 790 return 0; 791 if (vnode_isdir(vp)) 792 return 0; 793 if (fflag & IO_NDELAY) 794 return 0; 795 796 err = fuse_flush(vp, cred, pid, fflag); 797 if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) { 798 struct vattr vap; 799 struct fuse_data *data; 800 int dataflags; 801 int access_e = 0; 802 803 data = fuse_get_mpdata(mp); 804 dataflags = data->dataflags; 805 if (dataflags & FSESS_DEFAULT_PERMISSIONS) { 806 struct vattr va; 807 808 fuse_internal_getattr(vp, &va, cred, td); 809 access_e = vaccess(vp->v_type, va.va_mode, va.va_uid, 810 va.va_gid, VWRITE, cred); 811 } 812 if (access_e == 0) { 813 VATTR_NULL(&vap); 814 vap.va_atime = fvdat->cached_attrs.va_atime; 815 /* 816 * Ignore errors setting when setting atime. That 817 * should not cause close(2) to fail. 818 */ 819 fuse_internal_setattr(vp, &vap, td, NULL); 820 } 821 } 822 /* TODO: close the file handle, if we're sure it's no longer used */ 823 if ((fvdat->flag & FN_SIZECHANGE) != 0) { 824 fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); 825 } 826 return err; 827 } 828 829 /* 830 struct vop_copy_file_range_args { 831 struct vop_generic_args a_gen; 832 struct vnode *a_invp; 833 off_t *a_inoffp; 834 struct vnode *a_outvp; 835 off_t *a_outoffp; 836 size_t *a_lenp; 837 unsigned int a_flags; 838 struct ucred *a_incred; 839 struct ucred *a_outcred; 840 struct thread *a_fsizetd; 841 } 842 */ 843 static int 844 fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) 845 { 846 struct vnode *invp = ap->a_invp; 847 struct vnode *outvp = ap->a_outvp; 848 struct mount *mp = vnode_mount(invp); 849 struct fuse_vnode_data *outfvdat = VTOFUD(outvp); 850 struct fuse_dispatcher fdi; 851 struct fuse_filehandle *infufh, *outfufh; 852 struct fuse_copy_file_range_in *fcfri; 853 struct ucred *incred = ap->a_incred; 854 struct ucred *outcred = ap->a_outcred; 855 struct fuse_write_out *fwo; 856 struct thread *td; 857 struct uio io; 858 off_t outfilesize; 859 ssize_t r = 0; 860 pid_t pid; 861 int err; 862 863 err = ENOSYS; 864 if (mp == NULL || mp != vnode_mount(outvp)) 865 goto fallback; 866 867 if (incred->cr_uid != outcred->cr_uid) 868 goto fallback; 869 870 if (incred->cr_groups[0] != outcred->cr_groups[0]) 871 goto fallback; 872 873 /* Caller busied mp, mnt_data can be safely accessed. */ 874 if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) 875 goto fallback; 876 877 if (ap->a_fsizetd == NULL) 878 td = curthread; 879 else 880 td = ap->a_fsizetd; 881 pid = td->td_proc->p_pid; 882 883 vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); 884 if (invp->v_data == NULL || outvp->v_data == NULL) { 885 err = EBADF; 886 goto unlock; 887 } 888 889 err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); 890 if (err) 891 goto unlock; 892 893 err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); 894 if (err) 895 goto unlock; 896 897 io.uio_resid = *ap->a_lenp; 898 if (ap->a_fsizetd) { 899 io.uio_offset = *ap->a_outoffp; 900 err = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 901 if (err != 0) 902 goto unlock; 903 } 904 905 err = fuse_vnode_size(outvp, &outfilesize, outcred, curthread); 906 if (err) 907 goto unlock; 908 909 vnode_pager_clean_sync(invp); 910 err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp, 911 *ap->a_outoffp + io.uio_resid); 912 if (err) 913 goto unlock; 914 915 fdisp_init(&fdi, sizeof(*fcfri)); 916 fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); 917 fcfri = fdi.indata; 918 fcfri->fh_in = infufh->fh_id; 919 fcfri->off_in = *ap->a_inoffp; 920 fcfri->nodeid_out = VTOI(outvp); 921 fcfri->fh_out = outfufh->fh_id; 922 fcfri->off_out = *ap->a_outoffp; 923 fcfri->len = io.uio_resid; 924 fcfri->flags = 0; 925 926 err = fdisp_wait_answ(&fdi); 927 if (err == 0) { 928 fwo = fdi.answ; 929 *ap->a_lenp = fwo->size; 930 *ap->a_inoffp += fwo->size; 931 *ap->a_outoffp += fwo->size; 932 fuse_internal_clear_suid_on_write(outvp, outcred, td); 933 if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) { 934 fuse_vnode_setsize(outvp, *ap->a_outoffp, false); 935 getnanouptime(&outfvdat->last_local_modify); 936 } 937 fuse_vnode_update(invp, FN_ATIMECHANGE); 938 fuse_vnode_update(outvp, FN_MTIMECHANGE | FN_CTIMECHANGE); 939 } 940 fdisp_destroy(&fdi); 941 942 unlock: 943 if (invp != outvp) 944 VOP_UNLOCK(invp); 945 VOP_UNLOCK(outvp); 946 947 if (err == ENOSYS) 948 fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); 949 fallback: 950 951 /* 952 * No need to call vn_rlimit_fsizex_res before return, since the uio is 953 * local. 954 */ 955 return (err); 956 } 957 958 static void 959 fdisp_make_mknod_for_fallback( 960 struct fuse_dispatcher *fdip, 961 struct componentname *cnp, 962 struct vnode *dvp, 963 uint64_t parentnid, 964 struct thread *td, 965 struct ucred *cred, 966 mode_t mode, 967 enum fuse_opcode *op) 968 { 969 struct fuse_mknod_in *fmni; 970 971 fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); 972 *op = FUSE_MKNOD; 973 fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); 974 fmni = fdip->indata; 975 fmni->mode = mode; 976 fmni->rdev = 0; 977 memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, 978 cnp->cn_namelen); 979 ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; 980 } 981 /* 982 struct vnop_create_args { 983 struct vnode *a_dvp; 984 struct vnode **a_vpp; 985 struct componentname *a_cnp; 986 struct vattr *a_vap; 987 }; 988 */ 989 static int 990 fuse_vnop_create(struct vop_create_args *ap) 991 { 992 struct vnode *dvp = ap->a_dvp; 993 struct vnode **vpp = ap->a_vpp; 994 struct componentname *cnp = ap->a_cnp; 995 struct vattr *vap = ap->a_vap; 996 struct thread *td = curthread; 997 struct ucred *cred = cnp->cn_cred; 998 999 struct fuse_data *data; 1000 struct fuse_create_in *fci; 1001 struct fuse_entry_out *feo; 1002 struct fuse_open_out *foo; 1003 struct fuse_dispatcher fdi, fdi2; 1004 struct fuse_dispatcher *fdip = &fdi; 1005 struct fuse_dispatcher *fdip2 = NULL; 1006 1007 int err; 1008 1009 struct mount *mp = vnode_mount(dvp); 1010 data = fuse_get_mpdata(mp); 1011 uint64_t parentnid = VTOFUD(dvp)->nid; 1012 mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); 1013 enum fuse_opcode op; 1014 int flags; 1015 1016 if (fuse_isdeadfs(dvp)) 1017 return ENXIO; 1018 1019 /* FUSE expects sockets to be created with FUSE_MKNOD */ 1020 if (vap->va_type == VSOCK) 1021 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1022 1023 /* 1024 * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a 1025 * writable mode makes sense, and we might as well include readability 1026 * too. 1027 */ 1028 flags = O_RDWR; 1029 1030 bzero(&fdi, sizeof(fdi)); 1031 1032 if (vap->va_type != VREG) 1033 return (EINVAL); 1034 1035 if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { 1036 /* Fallback to FUSE_MKNOD/FUSE_OPEN */ 1037 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, 1038 cred, mode, &op); 1039 } else { 1040 /* Use FUSE_CREATE */ 1041 size_t insize; 1042 1043 op = FUSE_CREATE; 1044 fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); 1045 fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); 1046 fci = fdip->indata; 1047 fci->mode = mode; 1048 fci->flags = O_CREAT | flags; 1049 if (fuse_libabi_geq(data, 7, 12)) { 1050 insize = sizeof(*fci); 1051 fci->umask = td->td_proc->p_pd->pd_cmask; 1052 } else { 1053 insize = sizeof(struct fuse_open_in); 1054 } 1055 1056 memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, 1057 cnp->cn_namelen); 1058 ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; 1059 } 1060 1061 err = fdisp_wait_answ(fdip); 1062 1063 if (err) { 1064 if (err == ENOSYS && op == FUSE_CREATE) { 1065 fsess_set_notimpl(mp, FUSE_CREATE); 1066 fdisp_destroy(fdip); 1067 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, 1068 parentnid, td, cred, mode, &op); 1069 err = fdisp_wait_answ(fdip); 1070 } 1071 if (err) 1072 goto out; 1073 } 1074 1075 feo = fdip->answ; 1076 1077 if ((err = fuse_internal_checkentry(feo, vap->va_type))) { 1078 goto out; 1079 } 1080 1081 if (op == FUSE_CREATE) { 1082 if (fuse_libabi_geq(data, 7, 9)) 1083 foo = (struct fuse_open_out*)(feo + 1); 1084 else 1085 foo = (struct fuse_open_out*)((char*)feo + 1086 FUSE_COMPAT_ENTRY_OUT_SIZE); 1087 } else { 1088 /* Issue a separate FUSE_OPEN */ 1089 struct fuse_open_in *foi; 1090 1091 fdip2 = &fdi2; 1092 fdisp_init(fdip2, sizeof(*foi)); 1093 fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, 1094 cred); 1095 foi = fdip2->indata; 1096 foi->flags = flags; 1097 err = fdisp_wait_answ(fdip2); 1098 if (err) 1099 goto out; 1100 foo = fdip2->answ; 1101 } 1102 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); 1103 if (err) { 1104 struct fuse_release_in *fri; 1105 uint64_t nodeid = feo->nodeid; 1106 uint64_t fh_id = foo->fh; 1107 1108 fdisp_destroy(fdip); 1109 fdisp_init(fdip, sizeof(*fri)); 1110 fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); 1111 fri = fdip->indata; 1112 fri->fh = fh_id; 1113 fri->flags = flags; 1114 fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); 1115 fuse_insert_message(fdip->tick, false); 1116 goto out; 1117 } 1118 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); 1119 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 1120 feo->attr_valid_nsec, NULL, true); 1121 1122 fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); 1123 fuse_vnode_open(*vpp, foo->open_flags, td); 1124 /* 1125 * Purge the parent's attribute cache because the daemon should've 1126 * updated its mtime and ctime 1127 */ 1128 fuse_vnode_clear_attr_cache(dvp); 1129 cache_purge_negative(dvp); 1130 1131 out: 1132 if (fdip2) 1133 fdisp_destroy(fdip2); 1134 fdisp_destroy(fdip); 1135 return err; 1136 } 1137 1138 /* 1139 struct vnop_fdatasync_args { 1140 struct vop_generic_args a_gen; 1141 struct vnode * a_vp; 1142 struct thread * a_td; 1143 }; 1144 */ 1145 static int 1146 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) 1147 { 1148 struct vnode *vp = ap->a_vp; 1149 struct thread *td = ap->a_td; 1150 int waitfor = MNT_WAIT; 1151 1152 int err = 0; 1153 1154 if (fuse_isdeadfs(vp)) { 1155 return 0; 1156 } 1157 if ((err = vop_stdfdatasync_buf(ap))) 1158 return err; 1159 1160 return fuse_internal_fsync(vp, td, waitfor, true); 1161 } 1162 1163 /* 1164 struct vnop_fsync_args { 1165 struct vop_generic_args a_gen; 1166 struct vnode * a_vp; 1167 int a_waitfor; 1168 struct thread * a_td; 1169 }; 1170 */ 1171 static int 1172 fuse_vnop_fsync(struct vop_fsync_args *ap) 1173 { 1174 struct vnode *vp = ap->a_vp; 1175 struct thread *td = ap->a_td; 1176 int waitfor = ap->a_waitfor; 1177 int err = 0; 1178 1179 if (fuse_isdeadfs(vp)) { 1180 return 0; 1181 } 1182 if ((err = vop_stdfsync(ap))) 1183 return err; 1184 1185 return fuse_internal_fsync(vp, td, waitfor, false); 1186 } 1187 1188 /* 1189 struct vnop_getattr_args { 1190 struct vnode *a_vp; 1191 struct vattr *a_vap; 1192 struct ucred *a_cred; 1193 struct thread *a_td; 1194 }; 1195 */ 1196 static int 1197 fuse_vnop_getattr(struct vop_getattr_args *ap) 1198 { 1199 struct vnode *vp = ap->a_vp; 1200 struct vattr *vap = ap->a_vap; 1201 struct ucred *cred = ap->a_cred; 1202 struct thread *td = curthread; 1203 1204 int err = 0; 1205 int dataflags; 1206 1207 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 1208 1209 /* Note that we are not bailing out on a dead file system just yet. */ 1210 1211 if (!(dataflags & FSESS_INITED)) { 1212 if (!vnode_isvroot(vp)) { 1213 fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); 1214 err = ENOTCONN; 1215 return err; 1216 } else { 1217 goto fake; 1218 } 1219 } 1220 err = fuse_internal_getattr(vp, vap, cred, td); 1221 if (err == ENOTCONN && vnode_isvroot(vp)) { 1222 /* see comment in fuse_vfsop_statfs() */ 1223 goto fake; 1224 } else { 1225 return err; 1226 } 1227 1228 fake: 1229 bzero(vap, sizeof(*vap)); 1230 vap->va_type = vnode_vtype(vp); 1231 1232 return 0; 1233 } 1234 1235 /* 1236 struct vnop_inactive_args { 1237 struct vnode *a_vp; 1238 }; 1239 */ 1240 static int 1241 fuse_vnop_inactive(struct vop_inactive_args *ap) 1242 { 1243 struct vnode *vp = ap->a_vp; 1244 struct thread *td = curthread; 1245 1246 struct fuse_vnode_data *fvdat = VTOFUD(vp); 1247 struct fuse_filehandle *fufh, *fufh_tmp; 1248 1249 int need_flush = 1; 1250 1251 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 1252 if (need_flush && vp->v_type == VREG) { 1253 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { 1254 fuse_vnode_savesize(vp, NULL, 0); 1255 } 1256 if ((fvdat->flag & FN_REVOKED) != 0) 1257 fuse_io_invalbuf(vp, td); 1258 else 1259 fuse_io_flushbuf(vp, MNT_WAIT, td); 1260 need_flush = 0; 1261 } 1262 fuse_filehandle_close(vp, fufh, td, NULL); 1263 } 1264 1265 if ((fvdat->flag & FN_REVOKED) != 0) 1266 vrecycle(vp); 1267 1268 return 0; 1269 } 1270 1271 /* 1272 struct vnop_ioctl_args { 1273 struct vnode *a_vp; 1274 u_long a_command; 1275 caddr_t a_data; 1276 int a_fflag; 1277 struct ucred *a_cred; 1278 struct thread *a_td; 1279 }; 1280 */ 1281 static int 1282 fuse_vnop_ioctl(struct vop_ioctl_args *ap) 1283 { 1284 struct vnode *vp = ap->a_vp; 1285 struct mount *mp = vnode_mount(vp); 1286 struct ucred *cred = ap->a_cred; 1287 off_t *offp; 1288 pid_t pid = ap->a_td->td_proc->p_pid; 1289 int err; 1290 1291 switch (ap->a_command) { 1292 case FIOSEEKDATA: 1293 case FIOSEEKHOLE: 1294 /* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */ 1295 if (fsess_maybe_impl(mp, FUSE_LSEEK)) { 1296 int whence; 1297 1298 offp = ap->a_data; 1299 if (ap->a_command == FIOSEEKDATA) 1300 whence = SEEK_DATA; 1301 else 1302 whence = SEEK_HOLE; 1303 1304 vn_lock(vp, LK_SHARED | LK_RETRY); 1305 err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp, 1306 whence); 1307 VOP_UNLOCK(vp); 1308 } 1309 if (fsess_not_impl(mp, FUSE_LSEEK)) 1310 err = vop_stdioctl(ap); 1311 break; 1312 default: 1313 /* TODO: implement FUSE_IOCTL */ 1314 err = ENOTTY; 1315 break; 1316 } 1317 return (err); 1318 } 1319 1320 1321 /* 1322 struct vnop_link_args { 1323 struct vnode *a_tdvp; 1324 struct vnode *a_vp; 1325 struct componentname *a_cnp; 1326 }; 1327 */ 1328 static int 1329 fuse_vnop_link(struct vop_link_args *ap) 1330 { 1331 struct vnode *vp = ap->a_vp; 1332 struct vnode *tdvp = ap->a_tdvp; 1333 struct componentname *cnp = ap->a_cnp; 1334 1335 struct vattr *vap = VTOVA(vp); 1336 1337 struct fuse_dispatcher fdi; 1338 struct fuse_entry_out *feo; 1339 struct fuse_link_in fli; 1340 1341 int err; 1342 1343 if (fuse_isdeadfs(vp)) { 1344 return ENXIO; 1345 } 1346 if (vnode_mount(tdvp) != vnode_mount(vp)) { 1347 return EXDEV; 1348 } 1349 1350 /* 1351 * This is a seatbelt check to protect naive userspace filesystems from 1352 * themselves and the limitations of the FUSE IPC protocol. If a 1353 * filesystem does not allow attribute caching, assume it is capable of 1354 * validating that nlink does not overflow. 1355 */ 1356 if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) 1357 return EMLINK; 1358 fli.oldnodeid = VTOI(vp); 1359 1360 fdisp_init(&fdi, 0); 1361 fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, 1362 FUSE_LINK, &fli, sizeof(fli), &fdi); 1363 if ((err = fdisp_wait_answ(&fdi))) { 1364 goto out; 1365 } 1366 feo = fdi.answ; 1367 1368 if (fli.oldnodeid != feo->nodeid) { 1369 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 1370 fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, 1371 "Assigned wrong inode for a hard link."); 1372 fuse_vnode_clear_attr_cache(vp); 1373 fuse_vnode_clear_attr_cache(tdvp); 1374 err = EIO; 1375 goto out; 1376 } 1377 1378 err = fuse_internal_checkentry(feo, vnode_vtype(vp)); 1379 if (!err) { 1380 /* 1381 * Purge the parent's attribute cache because the daemon 1382 * should've updated its mtime and ctime 1383 */ 1384 fuse_vnode_clear_attr_cache(tdvp); 1385 fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, 1386 feo->attr_valid_nsec, NULL, true); 1387 } 1388 out: 1389 fdisp_destroy(&fdi); 1390 return err; 1391 } 1392 1393 struct fuse_lookup_alloc_arg { 1394 struct fuse_entry_out *feo; 1395 struct componentname *cnp; 1396 uint64_t nid; 1397 __enum_uint8(vtype) vtyp; 1398 }; 1399 1400 /* Callback for vn_get_ino */ 1401 static int 1402 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1403 { 1404 struct fuse_lookup_alloc_arg *flaa = arg; 1405 1406 return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, 1407 flaa->vtyp); 1408 } 1409 1410 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, 1411 "int", "struct timespec*", "struct timespec*"); 1412 /* 1413 struct vnop_lookup_args { 1414 struct vnodeop_desc *a_desc; 1415 struct vnode *a_dvp; 1416 struct vnode **a_vpp; 1417 struct componentname *a_cnp; 1418 }; 1419 */ 1420 int 1421 fuse_vnop_lookup(struct vop_lookup_args *ap) 1422 { 1423 struct vnode *dvp = ap->a_dvp; 1424 struct vnode **vpp = ap->a_vpp; 1425 struct componentname *cnp = ap->a_cnp; 1426 struct thread *td = curthread; 1427 struct ucred *cred = cnp->cn_cred; 1428 struct timespec now; 1429 1430 int nameiop = cnp->cn_nameiop; 1431 int flags = cnp->cn_flags; 1432 int islastcn = flags & ISLASTCN; 1433 struct mount *mp = vnode_mount(dvp); 1434 struct fuse_data *data = fuse_get_mpdata(mp); 1435 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 1436 bool is_dot; 1437 1438 int err = 0; 1439 int lookup_err = 0; 1440 struct vnode *vp = NULL; 1441 1442 struct fuse_dispatcher fdi; 1443 bool did_lookup = false; 1444 struct fuse_entry_out *feo = NULL; 1445 __enum_uint8(vtype) vtyp; /* vnode type of target */ 1446 1447 uint64_t nid; 1448 1449 if (fuse_isdeadfs(dvp)) { 1450 *vpp = NULL; 1451 return ENXIO; 1452 } 1453 if (!vnode_isdir(dvp)) 1454 return ENOTDIR; 1455 1456 if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) 1457 return EROFS; 1458 1459 if ((cnp->cn_flags & NOEXECCHECK) != 0) 1460 cnp->cn_flags &= ~NOEXECCHECK; 1461 else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) 1462 return err; 1463 1464 is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.'; 1465 if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT)) 1466 { 1467 if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) { 1468 /* 1469 * Since the file system doesn't support ".." lookups, 1470 * we have no way to find this entry. 1471 */ 1472 return ESTALE; 1473 } 1474 nid = VTOFUD(dvp)->parent_nid; 1475 if (nid == 0) 1476 return ENOENT; 1477 /* .. is obviously a directory */ 1478 vtyp = VDIR; 1479 } else if (is_dot) { 1480 nid = VTOI(dvp); 1481 /* . is obviously a directory */ 1482 vtyp = VDIR; 1483 } else { 1484 struct timespec timeout; 1485 int ncpticks; /* here to accommodate for API contract */ 1486 1487 err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); 1488 getnanouptime(&now); 1489 SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); 1490 switch (err) { 1491 case -1: /* positive match */ 1492 if (timespeccmp(&timeout, &now, >)) { 1493 counter_u64_add(fuse_lookup_cache_hits, 1); 1494 } else { 1495 /* Cache timeout */ 1496 counter_u64_add(fuse_lookup_cache_misses, 1); 1497 bintime_clear( 1498 &VTOFUD(*vpp)->entry_cache_timeout); 1499 cache_purge(*vpp); 1500 if (dvp != *vpp) 1501 vput(*vpp); 1502 else 1503 vrele(*vpp); 1504 *vpp = NULL; 1505 break; 1506 } 1507 return 0; 1508 1509 case 0: /* no match in cache */ 1510 counter_u64_add(fuse_lookup_cache_misses, 1); 1511 break; 1512 1513 case ENOENT: /* negative match */ 1514 if (timespeccmp(&timeout, &now, <=)) { 1515 /* Cache timeout */ 1516 cache_purge_negative(dvp); 1517 break; 1518 } 1519 /* fall through */ 1520 default: 1521 return err; 1522 } 1523 1524 fdisp_init(&fdi, cnp->cn_namelen + 1); 1525 fdisp_make(&fdi, FUSE_LOOKUP, mp, VTOI(dvp), td, cred); 1526 1527 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 1528 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 1529 lookup_err = fdisp_wait_answ(&fdi); 1530 did_lookup = true; 1531 1532 if (!lookup_err) { 1533 /* lookup call succeeded */ 1534 feo = (struct fuse_entry_out *)fdi.answ; 1535 nid = feo->nodeid; 1536 if (nid == 0) { 1537 /* zero nodeid means ENOENT and cache it */ 1538 struct timespec timeout; 1539 1540 fdi.answ_stat = ENOENT; 1541 lookup_err = ENOENT; 1542 if (cnp->cn_flags & MAKEENTRY) { 1543 fuse_validity_2_timespec(feo, &timeout); 1544 /* Use the same entry_time for .. as for 1545 * the file itself. That doesn't honor 1546 * exactly what the fuse server tells 1547 * us, but to do otherwise would require 1548 * another cache lookup at this point. 1549 */ 1550 struct timespec *dtsp = NULL; 1551 cache_enter_time(dvp, *vpp, cnp, 1552 &timeout, dtsp); 1553 } 1554 } 1555 vtyp = IFTOVT(feo->attr.mode); 1556 } 1557 if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { 1558 fdisp_destroy(&fdi); 1559 return lookup_err; 1560 } 1561 } 1562 /* lookup_err, if non-zero, must be ENOENT at this point */ 1563 1564 if (lookup_err) { 1565 /* Entry not found */ 1566 if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { 1567 if (default_permissions) 1568 err = fuse_internal_access(dvp, VWRITE, td, 1569 cred); 1570 else 1571 err = 0; 1572 if (!err) { 1573 err = EJUSTRETURN; 1574 } 1575 } else { 1576 err = ENOENT; 1577 } 1578 } else { 1579 /* Entry was found */ 1580 if (flags & ISDOTDOT) { 1581 struct fuse_lookup_alloc_arg flaa; 1582 1583 flaa.nid = nid; 1584 flaa.feo = feo; 1585 flaa.cnp = cnp; 1586 flaa.vtyp = vtyp; 1587 err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, 1588 &vp); 1589 *vpp = vp; 1590 } else if (nid == VTOI(dvp)) { 1591 if (is_dot) { 1592 vref(dvp); 1593 *vpp = dvp; 1594 } else { 1595 fuse_warn(fuse_get_mpdata(mp), 1596 FSESS_WARN_ILLEGAL_INODE, 1597 "Assigned same inode to both parent and " 1598 "child."); 1599 err = EIO; 1600 } 1601 1602 } else { 1603 struct fuse_vnode_data *fvdat; 1604 1605 err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, 1606 &vp, cnp, vtyp); 1607 if (err) 1608 goto out; 1609 *vpp = vp; 1610 fvdat = VTOFUD(vp); 1611 1612 MPASS(feo != NULL); 1613 if (timespeccmp(&now, &fvdat->last_local_modify, >)) { 1614 /* 1615 * Attributes from the server are definitely 1616 * newer than the last attributes we sent to 1617 * the server, so cache them. 1618 */ 1619 fuse_internal_cache_attrs(*vpp, &feo->attr, 1620 feo->attr_valid, feo->attr_valid_nsec, 1621 NULL, true); 1622 } 1623 fuse_validity_2_bintime(feo->entry_valid, 1624 feo->entry_valid_nsec, 1625 &fvdat->entry_cache_timeout); 1626 1627 if ((nameiop == DELETE || nameiop == RENAME) && 1628 islastcn && default_permissions) 1629 { 1630 struct vattr dvattr; 1631 1632 err = fuse_internal_access(dvp, VWRITE, td, 1633 cred); 1634 if (err != 0) 1635 goto out; 1636 /* 1637 * if the parent's sticky bit is set, check 1638 * whether we're allowed to remove the file. 1639 * Need to figure out the vnode locking to make 1640 * this work. 1641 */ 1642 fuse_internal_getattr(dvp, &dvattr, cred, td); 1643 if ((dvattr.va_mode & S_ISTXT) && 1644 fuse_internal_access(dvp, VADMIN, td, 1645 cred) && 1646 fuse_internal_access(*vpp, VADMIN, td, 1647 cred)) { 1648 err = EPERM; 1649 goto out; 1650 } 1651 } 1652 } 1653 } 1654 out: 1655 if (err) { 1656 if (vp != NULL && dvp != vp) 1657 vput(vp); 1658 else if (vp != NULL) 1659 vrele(vp); 1660 *vpp = NULL; 1661 } 1662 if (did_lookup) 1663 fdisp_destroy(&fdi); 1664 1665 return err; 1666 } 1667 1668 /* 1669 struct vnop_mkdir_args { 1670 struct vnode *a_dvp; 1671 struct vnode **a_vpp; 1672 struct componentname *a_cnp; 1673 struct vattr *a_vap; 1674 }; 1675 */ 1676 static int 1677 fuse_vnop_mkdir(struct vop_mkdir_args *ap) 1678 { 1679 struct vnode *dvp = ap->a_dvp; 1680 struct vnode **vpp = ap->a_vpp; 1681 struct componentname *cnp = ap->a_cnp; 1682 struct vattr *vap = ap->a_vap; 1683 1684 struct fuse_mkdir_in fmdi; 1685 1686 if (fuse_isdeadfs(dvp)) { 1687 return ENXIO; 1688 } 1689 fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); 1690 fmdi.umask = curthread->td_proc->p_pd->pd_cmask; 1691 1692 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, 1693 sizeof(fmdi), VDIR)); 1694 } 1695 1696 /* 1697 struct vnop_mknod_args { 1698 struct vnode *a_dvp; 1699 struct vnode **a_vpp; 1700 struct componentname *a_cnp; 1701 struct vattr *a_vap; 1702 }; 1703 */ 1704 static int 1705 fuse_vnop_mknod(struct vop_mknod_args *ap) 1706 { 1707 1708 struct vnode *dvp = ap->a_dvp; 1709 struct vnode **vpp = ap->a_vpp; 1710 struct componentname *cnp = ap->a_cnp; 1711 struct vattr *vap = ap->a_vap; 1712 1713 if (fuse_isdeadfs(dvp)) 1714 return ENXIO; 1715 1716 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1717 } 1718 1719 /* 1720 struct vop_open_args { 1721 struct vnode *a_vp; 1722 int a_mode; 1723 struct ucred *a_cred; 1724 struct thread *a_td; 1725 int a_fdidx; / struct file *a_fp; 1726 }; 1727 */ 1728 static int 1729 fuse_vnop_open(struct vop_open_args *ap) 1730 { 1731 struct vnode *vp = ap->a_vp; 1732 int a_mode = ap->a_mode; 1733 struct thread *td = ap->a_td; 1734 struct ucred *cred = ap->a_cred; 1735 pid_t pid = td->td_proc->p_pid; 1736 1737 if (fuse_isdeadfs(vp)) 1738 return ENXIO; 1739 if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) 1740 return (EOPNOTSUPP); 1741 if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) 1742 return EINVAL; 1743 1744 if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { 1745 fuse_vnode_open(vp, 0, td); 1746 return 0; 1747 } 1748 1749 return fuse_filehandle_open(vp, a_mode, NULL, td, cred); 1750 } 1751 1752 static int 1753 fuse_vnop_pathconf(struct vop_pathconf_args *ap) 1754 { 1755 struct vnode *vp = ap->a_vp; 1756 struct mount *mp; 1757 1758 switch (ap->a_name) { 1759 case _PC_FILESIZEBITS: 1760 *ap->a_retval = 64; 1761 return (0); 1762 case _PC_NAME_MAX: 1763 *ap->a_retval = NAME_MAX; 1764 return (0); 1765 case _PC_LINK_MAX: 1766 *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); 1767 return (0); 1768 case _PC_SYMLINK_MAX: 1769 *ap->a_retval = MAXPATHLEN; 1770 return (0); 1771 case _PC_NO_TRUNC: 1772 *ap->a_retval = 1; 1773 return (0); 1774 case _PC_MIN_HOLE_SIZE: 1775 /* 1776 * The FUSE protocol provides no mechanism for a server to 1777 * report _PC_MIN_HOLE_SIZE. It's a protocol bug. Instead, 1778 * return EINVAL if the server does not support FUSE_LSEEK, or 1779 * 1 if it does. 1780 */ 1781 mp = vnode_mount(vp); 1782 if (!fsess_is_impl(mp, FUSE_LSEEK) && 1783 !fsess_not_impl(mp, FUSE_LSEEK)) { 1784 off_t offset = 0; 1785 1786 /* Issue a FUSE_LSEEK to find out if it's implemented */ 1787 fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred, 1788 curthread->td_proc->p_pid, &offset, SEEK_DATA); 1789 } 1790 1791 if (fsess_is_impl(mp, FUSE_LSEEK)) { 1792 *ap->a_retval = 1; 1793 return (0); 1794 } else { 1795 /* 1796 * Probably FUSE_LSEEK is not implemented. It might 1797 * be, if the FUSE_LSEEK above returned an error like 1798 * EACCES, but in that case we can't tell, so it's 1799 * safest to report EINVAL anyway. 1800 */ 1801 return (EINVAL); 1802 } 1803 default: 1804 return (vop_stdpathconf(ap)); 1805 } 1806 } 1807 1808 SDT_PROBE_DEFINE3(fusefs, , vnops, filehandles_closed, "struct vnode*", 1809 "struct uio*", "struct ucred*"); 1810 /* 1811 struct vnop_read_args { 1812 struct vnode *a_vp; 1813 struct uio *a_uio; 1814 int a_ioflag; 1815 struct ucred *a_cred; 1816 }; 1817 */ 1818 static int 1819 fuse_vnop_read(struct vop_read_args *ap) 1820 { 1821 struct vnode *vp = ap->a_vp; 1822 struct uio *uio = ap->a_uio; 1823 int ioflag = ap->a_ioflag; 1824 struct ucred *cred = ap->a_cred; 1825 pid_t pid = curthread->td_proc->p_pid; 1826 struct fuse_filehandle *fufh; 1827 int err; 1828 bool closefufh = false, directio; 1829 1830 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 1831 1832 if (fuse_isdeadfs(vp)) { 1833 return ENXIO; 1834 } 1835 1836 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 1837 ioflag |= IO_DIRECT; 1838 } 1839 1840 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 1841 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 1842 /* 1843 * nfsd will do I/O without first doing VOP_OPEN. We 1844 * must implicitly open the file here 1845 */ 1846 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1847 closefufh = true; 1848 } 1849 if (err) { 1850 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 1851 return err; 1852 } 1853 1854 /* 1855 * Ideally, when the daemon asks for direct io at open time, the 1856 * standard file flag should be set according to this, so that would 1857 * just change the default mode, which later on could be changed via 1858 * fcntl(2). 1859 * But this doesn't work, the O_DIRECT flag gets cleared at some point 1860 * (don't know where). So to make any use of the Fuse direct_io option, 1861 * we hardwire it into the file's private data (similarly to Linux, 1862 * btw.). 1863 */ 1864 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 1865 1866 fuse_vnode_update(vp, FN_ATIMECHANGE); 1867 if (directio) { 1868 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct read of vnode"); 1869 err = fuse_read_directbackend(vp, uio, cred, fufh); 1870 } else { 1871 SDT_PROBE2(fusefs, , vnops, trace, 1, "buffered read of vnode"); 1872 err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); 1873 } 1874 1875 if (closefufh) 1876 fuse_filehandle_close(vp, fufh, curthread, cred); 1877 1878 return (err); 1879 } 1880 1881 /* 1882 struct vnop_readdir_args { 1883 struct vnode *a_vp; 1884 struct uio *a_uio; 1885 struct ucred *a_cred; 1886 int *a_eofflag; 1887 int *a_ncookies; 1888 uint64_t **a_cookies; 1889 }; 1890 */ 1891 static int 1892 fuse_vnop_readdir(struct vop_readdir_args *ap) 1893 { 1894 struct vnode *vp = ap->a_vp; 1895 struct uio *uio = ap->a_uio; 1896 struct ucred *cred = ap->a_cred; 1897 struct fuse_filehandle *fufh = NULL; 1898 struct mount *mp = vnode_mount(vp); 1899 struct fuse_iov cookediov; 1900 int err = 0; 1901 uint64_t *cookies; 1902 ssize_t tresid; 1903 int ncookies; 1904 bool closefufh = false; 1905 pid_t pid = curthread->td_proc->p_pid; 1906 1907 if (ap->a_eofflag) 1908 *ap->a_eofflag = 0; 1909 if (fuse_isdeadfs(vp)) { 1910 return ENXIO; 1911 } 1912 if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ 1913 (uio_resid(uio) < sizeof(struct dirent))) { 1914 return EINVAL; 1915 } 1916 1917 tresid = uio->uio_resid; 1918 err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); 1919 if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) { 1920 KASSERT(fuse_get_mpdata(mp)->dataflags 1921 & FSESS_NO_OPENDIR_SUPPORT, 1922 ("FUSE file systems that don't set " 1923 "FUSE_NO_OPENDIR_SUPPORT should not be exported")); 1924 /* 1925 * nfsd will do VOP_READDIR without first doing VOP_OPEN. We 1926 * must implicitly open the directory here. 1927 */ 1928 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1929 closefufh = true; 1930 } 1931 if (err) 1932 return (err); 1933 if (ap->a_ncookies != NULL) { 1934 ncookies = uio->uio_resid / 1935 (offsetof(struct dirent, d_name) + 4) + 1; 1936 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 1937 *ap->a_ncookies = ncookies; 1938 *ap->a_cookies = cookies; 1939 } else { 1940 ncookies = 0; 1941 cookies = NULL; 1942 } 1943 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) 1944 fiov_init(&cookediov, DIRCOOKEDSIZE); 1945 1946 err = fuse_internal_readdir(vp, uio, fufh, &cookediov, 1947 &ncookies, cookies); 1948 1949 fiov_teardown(&cookediov); 1950 if (closefufh) 1951 fuse_filehandle_close(vp, fufh, curthread, cred); 1952 1953 if (ap->a_ncookies != NULL) { 1954 if (err == 0) { 1955 *ap->a_ncookies -= ncookies; 1956 } else { 1957 free(*ap->a_cookies, M_TEMP); 1958 *ap->a_ncookies = 0; 1959 *ap->a_cookies = NULL; 1960 } 1961 } 1962 if (err == 0 && tresid == uio->uio_resid) 1963 *ap->a_eofflag = 1; 1964 1965 return err; 1966 } 1967 1968 /* 1969 struct vnop_readlink_args { 1970 struct vnode *a_vp; 1971 struct uio *a_uio; 1972 struct ucred *a_cred; 1973 }; 1974 */ 1975 static int 1976 fuse_vnop_readlink(struct vop_readlink_args *ap) 1977 { 1978 struct vnode *vp = ap->a_vp; 1979 struct uio *uio = ap->a_uio; 1980 struct ucred *cred = ap->a_cred; 1981 1982 struct fuse_dispatcher fdi; 1983 int err; 1984 1985 if (fuse_isdeadfs(vp)) { 1986 return ENXIO; 1987 } 1988 if (!vnode_islnk(vp)) { 1989 return EINVAL; 1990 } 1991 fdisp_init(&fdi, 0); 1992 err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); 1993 if (err) { 1994 goto out; 1995 } 1996 if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { 1997 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 1998 fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, 1999 "Returned an embedded NUL from FUSE_READLINK."); 2000 err = EIO; 2001 goto out; 2002 } 2003 if (((char *)fdi.answ)[0] == '/' && 2004 fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { 2005 char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; 2006 2007 err = uiomove(mpth, strlen(mpth), uio); 2008 } 2009 if (!err) { 2010 err = uiomove(fdi.answ, fdi.iosize, uio); 2011 } 2012 out: 2013 fdisp_destroy(&fdi); 2014 return err; 2015 } 2016 2017 /* 2018 struct vnop_reclaim_args { 2019 struct vnode *a_vp; 2020 }; 2021 */ 2022 static int 2023 fuse_vnop_reclaim(struct vop_reclaim_args *ap) 2024 { 2025 struct vnode *vp = ap->a_vp; 2026 struct thread *td = curthread; 2027 struct fuse_vnode_data *fvdat = VTOFUD(vp); 2028 struct fuse_filehandle *fufh, *fufh_tmp; 2029 2030 if (!fvdat) { 2031 panic("FUSE: no vnode data during recycling"); 2032 } 2033 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 2034 printf("FUSE: vnode being reclaimed with open fufh " 2035 "(type=%#x)", fufh->fufh_type); 2036 fuse_filehandle_close(vp, fufh, td, NULL); 2037 } 2038 2039 if (VTOI(vp) == 1) { 2040 /* 2041 * Don't send FUSE_FORGET for the root inode, because 2042 * we never send FUSE_LOOKUP for it (see 2043 * fuse_vfsop_root) and we don't want the server to see 2044 * mismatched lookup counts. 2045 */ 2046 struct fuse_data *data; 2047 struct vnode *vroot; 2048 2049 data = fuse_get_mpdata(vnode_mount(vp)); 2050 FUSE_LOCK(); 2051 vroot = data->vroot; 2052 data->vroot = NULL; 2053 FUSE_UNLOCK(); 2054 if (vroot) 2055 vrele(vroot); 2056 } else if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { 2057 fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), 2058 fvdat->nlookup); 2059 } 2060 cache_purge(vp); 2061 vfs_hash_remove(vp); 2062 fuse_vnode_destroy(vp); 2063 2064 return 0; 2065 } 2066 2067 /* 2068 struct vnop_remove_args { 2069 struct vnode *a_dvp; 2070 struct vnode *a_vp; 2071 struct componentname *a_cnp; 2072 }; 2073 */ 2074 static int 2075 fuse_vnop_remove(struct vop_remove_args *ap) 2076 { 2077 struct vnode *dvp = ap->a_dvp; 2078 struct vnode *vp = ap->a_vp; 2079 struct componentname *cnp = ap->a_cnp; 2080 2081 int err; 2082 2083 if (fuse_isdeadfs(vp)) { 2084 return ENXIO; 2085 } 2086 if (vnode_isdir(vp)) { 2087 return EPERM; 2088 } 2089 2090 err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); 2091 2092 return err; 2093 } 2094 2095 /* 2096 struct vnop_rename_args { 2097 struct vnode *a_fdvp; 2098 struct vnode *a_fvp; 2099 struct componentname *a_fcnp; 2100 struct vnode *a_tdvp; 2101 struct vnode *a_tvp; 2102 struct componentname *a_tcnp; 2103 }; 2104 */ 2105 static int 2106 fuse_vnop_rename(struct vop_rename_args *ap) 2107 { 2108 struct vnode *fdvp = ap->a_fdvp; 2109 struct vnode *fvp = ap->a_fvp; 2110 struct componentname *fcnp = ap->a_fcnp; 2111 struct vnode *tdvp = ap->a_tdvp; 2112 struct vnode *tvp = ap->a_tvp; 2113 struct componentname *tcnp = ap->a_tcnp; 2114 struct fuse_data *data; 2115 bool newparent = fdvp != tdvp; 2116 bool isdir = fvp->v_type == VDIR; 2117 int err = 0; 2118 2119 if (fuse_isdeadfs(fdvp)) { 2120 return ENXIO; 2121 } 2122 if (fvp->v_mount != tdvp->v_mount || 2123 (tvp && fvp->v_mount != tvp->v_mount)) { 2124 SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); 2125 err = EXDEV; 2126 goto out; 2127 } 2128 cache_purge(fvp); 2129 2130 /* 2131 * FUSE library is expected to check if target directory is not 2132 * under the source directory in the file system tree. 2133 * Linux performs this check at VFS level. 2134 */ 2135 /* 2136 * If source is a directory, and it will get a new parent, user must 2137 * have write permission to it, so ".." can be modified. 2138 */ 2139 data = fuse_get_mpdata(vnode_mount(tdvp)); 2140 if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { 2141 err = fuse_internal_access(fvp, VWRITE, 2142 curthread, tcnp->cn_cred); 2143 if (err) 2144 goto out; 2145 } 2146 sx_xlock(&data->rename_lock); 2147 err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); 2148 if (err == 0) { 2149 if (tdvp != fdvp) 2150 fuse_vnode_setparent(fvp, tdvp); 2151 if (tvp != NULL) 2152 fuse_vnode_setparent(tvp, NULL); 2153 } 2154 sx_unlock(&data->rename_lock); 2155 2156 if (tvp != NULL && tvp != fvp) { 2157 cache_purge(tvp); 2158 } 2159 if (vnode_isdir(fvp)) { 2160 if (((tvp != NULL) && vnode_isdir(tvp)) || vnode_isdir(fvp)) { 2161 cache_purge(tdvp); 2162 } 2163 cache_purge(fdvp); 2164 } 2165 out: 2166 if (tdvp == tvp) { 2167 vrele(tdvp); 2168 } else { 2169 vput(tdvp); 2170 } 2171 if (tvp != NULL) { 2172 vput(tvp); 2173 } 2174 vrele(fdvp); 2175 vrele(fvp); 2176 2177 return err; 2178 } 2179 2180 /* 2181 struct vnop_rmdir_args { 2182 struct vnode *a_dvp; 2183 struct vnode *a_vp; 2184 struct componentname *a_cnp; 2185 } *ap; 2186 */ 2187 static int 2188 fuse_vnop_rmdir(struct vop_rmdir_args *ap) 2189 { 2190 struct vnode *dvp = ap->a_dvp; 2191 struct vnode *vp = ap->a_vp; 2192 2193 int err; 2194 2195 if (fuse_isdeadfs(vp)) { 2196 return ENXIO; 2197 } 2198 if (VTOFUD(vp) == VTOFUD(dvp)) { 2199 return EINVAL; 2200 } 2201 err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); 2202 2203 return err; 2204 } 2205 2206 /* 2207 struct vnop_setattr_args { 2208 struct vnode *a_vp; 2209 struct vattr *a_vap; 2210 struct ucred *a_cred; 2211 struct thread *a_td; 2212 }; 2213 */ 2214 static int 2215 fuse_vnop_setattr(struct vop_setattr_args *ap) 2216 { 2217 struct vnode *vp = ap->a_vp; 2218 struct vattr *vap = ap->a_vap; 2219 struct ucred *cred = ap->a_cred; 2220 struct thread *td = curthread; 2221 struct mount *mp; 2222 struct fuse_data *data; 2223 struct vattr old_va; 2224 int dataflags; 2225 int err = 0, err2; 2226 accmode_t accmode = 0; 2227 bool checkperm; 2228 bool drop_suid = false; 2229 2230 mp = vnode_mount(vp); 2231 data = fuse_get_mpdata(mp); 2232 dataflags = data->dataflags; 2233 checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; 2234 2235 if (fuse_isdeadfs(vp)) { 2236 return ENXIO; 2237 } 2238 2239 if (vap->va_uid != (uid_t)VNOVAL) { 2240 if (checkperm) { 2241 /* Only root may change a file's owner */ 2242 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2243 if (err) { 2244 /* As a special case, allow the null chown */ 2245 err2 = fuse_internal_getattr(vp, &old_va, cred, 2246 td); 2247 if (err2) 2248 return (err2); 2249 if (vap->va_uid != old_va.va_uid) 2250 return err; 2251 else 2252 accmode |= VADMIN; 2253 drop_suid = true; 2254 } else 2255 accmode |= VADMIN; 2256 } else 2257 accmode |= VADMIN; 2258 } 2259 if (vap->va_gid != (gid_t)VNOVAL) { 2260 if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) 2261 drop_suid = true; 2262 if (checkperm && !groupmember(vap->va_gid, cred)) 2263 { 2264 /* 2265 * Non-root users may only chgrp to one of their own 2266 * groups 2267 */ 2268 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2269 if (err) { 2270 /* As a special case, allow the null chgrp */ 2271 err2 = fuse_internal_getattr(vp, &old_va, cred, 2272 td); 2273 if (err2) 2274 return (err2); 2275 if (vap->va_gid != old_va.va_gid) 2276 return err; 2277 accmode |= VADMIN; 2278 } else 2279 accmode |= VADMIN; 2280 } else 2281 accmode |= VADMIN; 2282 } 2283 if (vap->va_size != VNOVAL) { 2284 switch (vp->v_type) { 2285 case VDIR: 2286 return (EISDIR); 2287 case VLNK: 2288 case VREG: 2289 if (vfs_isrdonly(mp)) 2290 return (EROFS); 2291 err = vn_rlimit_trunc(vap->va_size, td); 2292 if (err) 2293 return (err); 2294 break; 2295 default: 2296 /* 2297 * According to POSIX, the result is unspecified 2298 * for file types other than regular files, 2299 * directories and shared memory objects. We 2300 * don't support shared memory objects in the file 2301 * system, and have dubious support for truncating 2302 * symlinks. Just ignore the request in other cases. 2303 */ 2304 return (0); 2305 } 2306 /* Don't set accmode. Permission to trunc is checked upstack */ 2307 } 2308 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 2309 if (vap->va_vaflags & VA_UTIMES_NULL) 2310 accmode |= VWRITE; 2311 else 2312 accmode |= VADMIN; 2313 } 2314 if (drop_suid) { 2315 if (vap->va_mode != (mode_t)VNOVAL) 2316 vap->va_mode &= ~(S_ISUID | S_ISGID); 2317 else { 2318 err = fuse_internal_getattr(vp, &old_va, cred, td); 2319 if (err) 2320 return (err); 2321 vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); 2322 } 2323 } 2324 if (vap->va_mode != (mode_t)VNOVAL) { 2325 /* Only root may set the sticky bit on non-directories */ 2326 if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) 2327 && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 2328 return EFTYPE; 2329 if (checkperm && (vap->va_mode & S_ISGID)) { 2330 err = fuse_internal_getattr(vp, &old_va, cred, td); 2331 if (err) 2332 return (err); 2333 if (!groupmember(old_va.va_gid, cred)) { 2334 err = priv_check_cred(cred, PRIV_VFS_SETGID); 2335 if (err) 2336 return (err); 2337 } 2338 } 2339 accmode |= VADMIN; 2340 } 2341 2342 if (vfs_isrdonly(mp)) 2343 return EROFS; 2344 2345 if (checkperm) { 2346 err = fuse_internal_access(vp, accmode, td, cred); 2347 } else { 2348 err = 0; 2349 } 2350 if (err) 2351 return err; 2352 else 2353 return fuse_internal_setattr(vp, vap, td, cred); 2354 } 2355 2356 /* 2357 struct vnop_strategy_args { 2358 struct vnode *a_vp; 2359 struct buf *a_bp; 2360 }; 2361 */ 2362 static int 2363 fuse_vnop_strategy(struct vop_strategy_args *ap) 2364 { 2365 struct vnode *vp = ap->a_vp; 2366 struct buf *bp = ap->a_bp; 2367 2368 if (!vp || fuse_isdeadfs(vp)) { 2369 bp->b_ioflags |= BIO_ERROR; 2370 bp->b_error = ENXIO; 2371 bufdone(bp); 2372 return 0; 2373 } 2374 2375 /* 2376 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. 2377 * fuse_io_strategy sets bp's error fields 2378 */ 2379 (void)fuse_io_strategy(vp, bp); 2380 2381 return 0; 2382 } 2383 2384 /* 2385 struct vnop_symlink_args { 2386 struct vnode *a_dvp; 2387 struct vnode **a_vpp; 2388 struct componentname *a_cnp; 2389 struct vattr *a_vap; 2390 char *a_target; 2391 }; 2392 */ 2393 static int 2394 fuse_vnop_symlink(struct vop_symlink_args *ap) 2395 { 2396 struct vnode *dvp = ap->a_dvp; 2397 struct vnode **vpp = ap->a_vpp; 2398 struct componentname *cnp = ap->a_cnp; 2399 const char *target = ap->a_target; 2400 2401 struct fuse_dispatcher fdi; 2402 2403 int err; 2404 size_t len; 2405 2406 if (fuse_isdeadfs(dvp)) { 2407 return ENXIO; 2408 } 2409 /* 2410 * Unlike the other creator type calls, here we have to create a message 2411 * where the name of the new entry comes first, and the data describing 2412 * the entry comes second. 2413 * Hence we can't rely on our handy fuse_internal_newentry() routine, 2414 * but put together the message manually and just call the core part. 2415 */ 2416 2417 len = strlen(target) + 1; 2418 fdisp_init(&fdi, len + cnp->cn_namelen + 1); 2419 fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); 2420 2421 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 2422 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 2423 memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); 2424 2425 err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); 2426 fdisp_destroy(&fdi); 2427 return err; 2428 } 2429 2430 /* 2431 struct vnop_write_args { 2432 struct vnode *a_vp; 2433 struct uio *a_uio; 2434 int a_ioflag; 2435 struct ucred *a_cred; 2436 }; 2437 */ 2438 static int 2439 fuse_vnop_write(struct vop_write_args *ap) 2440 { 2441 struct vnode *vp = ap->a_vp; 2442 struct uio *uio = ap->a_uio; 2443 int ioflag = ap->a_ioflag; 2444 struct ucred *cred = ap->a_cred; 2445 pid_t pid = curthread->td_proc->p_pid; 2446 struct fuse_filehandle *fufh; 2447 int err; 2448 bool closefufh = false, directio; 2449 2450 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 2451 2452 if (fuse_isdeadfs(vp)) { 2453 return ENXIO; 2454 } 2455 2456 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 2457 ioflag |= IO_DIRECT; 2458 } 2459 2460 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 2461 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 2462 /* 2463 * nfsd will do I/O without first doing VOP_OPEN. We 2464 * must implicitly open the file here 2465 */ 2466 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 2467 closefufh = true; 2468 } 2469 if (err) { 2470 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 2471 return err; 2472 } 2473 2474 /* 2475 * Ideally, when the daemon asks for direct io at open time, the 2476 * standard file flag should be set according to this, so that would 2477 * just change the default mode, which later on could be changed via 2478 * fcntl(2). 2479 * But this doesn't work, the O_DIRECT flag gets cleared at some point 2480 * (don't know where). So to make any use of the Fuse direct_io option, 2481 * we hardwire it into the file's private data (similarly to Linux, 2482 * btw.). 2483 */ 2484 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 2485 2486 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 2487 if (directio) { 2488 off_t start, end, filesize; 2489 bool pages = (ioflag & IO_VMIO) != 0; 2490 2491 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct write of vnode"); 2492 2493 err = fuse_vnode_size(vp, &filesize, cred, curthread); 2494 if (err) 2495 goto out; 2496 2497 start = uio->uio_offset; 2498 end = start + uio->uio_resid; 2499 if (!pages) { 2500 err = fuse_inval_buf_range(vp, filesize, start, 2501 end); 2502 if (err) 2503 goto out; 2504 } 2505 err = fuse_write_directbackend(vp, uio, cred, fufh, 2506 filesize, ioflag, pages); 2507 } else { 2508 SDT_PROBE2(fusefs, , vnops, trace, 1, 2509 "buffered write of vnode"); 2510 if (!fsess_opt_writeback(vnode_mount(vp))) 2511 ioflag |= IO_SYNC; 2512 err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); 2513 } 2514 fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); 2515 2516 out: 2517 if (closefufh) 2518 fuse_filehandle_close(vp, fufh, curthread, cred); 2519 2520 return (err); 2521 } 2522 2523 static daddr_t 2524 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) 2525 { 2526 const int biosize = fuse_iosize(vp); 2527 2528 return (off / biosize); 2529 } 2530 2531 static int 2532 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz) 2533 { 2534 off_t filesize; 2535 int err; 2536 const int biosize = fuse_iosize(vp); 2537 2538 err = fuse_vnode_size(vp, &filesize, NULL, NULL); 2539 if (err) { 2540 /* This will turn into a SIGBUS */ 2541 return (EIO); 2542 } else if ((off_t)lbn * biosize >= filesize) { 2543 *blksz = 0; 2544 } else if ((off_t)(lbn + 1) * biosize > filesize) { 2545 *blksz = filesize - (off_t)lbn *biosize; 2546 } else { 2547 *blksz = biosize; 2548 } 2549 return (0); 2550 } 2551 2552 /* 2553 struct vnop_getpages_args { 2554 struct vnode *a_vp; 2555 vm_page_t *a_m; 2556 int a_count; 2557 int a_reqpage; 2558 }; 2559 */ 2560 static int 2561 fuse_vnop_getpages(struct vop_getpages_args *ap) 2562 { 2563 struct vnode *vp = ap->a_vp; 2564 2565 if (!fsess_opt_mmap(vnode_mount(vp))) { 2566 SDT_PROBE2(fusefs, , vnops, trace, 1, 2567 "called on non-cacheable vnode??\n"); 2568 return (VM_PAGER_ERROR); 2569 } 2570 2571 return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, 2572 ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); 2573 } 2574 2575 static const char extattr_namespace_separator = '.'; 2576 2577 /* 2578 struct vop_getextattr_args { 2579 struct vop_generic_args a_gen; 2580 struct vnode *a_vp; 2581 int a_attrnamespace; 2582 const char *a_name; 2583 struct uio *a_uio; 2584 size_t *a_size; 2585 struct ucred *a_cred; 2586 struct thread *a_td; 2587 }; 2588 */ 2589 static int 2590 fuse_vnop_getextattr(struct vop_getextattr_args *ap) 2591 { 2592 struct vnode *vp = ap->a_vp; 2593 struct uio *uio = ap->a_uio; 2594 struct fuse_dispatcher fdi; 2595 struct fuse_getxattr_in *get_xattr_in; 2596 struct fuse_getxattr_out *get_xattr_out; 2597 struct mount *mp = vnode_mount(vp); 2598 struct thread *td = ap->a_td; 2599 struct ucred *cred = ap->a_cred; 2600 char *prefix; 2601 char *attr_str; 2602 size_t len; 2603 int err; 2604 2605 if (fuse_isdeadfs(vp)) 2606 return (ENXIO); 2607 2608 if (fsess_not_impl(mp, FUSE_GETXATTR)) 2609 return EOPNOTSUPP; 2610 2611 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2612 if (err) 2613 return err; 2614 2615 /* Default to looking for user attributes. */ 2616 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2617 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2618 else 2619 prefix = EXTATTR_NAMESPACE_USER_STRING; 2620 2621 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2622 strlen(ap->a_name) + 1; 2623 2624 fdisp_init(&fdi, len + sizeof(*get_xattr_in)); 2625 fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); 2626 2627 get_xattr_in = fdi.indata; 2628 /* 2629 * Check to see whether we're querying the available size or 2630 * issuing the actual request. If we pass in 0, we get back struct 2631 * fuse_getxattr_out. If we pass in a non-zero size, we get back 2632 * that much data, without the struct fuse_getxattr_out header. 2633 */ 2634 if (uio == NULL) 2635 get_xattr_in->size = 0; 2636 else 2637 get_xattr_in->size = uio->uio_resid; 2638 2639 attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); 2640 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2641 ap->a_name); 2642 2643 err = fdisp_wait_answ(&fdi); 2644 if (err != 0) { 2645 if (err == ENOSYS) { 2646 fsess_set_notimpl(mp, FUSE_GETXATTR); 2647 err = EOPNOTSUPP; 2648 } 2649 goto out; 2650 } 2651 2652 get_xattr_out = fdi.answ; 2653 2654 if (ap->a_size != NULL) 2655 *ap->a_size = get_xattr_out->size; 2656 2657 if (uio != NULL) 2658 err = uiomove(fdi.answ, fdi.iosize, uio); 2659 2660 out: 2661 fdisp_destroy(&fdi); 2662 return (err); 2663 } 2664 2665 /* 2666 struct vop_setextattr_args { 2667 struct vop_generic_args a_gen; 2668 struct vnode *a_vp; 2669 int a_attrnamespace; 2670 const char *a_name; 2671 struct uio *a_uio; 2672 struct ucred *a_cred; 2673 struct thread *a_td; 2674 }; 2675 */ 2676 static int 2677 fuse_vnop_setextattr(struct vop_setextattr_args *ap) 2678 { 2679 struct vnode *vp = ap->a_vp; 2680 struct uio *uio = ap->a_uio; 2681 struct fuse_dispatcher fdi; 2682 struct fuse_setxattr_in *set_xattr_in; 2683 struct mount *mp = vnode_mount(vp); 2684 struct thread *td = ap->a_td; 2685 struct ucred *cred = ap->a_cred; 2686 char *prefix; 2687 size_t len; 2688 char *attr_str; 2689 int err; 2690 2691 if (fuse_isdeadfs(vp)) 2692 return (ENXIO); 2693 2694 if (fsess_not_impl(mp, FUSE_SETXATTR)) 2695 return EOPNOTSUPP; 2696 2697 if (vfs_isrdonly(mp)) 2698 return EROFS; 2699 2700 /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ 2701 if (ap->a_uio == NULL) { 2702 /* 2703 * If we got here as fallback from VOP_DELETEEXTATTR, then 2704 * return EOPNOTSUPP. 2705 */ 2706 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 2707 return (EOPNOTSUPP); 2708 else 2709 return (EINVAL); 2710 } 2711 2712 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 2713 VWRITE); 2714 if (err) 2715 return err; 2716 2717 /* Default to looking for user attributes. */ 2718 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2719 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2720 else 2721 prefix = EXTATTR_NAMESPACE_USER_STRING; 2722 2723 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2724 strlen(ap->a_name) + 1; 2725 2726 fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); 2727 fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); 2728 2729 set_xattr_in = fdi.indata; 2730 set_xattr_in->size = uio->uio_resid; 2731 2732 attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); 2733 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2734 ap->a_name); 2735 2736 err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, 2737 uio->uio_resid, uio); 2738 if (err != 0) { 2739 goto out; 2740 } 2741 2742 err = fdisp_wait_answ(&fdi); 2743 2744 if (err == ENOSYS) { 2745 fsess_set_notimpl(mp, FUSE_SETXATTR); 2746 err = EOPNOTSUPP; 2747 } 2748 if (err == ERESTART) { 2749 /* Can't restart after calling uiomove */ 2750 err = EINTR; 2751 } 2752 2753 out: 2754 fdisp_destroy(&fdi); 2755 return (err); 2756 } 2757 2758 /* 2759 * The Linux / FUSE extended attribute list is simply a collection of 2760 * NUL-terminated strings. The FreeBSD extended attribute list is a single 2761 * byte length followed by a non-NUL terminated string. So, this allows 2762 * conversion of the Linux / FUSE format to the FreeBSD format in place. 2763 * Linux attribute names are reported with the namespace as a prefix (e.g. 2764 * "user.attribute_name"), but in FreeBSD they are reported without the 2765 * namespace prefix (e.g. "attribute_name"). So, we're going from: 2766 * 2767 * user.attr_name1\0user.attr_name2\0 2768 * 2769 * to: 2770 * 2771 * <num>attr_name1<num>attr_name2 2772 * 2773 * Where "<num>" is a single byte number of characters in the attribute name. 2774 * 2775 * Args: 2776 * prefix - exattr namespace prefix string 2777 * list, list_len - input list with namespace prefixes 2778 * bsd_list, bsd_list_len - output list compatible with bsd vfs 2779 */ 2780 static int 2781 fuse_xattrlist_convert(char *prefix, const char *list, int list_len, 2782 char *bsd_list, int *bsd_list_len) 2783 { 2784 int len, pos, dist_to_next, prefix_len; 2785 2786 pos = 0; 2787 *bsd_list_len = 0; 2788 prefix_len = strlen(prefix); 2789 2790 while (pos < list_len && list[pos] != '\0') { 2791 dist_to_next = strlen(&list[pos]) + 1; 2792 if (bcmp(&list[pos], prefix, prefix_len) == 0 && 2793 list[pos + prefix_len] == extattr_namespace_separator) { 2794 len = dist_to_next - 2795 (prefix_len + sizeof(extattr_namespace_separator)) - 1; 2796 if (len >= EXTATTR_MAXNAMELEN) 2797 return (ENAMETOOLONG); 2798 2799 bsd_list[*bsd_list_len] = len; 2800 memcpy(&bsd_list[*bsd_list_len + 1], 2801 &list[pos + prefix_len + 2802 sizeof(extattr_namespace_separator)], len); 2803 2804 *bsd_list_len += len + 1; 2805 } 2806 2807 pos += dist_to_next; 2808 } 2809 2810 return (0); 2811 } 2812 2813 /* 2814 * List extended attributes 2815 * 2816 * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which 2817 * has a number of differences compared to its FreeBSD equivalent, 2818 * extattr_list_file: 2819 * 2820 * - FUSE_LISTXATTR returns all extended attributes across all namespaces, 2821 * whereas listxattr(2) only returns attributes for a single namespace 2822 * - FUSE_LISTXATTR prepends each attribute name with "namespace." 2823 * - If the provided buffer is not large enough to hold the result, 2824 * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to 2825 * return as many results as will fit. 2826 */ 2827 /* 2828 struct vop_listextattr_args { 2829 struct vop_generic_args a_gen; 2830 struct vnode *a_vp; 2831 int a_attrnamespace; 2832 struct uio *a_uio; 2833 size_t *a_size; 2834 struct ucred *a_cred; 2835 struct thread *a_td; 2836 }; 2837 */ 2838 static int 2839 fuse_vnop_listextattr(struct vop_listextattr_args *ap) 2840 { 2841 struct vnode *vp = ap->a_vp; 2842 struct uio *uio = ap->a_uio; 2843 struct fuse_dispatcher fdi; 2844 struct fuse_listxattr_in *list_xattr_in; 2845 struct fuse_listxattr_out *list_xattr_out; 2846 struct mount *mp = vnode_mount(vp); 2847 struct thread *td = ap->a_td; 2848 struct ucred *cred = ap->a_cred; 2849 char *prefix; 2850 char *bsd_list = NULL; 2851 char *linux_list; 2852 int bsd_list_len; 2853 int linux_list_len; 2854 int err; 2855 2856 if (fuse_isdeadfs(vp)) 2857 return (ENXIO); 2858 2859 if (fsess_not_impl(mp, FUSE_LISTXATTR)) 2860 return EOPNOTSUPP; 2861 2862 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2863 if (err) 2864 return err; 2865 2866 /* 2867 * Add space for a NUL and the period separator if enabled. 2868 * Default to looking for user attributes. 2869 */ 2870 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2871 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2872 else 2873 prefix = EXTATTR_NAMESPACE_USER_STRING; 2874 2875 fdisp_init(&fdi, sizeof(*list_xattr_in)); 2876 fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2877 2878 /* 2879 * Retrieve Linux / FUSE compatible list size. 2880 */ 2881 list_xattr_in = fdi.indata; 2882 list_xattr_in->size = 0; 2883 2884 err = fdisp_wait_answ(&fdi); 2885 if (err != 0) { 2886 if (err == ENOSYS) { 2887 fsess_set_notimpl(mp, FUSE_LISTXATTR); 2888 err = EOPNOTSUPP; 2889 } 2890 goto out; 2891 } 2892 2893 list_xattr_out = fdi.answ; 2894 linux_list_len = list_xattr_out->size; 2895 if (linux_list_len == 0) { 2896 if (ap->a_size != NULL) 2897 *ap->a_size = linux_list_len; 2898 goto out; 2899 } 2900 2901 /* 2902 * Retrieve Linux / FUSE compatible list values. 2903 */ 2904 fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2905 list_xattr_in = fdi.indata; 2906 list_xattr_in->size = linux_list_len; 2907 2908 err = fdisp_wait_answ(&fdi); 2909 if (err == ERANGE) { 2910 /* 2911 * Race detected. The attribute list must've grown since the 2912 * first FUSE_LISTXATTR call. Start over. Go all the way back 2913 * to userland so we can process signals, if necessary, before 2914 * restarting. 2915 */ 2916 err = ERESTART; 2917 goto out; 2918 } else if (err != 0) 2919 goto out; 2920 2921 linux_list = fdi.answ; 2922 /* FUSE doesn't allow the server to return more data than requested */ 2923 if (fdi.iosize > linux_list_len) { 2924 struct fuse_data *data = fuse_get_mpdata(mp); 2925 2926 fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG, 2927 "server returned " 2928 "more extended attribute data than requested; " 2929 "should've returned ERANGE instead."); 2930 } else { 2931 /* But returning less data is fine */ 2932 linux_list_len = fdi.iosize; 2933 } 2934 2935 /* 2936 * Retrieve the BSD compatible list values. 2937 * The Linux / FUSE attribute list format isn't the same 2938 * as FreeBSD's format. So we need to transform it into 2939 * FreeBSD's format before giving it to the user. 2940 */ 2941 bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); 2942 err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, 2943 bsd_list, &bsd_list_len); 2944 if (err != 0) 2945 goto out; 2946 2947 if (ap->a_size != NULL) 2948 *ap->a_size = bsd_list_len; 2949 2950 if (uio != NULL) 2951 err = uiomove(bsd_list, bsd_list_len, uio); 2952 2953 out: 2954 free(bsd_list, M_TEMP); 2955 fdisp_destroy(&fdi); 2956 return (err); 2957 } 2958 2959 /* 2960 struct vop_deallocate_args { 2961 struct vop_generic_args a_gen; 2962 struct vnode *a_vp; 2963 off_t *a_offset; 2964 off_t *a_len; 2965 int a_flags; 2966 int a_ioflag; 2967 struct ucred *a_cred; 2968 }; 2969 */ 2970 static int 2971 fuse_vnop_deallocate(struct vop_deallocate_args *ap) 2972 { 2973 struct vnode *vp = ap->a_vp; 2974 struct mount *mp = vnode_mount(vp); 2975 struct fuse_filehandle *fufh; 2976 struct fuse_dispatcher fdi; 2977 struct fuse_fallocate_in *ffi; 2978 struct ucred *cred = ap->a_cred; 2979 pid_t pid = curthread->td_proc->p_pid; 2980 off_t *len = ap->a_len; 2981 off_t *offset = ap->a_offset; 2982 int ioflag = ap->a_ioflag; 2983 off_t filesize; 2984 int err; 2985 bool closefufh = false; 2986 2987 if (fuse_isdeadfs(vp)) 2988 return (ENXIO); 2989 2990 if (vfs_isrdonly(mp)) 2991 return (EROFS); 2992 2993 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 2994 goto fallback; 2995 2996 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 2997 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 2998 /* 2999 * nfsd will do I/O without first doing VOP_OPEN. We 3000 * must implicitly open the file here 3001 */ 3002 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 3003 closefufh = true; 3004 } 3005 if (err) 3006 return (err); 3007 3008 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 3009 3010 err = fuse_vnode_size(vp, &filesize, cred, curthread); 3011 if (err) 3012 goto out; 3013 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 3014 3015 fdisp_init(&fdi, sizeof(*ffi)); 3016 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 3017 ffi = fdi.indata; 3018 ffi->fh = fufh->fh_id; 3019 ffi->offset = *offset; 3020 ffi->length = *len; 3021 /* 3022 * FreeBSD's fspacectl is equivalent to Linux's fallocate with 3023 * mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE 3024 */ 3025 ffi->mode = FUSE_FALLOC_FL_PUNCH_HOLE | FUSE_FALLOC_FL_KEEP_SIZE; 3026 err = fdisp_wait_answ(&fdi); 3027 3028 if (err == ENOSYS) { 3029 fdisp_destroy(&fdi); 3030 fsess_set_notimpl(mp, FUSE_FALLOCATE); 3031 goto fallback; 3032 } else if (err == EOPNOTSUPP) { 3033 /* 3034 * The file system server does not support FUSE_FALLOCATE with 3035 * the supplied mode for this particular file. 3036 */ 3037 fdisp_destroy(&fdi); 3038 goto fallback; 3039 } else if (!err) { 3040 /* 3041 * Clip the returned offset to EoF. Do it here rather than 3042 * before FUSE_FALLOCATE just in case the kernel's cached file 3043 * size is out of date. Unfortunately, FUSE does not return 3044 * any information about filesize from that operation. 3045 */ 3046 *offset = MIN(*offset + *len, filesize); 3047 *len = 0; 3048 fuse_vnode_undirty_cached_timestamps(vp, false); 3049 fuse_internal_clear_suid_on_write(vp, cred, curthread); 3050 3051 if (ioflag & IO_SYNC) 3052 err = fuse_internal_fsync(vp, curthread, MNT_WAIT, 3053 false); 3054 } 3055 3056 out: 3057 fdisp_destroy(&fdi); 3058 if (closefufh) 3059 fuse_filehandle_close(vp, fufh, curthread, cred); 3060 3061 return (err); 3062 3063 fallback: 3064 if (closefufh) 3065 fuse_filehandle_close(vp, fufh, curthread, cred); 3066 3067 return (vop_stddeallocate(ap)); 3068 } 3069 3070 /* 3071 struct vop_deleteextattr_args { 3072 struct vop_generic_args a_gen; 3073 struct vnode *a_vp; 3074 int a_attrnamespace; 3075 const char *a_name; 3076 struct ucred *a_cred; 3077 struct thread *a_td; 3078 }; 3079 */ 3080 static int 3081 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) 3082 { 3083 struct vnode *vp = ap->a_vp; 3084 struct fuse_dispatcher fdi; 3085 struct mount *mp = vnode_mount(vp); 3086 struct thread *td = ap->a_td; 3087 struct ucred *cred = ap->a_cred; 3088 char *prefix; 3089 size_t len; 3090 char *attr_str; 3091 int err; 3092 3093 if (fuse_isdeadfs(vp)) 3094 return (ENXIO); 3095 3096 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 3097 return EOPNOTSUPP; 3098 3099 if (vfs_isrdonly(mp)) 3100 return EROFS; 3101 3102 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 3103 VWRITE); 3104 if (err) 3105 return err; 3106 3107 /* Default to looking for user attributes. */ 3108 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 3109 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 3110 else 3111 prefix = EXTATTR_NAMESPACE_USER_STRING; 3112 3113 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 3114 strlen(ap->a_name) + 1; 3115 3116 fdisp_init(&fdi, len); 3117 fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); 3118 3119 attr_str = fdi.indata; 3120 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 3121 ap->a_name); 3122 3123 err = fdisp_wait_answ(&fdi); 3124 if (err == ENOSYS) { 3125 fsess_set_notimpl(mp, FUSE_REMOVEXATTR); 3126 err = EOPNOTSUPP; 3127 } 3128 3129 fdisp_destroy(&fdi); 3130 return (err); 3131 } 3132 3133 /* 3134 struct vnop_print_args { 3135 struct vnode *a_vp; 3136 }; 3137 */ 3138 static int 3139 fuse_vnop_print(struct vop_print_args *ap) 3140 { 3141 struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); 3142 3143 printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", 3144 (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, 3145 (uintmax_t)fvdat->nlookup, 3146 fvdat->flag); 3147 3148 return 0; 3149 } 3150 3151 /* 3152 * Get an NFS filehandle for a FUSE file. 3153 * 3154 * This will only work for FUSE file systems that guarantee the uniqueness of 3155 * nodeid:generation, which most don't. 3156 */ 3157 /* 3158 vop_vptofh { 3159 IN struct vnode *a_vp; 3160 IN struct fid *a_fhp; 3161 }; 3162 */ 3163 static int 3164 fuse_vnop_vptofh(struct vop_vptofh_args *ap) 3165 { 3166 struct vnode *vp = ap->a_vp; 3167 struct fuse_vnode_data *fvdat = VTOFUD(vp); 3168 struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); 3169 _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), 3170 "FUSE fid type is too big"); 3171 struct mount *mp = vnode_mount(vp); 3172 struct fuse_data *data = fuse_get_mpdata(mp); 3173 struct vattr va; 3174 int err; 3175 3176 if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { 3177 /* NFS requires lookups for "." and ".." */ 3178 SDT_PROBE2(fusefs, , vnops, trace, 1, 3179 "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); 3180 return EOPNOTSUPP; 3181 } 3182 if ((mp->mnt_flag & MNT_EXPORTED) && 3183 !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT)) 3184 { 3185 /* 3186 * NFS is stateless, so nfsd must reopen a directory on every 3187 * call to VOP_READDIR, passing in the d_off field from the 3188 * final dirent of the previous invocation. But without 3189 * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not 3190 * guarantee that d_off will be valid after a directory is 3191 * closed and reopened. So prohibit exporting FUSE file 3192 * systems that don't set that flag. 3193 * 3194 * But userspace NFS servers don't have this problem. 3195 */ 3196 SDT_PROBE2(fusefs, , vnops, trace, 1, 3197 "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT"); 3198 return EOPNOTSUPP; 3199 } 3200 3201 err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); 3202 if (err) 3203 return err; 3204 3205 /*ip = VTOI(ap->a_vp);*/ 3206 /*ufhp = (struct ufid *)ap->a_fhp;*/ 3207 fhp->len = sizeof(struct fuse_fid); 3208 fhp->nid = fvdat->nid; 3209 if (fvdat->generation <= UINT32_MAX) 3210 fhp->gen = fvdat->generation; 3211 else 3212 return EOVERFLOW; 3213 return (0); 3214 } 3215