1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/param.h> 64 #include <sys/module.h> 65 #include <sys/systm.h> 66 #include <sys/errno.h> 67 #include <sys/kernel.h> 68 #include <sys/conf.h> 69 #include <sys/filio.h> 70 #include <sys/uio.h> 71 #include <sys/malloc.h> 72 #include <sys/queue.h> 73 #include <sys/limits.h> 74 #include <sys/lock.h> 75 #include <sys/rwlock.h> 76 #include <sys/sx.h> 77 #include <sys/proc.h> 78 #include <sys/mount.h> 79 #include <sys/vnode.h> 80 #include <sys/namei.h> 81 #include <sys/extattr.h> 82 #include <sys/stat.h> 83 #include <sys/unistd.h> 84 #include <sys/filedesc.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/dirent.h> 88 #include <sys/bio.h> 89 #include <sys/buf.h> 90 #include <sys/sysctl.h> 91 #include <sys/vmmeter.h> 92 93 #include <vm/vm.h> 94 #include <vm/vm_extern.h> 95 #include <vm/pmap.h> 96 #include <vm/vm_map.h> 97 #include <vm/vm_page.h> 98 #include <vm/vm_param.h> 99 #include <vm/vm_object.h> 100 #include <vm/vm_pager.h> 101 #include <vm/vnode_pager.h> 102 #include <vm/vm_object.h> 103 104 #include "fuse.h" 105 #include "fuse_file.h" 106 #include "fuse_internal.h" 107 #include "fuse_ipc.h" 108 #include "fuse_node.h" 109 #include "fuse_io.h" 110 111 #include <sys/priv.h> 112 113 /* Maximum number of hardlinks to a single FUSE file */ 114 #define FUSE_LINK_MAX UINT32_MAX 115 116 SDT_PROVIDER_DECLARE(fusefs); 117 /* 118 * Fuse trace probe: 119 * arg0: verbosity. Higher numbers give more verbose messages 120 * arg1: Textual message 121 */ 122 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); 123 124 /* vnode ops */ 125 static vop_access_t fuse_vnop_access; 126 static vop_advlock_t fuse_vnop_advlock; 127 static vop_allocate_t fuse_vnop_allocate; 128 static vop_bmap_t fuse_vnop_bmap; 129 static vop_close_t fuse_fifo_close; 130 static vop_close_t fuse_vnop_close; 131 static vop_copy_file_range_t fuse_vnop_copy_file_range; 132 static vop_create_t fuse_vnop_create; 133 static vop_deallocate_t fuse_vnop_deallocate; 134 static vop_deleteextattr_t fuse_vnop_deleteextattr; 135 static vop_fdatasync_t fuse_vnop_fdatasync; 136 static vop_fsync_t fuse_vnop_fsync; 137 static vop_getattr_t fuse_vnop_getattr; 138 static vop_getextattr_t fuse_vnop_getextattr; 139 static vop_inactive_t fuse_vnop_inactive; 140 static vop_ioctl_t fuse_vnop_ioctl; 141 static vop_link_t fuse_vnop_link; 142 static vop_listextattr_t fuse_vnop_listextattr; 143 static vop_lookup_t fuse_vnop_lookup; 144 static vop_mkdir_t fuse_vnop_mkdir; 145 static vop_mknod_t fuse_vnop_mknod; 146 static vop_open_t fuse_vnop_open; 147 static vop_pathconf_t fuse_vnop_pathconf; 148 static vop_read_t fuse_vnop_read; 149 static vop_readdir_t fuse_vnop_readdir; 150 static vop_readlink_t fuse_vnop_readlink; 151 static vop_reclaim_t fuse_vnop_reclaim; 152 static vop_remove_t fuse_vnop_remove; 153 static vop_rename_t fuse_vnop_rename; 154 static vop_rmdir_t fuse_vnop_rmdir; 155 static vop_setattr_t fuse_vnop_setattr; 156 static vop_setextattr_t fuse_vnop_setextattr; 157 static vop_strategy_t fuse_vnop_strategy; 158 static vop_symlink_t fuse_vnop_symlink; 159 static vop_write_t fuse_vnop_write; 160 static vop_getpages_t fuse_vnop_getpages; 161 static vop_print_t fuse_vnop_print; 162 static vop_vptofh_t fuse_vnop_vptofh; 163 164 struct vop_vector fuse_fifoops = { 165 .vop_default = &fifo_specops, 166 .vop_access = fuse_vnop_access, 167 .vop_close = fuse_fifo_close, 168 .vop_fsync = fuse_vnop_fsync, 169 .vop_getattr = fuse_vnop_getattr, 170 .vop_inactive = fuse_vnop_inactive, 171 .vop_pathconf = fuse_vnop_pathconf, 172 .vop_print = fuse_vnop_print, 173 .vop_read = VOP_PANIC, 174 .vop_reclaim = fuse_vnop_reclaim, 175 .vop_setattr = fuse_vnop_setattr, 176 .vop_write = VOP_PANIC, 177 .vop_vptofh = fuse_vnop_vptofh, 178 }; 179 VFS_VOP_VECTOR_REGISTER(fuse_fifoops); 180 181 struct vop_vector fuse_vnops = { 182 .vop_allocate = fuse_vnop_allocate, 183 .vop_default = &default_vnodeops, 184 .vop_access = fuse_vnop_access, 185 .vop_advlock = fuse_vnop_advlock, 186 .vop_bmap = fuse_vnop_bmap, 187 .vop_close = fuse_vnop_close, 188 .vop_copy_file_range = fuse_vnop_copy_file_range, 189 .vop_create = fuse_vnop_create, 190 .vop_deallocate = fuse_vnop_deallocate, 191 .vop_deleteextattr = fuse_vnop_deleteextattr, 192 .vop_fsync = fuse_vnop_fsync, 193 .vop_fdatasync = fuse_vnop_fdatasync, 194 .vop_getattr = fuse_vnop_getattr, 195 .vop_getextattr = fuse_vnop_getextattr, 196 .vop_inactive = fuse_vnop_inactive, 197 .vop_ioctl = fuse_vnop_ioctl, 198 .vop_link = fuse_vnop_link, 199 .vop_listextattr = fuse_vnop_listextattr, 200 .vop_lookup = fuse_vnop_lookup, 201 .vop_mkdir = fuse_vnop_mkdir, 202 .vop_mknod = fuse_vnop_mknod, 203 .vop_open = fuse_vnop_open, 204 .vop_pathconf = fuse_vnop_pathconf, 205 /* 206 * TODO: implement vop_poll after upgrading to protocol 7.21. 207 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until 208 * 7.21, which adds the ability for the client to choose which poll 209 * events it wants, and for a client to deregister a file handle 210 */ 211 .vop_read = fuse_vnop_read, 212 .vop_readdir = fuse_vnop_readdir, 213 .vop_readlink = fuse_vnop_readlink, 214 .vop_reclaim = fuse_vnop_reclaim, 215 .vop_remove = fuse_vnop_remove, 216 .vop_rename = fuse_vnop_rename, 217 .vop_rmdir = fuse_vnop_rmdir, 218 .vop_setattr = fuse_vnop_setattr, 219 .vop_setextattr = fuse_vnop_setextattr, 220 .vop_strategy = fuse_vnop_strategy, 221 .vop_symlink = fuse_vnop_symlink, 222 .vop_write = fuse_vnop_write, 223 .vop_getpages = fuse_vnop_getpages, 224 .vop_print = fuse_vnop_print, 225 .vop_vptofh = fuse_vnop_vptofh, 226 }; 227 VFS_VOP_VECTOR_REGISTER(fuse_vnops); 228 229 /* Check permission for extattr operations, much like extattr_check_cred */ 230 static int 231 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, 232 struct thread *td, accmode_t accmode) 233 { 234 struct mount *mp = vnode_mount(vp); 235 struct fuse_data *data = fuse_get_mpdata(mp); 236 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 237 238 /* 239 * Kernel-invoked always succeeds. 240 */ 241 if (cred == NOCRED) 242 return (0); 243 244 /* 245 * Do not allow privileged processes in jail to directly manipulate 246 * system attributes. 247 */ 248 switch (ns) { 249 case EXTATTR_NAMESPACE_SYSTEM: 250 if (default_permissions) { 251 return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); 252 } 253 return (0); 254 case EXTATTR_NAMESPACE_USER: 255 if (default_permissions) { 256 return (fuse_internal_access(vp, accmode, td, cred)); 257 } 258 return (0); 259 default: 260 return (EPERM); 261 } 262 } 263 264 /* Get a filehandle for a directory */ 265 static int 266 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, 267 struct ucred *cred, pid_t pid) 268 { 269 if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) 270 return 0; 271 return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); 272 } 273 274 /* Send FUSE_FLUSH for this vnode */ 275 static int 276 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) 277 { 278 struct fuse_flush_in *ffi; 279 struct fuse_filehandle *fufh; 280 struct fuse_dispatcher fdi; 281 struct thread *td = curthread; 282 struct mount *mp = vnode_mount(vp); 283 int err; 284 285 if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH)) 286 return 0; 287 288 err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); 289 if (err) 290 return err; 291 292 fdisp_init(&fdi, sizeof(*ffi)); 293 fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); 294 ffi = fdi.indata; 295 ffi->fh = fufh->fh_id; 296 /* 297 * If the file has a POSIX lock then we're supposed to set lock_owner. 298 * If not, then lock_owner is undefined. So we may as well always set 299 * it. 300 */ 301 ffi->lock_owner = td->td_proc->p_pid; 302 303 err = fdisp_wait_answ(&fdi); 304 if (err == ENOSYS) { 305 fsess_set_notimpl(mp, FUSE_FLUSH); 306 err = 0; 307 } 308 fdisp_destroy(&fdi); 309 return err; 310 } 311 312 /* Close wrapper for fifos. */ 313 static int 314 fuse_fifo_close(struct vop_close_args *ap) 315 { 316 return (fifo_specops.vop_close(ap)); 317 } 318 319 /* Invalidate a range of cached data, whether dirty of not */ 320 static int 321 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) 322 { 323 struct buf *bp; 324 daddr_t left_lbn, end_lbn, right_lbn; 325 off_t new_filesize; 326 int iosize, left_on, right_on, right_blksize; 327 328 iosize = fuse_iosize(vp); 329 left_lbn = start / iosize; 330 end_lbn = howmany(end, iosize); 331 left_on = start & (iosize - 1); 332 if (left_on != 0) { 333 bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); 334 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { 335 /* 336 * Flush the dirty buffer, because we don't have a 337 * byte-granular way to record which parts of the 338 * buffer are valid. 339 */ 340 bwrite(bp); 341 if (bp->b_error) 342 return (bp->b_error); 343 } else { 344 brelse(bp); 345 } 346 } 347 right_on = end & (iosize - 1); 348 if (right_on != 0) { 349 right_lbn = end / iosize; 350 new_filesize = MAX(filesize, end); 351 right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); 352 bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); 353 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { 354 /* 355 * Flush the dirty buffer, because we don't have a 356 * byte-granular way to record which parts of the 357 * buffer are valid. 358 */ 359 bwrite(bp); 360 if (bp->b_error) 361 return (bp->b_error); 362 } else { 363 brelse(bp); 364 } 365 } 366 367 v_inval_buf_range(vp, left_lbn, end_lbn, iosize); 368 return (0); 369 } 370 371 372 /* Send FUSE_LSEEK for this node */ 373 static int 374 fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred, 375 pid_t pid, off_t *offp, int whence) 376 { 377 struct fuse_dispatcher fdi; 378 struct fuse_filehandle *fufh; 379 struct fuse_lseek_in *flsi; 380 struct fuse_lseek_out *flso; 381 struct mount *mp = vnode_mount(vp); 382 int err; 383 384 ASSERT_VOP_LOCKED(vp, __func__); 385 386 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 387 if (err) 388 return (err); 389 fdisp_init(&fdi, sizeof(*flsi)); 390 fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred); 391 flsi = fdi.indata; 392 flsi->fh = fufh->fh_id; 393 flsi->offset = *offp; 394 flsi->whence = whence; 395 err = fdisp_wait_answ(&fdi); 396 if (err == ENOSYS) { 397 fsess_set_notimpl(mp, FUSE_LSEEK); 398 } else if (err == 0) { 399 fsess_set_impl(mp, FUSE_LSEEK); 400 flso = fdi.answ; 401 *offp = flso->offset; 402 } 403 fdisp_destroy(&fdi); 404 405 return (err); 406 } 407 408 /* 409 struct vnop_access_args { 410 struct vnode *a_vp; 411 #if VOP_ACCESS_TAKES_ACCMODE_T 412 accmode_t a_accmode; 413 #else 414 int a_mode; 415 #endif 416 struct ucred *a_cred; 417 struct thread *a_td; 418 }; 419 */ 420 static int 421 fuse_vnop_access(struct vop_access_args *ap) 422 { 423 struct vnode *vp = ap->a_vp; 424 int accmode = ap->a_accmode; 425 struct ucred *cred = ap->a_cred; 426 427 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 428 429 int err; 430 431 if (fuse_isdeadfs(vp)) { 432 if (vnode_isvroot(vp)) { 433 return 0; 434 } 435 return ENXIO; 436 } 437 if (!(data->dataflags & FSESS_INITED)) { 438 if (vnode_isvroot(vp)) { 439 if (priv_check_cred(cred, PRIV_VFS_ADMIN) || 440 (fuse_match_cred(data->daemoncred, cred) == 0)) { 441 return 0; 442 } 443 } 444 return EBADF; 445 } 446 if (vnode_islnk(vp)) { 447 return 0; 448 } 449 450 err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); 451 return err; 452 } 453 454 /* 455 * struct vop_advlock_args { 456 * struct vop_generic_args a_gen; 457 * struct vnode *a_vp; 458 * void *a_id; 459 * int a_op; 460 * struct flock *a_fl; 461 * int a_flags; 462 * } 463 */ 464 static int 465 fuse_vnop_advlock(struct vop_advlock_args *ap) 466 { 467 struct vnode *vp = ap->a_vp; 468 struct flock *fl = ap->a_fl; 469 struct thread *td = curthread; 470 struct ucred *cred = td->td_ucred; 471 pid_t pid = td->td_proc->p_pid; 472 struct fuse_filehandle *fufh; 473 struct fuse_dispatcher fdi; 474 struct fuse_lk_in *fli; 475 struct fuse_lk_out *flo; 476 struct vattr vattr; 477 enum fuse_opcode op; 478 off_t size, start; 479 int dataflags, err; 480 int flags = ap->a_flags; 481 482 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 483 484 if (fuse_isdeadfs(vp)) { 485 return ENXIO; 486 } 487 488 switch(ap->a_op) { 489 case F_GETLK: 490 op = FUSE_GETLK; 491 break; 492 case F_SETLK: 493 if (flags & F_WAIT) 494 op = FUSE_SETLKW; 495 else 496 op = FUSE_SETLK; 497 break; 498 case F_UNLCK: 499 op = FUSE_SETLK; 500 break; 501 default: 502 return EINVAL; 503 } 504 505 if (!(dataflags & FSESS_POSIX_LOCKS)) 506 return vop_stdadvlock(ap); 507 /* FUSE doesn't properly support flock until protocol 7.17 */ 508 if (flags & F_FLOCK) 509 return vop_stdadvlock(ap); 510 511 vn_lock(vp, LK_SHARED | LK_RETRY); 512 513 switch (fl->l_whence) { 514 case SEEK_SET: 515 case SEEK_CUR: 516 /* 517 * Caller is responsible for adding any necessary offset 518 * when SEEK_CUR is used. 519 */ 520 start = fl->l_start; 521 break; 522 523 case SEEK_END: 524 err = fuse_internal_getattr(vp, &vattr, cred, td); 525 if (err) 526 goto out; 527 size = vattr.va_size; 528 if (size > OFF_MAX || 529 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { 530 err = EOVERFLOW; 531 goto out; 532 } 533 start = size + fl->l_start; 534 break; 535 536 default: 537 return (EINVAL); 538 } 539 540 err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); 541 if (err) 542 goto out; 543 544 fdisp_init(&fdi, sizeof(*fli)); 545 546 fdisp_make_vp(&fdi, op, vp, td, cred); 547 fli = fdi.indata; 548 fli->fh = fufh->fh_id; 549 fli->owner = td->td_proc->p_pid; 550 fli->lk.start = start; 551 if (fl->l_len != 0) 552 fli->lk.end = start + fl->l_len - 1; 553 else 554 fli->lk.end = INT64_MAX; 555 fli->lk.type = fl->l_type; 556 fli->lk.pid = td->td_proc->p_pid; 557 558 err = fdisp_wait_answ(&fdi); 559 fdisp_destroy(&fdi); 560 561 if (err == 0 && op == FUSE_GETLK) { 562 flo = fdi.answ; 563 fl->l_type = flo->lk.type; 564 fl->l_whence = SEEK_SET; 565 if (flo->lk.type != F_UNLCK) { 566 fl->l_pid = flo->lk.pid; 567 fl->l_start = flo->lk.start; 568 if (flo->lk.end == INT64_MAX) 569 fl->l_len = 0; 570 else 571 fl->l_len = flo->lk.end - flo->lk.start + 1; 572 fl->l_start = flo->lk.start; 573 } 574 } 575 576 out: 577 VOP_UNLOCK(vp); 578 return err; 579 } 580 581 static int 582 fuse_vnop_allocate(struct vop_allocate_args *ap) 583 { 584 struct vnode *vp = ap->a_vp; 585 off_t *len = ap->a_len; 586 off_t *offset = ap->a_offset; 587 struct ucred *cred = ap->a_cred; 588 struct fuse_filehandle *fufh; 589 struct mount *mp = vnode_mount(vp); 590 struct fuse_dispatcher fdi; 591 struct fuse_fallocate_in *ffi; 592 struct uio io; 593 pid_t pid = curthread->td_proc->p_pid; 594 struct fuse_vnode_data *fvdat = VTOFUD(vp); 595 off_t filesize; 596 int err; 597 598 if (fuse_isdeadfs(vp)) 599 return (ENXIO); 600 601 switch (vp->v_type) { 602 case VFIFO: 603 return (ESPIPE); 604 case VLNK: 605 case VREG: 606 if (vfs_isrdonly(mp)) 607 return (EROFS); 608 break; 609 default: 610 return (ENODEV); 611 } 612 613 if (vfs_isrdonly(mp)) 614 return (EROFS); 615 616 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 617 return (EINVAL); 618 619 io.uio_offset = *offset; 620 io.uio_resid = *len; 621 err = vn_rlimit_fsize(vp, &io, curthread); 622 if (err) 623 return (err); 624 625 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 626 if (err) 627 return (err); 628 629 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 630 631 err = fuse_vnode_size(vp, &filesize, cred, curthread); 632 if (err) 633 return (err); 634 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 635 636 fdisp_init(&fdi, sizeof(*ffi)); 637 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 638 ffi = fdi.indata; 639 ffi->fh = fufh->fh_id; 640 ffi->offset = *offset; 641 ffi->length = *len; 642 ffi->mode = 0; 643 err = fdisp_wait_answ(&fdi); 644 645 if (err == ENOSYS) { 646 fsess_set_notimpl(mp, FUSE_FALLOCATE); 647 err = EINVAL; 648 } else if (err == EOPNOTSUPP) { 649 /* 650 * The file system server does not support FUSE_FALLOCATE with 651 * the supplied mode for this particular file. 652 */ 653 err = EINVAL; 654 } else if (!err) { 655 *offset += *len; 656 *len = 0; 657 fuse_vnode_undirty_cached_timestamps(vp, false); 658 fuse_internal_clear_suid_on_write(vp, cred, curthread); 659 if (*offset > fvdat->cached_attrs.va_size) { 660 fuse_vnode_setsize(vp, *offset, false); 661 getnanouptime(&fvdat->last_local_modify); 662 } 663 } 664 665 fdisp_destroy(&fdi); 666 return (err); 667 } 668 669 /* { 670 struct vnode *a_vp; 671 daddr_t a_bn; 672 struct bufobj **a_bop; 673 daddr_t *a_bnp; 674 int *a_runp; 675 int *a_runb; 676 } */ 677 static int 678 fuse_vnop_bmap(struct vop_bmap_args *ap) 679 { 680 struct vnode *vp = ap->a_vp; 681 struct bufobj **bo = ap->a_bop; 682 struct thread *td = curthread; 683 struct mount *mp; 684 struct fuse_dispatcher fdi; 685 struct fuse_bmap_in *fbi; 686 struct fuse_bmap_out *fbo; 687 struct fuse_data *data; 688 struct fuse_vnode_data *fvdat = VTOFUD(vp); 689 uint64_t biosize; 690 off_t fsize; 691 daddr_t lbn = ap->a_bn; 692 daddr_t *pbn = ap->a_bnp; 693 int *runp = ap->a_runp; 694 int *runb = ap->a_runb; 695 int error = 0; 696 int maxrun; 697 698 if (fuse_isdeadfs(vp)) { 699 return ENXIO; 700 } 701 702 mp = vnode_mount(vp); 703 data = fuse_get_mpdata(mp); 704 biosize = fuse_iosize(vp); 705 maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, 706 data->max_readahead_blocks); 707 708 if (bo != NULL) 709 *bo = &vp->v_bufobj; 710 711 /* 712 * The FUSE_BMAP operation does not include the runp and runb 713 * variables, so we must guess. Report nonzero contiguous runs so 714 * cluster_read will combine adjacent reads. It's worthwhile to reduce 715 * upcalls even if we don't know the true physical layout of the file. 716 * 717 * FUSE file systems may opt out of read clustering in two ways: 718 * * mounting with -onoclusterr 719 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT 720 */ 721 if (runb != NULL) 722 *runb = MIN(lbn, maxrun); 723 if (runp != NULL && maxrun == 0) 724 *runp = 0; 725 else if (runp != NULL) { 726 /* 727 * If the file's size is cached, use that value to calculate 728 * runp, even if the cache is expired. runp is only advisory, 729 * and the risk of getting it wrong is not worth the cost of 730 * another upcall. 731 */ 732 if (fvdat->cached_attrs.va_size != VNOVAL) 733 fsize = fvdat->cached_attrs.va_size; 734 else 735 error = fuse_vnode_size(vp, &fsize, td->td_ucred, td); 736 if (error == 0) 737 *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1), 738 maxrun); 739 else 740 *runp = 0; 741 } 742 743 if (fsess_maybe_impl(mp, FUSE_BMAP)) { 744 fdisp_init(&fdi, sizeof(*fbi)); 745 fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); 746 fbi = fdi.indata; 747 fbi->block = lbn; 748 fbi->blocksize = biosize; 749 error = fdisp_wait_answ(&fdi); 750 if (error == ENOSYS) { 751 fdisp_destroy(&fdi); 752 fsess_set_notimpl(mp, FUSE_BMAP); 753 error = 0; 754 } else { 755 fbo = fdi.answ; 756 if (error == 0 && pbn != NULL) 757 *pbn = fbo->block; 758 fdisp_destroy(&fdi); 759 return error; 760 } 761 } 762 763 /* If the daemon doesn't support BMAP, make up a sensible default */ 764 if (pbn != NULL) 765 *pbn = lbn * btodb(biosize); 766 return (error); 767 } 768 769 /* 770 struct vop_close_args { 771 struct vnode *a_vp; 772 int a_fflag; 773 struct ucred *a_cred; 774 struct thread *a_td; 775 }; 776 */ 777 static int 778 fuse_vnop_close(struct vop_close_args *ap) 779 { 780 struct vnode *vp = ap->a_vp; 781 struct mount *mp = vnode_mount(vp); 782 struct ucred *cred = ap->a_cred; 783 int fflag = ap->a_fflag; 784 struct thread *td = ap->a_td; 785 pid_t pid = td->td_proc->p_pid; 786 struct fuse_vnode_data *fvdat = VTOFUD(vp); 787 int err = 0; 788 789 if (fuse_isdeadfs(vp)) 790 return 0; 791 if (vnode_isdir(vp)) 792 return 0; 793 if (fflag & IO_NDELAY) 794 return 0; 795 796 err = fuse_flush(vp, cred, pid, fflag); 797 if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) { 798 struct vattr vap; 799 struct fuse_data *data; 800 int dataflags; 801 int access_e = 0; 802 803 data = fuse_get_mpdata(mp); 804 dataflags = data->dataflags; 805 if (dataflags & FSESS_DEFAULT_PERMISSIONS) { 806 struct vattr va; 807 808 fuse_internal_getattr(vp, &va, cred, td); 809 access_e = vaccess(vp->v_type, va.va_mode, va.va_uid, 810 va.va_gid, VWRITE, cred); 811 } 812 if (access_e == 0) { 813 VATTR_NULL(&vap); 814 vap.va_atime = fvdat->cached_attrs.va_atime; 815 /* 816 * Ignore errors setting when setting atime. That 817 * should not cause close(2) to fail. 818 */ 819 fuse_internal_setattr(vp, &vap, td, NULL); 820 } 821 } 822 /* TODO: close the file handle, if we're sure it's no longer used */ 823 if ((fvdat->flag & FN_SIZECHANGE) != 0) { 824 fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); 825 } 826 return err; 827 } 828 829 /* 830 struct vop_copy_file_range_args { 831 struct vop_generic_args a_gen; 832 struct vnode *a_invp; 833 off_t *a_inoffp; 834 struct vnode *a_outvp; 835 off_t *a_outoffp; 836 size_t *a_lenp; 837 unsigned int a_flags; 838 struct ucred *a_incred; 839 struct ucred *a_outcred; 840 struct thread *a_fsizetd; 841 } 842 */ 843 static int 844 fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) 845 { 846 struct vnode *invp = ap->a_invp; 847 struct vnode *outvp = ap->a_outvp; 848 struct mount *mp = vnode_mount(invp); 849 struct fuse_vnode_data *outfvdat = VTOFUD(outvp); 850 struct fuse_dispatcher fdi; 851 struct fuse_filehandle *infufh, *outfufh; 852 struct fuse_copy_file_range_in *fcfri; 853 struct ucred *incred = ap->a_incred; 854 struct ucred *outcred = ap->a_outcred; 855 struct fuse_write_out *fwo; 856 struct thread *td; 857 struct uio io; 858 off_t outfilesize; 859 ssize_t r = 0; 860 pid_t pid; 861 int err; 862 863 err = ENOSYS; 864 if (mp == NULL || mp != vnode_mount(outvp)) 865 goto fallback; 866 867 if (incred->cr_uid != outcred->cr_uid) 868 goto fallback; 869 870 if (incred->cr_groups[0] != outcred->cr_groups[0]) 871 goto fallback; 872 873 /* Caller busied mp, mnt_data can be safely accessed. */ 874 if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) 875 goto fallback; 876 877 if (ap->a_fsizetd == NULL) 878 td = curthread; 879 else 880 td = ap->a_fsizetd; 881 pid = td->td_proc->p_pid; 882 883 vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); 884 if (invp->v_data == NULL || outvp->v_data == NULL) { 885 err = EBADF; 886 goto unlock; 887 } 888 889 err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); 890 if (err) 891 goto unlock; 892 893 err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); 894 if (err) 895 goto unlock; 896 897 io.uio_resid = *ap->a_lenp; 898 if (ap->a_fsizetd) { 899 io.uio_offset = *ap->a_outoffp; 900 err = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 901 if (err != 0) 902 goto unlock; 903 } 904 905 err = fuse_vnode_size(outvp, &outfilesize, outcred, curthread); 906 if (err) 907 goto unlock; 908 909 vnode_pager_clean_sync(invp); 910 err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp, 911 *ap->a_outoffp + io.uio_resid); 912 if (err) 913 goto unlock; 914 915 fdisp_init(&fdi, sizeof(*fcfri)); 916 fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); 917 fcfri = fdi.indata; 918 fcfri->fh_in = infufh->fh_id; 919 fcfri->off_in = *ap->a_inoffp; 920 fcfri->nodeid_out = VTOI(outvp); 921 fcfri->fh_out = outfufh->fh_id; 922 fcfri->off_out = *ap->a_outoffp; 923 fcfri->len = io.uio_resid; 924 fcfri->flags = 0; 925 926 err = fdisp_wait_answ(&fdi); 927 if (err == 0) { 928 fwo = fdi.answ; 929 *ap->a_lenp = fwo->size; 930 *ap->a_inoffp += fwo->size; 931 *ap->a_outoffp += fwo->size; 932 fuse_internal_clear_suid_on_write(outvp, outcred, td); 933 if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) { 934 fuse_vnode_setsize(outvp, *ap->a_outoffp, false); 935 getnanouptime(&outfvdat->last_local_modify); 936 } 937 fuse_vnode_update(invp, FN_ATIMECHANGE); 938 fuse_vnode_update(outvp, FN_MTIMECHANGE | FN_CTIMECHANGE); 939 } 940 fdisp_destroy(&fdi); 941 942 unlock: 943 if (invp != outvp) 944 VOP_UNLOCK(invp); 945 VOP_UNLOCK(outvp); 946 947 if (err == ENOSYS) 948 fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); 949 fallback: 950 951 /* 952 * No need to call vn_rlimit_fsizex_res before return, since the uio is 953 * local. 954 */ 955 return (err); 956 } 957 958 static void 959 fdisp_make_mknod_for_fallback( 960 struct fuse_dispatcher *fdip, 961 struct componentname *cnp, 962 struct vnode *dvp, 963 uint64_t parentnid, 964 struct thread *td, 965 struct ucred *cred, 966 mode_t mode, 967 enum fuse_opcode *op) 968 { 969 struct fuse_mknod_in *fmni; 970 971 fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); 972 *op = FUSE_MKNOD; 973 fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); 974 fmni = fdip->indata; 975 fmni->mode = mode; 976 fmni->rdev = 0; 977 memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, 978 cnp->cn_namelen); 979 ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; 980 } 981 /* 982 struct vnop_create_args { 983 struct vnode *a_dvp; 984 struct vnode **a_vpp; 985 struct componentname *a_cnp; 986 struct vattr *a_vap; 987 }; 988 */ 989 static int 990 fuse_vnop_create(struct vop_create_args *ap) 991 { 992 struct vnode *dvp = ap->a_dvp; 993 struct vnode **vpp = ap->a_vpp; 994 struct componentname *cnp = ap->a_cnp; 995 struct vattr *vap = ap->a_vap; 996 struct thread *td = curthread; 997 struct ucred *cred = cnp->cn_cred; 998 999 struct fuse_data *data; 1000 struct fuse_create_in *fci; 1001 struct fuse_entry_out *feo; 1002 struct fuse_open_out *foo; 1003 struct fuse_dispatcher fdi, fdi2; 1004 struct fuse_dispatcher *fdip = &fdi; 1005 struct fuse_dispatcher *fdip2 = NULL; 1006 1007 int err; 1008 1009 struct mount *mp = vnode_mount(dvp); 1010 data = fuse_get_mpdata(mp); 1011 uint64_t parentnid = VTOFUD(dvp)->nid; 1012 mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); 1013 enum fuse_opcode op; 1014 int flags; 1015 1016 if (fuse_isdeadfs(dvp)) 1017 return ENXIO; 1018 1019 /* FUSE expects sockets to be created with FUSE_MKNOD */ 1020 if (vap->va_type == VSOCK) 1021 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1022 1023 /* 1024 * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a 1025 * writable mode makes sense, and we might as well include readability 1026 * too. 1027 */ 1028 flags = O_RDWR; 1029 1030 bzero(&fdi, sizeof(fdi)); 1031 1032 if (vap->va_type != VREG) 1033 return (EINVAL); 1034 1035 if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { 1036 /* Fallback to FUSE_MKNOD/FUSE_OPEN */ 1037 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, 1038 cred, mode, &op); 1039 } else { 1040 /* Use FUSE_CREATE */ 1041 size_t insize; 1042 1043 op = FUSE_CREATE; 1044 fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); 1045 fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); 1046 fci = fdip->indata; 1047 fci->mode = mode; 1048 fci->flags = O_CREAT | flags; 1049 if (fuse_libabi_geq(data, 7, 12)) { 1050 insize = sizeof(*fci); 1051 fci->umask = td->td_proc->p_pd->pd_cmask; 1052 } else { 1053 insize = sizeof(struct fuse_open_in); 1054 } 1055 1056 memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, 1057 cnp->cn_namelen); 1058 ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; 1059 } 1060 1061 err = fdisp_wait_answ(fdip); 1062 1063 if (err) { 1064 if (err == ENOSYS && op == FUSE_CREATE) { 1065 fsess_set_notimpl(mp, FUSE_CREATE); 1066 fdisp_destroy(fdip); 1067 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, 1068 parentnid, td, cred, mode, &op); 1069 err = fdisp_wait_answ(fdip); 1070 } 1071 if (err) 1072 goto out; 1073 } 1074 1075 feo = fdip->answ; 1076 1077 if ((err = fuse_internal_checkentry(feo, vap->va_type))) { 1078 goto out; 1079 } 1080 1081 if (op == FUSE_CREATE) { 1082 if (fuse_libabi_geq(data, 7, 9)) 1083 foo = (struct fuse_open_out*)(feo + 1); 1084 else 1085 foo = (struct fuse_open_out*)((char*)feo + 1086 FUSE_COMPAT_ENTRY_OUT_SIZE); 1087 } else { 1088 /* Issue a separate FUSE_OPEN */ 1089 struct fuse_open_in *foi; 1090 1091 fdip2 = &fdi2; 1092 fdisp_init(fdip2, sizeof(*foi)); 1093 fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, 1094 cred); 1095 foi = fdip2->indata; 1096 foi->flags = flags; 1097 err = fdisp_wait_answ(fdip2); 1098 if (err) 1099 goto out; 1100 foo = fdip2->answ; 1101 } 1102 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); 1103 if (err) { 1104 struct fuse_release_in *fri; 1105 uint64_t nodeid = feo->nodeid; 1106 uint64_t fh_id = foo->fh; 1107 1108 fdisp_destroy(fdip); 1109 fdisp_init(fdip, sizeof(*fri)); 1110 fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); 1111 fri = fdip->indata; 1112 fri->fh = fh_id; 1113 fri->flags = flags; 1114 fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); 1115 fuse_insert_message(fdip->tick, false); 1116 goto out; 1117 } 1118 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); 1119 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 1120 feo->attr_valid_nsec, NULL, true); 1121 1122 fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); 1123 fuse_vnode_open(*vpp, foo->open_flags, td); 1124 /* 1125 * Purge the parent's attribute cache because the daemon should've 1126 * updated its mtime and ctime 1127 */ 1128 fuse_vnode_clear_attr_cache(dvp); 1129 cache_purge_negative(dvp); 1130 1131 out: 1132 if (fdip2) 1133 fdisp_destroy(fdip2); 1134 fdisp_destroy(fdip); 1135 return err; 1136 } 1137 1138 /* 1139 struct vnop_fdatasync_args { 1140 struct vop_generic_args a_gen; 1141 struct vnode * a_vp; 1142 struct thread * a_td; 1143 }; 1144 */ 1145 static int 1146 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) 1147 { 1148 struct vnode *vp = ap->a_vp; 1149 struct thread *td = ap->a_td; 1150 int waitfor = MNT_WAIT; 1151 1152 int err = 0; 1153 1154 if (fuse_isdeadfs(vp)) { 1155 return 0; 1156 } 1157 if ((err = vop_stdfdatasync_buf(ap))) 1158 return err; 1159 1160 return fuse_internal_fsync(vp, td, waitfor, true); 1161 } 1162 1163 /* 1164 struct vnop_fsync_args { 1165 struct vop_generic_args a_gen; 1166 struct vnode * a_vp; 1167 int a_waitfor; 1168 struct thread * a_td; 1169 }; 1170 */ 1171 static int 1172 fuse_vnop_fsync(struct vop_fsync_args *ap) 1173 { 1174 struct vnode *vp = ap->a_vp; 1175 struct thread *td = ap->a_td; 1176 int waitfor = ap->a_waitfor; 1177 int err = 0; 1178 1179 if (fuse_isdeadfs(vp)) { 1180 return 0; 1181 } 1182 if ((err = vop_stdfsync(ap))) 1183 return err; 1184 1185 return fuse_internal_fsync(vp, td, waitfor, false); 1186 } 1187 1188 /* 1189 struct vnop_getattr_args { 1190 struct vnode *a_vp; 1191 struct vattr *a_vap; 1192 struct ucred *a_cred; 1193 struct thread *a_td; 1194 }; 1195 */ 1196 static int 1197 fuse_vnop_getattr(struct vop_getattr_args *ap) 1198 { 1199 struct vnode *vp = ap->a_vp; 1200 struct vattr *vap = ap->a_vap; 1201 struct ucred *cred = ap->a_cred; 1202 struct thread *td = curthread; 1203 1204 int err = 0; 1205 int dataflags; 1206 1207 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 1208 1209 /* Note that we are not bailing out on a dead file system just yet. */ 1210 1211 if (!(dataflags & FSESS_INITED)) { 1212 if (!vnode_isvroot(vp)) { 1213 fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); 1214 err = ENOTCONN; 1215 return err; 1216 } else { 1217 goto fake; 1218 } 1219 } 1220 err = fuse_internal_getattr(vp, vap, cred, td); 1221 if (err == ENOTCONN && vnode_isvroot(vp)) { 1222 /* see comment in fuse_vfsop_statfs() */ 1223 goto fake; 1224 } else { 1225 return err; 1226 } 1227 1228 fake: 1229 bzero(vap, sizeof(*vap)); 1230 vap->va_type = vnode_vtype(vp); 1231 1232 return 0; 1233 } 1234 1235 /* 1236 struct vnop_inactive_args { 1237 struct vnode *a_vp; 1238 }; 1239 */ 1240 static int 1241 fuse_vnop_inactive(struct vop_inactive_args *ap) 1242 { 1243 struct vnode *vp = ap->a_vp; 1244 struct thread *td = curthread; 1245 1246 struct fuse_vnode_data *fvdat = VTOFUD(vp); 1247 struct fuse_filehandle *fufh, *fufh_tmp; 1248 1249 int need_flush = 1; 1250 1251 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 1252 if (need_flush && vp->v_type == VREG) { 1253 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { 1254 fuse_vnode_savesize(vp, NULL, 0); 1255 } 1256 if ((fvdat->flag & FN_REVOKED) != 0) 1257 fuse_io_invalbuf(vp, td); 1258 else 1259 fuse_io_flushbuf(vp, MNT_WAIT, td); 1260 need_flush = 0; 1261 } 1262 fuse_filehandle_close(vp, fufh, td, NULL); 1263 } 1264 1265 if ((fvdat->flag & FN_REVOKED) != 0) 1266 vrecycle(vp); 1267 1268 return 0; 1269 } 1270 1271 /* 1272 struct vnop_ioctl_args { 1273 struct vnode *a_vp; 1274 u_long a_command; 1275 caddr_t a_data; 1276 int a_fflag; 1277 struct ucred *a_cred; 1278 struct thread *a_td; 1279 }; 1280 */ 1281 static int 1282 fuse_vnop_ioctl(struct vop_ioctl_args *ap) 1283 { 1284 struct vnode *vp = ap->a_vp; 1285 struct mount *mp = vnode_mount(vp); 1286 struct ucred *cred = ap->a_cred; 1287 off_t *offp; 1288 pid_t pid = ap->a_td->td_proc->p_pid; 1289 int err; 1290 1291 switch (ap->a_command) { 1292 case FIOSEEKDATA: 1293 case FIOSEEKHOLE: 1294 /* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */ 1295 if (fsess_maybe_impl(mp, FUSE_LSEEK)) { 1296 int whence; 1297 1298 offp = ap->a_data; 1299 if (ap->a_command == FIOSEEKDATA) 1300 whence = SEEK_DATA; 1301 else 1302 whence = SEEK_HOLE; 1303 1304 vn_lock(vp, LK_SHARED | LK_RETRY); 1305 err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp, 1306 whence); 1307 VOP_UNLOCK(vp); 1308 } 1309 if (fsess_not_impl(mp, FUSE_LSEEK)) 1310 err = vop_stdioctl(ap); 1311 break; 1312 default: 1313 /* TODO: implement FUSE_IOCTL */ 1314 err = ENOTTY; 1315 break; 1316 } 1317 return (err); 1318 } 1319 1320 1321 /* 1322 struct vnop_link_args { 1323 struct vnode *a_tdvp; 1324 struct vnode *a_vp; 1325 struct componentname *a_cnp; 1326 }; 1327 */ 1328 static int 1329 fuse_vnop_link(struct vop_link_args *ap) 1330 { 1331 struct vnode *vp = ap->a_vp; 1332 struct vnode *tdvp = ap->a_tdvp; 1333 struct componentname *cnp = ap->a_cnp; 1334 1335 struct vattr *vap = VTOVA(vp); 1336 1337 struct fuse_dispatcher fdi; 1338 struct fuse_entry_out *feo; 1339 struct fuse_link_in fli; 1340 1341 int err; 1342 1343 if (fuse_isdeadfs(vp)) { 1344 return ENXIO; 1345 } 1346 if (vnode_mount(tdvp) != vnode_mount(vp)) { 1347 return EXDEV; 1348 } 1349 1350 /* 1351 * This is a seatbelt check to protect naive userspace filesystems from 1352 * themselves and the limitations of the FUSE IPC protocol. If a 1353 * filesystem does not allow attribute caching, assume it is capable of 1354 * validating that nlink does not overflow. 1355 */ 1356 if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) 1357 return EMLINK; 1358 fli.oldnodeid = VTOI(vp); 1359 1360 fdisp_init(&fdi, 0); 1361 fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, 1362 FUSE_LINK, &fli, sizeof(fli), &fdi); 1363 if ((err = fdisp_wait_answ(&fdi))) { 1364 goto out; 1365 } 1366 feo = fdi.answ; 1367 1368 if (fli.oldnodeid != feo->nodeid) { 1369 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 1370 fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, 1371 "Assigned wrong inode for a hard link."); 1372 fuse_vnode_clear_attr_cache(vp); 1373 fuse_vnode_clear_attr_cache(tdvp); 1374 err = EIO; 1375 goto out; 1376 } 1377 1378 err = fuse_internal_checkentry(feo, vnode_vtype(vp)); 1379 if (!err) { 1380 /* 1381 * Purge the parent's attribute cache because the daemon 1382 * should've updated its mtime and ctime 1383 */ 1384 fuse_vnode_clear_attr_cache(tdvp); 1385 fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, 1386 feo->attr_valid_nsec, NULL, true); 1387 } 1388 out: 1389 fdisp_destroy(&fdi); 1390 return err; 1391 } 1392 1393 struct fuse_lookup_alloc_arg { 1394 struct fuse_entry_out *feo; 1395 struct componentname *cnp; 1396 uint64_t nid; 1397 __enum_uint8(vtype) vtyp; 1398 }; 1399 1400 /* Callback for vn_get_ino */ 1401 static int 1402 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1403 { 1404 struct fuse_lookup_alloc_arg *flaa = arg; 1405 1406 return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, 1407 flaa->vtyp); 1408 } 1409 1410 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, 1411 "int", "struct timespec*", "struct timespec*"); 1412 /* 1413 struct vnop_lookup_args { 1414 struct vnodeop_desc *a_desc; 1415 struct vnode *a_dvp; 1416 struct vnode **a_vpp; 1417 struct componentname *a_cnp; 1418 }; 1419 */ 1420 int 1421 fuse_vnop_lookup(struct vop_lookup_args *ap) 1422 { 1423 struct vnode *dvp = ap->a_dvp; 1424 struct vnode **vpp = ap->a_vpp; 1425 struct componentname *cnp = ap->a_cnp; 1426 struct thread *td = curthread; 1427 struct ucred *cred = cnp->cn_cred; 1428 struct timespec now; 1429 1430 int nameiop = cnp->cn_nameiop; 1431 int flags = cnp->cn_flags; 1432 int islastcn = flags & ISLASTCN; 1433 struct mount *mp = vnode_mount(dvp); 1434 struct fuse_data *data = fuse_get_mpdata(mp); 1435 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 1436 bool is_dot; 1437 1438 int err = 0; 1439 int lookup_err = 0; 1440 struct vnode *vp = NULL; 1441 1442 struct fuse_dispatcher fdi; 1443 bool did_lookup = false; 1444 struct fuse_entry_out *feo = NULL; 1445 __enum_uint8(vtype) vtyp; /* vnode type of target */ 1446 1447 uint64_t nid; 1448 1449 if (fuse_isdeadfs(dvp)) { 1450 *vpp = NULL; 1451 return ENXIO; 1452 } 1453 if (!vnode_isdir(dvp)) 1454 return ENOTDIR; 1455 1456 if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) 1457 return EROFS; 1458 1459 if ((cnp->cn_flags & NOEXECCHECK) != 0) 1460 cnp->cn_flags &= ~NOEXECCHECK; 1461 else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) 1462 return err; 1463 1464 is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.'; 1465 if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT)) 1466 { 1467 if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) { 1468 /* 1469 * Since the file system doesn't support ".." lookups, 1470 * we have no way to find this entry. 1471 */ 1472 return ESTALE; 1473 } 1474 nid = VTOFUD(dvp)->parent_nid; 1475 if (nid == 0) 1476 return ENOENT; 1477 /* .. is obviously a directory */ 1478 vtyp = VDIR; 1479 } else if (is_dot) { 1480 nid = VTOI(dvp); 1481 /* . is obviously a directory */ 1482 vtyp = VDIR; 1483 } else { 1484 struct timespec timeout; 1485 int ncpticks; /* here to accommodate for API contract */ 1486 1487 err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); 1488 getnanouptime(&now); 1489 SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); 1490 switch (err) { 1491 case -1: /* positive match */ 1492 if (timespeccmp(&timeout, &now, >)) { 1493 counter_u64_add(fuse_lookup_cache_hits, 1); 1494 } else { 1495 /* Cache timeout */ 1496 counter_u64_add(fuse_lookup_cache_misses, 1); 1497 bintime_clear( 1498 &VTOFUD(*vpp)->entry_cache_timeout); 1499 cache_purge(*vpp); 1500 if (dvp != *vpp) 1501 vput(*vpp); 1502 else 1503 vrele(*vpp); 1504 *vpp = NULL; 1505 break; 1506 } 1507 return 0; 1508 1509 case 0: /* no match in cache */ 1510 counter_u64_add(fuse_lookup_cache_misses, 1); 1511 break; 1512 1513 case ENOENT: /* negative match */ 1514 if (timespeccmp(&timeout, &now, <=)) { 1515 /* Cache timeout */ 1516 cache_purge_negative(dvp); 1517 break; 1518 } 1519 /* fall through */ 1520 default: 1521 return err; 1522 } 1523 1524 fdisp_init(&fdi, cnp->cn_namelen + 1); 1525 fdisp_make(&fdi, FUSE_LOOKUP, mp, VTOI(dvp), td, cred); 1526 1527 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 1528 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 1529 lookup_err = fdisp_wait_answ(&fdi); 1530 did_lookup = true; 1531 1532 if (!lookup_err) { 1533 /* lookup call succeeded */ 1534 feo = (struct fuse_entry_out *)fdi.answ; 1535 nid = feo->nodeid; 1536 if (nid == 0) { 1537 /* zero nodeid means ENOENT and cache it */ 1538 struct timespec timeout; 1539 1540 fdi.answ_stat = ENOENT; 1541 lookup_err = ENOENT; 1542 if (cnp->cn_flags & MAKEENTRY) { 1543 fuse_validity_2_timespec(feo, &timeout); 1544 /* Use the same entry_time for .. as for 1545 * the file itself. That doesn't honor 1546 * exactly what the fuse server tells 1547 * us, but to do otherwise would require 1548 * another cache lookup at this point. 1549 */ 1550 struct timespec *dtsp = NULL; 1551 cache_enter_time(dvp, *vpp, cnp, 1552 &timeout, dtsp); 1553 } 1554 } 1555 vtyp = IFTOVT(feo->attr.mode); 1556 } 1557 if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { 1558 fdisp_destroy(&fdi); 1559 return lookup_err; 1560 } 1561 } 1562 /* lookup_err, if non-zero, must be ENOENT at this point */ 1563 1564 if (lookup_err) { 1565 /* Entry not found */ 1566 if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { 1567 if (default_permissions) 1568 err = fuse_internal_access(dvp, VWRITE, td, 1569 cred); 1570 else 1571 err = 0; 1572 if (!err) { 1573 err = EJUSTRETURN; 1574 } 1575 } else { 1576 err = ENOENT; 1577 } 1578 } else { 1579 /* Entry was found */ 1580 if (flags & ISDOTDOT) { 1581 struct fuse_lookup_alloc_arg flaa; 1582 1583 flaa.nid = nid; 1584 flaa.feo = feo; 1585 flaa.cnp = cnp; 1586 flaa.vtyp = vtyp; 1587 err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, 1588 &vp); 1589 *vpp = vp; 1590 } else if (nid == VTOI(dvp)) { 1591 if (is_dot) { 1592 vref(dvp); 1593 *vpp = dvp; 1594 } else { 1595 fuse_warn(fuse_get_mpdata(mp), 1596 FSESS_WARN_ILLEGAL_INODE, 1597 "Assigned same inode to both parent and " 1598 "child."); 1599 err = EIO; 1600 } 1601 1602 } else { 1603 struct fuse_vnode_data *fvdat; 1604 1605 err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, 1606 &vp, cnp, vtyp); 1607 if (err) 1608 goto out; 1609 *vpp = vp; 1610 fvdat = VTOFUD(vp); 1611 1612 MPASS(feo != NULL); 1613 if (timespeccmp(&now, &fvdat->last_local_modify, >)) { 1614 /* 1615 * Attributes from the server are definitely 1616 * newer than the last attributes we sent to 1617 * the server, so cache them. 1618 */ 1619 fuse_internal_cache_attrs(*vpp, &feo->attr, 1620 feo->attr_valid, feo->attr_valid_nsec, 1621 NULL, true); 1622 } 1623 fuse_validity_2_bintime(feo->entry_valid, 1624 feo->entry_valid_nsec, 1625 &fvdat->entry_cache_timeout); 1626 1627 if ((nameiop == DELETE || nameiop == RENAME) && 1628 islastcn && default_permissions) 1629 { 1630 struct vattr dvattr; 1631 1632 err = fuse_internal_access(dvp, VWRITE, td, 1633 cred); 1634 if (err != 0) 1635 goto out; 1636 /* 1637 * if the parent's sticky bit is set, check 1638 * whether we're allowed to remove the file. 1639 * Need to figure out the vnode locking to make 1640 * this work. 1641 */ 1642 fuse_internal_getattr(dvp, &dvattr, cred, td); 1643 if ((dvattr.va_mode & S_ISTXT) && 1644 fuse_internal_access(dvp, VADMIN, td, 1645 cred) && 1646 fuse_internal_access(*vpp, VADMIN, td, 1647 cred)) { 1648 err = EPERM; 1649 goto out; 1650 } 1651 } 1652 } 1653 } 1654 out: 1655 if (err) { 1656 if (vp != NULL && dvp != vp) 1657 vput(vp); 1658 else if (vp != NULL) 1659 vrele(vp); 1660 *vpp = NULL; 1661 } 1662 if (did_lookup) 1663 fdisp_destroy(&fdi); 1664 1665 return err; 1666 } 1667 1668 /* 1669 struct vnop_mkdir_args { 1670 struct vnode *a_dvp; 1671 struct vnode **a_vpp; 1672 struct componentname *a_cnp; 1673 struct vattr *a_vap; 1674 }; 1675 */ 1676 static int 1677 fuse_vnop_mkdir(struct vop_mkdir_args *ap) 1678 { 1679 struct vnode *dvp = ap->a_dvp; 1680 struct vnode **vpp = ap->a_vpp; 1681 struct componentname *cnp = ap->a_cnp; 1682 struct vattr *vap = ap->a_vap; 1683 1684 struct fuse_mkdir_in fmdi; 1685 1686 if (fuse_isdeadfs(dvp)) { 1687 return ENXIO; 1688 } 1689 fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); 1690 fmdi.umask = curthread->td_proc->p_pd->pd_cmask; 1691 1692 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, 1693 sizeof(fmdi), VDIR)); 1694 } 1695 1696 /* 1697 struct vnop_mknod_args { 1698 struct vnode *a_dvp; 1699 struct vnode **a_vpp; 1700 struct componentname *a_cnp; 1701 struct vattr *a_vap; 1702 }; 1703 */ 1704 static int 1705 fuse_vnop_mknod(struct vop_mknod_args *ap) 1706 { 1707 1708 struct vnode *dvp = ap->a_dvp; 1709 struct vnode **vpp = ap->a_vpp; 1710 struct componentname *cnp = ap->a_cnp; 1711 struct vattr *vap = ap->a_vap; 1712 1713 if (fuse_isdeadfs(dvp)) 1714 return ENXIO; 1715 1716 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1717 } 1718 1719 /* 1720 struct vop_open_args { 1721 struct vnode *a_vp; 1722 int a_mode; 1723 struct ucred *a_cred; 1724 struct thread *a_td; 1725 int a_fdidx; / struct file *a_fp; 1726 }; 1727 */ 1728 static int 1729 fuse_vnop_open(struct vop_open_args *ap) 1730 { 1731 struct vnode *vp = ap->a_vp; 1732 int a_mode = ap->a_mode; 1733 struct thread *td = ap->a_td; 1734 struct ucred *cred = ap->a_cred; 1735 pid_t pid = td->td_proc->p_pid; 1736 1737 if (fuse_isdeadfs(vp)) 1738 return ENXIO; 1739 if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) 1740 return (EOPNOTSUPP); 1741 if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) 1742 return EINVAL; 1743 1744 if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { 1745 fuse_vnode_open(vp, 0, td); 1746 return 0; 1747 } 1748 1749 return fuse_filehandle_open(vp, a_mode, NULL, td, cred); 1750 } 1751 1752 static int 1753 fuse_vnop_pathconf(struct vop_pathconf_args *ap) 1754 { 1755 struct vnode *vp = ap->a_vp; 1756 struct mount *mp; 1757 1758 switch (ap->a_name) { 1759 case _PC_FILESIZEBITS: 1760 *ap->a_retval = 64; 1761 return (0); 1762 case _PC_NAME_MAX: 1763 *ap->a_retval = NAME_MAX; 1764 return (0); 1765 case _PC_LINK_MAX: 1766 *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); 1767 return (0); 1768 case _PC_SYMLINK_MAX: 1769 *ap->a_retval = MAXPATHLEN; 1770 return (0); 1771 case _PC_NO_TRUNC: 1772 *ap->a_retval = 1; 1773 return (0); 1774 case _PC_MIN_HOLE_SIZE: 1775 /* 1776 * The FUSE protocol provides no mechanism for a server to 1777 * report _PC_MIN_HOLE_SIZE. It's a protocol bug. Instead, 1778 * return EINVAL if the server does not support FUSE_LSEEK, or 1779 * 1 if it does. 1780 */ 1781 mp = vnode_mount(vp); 1782 if (!fsess_is_impl(mp, FUSE_LSEEK) && 1783 !fsess_not_impl(mp, FUSE_LSEEK)) { 1784 off_t offset = 0; 1785 1786 /* Issue a FUSE_LSEEK to find out if it's implemented */ 1787 fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred, 1788 curthread->td_proc->p_pid, &offset, SEEK_DATA); 1789 } 1790 1791 if (fsess_is_impl(mp, FUSE_LSEEK)) { 1792 *ap->a_retval = 1; 1793 return (0); 1794 } else { 1795 /* 1796 * Probably FUSE_LSEEK is not implemented. It might 1797 * be, if the FUSE_LSEEK above returned an error like 1798 * EACCES, but in that case we can't tell, so it's 1799 * safest to report EINVAL anyway. 1800 */ 1801 return (EINVAL); 1802 } 1803 default: 1804 return (vop_stdpathconf(ap)); 1805 } 1806 } 1807 1808 SDT_PROBE_DEFINE3(fusefs, , vnops, filehandles_closed, "struct vnode*", 1809 "struct uio*", "struct ucred*"); 1810 /* 1811 struct vnop_read_args { 1812 struct vnode *a_vp; 1813 struct uio *a_uio; 1814 int a_ioflag; 1815 struct ucred *a_cred; 1816 }; 1817 */ 1818 static int 1819 fuse_vnop_read(struct vop_read_args *ap) 1820 { 1821 struct vnode *vp = ap->a_vp; 1822 struct uio *uio = ap->a_uio; 1823 int ioflag = ap->a_ioflag; 1824 struct ucred *cred = ap->a_cred; 1825 pid_t pid = curthread->td_proc->p_pid; 1826 struct fuse_filehandle *fufh; 1827 int err; 1828 bool closefufh = false, directio; 1829 1830 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 1831 1832 if (fuse_isdeadfs(vp)) { 1833 return ENXIO; 1834 } 1835 1836 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 1837 ioflag |= IO_DIRECT; 1838 } 1839 1840 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 1841 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 1842 /* 1843 * nfsd will do I/O without first doing VOP_OPEN. We 1844 * must implicitly open the file here 1845 */ 1846 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1847 closefufh = true; 1848 } 1849 if (err) { 1850 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 1851 return err; 1852 } 1853 1854 /* 1855 * Ideally, when the daemon asks for direct io at open time, the 1856 * standard file flag should be set according to this, so that would 1857 * just change the default mode, which later on could be changed via 1858 * fcntl(2). 1859 * But this doesn't work, the O_DIRECT flag gets cleared at some point 1860 * (don't know where). So to make any use of the Fuse direct_io option, 1861 * we hardwire it into the file's private data (similarly to Linux, 1862 * btw.). 1863 */ 1864 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 1865 1866 fuse_vnode_update(vp, FN_ATIMECHANGE); 1867 if (directio) { 1868 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct read of vnode"); 1869 err = fuse_read_directbackend(vp, uio, cred, fufh); 1870 } else { 1871 SDT_PROBE2(fusefs, , vnops, trace, 1, "buffered read of vnode"); 1872 err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); 1873 } 1874 1875 if (closefufh) 1876 fuse_filehandle_close(vp, fufh, curthread, cred); 1877 1878 return (err); 1879 } 1880 1881 /* 1882 struct vnop_readdir_args { 1883 struct vnode *a_vp; 1884 struct uio *a_uio; 1885 struct ucred *a_cred; 1886 int *a_eofflag; 1887 int *a_ncookies; 1888 uint64_t **a_cookies; 1889 }; 1890 */ 1891 static int 1892 fuse_vnop_readdir(struct vop_readdir_args *ap) 1893 { 1894 struct vnode *vp = ap->a_vp; 1895 struct uio *uio = ap->a_uio; 1896 struct ucred *cred = ap->a_cred; 1897 struct fuse_filehandle *fufh = NULL; 1898 struct mount *mp = vnode_mount(vp); 1899 struct fuse_iov cookediov; 1900 int err = 0; 1901 uint64_t *cookies; 1902 ssize_t tresid; 1903 int ncookies; 1904 bool closefufh = false; 1905 pid_t pid = curthread->td_proc->p_pid; 1906 1907 if (ap->a_eofflag) 1908 *ap->a_eofflag = 0; 1909 if (fuse_isdeadfs(vp)) { 1910 return ENXIO; 1911 } 1912 if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ 1913 (uio_resid(uio) < sizeof(struct dirent))) { 1914 return EINVAL; 1915 } 1916 1917 tresid = uio->uio_resid; 1918 err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); 1919 if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) { 1920 KASSERT(fuse_get_mpdata(mp)->dataflags 1921 & FSESS_NO_OPENDIR_SUPPORT, 1922 ("FUSE file systems that don't set " 1923 "FUSE_NO_OPENDIR_SUPPORT should not be exported")); 1924 /* 1925 * nfsd will do VOP_READDIR without first doing VOP_OPEN. We 1926 * must implicitly open the directory here. 1927 */ 1928 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1929 closefufh = true; 1930 } 1931 if (err) 1932 return (err); 1933 if (ap->a_ncookies != NULL) { 1934 ncookies = uio->uio_resid / 1935 (offsetof(struct dirent, d_name) + 4) + 1; 1936 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 1937 *ap->a_ncookies = ncookies; 1938 *ap->a_cookies = cookies; 1939 } else { 1940 ncookies = 0; 1941 cookies = NULL; 1942 } 1943 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) 1944 fiov_init(&cookediov, DIRCOOKEDSIZE); 1945 1946 err = fuse_internal_readdir(vp, uio, fufh, &cookediov, 1947 &ncookies, cookies); 1948 1949 fiov_teardown(&cookediov); 1950 if (closefufh) 1951 fuse_filehandle_close(vp, fufh, curthread, cred); 1952 1953 if (ap->a_ncookies != NULL) { 1954 if (err == 0) { 1955 *ap->a_ncookies -= ncookies; 1956 } else { 1957 free(*ap->a_cookies, M_TEMP); 1958 *ap->a_ncookies = 0; 1959 *ap->a_cookies = NULL; 1960 } 1961 } 1962 if (err == 0 && tresid == uio->uio_resid) 1963 *ap->a_eofflag = 1; 1964 1965 return err; 1966 } 1967 1968 /* 1969 struct vnop_readlink_args { 1970 struct vnode *a_vp; 1971 struct uio *a_uio; 1972 struct ucred *a_cred; 1973 }; 1974 */ 1975 static int 1976 fuse_vnop_readlink(struct vop_readlink_args *ap) 1977 { 1978 struct vnode *vp = ap->a_vp; 1979 struct uio *uio = ap->a_uio; 1980 struct ucred *cred = ap->a_cred; 1981 1982 struct fuse_dispatcher fdi; 1983 int err; 1984 1985 if (fuse_isdeadfs(vp)) { 1986 return ENXIO; 1987 } 1988 if (!vnode_islnk(vp)) { 1989 return EINVAL; 1990 } 1991 fdisp_init(&fdi, 0); 1992 err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); 1993 if (err) { 1994 goto out; 1995 } 1996 if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { 1997 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 1998 fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, 1999 "Returned an embedded NUL from FUSE_READLINK."); 2000 err = EIO; 2001 goto out; 2002 } 2003 if (((char *)fdi.answ)[0] == '/' && 2004 fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { 2005 char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; 2006 2007 err = uiomove(mpth, strlen(mpth), uio); 2008 } 2009 if (!err) { 2010 err = uiomove(fdi.answ, fdi.iosize, uio); 2011 } 2012 out: 2013 fdisp_destroy(&fdi); 2014 return err; 2015 } 2016 2017 /* 2018 struct vnop_reclaim_args { 2019 struct vnode *a_vp; 2020 }; 2021 */ 2022 static int 2023 fuse_vnop_reclaim(struct vop_reclaim_args *ap) 2024 { 2025 struct vnode *vp = ap->a_vp; 2026 struct thread *td = curthread; 2027 struct fuse_vnode_data *fvdat = VTOFUD(vp); 2028 struct fuse_filehandle *fufh, *fufh_tmp; 2029 2030 if (!fvdat) { 2031 panic("FUSE: no vnode data during recycling"); 2032 } 2033 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 2034 printf("FUSE: vnode being reclaimed with open fufh " 2035 "(type=%#x)", fufh->fufh_type); 2036 fuse_filehandle_close(vp, fufh, td, NULL); 2037 } 2038 2039 if (VTOI(vp) == 1) { 2040 /* 2041 * Don't send FUSE_FORGET for the root inode, because 2042 * we never send FUSE_LOOKUP for it (see 2043 * fuse_vfsop_root) and we don't want the server to see 2044 * mismatched lookup counts. 2045 */ 2046 struct fuse_data *data; 2047 struct vnode *vroot; 2048 2049 data = fuse_get_mpdata(vnode_mount(vp)); 2050 FUSE_LOCK(); 2051 vroot = data->vroot; 2052 data->vroot = NULL; 2053 FUSE_UNLOCK(); 2054 if (vroot) 2055 vrele(vroot); 2056 } else if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { 2057 fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), 2058 fvdat->nlookup); 2059 } 2060 cache_purge(vp); 2061 vfs_hash_remove(vp); 2062 fuse_vnode_destroy(vp); 2063 2064 return 0; 2065 } 2066 2067 /* 2068 struct vnop_remove_args { 2069 struct vnode *a_dvp; 2070 struct vnode *a_vp; 2071 struct componentname *a_cnp; 2072 }; 2073 */ 2074 static int 2075 fuse_vnop_remove(struct vop_remove_args *ap) 2076 { 2077 struct vnode *dvp = ap->a_dvp; 2078 struct vnode *vp = ap->a_vp; 2079 struct componentname *cnp = ap->a_cnp; 2080 2081 int err; 2082 2083 if (fuse_isdeadfs(vp)) { 2084 return ENXIO; 2085 } 2086 if (vnode_isdir(vp)) { 2087 return EPERM; 2088 } 2089 2090 err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); 2091 2092 return err; 2093 } 2094 2095 /* 2096 struct vnop_rename_args { 2097 struct vnode *a_fdvp; 2098 struct vnode *a_fvp; 2099 struct componentname *a_fcnp; 2100 struct vnode *a_tdvp; 2101 struct vnode *a_tvp; 2102 struct componentname *a_tcnp; 2103 }; 2104 */ 2105 static int 2106 fuse_vnop_rename(struct vop_rename_args *ap) 2107 { 2108 struct vnode *fdvp = ap->a_fdvp; 2109 struct vnode *fvp = ap->a_fvp; 2110 struct componentname *fcnp = ap->a_fcnp; 2111 struct vnode *tdvp = ap->a_tdvp; 2112 struct vnode *tvp = ap->a_tvp; 2113 struct componentname *tcnp = ap->a_tcnp; 2114 struct fuse_data *data; 2115 bool newparent = fdvp != tdvp; 2116 bool isdir = fvp->v_type == VDIR; 2117 int err = 0; 2118 2119 if (fuse_isdeadfs(fdvp)) { 2120 return ENXIO; 2121 } 2122 if (fvp->v_mount != tdvp->v_mount || 2123 (tvp && fvp->v_mount != tvp->v_mount)) { 2124 SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); 2125 err = EXDEV; 2126 goto out; 2127 } 2128 cache_purge(fvp); 2129 2130 /* 2131 * FUSE library is expected to check if target directory is not 2132 * under the source directory in the file system tree. 2133 * Linux performs this check at VFS level. 2134 */ 2135 /* 2136 * If source is a directory, and it will get a new parent, user must 2137 * have write permission to it, so ".." can be modified. 2138 */ 2139 data = fuse_get_mpdata(vnode_mount(tdvp)); 2140 if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { 2141 err = fuse_internal_access(fvp, VWRITE, 2142 curthread, tcnp->cn_cred); 2143 if (err) 2144 goto out; 2145 } 2146 sx_xlock(&data->rename_lock); 2147 err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); 2148 if (err == 0) { 2149 if (tdvp != fdvp) 2150 fuse_vnode_setparent(fvp, tdvp); 2151 if (tvp != NULL) 2152 fuse_vnode_setparent(tvp, NULL); 2153 } 2154 sx_unlock(&data->rename_lock); 2155 2156 if (tvp != NULL && tvp != fvp) { 2157 cache_purge(tvp); 2158 } 2159 if (vnode_isdir(fvp)) { 2160 if (((tvp != NULL) && vnode_isdir(tvp)) || vnode_isdir(fvp)) { 2161 cache_purge(tdvp); 2162 } 2163 cache_purge(fdvp); 2164 } 2165 out: 2166 if (tdvp == tvp) { 2167 vrele(tdvp); 2168 } else { 2169 vput(tdvp); 2170 } 2171 if (tvp != NULL) { 2172 vput(tvp); 2173 } 2174 vrele(fdvp); 2175 vrele(fvp); 2176 2177 return err; 2178 } 2179 2180 /* 2181 struct vnop_rmdir_args { 2182 struct vnode *a_dvp; 2183 struct vnode *a_vp; 2184 struct componentname *a_cnp; 2185 } *ap; 2186 */ 2187 static int 2188 fuse_vnop_rmdir(struct vop_rmdir_args *ap) 2189 { 2190 struct vnode *dvp = ap->a_dvp; 2191 struct vnode *vp = ap->a_vp; 2192 2193 int err; 2194 2195 if (fuse_isdeadfs(vp)) { 2196 return ENXIO; 2197 } 2198 if (VTOFUD(vp) == VTOFUD(dvp)) { 2199 return EINVAL; 2200 } 2201 err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); 2202 2203 return err; 2204 } 2205 2206 /* 2207 struct vnop_setattr_args { 2208 struct vnode *a_vp; 2209 struct vattr *a_vap; 2210 struct ucred *a_cred; 2211 struct thread *a_td; 2212 }; 2213 */ 2214 static int 2215 fuse_vnop_setattr(struct vop_setattr_args *ap) 2216 { 2217 struct vnode *vp = ap->a_vp; 2218 struct vattr *vap = ap->a_vap; 2219 struct ucred *cred = ap->a_cred; 2220 struct thread *td = curthread; 2221 struct mount *mp; 2222 struct fuse_data *data; 2223 struct vattr old_va; 2224 int dataflags; 2225 int err = 0, err2; 2226 accmode_t accmode = 0; 2227 bool checkperm; 2228 bool drop_suid = false; 2229 2230 mp = vnode_mount(vp); 2231 data = fuse_get_mpdata(mp); 2232 dataflags = data->dataflags; 2233 checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; 2234 2235 if (fuse_isdeadfs(vp)) { 2236 return ENXIO; 2237 } 2238 2239 if (vap->va_uid != (uid_t)VNOVAL) { 2240 if (checkperm) { 2241 /* Only root may change a file's owner */ 2242 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2243 if (err) { 2244 /* As a special case, allow the null chown */ 2245 err2 = fuse_internal_getattr(vp, &old_va, cred, 2246 td); 2247 if (err2) 2248 return (err2); 2249 if (vap->va_uid != old_va.va_uid) 2250 return err; 2251 drop_suid = true; 2252 } 2253 } 2254 accmode |= VADMIN; 2255 } 2256 if (vap->va_gid != (gid_t)VNOVAL) { 2257 if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) 2258 drop_suid = true; 2259 if (checkperm && !groupmember(vap->va_gid, cred)) { 2260 /* 2261 * Non-root users may only chgrp to one of their own 2262 * groups 2263 */ 2264 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2265 if (err) { 2266 /* As a special case, allow the null chgrp */ 2267 err2 = fuse_internal_getattr(vp, &old_va, cred, 2268 td); 2269 if (err2) 2270 return (err2); 2271 if (vap->va_gid != old_va.va_gid) 2272 return err; 2273 } 2274 } 2275 accmode |= VADMIN; 2276 } 2277 if (vap->va_size != VNOVAL) { 2278 switch (vp->v_type) { 2279 case VDIR: 2280 return (EISDIR); 2281 case VLNK: 2282 case VREG: 2283 if (vfs_isrdonly(mp)) 2284 return (EROFS); 2285 err = vn_rlimit_trunc(vap->va_size, td); 2286 if (err) 2287 return (err); 2288 break; 2289 default: 2290 /* 2291 * According to POSIX, the result is unspecified 2292 * for file types other than regular files, 2293 * directories and shared memory objects. We 2294 * don't support shared memory objects in the file 2295 * system, and have dubious support for truncating 2296 * symlinks. Just ignore the request in other cases. 2297 */ 2298 return (0); 2299 } 2300 /* Don't set accmode. Permission to trunc is checked upstack */ 2301 } 2302 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 2303 if (vap->va_vaflags & VA_UTIMES_NULL) 2304 accmode |= VWRITE; 2305 else 2306 accmode |= VADMIN; 2307 } 2308 if (drop_suid) { 2309 if (vap->va_mode != (mode_t)VNOVAL) 2310 vap->va_mode &= ~(S_ISUID | S_ISGID); 2311 else { 2312 err = fuse_internal_getattr(vp, &old_va, cred, td); 2313 if (err) 2314 return (err); 2315 vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); 2316 } 2317 } 2318 if (vap->va_mode != (mode_t)VNOVAL) { 2319 /* Only root may set the sticky bit on non-directories */ 2320 if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) 2321 && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 2322 return EFTYPE; 2323 if (checkperm && (vap->va_mode & S_ISGID)) { 2324 err = fuse_internal_getattr(vp, &old_va, cred, td); 2325 if (err) 2326 return (err); 2327 if (!groupmember(old_va.va_gid, cred)) { 2328 err = priv_check_cred(cred, PRIV_VFS_SETGID); 2329 if (err) 2330 return (err); 2331 } 2332 } 2333 accmode |= VADMIN; 2334 } 2335 2336 if (vfs_isrdonly(mp)) 2337 return EROFS; 2338 2339 if (checkperm) { 2340 err = fuse_internal_access(vp, accmode, td, cred); 2341 } else { 2342 err = 0; 2343 } 2344 if (err) 2345 return err; 2346 else 2347 return fuse_internal_setattr(vp, vap, td, cred); 2348 } 2349 2350 /* 2351 struct vnop_strategy_args { 2352 struct vnode *a_vp; 2353 struct buf *a_bp; 2354 }; 2355 */ 2356 static int 2357 fuse_vnop_strategy(struct vop_strategy_args *ap) 2358 { 2359 struct vnode *vp = ap->a_vp; 2360 struct buf *bp = ap->a_bp; 2361 2362 if (!vp || fuse_isdeadfs(vp)) { 2363 bp->b_ioflags |= BIO_ERROR; 2364 bp->b_error = ENXIO; 2365 bufdone(bp); 2366 return 0; 2367 } 2368 2369 /* 2370 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. 2371 * fuse_io_strategy sets bp's error fields 2372 */ 2373 (void)fuse_io_strategy(vp, bp); 2374 2375 return 0; 2376 } 2377 2378 /* 2379 struct vnop_symlink_args { 2380 struct vnode *a_dvp; 2381 struct vnode **a_vpp; 2382 struct componentname *a_cnp; 2383 struct vattr *a_vap; 2384 char *a_target; 2385 }; 2386 */ 2387 static int 2388 fuse_vnop_symlink(struct vop_symlink_args *ap) 2389 { 2390 struct vnode *dvp = ap->a_dvp; 2391 struct vnode **vpp = ap->a_vpp; 2392 struct componentname *cnp = ap->a_cnp; 2393 const char *target = ap->a_target; 2394 2395 struct fuse_dispatcher fdi; 2396 2397 int err; 2398 size_t len; 2399 2400 if (fuse_isdeadfs(dvp)) { 2401 return ENXIO; 2402 } 2403 /* 2404 * Unlike the other creator type calls, here we have to create a message 2405 * where the name of the new entry comes first, and the data describing 2406 * the entry comes second. 2407 * Hence we can't rely on our handy fuse_internal_newentry() routine, 2408 * but put together the message manually and just call the core part. 2409 */ 2410 2411 len = strlen(target) + 1; 2412 fdisp_init(&fdi, len + cnp->cn_namelen + 1); 2413 fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); 2414 2415 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 2416 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 2417 memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); 2418 2419 err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); 2420 fdisp_destroy(&fdi); 2421 return err; 2422 } 2423 2424 /* 2425 struct vnop_write_args { 2426 struct vnode *a_vp; 2427 struct uio *a_uio; 2428 int a_ioflag; 2429 struct ucred *a_cred; 2430 }; 2431 */ 2432 static int 2433 fuse_vnop_write(struct vop_write_args *ap) 2434 { 2435 struct vnode *vp = ap->a_vp; 2436 struct uio *uio = ap->a_uio; 2437 int ioflag = ap->a_ioflag; 2438 struct ucred *cred = ap->a_cred; 2439 pid_t pid = curthread->td_proc->p_pid; 2440 struct fuse_filehandle *fufh; 2441 int err; 2442 bool closefufh = false, directio; 2443 2444 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 2445 2446 if (fuse_isdeadfs(vp)) { 2447 return ENXIO; 2448 } 2449 2450 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 2451 ioflag |= IO_DIRECT; 2452 } 2453 2454 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 2455 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 2456 /* 2457 * nfsd will do I/O without first doing VOP_OPEN. We 2458 * must implicitly open the file here 2459 */ 2460 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 2461 closefufh = true; 2462 } 2463 if (err) { 2464 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 2465 return err; 2466 } 2467 2468 /* 2469 * Ideally, when the daemon asks for direct io at open time, the 2470 * standard file flag should be set according to this, so that would 2471 * just change the default mode, which later on could be changed via 2472 * fcntl(2). 2473 * But this doesn't work, the O_DIRECT flag gets cleared at some point 2474 * (don't know where). So to make any use of the Fuse direct_io option, 2475 * we hardwire it into the file's private data (similarly to Linux, 2476 * btw.). 2477 */ 2478 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 2479 2480 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 2481 if (directio) { 2482 off_t start, end, filesize; 2483 bool pages = (ioflag & IO_VMIO) != 0; 2484 2485 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct write of vnode"); 2486 2487 err = fuse_vnode_size(vp, &filesize, cred, curthread); 2488 if (err) 2489 goto out; 2490 2491 start = uio->uio_offset; 2492 end = start + uio->uio_resid; 2493 if (!pages) { 2494 err = fuse_inval_buf_range(vp, filesize, start, 2495 end); 2496 if (err) 2497 goto out; 2498 } 2499 err = fuse_write_directbackend(vp, uio, cred, fufh, 2500 filesize, ioflag, pages); 2501 } else { 2502 SDT_PROBE2(fusefs, , vnops, trace, 1, 2503 "buffered write of vnode"); 2504 if (!fsess_opt_writeback(vnode_mount(vp))) 2505 ioflag |= IO_SYNC; 2506 err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); 2507 } 2508 fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); 2509 2510 out: 2511 if (closefufh) 2512 fuse_filehandle_close(vp, fufh, curthread, cred); 2513 2514 return (err); 2515 } 2516 2517 static daddr_t 2518 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) 2519 { 2520 const int biosize = fuse_iosize(vp); 2521 2522 return (off / biosize); 2523 } 2524 2525 static int 2526 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz) 2527 { 2528 off_t filesize; 2529 int err; 2530 const int biosize = fuse_iosize(vp); 2531 2532 err = fuse_vnode_size(vp, &filesize, NULL, NULL); 2533 if (err) { 2534 /* This will turn into a SIGBUS */ 2535 return (EIO); 2536 } else if ((off_t)lbn * biosize >= filesize) { 2537 *blksz = 0; 2538 } else if ((off_t)(lbn + 1) * biosize > filesize) { 2539 *blksz = filesize - (off_t)lbn *biosize; 2540 } else { 2541 *blksz = biosize; 2542 } 2543 return (0); 2544 } 2545 2546 /* 2547 struct vnop_getpages_args { 2548 struct vnode *a_vp; 2549 vm_page_t *a_m; 2550 int a_count; 2551 int a_reqpage; 2552 }; 2553 */ 2554 static int 2555 fuse_vnop_getpages(struct vop_getpages_args *ap) 2556 { 2557 struct vnode *vp = ap->a_vp; 2558 2559 if (!fsess_opt_mmap(vnode_mount(vp))) { 2560 SDT_PROBE2(fusefs, , vnops, trace, 1, 2561 "called on non-cacheable vnode??\n"); 2562 return (VM_PAGER_ERROR); 2563 } 2564 2565 return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, 2566 ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); 2567 } 2568 2569 static const char extattr_namespace_separator = '.'; 2570 2571 /* 2572 struct vop_getextattr_args { 2573 struct vop_generic_args a_gen; 2574 struct vnode *a_vp; 2575 int a_attrnamespace; 2576 const char *a_name; 2577 struct uio *a_uio; 2578 size_t *a_size; 2579 struct ucred *a_cred; 2580 struct thread *a_td; 2581 }; 2582 */ 2583 static int 2584 fuse_vnop_getextattr(struct vop_getextattr_args *ap) 2585 { 2586 struct vnode *vp = ap->a_vp; 2587 struct uio *uio = ap->a_uio; 2588 struct fuse_dispatcher fdi; 2589 struct fuse_getxattr_in *get_xattr_in; 2590 struct fuse_getxattr_out *get_xattr_out; 2591 struct mount *mp = vnode_mount(vp); 2592 struct thread *td = ap->a_td; 2593 struct ucred *cred = ap->a_cred; 2594 char *prefix; 2595 char *attr_str; 2596 size_t len; 2597 int err; 2598 2599 if (fuse_isdeadfs(vp)) 2600 return (ENXIO); 2601 2602 if (fsess_not_impl(mp, FUSE_GETXATTR)) 2603 return EOPNOTSUPP; 2604 2605 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2606 if (err) 2607 return err; 2608 2609 /* Default to looking for user attributes. */ 2610 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2611 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2612 else 2613 prefix = EXTATTR_NAMESPACE_USER_STRING; 2614 2615 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2616 strlen(ap->a_name) + 1; 2617 2618 fdisp_init(&fdi, len + sizeof(*get_xattr_in)); 2619 fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); 2620 2621 get_xattr_in = fdi.indata; 2622 /* 2623 * Check to see whether we're querying the available size or 2624 * issuing the actual request. If we pass in 0, we get back struct 2625 * fuse_getxattr_out. If we pass in a non-zero size, we get back 2626 * that much data, without the struct fuse_getxattr_out header. 2627 */ 2628 if (uio == NULL) 2629 get_xattr_in->size = 0; 2630 else 2631 get_xattr_in->size = uio->uio_resid; 2632 2633 attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); 2634 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2635 ap->a_name); 2636 2637 err = fdisp_wait_answ(&fdi); 2638 if (err != 0) { 2639 if (err == ENOSYS) { 2640 fsess_set_notimpl(mp, FUSE_GETXATTR); 2641 err = EOPNOTSUPP; 2642 } 2643 goto out; 2644 } 2645 2646 get_xattr_out = fdi.answ; 2647 2648 if (ap->a_size != NULL) 2649 *ap->a_size = get_xattr_out->size; 2650 2651 if (uio != NULL) 2652 err = uiomove(fdi.answ, fdi.iosize, uio); 2653 2654 out: 2655 fdisp_destroy(&fdi); 2656 return (err); 2657 } 2658 2659 /* 2660 struct vop_setextattr_args { 2661 struct vop_generic_args a_gen; 2662 struct vnode *a_vp; 2663 int a_attrnamespace; 2664 const char *a_name; 2665 struct uio *a_uio; 2666 struct ucred *a_cred; 2667 struct thread *a_td; 2668 }; 2669 */ 2670 static int 2671 fuse_vnop_setextattr(struct vop_setextattr_args *ap) 2672 { 2673 struct vnode *vp = ap->a_vp; 2674 struct uio *uio = ap->a_uio; 2675 struct fuse_dispatcher fdi; 2676 struct fuse_setxattr_in *set_xattr_in; 2677 struct mount *mp = vnode_mount(vp); 2678 struct thread *td = ap->a_td; 2679 struct ucred *cred = ap->a_cred; 2680 char *prefix; 2681 size_t len; 2682 char *attr_str; 2683 int err; 2684 2685 if (fuse_isdeadfs(vp)) 2686 return (ENXIO); 2687 2688 if (fsess_not_impl(mp, FUSE_SETXATTR)) 2689 return EOPNOTSUPP; 2690 2691 if (vfs_isrdonly(mp)) 2692 return EROFS; 2693 2694 /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ 2695 if (ap->a_uio == NULL) { 2696 /* 2697 * If we got here as fallback from VOP_DELETEEXTATTR, then 2698 * return EOPNOTSUPP. 2699 */ 2700 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 2701 return (EOPNOTSUPP); 2702 else 2703 return (EINVAL); 2704 } 2705 2706 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 2707 VWRITE); 2708 if (err) 2709 return err; 2710 2711 /* Default to looking for user attributes. */ 2712 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2713 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2714 else 2715 prefix = EXTATTR_NAMESPACE_USER_STRING; 2716 2717 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2718 strlen(ap->a_name) + 1; 2719 2720 fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); 2721 fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); 2722 2723 set_xattr_in = fdi.indata; 2724 set_xattr_in->size = uio->uio_resid; 2725 2726 attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); 2727 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2728 ap->a_name); 2729 2730 err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, 2731 uio->uio_resid, uio); 2732 if (err != 0) { 2733 goto out; 2734 } 2735 2736 err = fdisp_wait_answ(&fdi); 2737 2738 if (err == ENOSYS) { 2739 fsess_set_notimpl(mp, FUSE_SETXATTR); 2740 err = EOPNOTSUPP; 2741 } 2742 if (err == ERESTART) { 2743 /* Can't restart after calling uiomove */ 2744 err = EINTR; 2745 } 2746 2747 out: 2748 fdisp_destroy(&fdi); 2749 return (err); 2750 } 2751 2752 /* 2753 * The Linux / FUSE extended attribute list is simply a collection of 2754 * NUL-terminated strings. The FreeBSD extended attribute list is a single 2755 * byte length followed by a non-NUL terminated string. So, this allows 2756 * conversion of the Linux / FUSE format to the FreeBSD format in place. 2757 * Linux attribute names are reported with the namespace as a prefix (e.g. 2758 * "user.attribute_name"), but in FreeBSD they are reported without the 2759 * namespace prefix (e.g. "attribute_name"). So, we're going from: 2760 * 2761 * user.attr_name1\0user.attr_name2\0 2762 * 2763 * to: 2764 * 2765 * <num>attr_name1<num>attr_name2 2766 * 2767 * Where "<num>" is a single byte number of characters in the attribute name. 2768 * 2769 * Args: 2770 * prefix - exattr namespace prefix string 2771 * list, list_len - input list with namespace prefixes 2772 * bsd_list, bsd_list_len - output list compatible with bsd vfs 2773 */ 2774 static int 2775 fuse_xattrlist_convert(char *prefix, const char *list, int list_len, 2776 char *bsd_list, int *bsd_list_len) 2777 { 2778 int len, pos, dist_to_next, prefix_len; 2779 2780 pos = 0; 2781 *bsd_list_len = 0; 2782 prefix_len = strlen(prefix); 2783 2784 while (pos < list_len && list[pos] != '\0') { 2785 dist_to_next = strlen(&list[pos]) + 1; 2786 if (bcmp(&list[pos], prefix, prefix_len) == 0 && 2787 list[pos + prefix_len] == extattr_namespace_separator) { 2788 len = dist_to_next - 2789 (prefix_len + sizeof(extattr_namespace_separator)) - 1; 2790 if (len >= EXTATTR_MAXNAMELEN) 2791 return (ENAMETOOLONG); 2792 2793 bsd_list[*bsd_list_len] = len; 2794 memcpy(&bsd_list[*bsd_list_len + 1], 2795 &list[pos + prefix_len + 2796 sizeof(extattr_namespace_separator)], len); 2797 2798 *bsd_list_len += len + 1; 2799 } 2800 2801 pos += dist_to_next; 2802 } 2803 2804 return (0); 2805 } 2806 2807 /* 2808 * List extended attributes 2809 * 2810 * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which 2811 * has a number of differences compared to its FreeBSD equivalent, 2812 * extattr_list_file: 2813 * 2814 * - FUSE_LISTXATTR returns all extended attributes across all namespaces, 2815 * whereas listxattr(2) only returns attributes for a single namespace 2816 * - FUSE_LISTXATTR prepends each attribute name with "namespace." 2817 * - If the provided buffer is not large enough to hold the result, 2818 * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to 2819 * return as many results as will fit. 2820 */ 2821 /* 2822 struct vop_listextattr_args { 2823 struct vop_generic_args a_gen; 2824 struct vnode *a_vp; 2825 int a_attrnamespace; 2826 struct uio *a_uio; 2827 size_t *a_size; 2828 struct ucred *a_cred; 2829 struct thread *a_td; 2830 }; 2831 */ 2832 static int 2833 fuse_vnop_listextattr(struct vop_listextattr_args *ap) 2834 { 2835 struct vnode *vp = ap->a_vp; 2836 struct uio *uio = ap->a_uio; 2837 struct fuse_dispatcher fdi; 2838 struct fuse_listxattr_in *list_xattr_in; 2839 struct fuse_listxattr_out *list_xattr_out; 2840 struct mount *mp = vnode_mount(vp); 2841 struct thread *td = ap->a_td; 2842 struct ucred *cred = ap->a_cred; 2843 char *prefix; 2844 char *bsd_list = NULL; 2845 char *linux_list; 2846 int bsd_list_len; 2847 int linux_list_len; 2848 int err; 2849 2850 if (fuse_isdeadfs(vp)) 2851 return (ENXIO); 2852 2853 if (fsess_not_impl(mp, FUSE_LISTXATTR)) 2854 return EOPNOTSUPP; 2855 2856 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2857 if (err) 2858 return err; 2859 2860 /* 2861 * Add space for a NUL and the period separator if enabled. 2862 * Default to looking for user attributes. 2863 */ 2864 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2865 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2866 else 2867 prefix = EXTATTR_NAMESPACE_USER_STRING; 2868 2869 fdisp_init(&fdi, sizeof(*list_xattr_in)); 2870 fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2871 2872 /* 2873 * Retrieve Linux / FUSE compatible list size. 2874 */ 2875 list_xattr_in = fdi.indata; 2876 list_xattr_in->size = 0; 2877 2878 err = fdisp_wait_answ(&fdi); 2879 if (err != 0) { 2880 if (err == ENOSYS) { 2881 fsess_set_notimpl(mp, FUSE_LISTXATTR); 2882 err = EOPNOTSUPP; 2883 } 2884 goto out; 2885 } 2886 2887 list_xattr_out = fdi.answ; 2888 linux_list_len = list_xattr_out->size; 2889 if (linux_list_len == 0) { 2890 if (ap->a_size != NULL) 2891 *ap->a_size = linux_list_len; 2892 goto out; 2893 } 2894 2895 /* 2896 * Retrieve Linux / FUSE compatible list values. 2897 */ 2898 fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2899 list_xattr_in = fdi.indata; 2900 list_xattr_in->size = linux_list_len; 2901 2902 err = fdisp_wait_answ(&fdi); 2903 if (err == ERANGE) { 2904 /* 2905 * Race detected. The attribute list must've grown since the 2906 * first FUSE_LISTXATTR call. Start over. Go all the way back 2907 * to userland so we can process signals, if necessary, before 2908 * restarting. 2909 */ 2910 err = ERESTART; 2911 goto out; 2912 } else if (err != 0) 2913 goto out; 2914 2915 linux_list = fdi.answ; 2916 /* FUSE doesn't allow the server to return more data than requested */ 2917 if (fdi.iosize > linux_list_len) { 2918 struct fuse_data *data = fuse_get_mpdata(mp); 2919 2920 fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG, 2921 "server returned " 2922 "more extended attribute data than requested; " 2923 "should've returned ERANGE instead."); 2924 } else { 2925 /* But returning less data is fine */ 2926 linux_list_len = fdi.iosize; 2927 } 2928 2929 /* 2930 * Retrieve the BSD compatible list values. 2931 * The Linux / FUSE attribute list format isn't the same 2932 * as FreeBSD's format. So we need to transform it into 2933 * FreeBSD's format before giving it to the user. 2934 */ 2935 bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); 2936 err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, 2937 bsd_list, &bsd_list_len); 2938 if (err != 0) 2939 goto out; 2940 2941 if (ap->a_size != NULL) 2942 *ap->a_size = bsd_list_len; 2943 2944 if (uio != NULL) 2945 err = uiomove(bsd_list, bsd_list_len, uio); 2946 2947 out: 2948 free(bsd_list, M_TEMP); 2949 fdisp_destroy(&fdi); 2950 return (err); 2951 } 2952 2953 /* 2954 struct vop_deallocate_args { 2955 struct vop_generic_args a_gen; 2956 struct vnode *a_vp; 2957 off_t *a_offset; 2958 off_t *a_len; 2959 int a_flags; 2960 int a_ioflag; 2961 struct ucred *a_cred; 2962 }; 2963 */ 2964 static int 2965 fuse_vnop_deallocate(struct vop_deallocate_args *ap) 2966 { 2967 struct vnode *vp = ap->a_vp; 2968 struct mount *mp = vnode_mount(vp); 2969 struct fuse_filehandle *fufh; 2970 struct fuse_dispatcher fdi; 2971 struct fuse_fallocate_in *ffi; 2972 struct ucred *cred = ap->a_cred; 2973 pid_t pid = curthread->td_proc->p_pid; 2974 off_t *len = ap->a_len; 2975 off_t *offset = ap->a_offset; 2976 int ioflag = ap->a_ioflag; 2977 off_t filesize; 2978 int err; 2979 bool closefufh = false; 2980 2981 if (fuse_isdeadfs(vp)) 2982 return (ENXIO); 2983 2984 if (vfs_isrdonly(mp)) 2985 return (EROFS); 2986 2987 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 2988 goto fallback; 2989 2990 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 2991 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 2992 /* 2993 * nfsd will do I/O without first doing VOP_OPEN. We 2994 * must implicitly open the file here 2995 */ 2996 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 2997 closefufh = true; 2998 } 2999 if (err) 3000 return (err); 3001 3002 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 3003 3004 err = fuse_vnode_size(vp, &filesize, cred, curthread); 3005 if (err) 3006 goto out; 3007 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 3008 3009 fdisp_init(&fdi, sizeof(*ffi)); 3010 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 3011 ffi = fdi.indata; 3012 ffi->fh = fufh->fh_id; 3013 ffi->offset = *offset; 3014 ffi->length = *len; 3015 /* 3016 * FreeBSD's fspacectl is equivalent to Linux's fallocate with 3017 * mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE 3018 */ 3019 ffi->mode = FUSE_FALLOC_FL_PUNCH_HOLE | FUSE_FALLOC_FL_KEEP_SIZE; 3020 err = fdisp_wait_answ(&fdi); 3021 3022 if (err == ENOSYS) { 3023 fdisp_destroy(&fdi); 3024 fsess_set_notimpl(mp, FUSE_FALLOCATE); 3025 goto fallback; 3026 } else if (err == EOPNOTSUPP) { 3027 /* 3028 * The file system server does not support FUSE_FALLOCATE with 3029 * the supplied mode for this particular file. 3030 */ 3031 fdisp_destroy(&fdi); 3032 goto fallback; 3033 } else if (!err) { 3034 /* 3035 * Clip the returned offset to EoF. Do it here rather than 3036 * before FUSE_FALLOCATE just in case the kernel's cached file 3037 * size is out of date. Unfortunately, FUSE does not return 3038 * any information about filesize from that operation. 3039 */ 3040 *offset = MIN(*offset + *len, filesize); 3041 *len = 0; 3042 fuse_vnode_undirty_cached_timestamps(vp, false); 3043 fuse_internal_clear_suid_on_write(vp, cred, curthread); 3044 3045 if (ioflag & IO_SYNC) 3046 err = fuse_internal_fsync(vp, curthread, MNT_WAIT, 3047 false); 3048 } 3049 3050 out: 3051 fdisp_destroy(&fdi); 3052 if (closefufh) 3053 fuse_filehandle_close(vp, fufh, curthread, cred); 3054 3055 return (err); 3056 3057 fallback: 3058 if (closefufh) 3059 fuse_filehandle_close(vp, fufh, curthread, cred); 3060 3061 return (vop_stddeallocate(ap)); 3062 } 3063 3064 /* 3065 struct vop_deleteextattr_args { 3066 struct vop_generic_args a_gen; 3067 struct vnode *a_vp; 3068 int a_attrnamespace; 3069 const char *a_name; 3070 struct ucred *a_cred; 3071 struct thread *a_td; 3072 }; 3073 */ 3074 static int 3075 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) 3076 { 3077 struct vnode *vp = ap->a_vp; 3078 struct fuse_dispatcher fdi; 3079 struct mount *mp = vnode_mount(vp); 3080 struct thread *td = ap->a_td; 3081 struct ucred *cred = ap->a_cred; 3082 char *prefix; 3083 size_t len; 3084 char *attr_str; 3085 int err; 3086 3087 if (fuse_isdeadfs(vp)) 3088 return (ENXIO); 3089 3090 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 3091 return EOPNOTSUPP; 3092 3093 if (vfs_isrdonly(mp)) 3094 return EROFS; 3095 3096 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 3097 VWRITE); 3098 if (err) 3099 return err; 3100 3101 /* Default to looking for user attributes. */ 3102 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 3103 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 3104 else 3105 prefix = EXTATTR_NAMESPACE_USER_STRING; 3106 3107 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 3108 strlen(ap->a_name) + 1; 3109 3110 fdisp_init(&fdi, len); 3111 fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); 3112 3113 attr_str = fdi.indata; 3114 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 3115 ap->a_name); 3116 3117 err = fdisp_wait_answ(&fdi); 3118 if (err == ENOSYS) { 3119 fsess_set_notimpl(mp, FUSE_REMOVEXATTR); 3120 err = EOPNOTSUPP; 3121 } 3122 3123 fdisp_destroy(&fdi); 3124 return (err); 3125 } 3126 3127 /* 3128 struct vnop_print_args { 3129 struct vnode *a_vp; 3130 }; 3131 */ 3132 static int 3133 fuse_vnop_print(struct vop_print_args *ap) 3134 { 3135 struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); 3136 3137 printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", 3138 (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, 3139 (uintmax_t)fvdat->nlookup, 3140 fvdat->flag); 3141 3142 return 0; 3143 } 3144 3145 /* 3146 * Get an NFS filehandle for a FUSE file. 3147 * 3148 * This will only work for FUSE file systems that guarantee the uniqueness of 3149 * nodeid:generation, which most don't. 3150 */ 3151 /* 3152 vop_vptofh { 3153 IN struct vnode *a_vp; 3154 IN struct fid *a_fhp; 3155 }; 3156 */ 3157 static int 3158 fuse_vnop_vptofh(struct vop_vptofh_args *ap) 3159 { 3160 struct vnode *vp = ap->a_vp; 3161 struct fuse_vnode_data *fvdat = VTOFUD(vp); 3162 struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); 3163 _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), 3164 "FUSE fid type is too big"); 3165 struct mount *mp = vnode_mount(vp); 3166 struct fuse_data *data = fuse_get_mpdata(mp); 3167 struct vattr va; 3168 int err; 3169 3170 if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { 3171 /* NFS requires lookups for "." and ".." */ 3172 SDT_PROBE2(fusefs, , vnops, trace, 1, 3173 "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); 3174 return EOPNOTSUPP; 3175 } 3176 if ((mp->mnt_flag & MNT_EXPORTED) && 3177 !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT)) 3178 { 3179 /* 3180 * NFS is stateless, so nfsd must reopen a directory on every 3181 * call to VOP_READDIR, passing in the d_off field from the 3182 * final dirent of the previous invocation. But without 3183 * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not 3184 * guarantee that d_off will be valid after a directory is 3185 * closed and reopened. So prohibit exporting FUSE file 3186 * systems that don't set that flag. 3187 * 3188 * But userspace NFS servers don't have this problem. 3189 */ 3190 SDT_PROBE2(fusefs, , vnops, trace, 1, 3191 "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT"); 3192 return EOPNOTSUPP; 3193 } 3194 3195 err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); 3196 if (err) 3197 return err; 3198 3199 /*ip = VTOI(ap->a_vp);*/ 3200 /*ufhp = (struct ufid *)ap->a_fhp;*/ 3201 fhp->len = sizeof(struct fuse_fid); 3202 fhp->nid = fvdat->nid; 3203 if (fvdat->generation <= UINT32_MAX) 3204 fhp->gen = fvdat->generation; 3205 else 3206 return EOVERFLOW; 3207 return (0); 3208 } 3209