1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 #include <sys/param.h> 65 #include <sys/module.h> 66 #include <sys/systm.h> 67 #include <sys/errno.h> 68 #include <sys/kernel.h> 69 #include <sys/conf.h> 70 #include <sys/filio.h> 71 #include <sys/uio.h> 72 #include <sys/malloc.h> 73 #include <sys/queue.h> 74 #include <sys/limits.h> 75 #include <sys/lock.h> 76 #include <sys/rwlock.h> 77 #include <sys/sx.h> 78 #include <sys/proc.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/namei.h> 82 #include <sys/extattr.h> 83 #include <sys/stat.h> 84 #include <sys/unistd.h> 85 #include <sys/filedesc.h> 86 #include <sys/file.h> 87 #include <sys/fcntl.h> 88 #include <sys/dirent.h> 89 #include <sys/bio.h> 90 #include <sys/buf.h> 91 #include <sys/sysctl.h> 92 #include <sys/vmmeter.h> 93 94 #include <vm/vm.h> 95 #include <vm/vm_extern.h> 96 #include <vm/pmap.h> 97 #include <vm/vm_map.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_param.h> 100 #include <vm/vm_object.h> 101 #include <vm/vm_pager.h> 102 #include <vm/vnode_pager.h> 103 #include <vm/vm_object.h> 104 105 #include "fuse.h" 106 #include "fuse_file.h" 107 #include "fuse_internal.h" 108 #include "fuse_ipc.h" 109 #include "fuse_node.h" 110 #include "fuse_io.h" 111 112 #include <sys/priv.h> 113 114 /* Maximum number of hardlinks to a single FUSE file */ 115 #define FUSE_LINK_MAX UINT32_MAX 116 117 SDT_PROVIDER_DECLARE(fusefs); 118 /* 119 * Fuse trace probe: 120 * arg0: verbosity. Higher numbers give more verbose messages 121 * arg1: Textual message 122 */ 123 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); 124 125 /* vnode ops */ 126 static vop_access_t fuse_vnop_access; 127 static vop_advlock_t fuse_vnop_advlock; 128 static vop_allocate_t fuse_vnop_allocate; 129 static vop_bmap_t fuse_vnop_bmap; 130 static vop_close_t fuse_fifo_close; 131 static vop_close_t fuse_vnop_close; 132 static vop_copy_file_range_t fuse_vnop_copy_file_range; 133 static vop_create_t fuse_vnop_create; 134 static vop_deallocate_t fuse_vnop_deallocate; 135 static vop_deleteextattr_t fuse_vnop_deleteextattr; 136 static vop_fdatasync_t fuse_vnop_fdatasync; 137 static vop_fsync_t fuse_vnop_fsync; 138 static vop_getattr_t fuse_vnop_getattr; 139 static vop_getextattr_t fuse_vnop_getextattr; 140 static vop_inactive_t fuse_vnop_inactive; 141 static vop_ioctl_t fuse_vnop_ioctl; 142 static vop_link_t fuse_vnop_link; 143 static vop_listextattr_t fuse_vnop_listextattr; 144 static vop_lookup_t fuse_vnop_lookup; 145 static vop_mkdir_t fuse_vnop_mkdir; 146 static vop_mknod_t fuse_vnop_mknod; 147 static vop_open_t fuse_vnop_open; 148 static vop_pathconf_t fuse_vnop_pathconf; 149 static vop_read_t fuse_vnop_read; 150 static vop_readdir_t fuse_vnop_readdir; 151 static vop_readlink_t fuse_vnop_readlink; 152 static vop_reclaim_t fuse_vnop_reclaim; 153 static vop_remove_t fuse_vnop_remove; 154 static vop_rename_t fuse_vnop_rename; 155 static vop_rmdir_t fuse_vnop_rmdir; 156 static vop_setattr_t fuse_vnop_setattr; 157 static vop_setextattr_t fuse_vnop_setextattr; 158 static vop_strategy_t fuse_vnop_strategy; 159 static vop_symlink_t fuse_vnop_symlink; 160 static vop_write_t fuse_vnop_write; 161 static vop_getpages_t fuse_vnop_getpages; 162 static vop_print_t fuse_vnop_print; 163 static vop_vptofh_t fuse_vnop_vptofh; 164 165 struct vop_vector fuse_fifoops = { 166 .vop_default = &fifo_specops, 167 .vop_access = fuse_vnop_access, 168 .vop_close = fuse_fifo_close, 169 .vop_fsync = fuse_vnop_fsync, 170 .vop_getattr = fuse_vnop_getattr, 171 .vop_inactive = fuse_vnop_inactive, 172 .vop_pathconf = fuse_vnop_pathconf, 173 .vop_print = fuse_vnop_print, 174 .vop_read = VOP_PANIC, 175 .vop_reclaim = fuse_vnop_reclaim, 176 .vop_setattr = fuse_vnop_setattr, 177 .vop_write = VOP_PANIC, 178 .vop_vptofh = fuse_vnop_vptofh, 179 }; 180 VFS_VOP_VECTOR_REGISTER(fuse_fifoops); 181 182 struct vop_vector fuse_vnops = { 183 .vop_allocate = fuse_vnop_allocate, 184 .vop_default = &default_vnodeops, 185 .vop_access = fuse_vnop_access, 186 .vop_advlock = fuse_vnop_advlock, 187 .vop_bmap = fuse_vnop_bmap, 188 .vop_close = fuse_vnop_close, 189 .vop_copy_file_range = fuse_vnop_copy_file_range, 190 .vop_create = fuse_vnop_create, 191 .vop_deallocate = fuse_vnop_deallocate, 192 .vop_deleteextattr = fuse_vnop_deleteextattr, 193 .vop_fsync = fuse_vnop_fsync, 194 .vop_fdatasync = fuse_vnop_fdatasync, 195 .vop_getattr = fuse_vnop_getattr, 196 .vop_getextattr = fuse_vnop_getextattr, 197 .vop_inactive = fuse_vnop_inactive, 198 .vop_ioctl = fuse_vnop_ioctl, 199 .vop_link = fuse_vnop_link, 200 .vop_listextattr = fuse_vnop_listextattr, 201 .vop_lookup = fuse_vnop_lookup, 202 .vop_mkdir = fuse_vnop_mkdir, 203 .vop_mknod = fuse_vnop_mknod, 204 .vop_open = fuse_vnop_open, 205 .vop_pathconf = fuse_vnop_pathconf, 206 /* 207 * TODO: implement vop_poll after upgrading to protocol 7.21. 208 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until 209 * 7.21, which adds the ability for the client to choose which poll 210 * events it wants, and for a client to deregister a file handle 211 */ 212 .vop_read = fuse_vnop_read, 213 .vop_readdir = fuse_vnop_readdir, 214 .vop_readlink = fuse_vnop_readlink, 215 .vop_reclaim = fuse_vnop_reclaim, 216 .vop_remove = fuse_vnop_remove, 217 .vop_rename = fuse_vnop_rename, 218 .vop_rmdir = fuse_vnop_rmdir, 219 .vop_setattr = fuse_vnop_setattr, 220 .vop_setextattr = fuse_vnop_setextattr, 221 .vop_strategy = fuse_vnop_strategy, 222 .vop_symlink = fuse_vnop_symlink, 223 .vop_write = fuse_vnop_write, 224 .vop_getpages = fuse_vnop_getpages, 225 .vop_print = fuse_vnop_print, 226 .vop_vptofh = fuse_vnop_vptofh, 227 }; 228 VFS_VOP_VECTOR_REGISTER(fuse_vnops); 229 230 /* Check permission for extattr operations, much like extattr_check_cred */ 231 static int 232 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, 233 struct thread *td, accmode_t accmode) 234 { 235 struct mount *mp = vnode_mount(vp); 236 struct fuse_data *data = fuse_get_mpdata(mp); 237 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 238 239 /* 240 * Kernel-invoked always succeeds. 241 */ 242 if (cred == NOCRED) 243 return (0); 244 245 /* 246 * Do not allow privileged processes in jail to directly manipulate 247 * system attributes. 248 */ 249 switch (ns) { 250 case EXTATTR_NAMESPACE_SYSTEM: 251 if (default_permissions) { 252 return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); 253 } 254 return (0); 255 case EXTATTR_NAMESPACE_USER: 256 if (default_permissions) { 257 return (fuse_internal_access(vp, accmode, td, cred)); 258 } 259 return (0); 260 default: 261 return (EPERM); 262 } 263 } 264 265 /* Get a filehandle for a directory */ 266 static int 267 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, 268 struct ucred *cred, pid_t pid) 269 { 270 if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) 271 return 0; 272 return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); 273 } 274 275 /* Send FUSE_FLUSH for this vnode */ 276 static int 277 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) 278 { 279 struct fuse_flush_in *ffi; 280 struct fuse_filehandle *fufh; 281 struct fuse_dispatcher fdi; 282 struct thread *td = curthread; 283 struct mount *mp = vnode_mount(vp); 284 int err; 285 286 if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH)) 287 return 0; 288 289 err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); 290 if (err) 291 return err; 292 293 fdisp_init(&fdi, sizeof(*ffi)); 294 fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); 295 ffi = fdi.indata; 296 ffi->fh = fufh->fh_id; 297 /* 298 * If the file has a POSIX lock then we're supposed to set lock_owner. 299 * If not, then lock_owner is undefined. So we may as well always set 300 * it. 301 */ 302 ffi->lock_owner = td->td_proc->p_pid; 303 304 err = fdisp_wait_answ(&fdi); 305 if (err == ENOSYS) { 306 fsess_set_notimpl(mp, FUSE_FLUSH); 307 err = 0; 308 } 309 fdisp_destroy(&fdi); 310 return err; 311 } 312 313 /* Close wrapper for fifos. */ 314 static int 315 fuse_fifo_close(struct vop_close_args *ap) 316 { 317 return (fifo_specops.vop_close(ap)); 318 } 319 320 /* Invalidate a range of cached data, whether dirty of not */ 321 static int 322 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) 323 { 324 struct buf *bp; 325 daddr_t left_lbn, end_lbn, right_lbn; 326 off_t new_filesize; 327 int iosize, left_on, right_on, right_blksize; 328 329 iosize = fuse_iosize(vp); 330 left_lbn = start / iosize; 331 end_lbn = howmany(end, iosize); 332 left_on = start & (iosize - 1); 333 if (left_on != 0) { 334 bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); 335 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { 336 /* 337 * Flush the dirty buffer, because we don't have a 338 * byte-granular way to record which parts of the 339 * buffer are valid. 340 */ 341 bwrite(bp); 342 if (bp->b_error) 343 return (bp->b_error); 344 } else { 345 brelse(bp); 346 } 347 } 348 right_on = end & (iosize - 1); 349 if (right_on != 0) { 350 right_lbn = end / iosize; 351 new_filesize = MAX(filesize, end); 352 right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); 353 bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); 354 if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { 355 /* 356 * Flush the dirty buffer, because we don't have a 357 * byte-granular way to record which parts of the 358 * buffer are valid. 359 */ 360 bwrite(bp); 361 if (bp->b_error) 362 return (bp->b_error); 363 } else { 364 brelse(bp); 365 } 366 } 367 368 v_inval_buf_range(vp, left_lbn, end_lbn, iosize); 369 return (0); 370 } 371 372 373 /* Send FUSE_LSEEK for this node */ 374 static int 375 fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred, 376 pid_t pid, off_t *offp, int whence) 377 { 378 struct fuse_dispatcher fdi; 379 struct fuse_filehandle *fufh; 380 struct fuse_lseek_in *flsi; 381 struct fuse_lseek_out *flso; 382 struct mount *mp = vnode_mount(vp); 383 int err; 384 385 ASSERT_VOP_LOCKED(vp, __func__); 386 387 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 388 if (err) 389 return (err); 390 fdisp_init(&fdi, sizeof(*flsi)); 391 fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred); 392 flsi = fdi.indata; 393 flsi->fh = fufh->fh_id; 394 flsi->offset = *offp; 395 flsi->whence = whence; 396 err = fdisp_wait_answ(&fdi); 397 if (err == ENOSYS) { 398 fsess_set_notimpl(mp, FUSE_LSEEK); 399 } else if (err == 0) { 400 fsess_set_impl(mp, FUSE_LSEEK); 401 flso = fdi.answ; 402 *offp = flso->offset; 403 } 404 fdisp_destroy(&fdi); 405 406 return (err); 407 } 408 409 /* 410 struct vnop_access_args { 411 struct vnode *a_vp; 412 #if VOP_ACCESS_TAKES_ACCMODE_T 413 accmode_t a_accmode; 414 #else 415 int a_mode; 416 #endif 417 struct ucred *a_cred; 418 struct thread *a_td; 419 }; 420 */ 421 static int 422 fuse_vnop_access(struct vop_access_args *ap) 423 { 424 struct vnode *vp = ap->a_vp; 425 int accmode = ap->a_accmode; 426 struct ucred *cred = ap->a_cred; 427 428 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 429 430 int err; 431 432 if (fuse_isdeadfs(vp)) { 433 if (vnode_isvroot(vp)) { 434 return 0; 435 } 436 return ENXIO; 437 } 438 if (!(data->dataflags & FSESS_INITED)) { 439 if (vnode_isvroot(vp)) { 440 if (priv_check_cred(cred, PRIV_VFS_ADMIN) || 441 (fuse_match_cred(data->daemoncred, cred) == 0)) { 442 return 0; 443 } 444 } 445 return EBADF; 446 } 447 if (vnode_islnk(vp)) { 448 return 0; 449 } 450 451 err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); 452 return err; 453 } 454 455 /* 456 * struct vop_advlock_args { 457 * struct vop_generic_args a_gen; 458 * struct vnode *a_vp; 459 * void *a_id; 460 * int a_op; 461 * struct flock *a_fl; 462 * int a_flags; 463 * } 464 */ 465 static int 466 fuse_vnop_advlock(struct vop_advlock_args *ap) 467 { 468 struct vnode *vp = ap->a_vp; 469 struct flock *fl = ap->a_fl; 470 struct thread *td = curthread; 471 struct ucred *cred = td->td_ucred; 472 pid_t pid = td->td_proc->p_pid; 473 struct fuse_filehandle *fufh; 474 struct fuse_dispatcher fdi; 475 struct fuse_lk_in *fli; 476 struct fuse_lk_out *flo; 477 struct vattr vattr; 478 enum fuse_opcode op; 479 off_t size, start; 480 int dataflags, err; 481 int flags = ap->a_flags; 482 483 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 484 485 if (fuse_isdeadfs(vp)) { 486 return ENXIO; 487 } 488 489 switch(ap->a_op) { 490 case F_GETLK: 491 op = FUSE_GETLK; 492 break; 493 case F_SETLK: 494 if (flags & F_WAIT) 495 op = FUSE_SETLKW; 496 else 497 op = FUSE_SETLK; 498 break; 499 case F_UNLCK: 500 op = FUSE_SETLK; 501 break; 502 default: 503 return EINVAL; 504 } 505 506 if (!(dataflags & FSESS_POSIX_LOCKS)) 507 return vop_stdadvlock(ap); 508 /* FUSE doesn't properly support flock until protocol 7.17 */ 509 if (flags & F_FLOCK) 510 return vop_stdadvlock(ap); 511 512 vn_lock(vp, LK_SHARED | LK_RETRY); 513 514 switch (fl->l_whence) { 515 case SEEK_SET: 516 case SEEK_CUR: 517 /* 518 * Caller is responsible for adding any necessary offset 519 * when SEEK_CUR is used. 520 */ 521 start = fl->l_start; 522 break; 523 524 case SEEK_END: 525 err = fuse_internal_getattr(vp, &vattr, cred, td); 526 if (err) 527 goto out; 528 size = vattr.va_size; 529 if (size > OFF_MAX || 530 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { 531 err = EOVERFLOW; 532 goto out; 533 } 534 start = size + fl->l_start; 535 break; 536 537 default: 538 return (EINVAL); 539 } 540 541 err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); 542 if (err) 543 goto out; 544 545 fdisp_init(&fdi, sizeof(*fli)); 546 547 fdisp_make_vp(&fdi, op, vp, td, cred); 548 fli = fdi.indata; 549 fli->fh = fufh->fh_id; 550 fli->owner = td->td_proc->p_pid; 551 fli->lk.start = start; 552 if (fl->l_len != 0) 553 fli->lk.end = start + fl->l_len - 1; 554 else 555 fli->lk.end = INT64_MAX; 556 fli->lk.type = fl->l_type; 557 fli->lk.pid = td->td_proc->p_pid; 558 559 err = fdisp_wait_answ(&fdi); 560 fdisp_destroy(&fdi); 561 562 if (err == 0 && op == FUSE_GETLK) { 563 flo = fdi.answ; 564 fl->l_type = flo->lk.type; 565 fl->l_whence = SEEK_SET; 566 if (flo->lk.type != F_UNLCK) { 567 fl->l_pid = flo->lk.pid; 568 fl->l_start = flo->lk.start; 569 if (flo->lk.end == INT64_MAX) 570 fl->l_len = 0; 571 else 572 fl->l_len = flo->lk.end - flo->lk.start + 1; 573 fl->l_start = flo->lk.start; 574 } 575 } 576 577 out: 578 VOP_UNLOCK(vp); 579 return err; 580 } 581 582 static int 583 fuse_vnop_allocate(struct vop_allocate_args *ap) 584 { 585 struct vnode *vp = ap->a_vp; 586 off_t *len = ap->a_len; 587 off_t *offset = ap->a_offset; 588 struct ucred *cred = ap->a_cred; 589 struct fuse_filehandle *fufh; 590 struct mount *mp = vnode_mount(vp); 591 struct fuse_dispatcher fdi; 592 struct fuse_fallocate_in *ffi; 593 struct uio io; 594 pid_t pid = curthread->td_proc->p_pid; 595 struct fuse_vnode_data *fvdat = VTOFUD(vp); 596 off_t filesize; 597 int err; 598 599 if (fuse_isdeadfs(vp)) 600 return (ENXIO); 601 602 switch (vp->v_type) { 603 case VFIFO: 604 return (ESPIPE); 605 case VLNK: 606 case VREG: 607 if (vfs_isrdonly(mp)) 608 return (EROFS); 609 break; 610 default: 611 return (ENODEV); 612 } 613 614 if (vfs_isrdonly(mp)) 615 return (EROFS); 616 617 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 618 return (EINVAL); 619 620 io.uio_offset = *offset; 621 io.uio_resid = *len; 622 err = vn_rlimit_fsize(vp, &io, curthread); 623 if (err) 624 return (err); 625 626 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 627 if (err) 628 return (err); 629 630 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 631 632 err = fuse_vnode_size(vp, &filesize, cred, curthread); 633 if (err) 634 return (err); 635 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 636 637 fdisp_init(&fdi, sizeof(*ffi)); 638 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 639 ffi = fdi.indata; 640 ffi->fh = fufh->fh_id; 641 ffi->offset = *offset; 642 ffi->length = *len; 643 ffi->mode = 0; 644 err = fdisp_wait_answ(&fdi); 645 646 if (err == ENOSYS) { 647 fsess_set_notimpl(mp, FUSE_FALLOCATE); 648 err = EINVAL; 649 } else if (err == EOPNOTSUPP) { 650 /* 651 * The file system server does not support FUSE_FALLOCATE with 652 * the supplied mode for this particular file. 653 */ 654 err = EINVAL; 655 } else if (!err) { 656 *offset += *len; 657 *len = 0; 658 fuse_vnode_undirty_cached_timestamps(vp, false); 659 fuse_internal_clear_suid_on_write(vp, cred, curthread); 660 if (*offset > fvdat->cached_attrs.va_size) { 661 fuse_vnode_setsize(vp, *offset, false); 662 getnanouptime(&fvdat->last_local_modify); 663 } 664 } 665 666 fdisp_destroy(&fdi); 667 return (err); 668 } 669 670 /* { 671 struct vnode *a_vp; 672 daddr_t a_bn; 673 struct bufobj **a_bop; 674 daddr_t *a_bnp; 675 int *a_runp; 676 int *a_runb; 677 } */ 678 static int 679 fuse_vnop_bmap(struct vop_bmap_args *ap) 680 { 681 struct vnode *vp = ap->a_vp; 682 struct bufobj **bo = ap->a_bop; 683 struct thread *td = curthread; 684 struct mount *mp; 685 struct fuse_dispatcher fdi; 686 struct fuse_bmap_in *fbi; 687 struct fuse_bmap_out *fbo; 688 struct fuse_data *data; 689 struct fuse_vnode_data *fvdat = VTOFUD(vp); 690 uint64_t biosize; 691 off_t fsize; 692 daddr_t lbn = ap->a_bn; 693 daddr_t *pbn = ap->a_bnp; 694 int *runp = ap->a_runp; 695 int *runb = ap->a_runb; 696 int error = 0; 697 int maxrun; 698 699 if (fuse_isdeadfs(vp)) { 700 return ENXIO; 701 } 702 703 mp = vnode_mount(vp); 704 data = fuse_get_mpdata(mp); 705 biosize = fuse_iosize(vp); 706 maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, 707 data->max_readahead_blocks); 708 709 if (bo != NULL) 710 *bo = &vp->v_bufobj; 711 712 /* 713 * The FUSE_BMAP operation does not include the runp and runb 714 * variables, so we must guess. Report nonzero contiguous runs so 715 * cluster_read will combine adjacent reads. It's worthwhile to reduce 716 * upcalls even if we don't know the true physical layout of the file. 717 * 718 * FUSE file systems may opt out of read clustering in two ways: 719 * * mounting with -onoclusterr 720 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT 721 */ 722 if (runb != NULL) 723 *runb = MIN(lbn, maxrun); 724 if (runp != NULL && maxrun == 0) 725 *runp = 0; 726 else if (runp != NULL) { 727 /* 728 * If the file's size is cached, use that value to calculate 729 * runp, even if the cache is expired. runp is only advisory, 730 * and the risk of getting it wrong is not worth the cost of 731 * another upcall. 732 */ 733 if (fvdat->cached_attrs.va_size != VNOVAL) 734 fsize = fvdat->cached_attrs.va_size; 735 else 736 error = fuse_vnode_size(vp, &fsize, td->td_ucred, td); 737 if (error == 0) 738 *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1), 739 maxrun); 740 else 741 *runp = 0; 742 } 743 744 if (fsess_maybe_impl(mp, FUSE_BMAP)) { 745 fdisp_init(&fdi, sizeof(*fbi)); 746 fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); 747 fbi = fdi.indata; 748 fbi->block = lbn; 749 fbi->blocksize = biosize; 750 error = fdisp_wait_answ(&fdi); 751 if (error == ENOSYS) { 752 fdisp_destroy(&fdi); 753 fsess_set_notimpl(mp, FUSE_BMAP); 754 error = 0; 755 } else { 756 fbo = fdi.answ; 757 if (error == 0 && pbn != NULL) 758 *pbn = fbo->block; 759 fdisp_destroy(&fdi); 760 return error; 761 } 762 } 763 764 /* If the daemon doesn't support BMAP, make up a sensible default */ 765 if (pbn != NULL) 766 *pbn = lbn * btodb(biosize); 767 return (error); 768 } 769 770 /* 771 struct vop_close_args { 772 struct vnode *a_vp; 773 int a_fflag; 774 struct ucred *a_cred; 775 struct thread *a_td; 776 }; 777 */ 778 static int 779 fuse_vnop_close(struct vop_close_args *ap) 780 { 781 struct vnode *vp = ap->a_vp; 782 struct mount *mp = vnode_mount(vp); 783 struct ucred *cred = ap->a_cred; 784 int fflag = ap->a_fflag; 785 struct thread *td = ap->a_td; 786 pid_t pid = td->td_proc->p_pid; 787 struct fuse_vnode_data *fvdat = VTOFUD(vp); 788 int err = 0; 789 790 if (fuse_isdeadfs(vp)) 791 return 0; 792 if (vnode_isdir(vp)) 793 return 0; 794 if (fflag & IO_NDELAY) 795 return 0; 796 797 err = fuse_flush(vp, cred, pid, fflag); 798 if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) { 799 struct vattr vap; 800 struct fuse_data *data; 801 int dataflags; 802 int access_e = 0; 803 804 data = fuse_get_mpdata(mp); 805 dataflags = data->dataflags; 806 if (dataflags & FSESS_DEFAULT_PERMISSIONS) { 807 struct vattr va; 808 809 fuse_internal_getattr(vp, &va, cred, td); 810 access_e = vaccess(vp->v_type, va.va_mode, va.va_uid, 811 va.va_gid, VWRITE, cred); 812 } 813 if (access_e == 0) { 814 VATTR_NULL(&vap); 815 vap.va_atime = fvdat->cached_attrs.va_atime; 816 /* 817 * Ignore errors setting when setting atime. That 818 * should not cause close(2) to fail. 819 */ 820 fuse_internal_setattr(vp, &vap, td, NULL); 821 } 822 } 823 /* TODO: close the file handle, if we're sure it's no longer used */ 824 if ((fvdat->flag & FN_SIZECHANGE) != 0) { 825 fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); 826 } 827 return err; 828 } 829 830 /* 831 struct vop_copy_file_range_args { 832 struct vop_generic_args a_gen; 833 struct vnode *a_invp; 834 off_t *a_inoffp; 835 struct vnode *a_outvp; 836 off_t *a_outoffp; 837 size_t *a_lenp; 838 unsigned int a_flags; 839 struct ucred *a_incred; 840 struct ucred *a_outcred; 841 struct thread *a_fsizetd; 842 } 843 */ 844 static int 845 fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) 846 { 847 struct vnode *invp = ap->a_invp; 848 struct vnode *outvp = ap->a_outvp; 849 struct mount *mp = vnode_mount(invp); 850 struct fuse_vnode_data *outfvdat = VTOFUD(outvp); 851 struct fuse_dispatcher fdi; 852 struct fuse_filehandle *infufh, *outfufh; 853 struct fuse_copy_file_range_in *fcfri; 854 struct ucred *incred = ap->a_incred; 855 struct ucred *outcred = ap->a_outcred; 856 struct fuse_write_out *fwo; 857 struct thread *td; 858 struct uio io; 859 off_t outfilesize; 860 ssize_t r = 0; 861 pid_t pid; 862 int err; 863 864 if (mp == NULL || mp != vnode_mount(outvp)) 865 goto fallback; 866 867 if (incred->cr_uid != outcred->cr_uid) 868 goto fallback; 869 870 if (incred->cr_groups[0] != outcred->cr_groups[0]) 871 goto fallback; 872 873 /* Caller busied mp, mnt_data can be safely accessed. */ 874 if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) 875 goto fallback; 876 877 if (ap->a_fsizetd == NULL) 878 td = curthread; 879 else 880 td = ap->a_fsizetd; 881 pid = td->td_proc->p_pid; 882 883 vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); 884 if (invp->v_data == NULL || outvp->v_data == NULL) { 885 err = EBADF; 886 goto unlock; 887 } 888 889 err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); 890 if (err) 891 goto unlock; 892 893 err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); 894 if (err) 895 goto unlock; 896 897 io.uio_resid = *ap->a_lenp; 898 if (ap->a_fsizetd) { 899 io.uio_offset = *ap->a_outoffp; 900 err = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 901 if (err != 0) 902 goto unlock; 903 } 904 905 err = fuse_vnode_size(outvp, &outfilesize, outcred, curthread); 906 if (err) 907 goto unlock; 908 909 err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp, 910 *ap->a_outoffp + io.uio_resid); 911 if (err) 912 goto unlock; 913 914 fdisp_init(&fdi, sizeof(*fcfri)); 915 fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); 916 fcfri = fdi.indata; 917 fcfri->fh_in = infufh->fh_id; 918 fcfri->off_in = *ap->a_inoffp; 919 fcfri->nodeid_out = VTOI(outvp); 920 fcfri->fh_out = outfufh->fh_id; 921 fcfri->off_out = *ap->a_outoffp; 922 fcfri->len = io.uio_resid; 923 fcfri->flags = 0; 924 925 err = fdisp_wait_answ(&fdi); 926 if (err == 0) { 927 fwo = fdi.answ; 928 *ap->a_lenp = fwo->size; 929 *ap->a_inoffp += fwo->size; 930 *ap->a_outoffp += fwo->size; 931 fuse_internal_clear_suid_on_write(outvp, outcred, td); 932 if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) { 933 fuse_vnode_setsize(outvp, *ap->a_outoffp, false); 934 getnanouptime(&outfvdat->last_local_modify); 935 } 936 fuse_vnode_update(invp, FN_ATIMECHANGE); 937 fuse_vnode_update(outvp, FN_MTIMECHANGE | FN_CTIMECHANGE); 938 } 939 fdisp_destroy(&fdi); 940 941 unlock: 942 if (invp != outvp) 943 VOP_UNLOCK(invp); 944 VOP_UNLOCK(outvp); 945 946 if (err == ENOSYS) { 947 fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); 948 fallback: 949 err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 950 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 951 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 952 } 953 954 /* 955 * No need to call vn_rlimit_fsizex_res before return, since the uio is 956 * local. 957 */ 958 return (err); 959 } 960 961 static void 962 fdisp_make_mknod_for_fallback( 963 struct fuse_dispatcher *fdip, 964 struct componentname *cnp, 965 struct vnode *dvp, 966 uint64_t parentnid, 967 struct thread *td, 968 struct ucred *cred, 969 mode_t mode, 970 enum fuse_opcode *op) 971 { 972 struct fuse_mknod_in *fmni; 973 974 fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); 975 *op = FUSE_MKNOD; 976 fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); 977 fmni = fdip->indata; 978 fmni->mode = mode; 979 fmni->rdev = 0; 980 memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, 981 cnp->cn_namelen); 982 ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; 983 } 984 /* 985 struct vnop_create_args { 986 struct vnode *a_dvp; 987 struct vnode **a_vpp; 988 struct componentname *a_cnp; 989 struct vattr *a_vap; 990 }; 991 */ 992 static int 993 fuse_vnop_create(struct vop_create_args *ap) 994 { 995 struct vnode *dvp = ap->a_dvp; 996 struct vnode **vpp = ap->a_vpp; 997 struct componentname *cnp = ap->a_cnp; 998 struct vattr *vap = ap->a_vap; 999 struct thread *td = curthread; 1000 struct ucred *cred = cnp->cn_cred; 1001 1002 struct fuse_data *data; 1003 struct fuse_create_in *fci; 1004 struct fuse_entry_out *feo; 1005 struct fuse_open_out *foo; 1006 struct fuse_dispatcher fdi, fdi2; 1007 struct fuse_dispatcher *fdip = &fdi; 1008 struct fuse_dispatcher *fdip2 = NULL; 1009 1010 int err; 1011 1012 struct mount *mp = vnode_mount(dvp); 1013 data = fuse_get_mpdata(mp); 1014 uint64_t parentnid = VTOFUD(dvp)->nid; 1015 mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); 1016 enum fuse_opcode op; 1017 int flags; 1018 1019 if (fuse_isdeadfs(dvp)) 1020 return ENXIO; 1021 1022 /* FUSE expects sockets to be created with FUSE_MKNOD */ 1023 if (vap->va_type == VSOCK) 1024 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1025 1026 /* 1027 * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a 1028 * writable mode makes sense, and we might as well include readability 1029 * too. 1030 */ 1031 flags = O_RDWR; 1032 1033 bzero(&fdi, sizeof(fdi)); 1034 1035 if (vap->va_type != VREG) 1036 return (EINVAL); 1037 1038 if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { 1039 /* Fallback to FUSE_MKNOD/FUSE_OPEN */ 1040 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, 1041 cred, mode, &op); 1042 } else { 1043 /* Use FUSE_CREATE */ 1044 size_t insize; 1045 1046 op = FUSE_CREATE; 1047 fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); 1048 fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); 1049 fci = fdip->indata; 1050 fci->mode = mode; 1051 fci->flags = O_CREAT | flags; 1052 if (fuse_libabi_geq(data, 7, 12)) { 1053 insize = sizeof(*fci); 1054 fci->umask = td->td_proc->p_pd->pd_cmask; 1055 } else { 1056 insize = sizeof(struct fuse_open_in); 1057 } 1058 1059 memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, 1060 cnp->cn_namelen); 1061 ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; 1062 } 1063 1064 err = fdisp_wait_answ(fdip); 1065 1066 if (err) { 1067 if (err == ENOSYS && op == FUSE_CREATE) { 1068 fsess_set_notimpl(mp, FUSE_CREATE); 1069 fdisp_destroy(fdip); 1070 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, 1071 parentnid, td, cred, mode, &op); 1072 err = fdisp_wait_answ(fdip); 1073 } 1074 if (err) 1075 goto out; 1076 } 1077 1078 feo = fdip->answ; 1079 1080 if ((err = fuse_internal_checkentry(feo, vap->va_type))) { 1081 goto out; 1082 } 1083 1084 if (op == FUSE_CREATE) { 1085 if (fuse_libabi_geq(data, 7, 9)) 1086 foo = (struct fuse_open_out*)(feo + 1); 1087 else 1088 foo = (struct fuse_open_out*)((char*)feo + 1089 FUSE_COMPAT_ENTRY_OUT_SIZE); 1090 } else { 1091 /* Issue a separate FUSE_OPEN */ 1092 struct fuse_open_in *foi; 1093 1094 fdip2 = &fdi2; 1095 fdisp_init(fdip2, sizeof(*foi)); 1096 fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, 1097 cred); 1098 foi = fdip2->indata; 1099 foi->flags = flags; 1100 err = fdisp_wait_answ(fdip2); 1101 if (err) 1102 goto out; 1103 foo = fdip2->answ; 1104 } 1105 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); 1106 if (err) { 1107 struct fuse_release_in *fri; 1108 uint64_t nodeid = feo->nodeid; 1109 uint64_t fh_id = foo->fh; 1110 1111 fdisp_destroy(fdip); 1112 fdisp_init(fdip, sizeof(*fri)); 1113 fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); 1114 fri = fdip->indata; 1115 fri->fh = fh_id; 1116 fri->flags = flags; 1117 fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); 1118 fuse_insert_message(fdip->tick, false); 1119 goto out; 1120 } 1121 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); 1122 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 1123 feo->attr_valid_nsec, NULL, true); 1124 1125 fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); 1126 fuse_vnode_open(*vpp, foo->open_flags, td); 1127 /* 1128 * Purge the parent's attribute cache because the daemon should've 1129 * updated its mtime and ctime 1130 */ 1131 fuse_vnode_clear_attr_cache(dvp); 1132 cache_purge_negative(dvp); 1133 1134 out: 1135 if (fdip2) 1136 fdisp_destroy(fdip2); 1137 fdisp_destroy(fdip); 1138 return err; 1139 } 1140 1141 /* 1142 struct vnop_fdatasync_args { 1143 struct vop_generic_args a_gen; 1144 struct vnode * a_vp; 1145 struct thread * a_td; 1146 }; 1147 */ 1148 static int 1149 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) 1150 { 1151 struct vnode *vp = ap->a_vp; 1152 struct thread *td = ap->a_td; 1153 int waitfor = MNT_WAIT; 1154 1155 int err = 0; 1156 1157 if (fuse_isdeadfs(vp)) { 1158 return 0; 1159 } 1160 if ((err = vop_stdfdatasync_buf(ap))) 1161 return err; 1162 1163 return fuse_internal_fsync(vp, td, waitfor, true); 1164 } 1165 1166 /* 1167 struct vnop_fsync_args { 1168 struct vop_generic_args a_gen; 1169 struct vnode * a_vp; 1170 int a_waitfor; 1171 struct thread * a_td; 1172 }; 1173 */ 1174 static int 1175 fuse_vnop_fsync(struct vop_fsync_args *ap) 1176 { 1177 struct vnode *vp = ap->a_vp; 1178 struct thread *td = ap->a_td; 1179 int waitfor = ap->a_waitfor; 1180 int err = 0; 1181 1182 if (fuse_isdeadfs(vp)) { 1183 return 0; 1184 } 1185 if ((err = vop_stdfsync(ap))) 1186 return err; 1187 1188 return fuse_internal_fsync(vp, td, waitfor, false); 1189 } 1190 1191 /* 1192 struct vnop_getattr_args { 1193 struct vnode *a_vp; 1194 struct vattr *a_vap; 1195 struct ucred *a_cred; 1196 struct thread *a_td; 1197 }; 1198 */ 1199 static int 1200 fuse_vnop_getattr(struct vop_getattr_args *ap) 1201 { 1202 struct vnode *vp = ap->a_vp; 1203 struct vattr *vap = ap->a_vap; 1204 struct ucred *cred = ap->a_cred; 1205 struct thread *td = curthread; 1206 1207 int err = 0; 1208 int dataflags; 1209 1210 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 1211 1212 /* Note that we are not bailing out on a dead file system just yet. */ 1213 1214 if (!(dataflags & FSESS_INITED)) { 1215 if (!vnode_isvroot(vp)) { 1216 fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); 1217 err = ENOTCONN; 1218 return err; 1219 } else { 1220 goto fake; 1221 } 1222 } 1223 err = fuse_internal_getattr(vp, vap, cred, td); 1224 if (err == ENOTCONN && vnode_isvroot(vp)) { 1225 /* see comment in fuse_vfsop_statfs() */ 1226 goto fake; 1227 } else { 1228 return err; 1229 } 1230 1231 fake: 1232 bzero(vap, sizeof(*vap)); 1233 vap->va_type = vnode_vtype(vp); 1234 1235 return 0; 1236 } 1237 1238 /* 1239 struct vnop_inactive_args { 1240 struct vnode *a_vp; 1241 }; 1242 */ 1243 static int 1244 fuse_vnop_inactive(struct vop_inactive_args *ap) 1245 { 1246 struct vnode *vp = ap->a_vp; 1247 struct thread *td = curthread; 1248 1249 struct fuse_vnode_data *fvdat = VTOFUD(vp); 1250 struct fuse_filehandle *fufh, *fufh_tmp; 1251 1252 int need_flush = 1; 1253 1254 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 1255 if (need_flush && vp->v_type == VREG) { 1256 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { 1257 fuse_vnode_savesize(vp, NULL, 0); 1258 } 1259 if ((fvdat->flag & FN_REVOKED) != 0) 1260 fuse_io_invalbuf(vp, td); 1261 else 1262 fuse_io_flushbuf(vp, MNT_WAIT, td); 1263 need_flush = 0; 1264 } 1265 fuse_filehandle_close(vp, fufh, td, NULL); 1266 } 1267 1268 if ((fvdat->flag & FN_REVOKED) != 0) 1269 vrecycle(vp); 1270 1271 return 0; 1272 } 1273 1274 /* 1275 struct vnop_ioctl_args { 1276 struct vnode *a_vp; 1277 u_long a_command; 1278 caddr_t a_data; 1279 int a_fflag; 1280 struct ucred *a_cred; 1281 struct thread *a_td; 1282 }; 1283 */ 1284 static int 1285 fuse_vnop_ioctl(struct vop_ioctl_args *ap) 1286 { 1287 struct vnode *vp = ap->a_vp; 1288 struct mount *mp = vnode_mount(vp); 1289 struct ucred *cred = ap->a_cred; 1290 off_t *offp; 1291 pid_t pid = ap->a_td->td_proc->p_pid; 1292 int err; 1293 1294 switch (ap->a_command) { 1295 case FIOSEEKDATA: 1296 case FIOSEEKHOLE: 1297 /* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */ 1298 if (fsess_maybe_impl(mp, FUSE_LSEEK)) { 1299 int whence; 1300 1301 offp = ap->a_data; 1302 if (ap->a_command == FIOSEEKDATA) 1303 whence = SEEK_DATA; 1304 else 1305 whence = SEEK_HOLE; 1306 1307 vn_lock(vp, LK_SHARED | LK_RETRY); 1308 err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp, 1309 whence); 1310 VOP_UNLOCK(vp); 1311 } 1312 if (fsess_not_impl(mp, FUSE_LSEEK)) 1313 err = vop_stdioctl(ap); 1314 break; 1315 default: 1316 /* TODO: implement FUSE_IOCTL */ 1317 err = ENOTTY; 1318 break; 1319 } 1320 return (err); 1321 } 1322 1323 1324 /* 1325 struct vnop_link_args { 1326 struct vnode *a_tdvp; 1327 struct vnode *a_vp; 1328 struct componentname *a_cnp; 1329 }; 1330 */ 1331 static int 1332 fuse_vnop_link(struct vop_link_args *ap) 1333 { 1334 struct vnode *vp = ap->a_vp; 1335 struct vnode *tdvp = ap->a_tdvp; 1336 struct componentname *cnp = ap->a_cnp; 1337 1338 struct vattr *vap = VTOVA(vp); 1339 1340 struct fuse_dispatcher fdi; 1341 struct fuse_entry_out *feo; 1342 struct fuse_link_in fli; 1343 1344 int err; 1345 1346 if (fuse_isdeadfs(vp)) { 1347 return ENXIO; 1348 } 1349 if (vnode_mount(tdvp) != vnode_mount(vp)) { 1350 return EXDEV; 1351 } 1352 1353 /* 1354 * This is a seatbelt check to protect naive userspace filesystems from 1355 * themselves and the limitations of the FUSE IPC protocol. If a 1356 * filesystem does not allow attribute caching, assume it is capable of 1357 * validating that nlink does not overflow. 1358 */ 1359 if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) 1360 return EMLINK; 1361 fli.oldnodeid = VTOI(vp); 1362 1363 fdisp_init(&fdi, 0); 1364 fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, 1365 FUSE_LINK, &fli, sizeof(fli), &fdi); 1366 if ((err = fdisp_wait_answ(&fdi))) { 1367 goto out; 1368 } 1369 feo = fdi.answ; 1370 1371 if (fli.oldnodeid != feo->nodeid) { 1372 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 1373 fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, 1374 "Assigned wrong inode for a hard link."); 1375 fuse_vnode_clear_attr_cache(vp); 1376 fuse_vnode_clear_attr_cache(tdvp); 1377 err = EIO; 1378 goto out; 1379 } 1380 1381 err = fuse_internal_checkentry(feo, vnode_vtype(vp)); 1382 if (!err) { 1383 /* 1384 * Purge the parent's attribute cache because the daemon 1385 * should've updated its mtime and ctime 1386 */ 1387 fuse_vnode_clear_attr_cache(tdvp); 1388 fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, 1389 feo->attr_valid_nsec, NULL, true); 1390 } 1391 out: 1392 fdisp_destroy(&fdi); 1393 return err; 1394 } 1395 1396 struct fuse_lookup_alloc_arg { 1397 struct fuse_entry_out *feo; 1398 struct componentname *cnp; 1399 uint64_t nid; 1400 __enum_uint8(vtype) vtyp; 1401 }; 1402 1403 /* Callback for vn_get_ino */ 1404 static int 1405 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1406 { 1407 struct fuse_lookup_alloc_arg *flaa = arg; 1408 1409 return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, 1410 flaa->vtyp); 1411 } 1412 1413 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, 1414 "int", "struct timespec*", "struct timespec*"); 1415 /* 1416 struct vnop_lookup_args { 1417 struct vnodeop_desc *a_desc; 1418 struct vnode *a_dvp; 1419 struct vnode **a_vpp; 1420 struct componentname *a_cnp; 1421 }; 1422 */ 1423 int 1424 fuse_vnop_lookup(struct vop_lookup_args *ap) 1425 { 1426 struct vnode *dvp = ap->a_dvp; 1427 struct vnode **vpp = ap->a_vpp; 1428 struct componentname *cnp = ap->a_cnp; 1429 struct thread *td = curthread; 1430 struct ucred *cred = cnp->cn_cred; 1431 struct timespec now; 1432 1433 int nameiop = cnp->cn_nameiop; 1434 int flags = cnp->cn_flags; 1435 int islastcn = flags & ISLASTCN; 1436 struct mount *mp = vnode_mount(dvp); 1437 struct fuse_data *data = fuse_get_mpdata(mp); 1438 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 1439 bool is_dot; 1440 1441 int err = 0; 1442 int lookup_err = 0; 1443 struct vnode *vp = NULL; 1444 1445 struct fuse_dispatcher fdi; 1446 bool did_lookup = false; 1447 struct fuse_entry_out *feo = NULL; 1448 __enum_uint8(vtype) vtyp; /* vnode type of target */ 1449 1450 uint64_t nid; 1451 1452 if (fuse_isdeadfs(dvp)) { 1453 *vpp = NULL; 1454 return ENXIO; 1455 } 1456 if (!vnode_isdir(dvp)) 1457 return ENOTDIR; 1458 1459 if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) 1460 return EROFS; 1461 1462 if ((cnp->cn_flags & NOEXECCHECK) != 0) 1463 cnp->cn_flags &= ~NOEXECCHECK; 1464 else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) 1465 return err; 1466 1467 is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.'; 1468 if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT)) 1469 { 1470 if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) { 1471 /* 1472 * Since the file system doesn't support ".." lookups, 1473 * we have no way to find this entry. 1474 */ 1475 return ESTALE; 1476 } 1477 nid = VTOFUD(dvp)->parent_nid; 1478 if (nid == 0) 1479 return ENOENT; 1480 /* .. is obviously a directory */ 1481 vtyp = VDIR; 1482 } else if (is_dot) { 1483 nid = VTOI(dvp); 1484 /* . is obviously a directory */ 1485 vtyp = VDIR; 1486 } else { 1487 struct timespec timeout; 1488 int ncpticks; /* here to accommodate for API contract */ 1489 1490 err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); 1491 getnanouptime(&now); 1492 SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); 1493 switch (err) { 1494 case -1: /* positive match */ 1495 if (timespeccmp(&timeout, &now, >)) { 1496 counter_u64_add(fuse_lookup_cache_hits, 1); 1497 } else { 1498 /* Cache timeout */ 1499 counter_u64_add(fuse_lookup_cache_misses, 1); 1500 bintime_clear( 1501 &VTOFUD(*vpp)->entry_cache_timeout); 1502 cache_purge(*vpp); 1503 if (dvp != *vpp) 1504 vput(*vpp); 1505 else 1506 vrele(*vpp); 1507 *vpp = NULL; 1508 break; 1509 } 1510 return 0; 1511 1512 case 0: /* no match in cache */ 1513 counter_u64_add(fuse_lookup_cache_misses, 1); 1514 break; 1515 1516 case ENOENT: /* negative match */ 1517 if (timespeccmp(&timeout, &now, <=)) { 1518 /* Cache timeout */ 1519 cache_purge_negative(dvp); 1520 break; 1521 } 1522 /* fall through */ 1523 default: 1524 return err; 1525 } 1526 1527 fdisp_init(&fdi, cnp->cn_namelen + 1); 1528 fdisp_make(&fdi, FUSE_LOOKUP, mp, VTOI(dvp), td, cred); 1529 1530 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 1531 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 1532 lookup_err = fdisp_wait_answ(&fdi); 1533 did_lookup = true; 1534 1535 if (!lookup_err) { 1536 /* lookup call succeeded */ 1537 feo = (struct fuse_entry_out *)fdi.answ; 1538 nid = feo->nodeid; 1539 if (nid == 0) { 1540 /* zero nodeid means ENOENT and cache it */ 1541 struct timespec timeout; 1542 1543 fdi.answ_stat = ENOENT; 1544 lookup_err = ENOENT; 1545 if (cnp->cn_flags & MAKEENTRY) { 1546 fuse_validity_2_timespec(feo, &timeout); 1547 /* Use the same entry_time for .. as for 1548 * the file itself. That doesn't honor 1549 * exactly what the fuse server tells 1550 * us, but to do otherwise would require 1551 * another cache lookup at this point. 1552 */ 1553 struct timespec *dtsp = NULL; 1554 cache_enter_time(dvp, *vpp, cnp, 1555 &timeout, dtsp); 1556 } 1557 } 1558 vtyp = IFTOVT(feo->attr.mode); 1559 } 1560 if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { 1561 fdisp_destroy(&fdi); 1562 return lookup_err; 1563 } 1564 } 1565 /* lookup_err, if non-zero, must be ENOENT at this point */ 1566 1567 if (lookup_err) { 1568 /* Entry not found */ 1569 if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { 1570 if (default_permissions) 1571 err = fuse_internal_access(dvp, VWRITE, td, 1572 cred); 1573 else 1574 err = 0; 1575 if (!err) { 1576 err = EJUSTRETURN; 1577 } 1578 } else { 1579 err = ENOENT; 1580 } 1581 } else { 1582 /* Entry was found */ 1583 if (flags & ISDOTDOT) { 1584 struct fuse_lookup_alloc_arg flaa; 1585 1586 flaa.nid = nid; 1587 flaa.feo = feo; 1588 flaa.cnp = cnp; 1589 flaa.vtyp = vtyp; 1590 err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, 1591 &vp); 1592 *vpp = vp; 1593 } else if (nid == VTOI(dvp)) { 1594 if (is_dot) { 1595 vref(dvp); 1596 *vpp = dvp; 1597 } else { 1598 fuse_warn(fuse_get_mpdata(mp), 1599 FSESS_WARN_ILLEGAL_INODE, 1600 "Assigned same inode to both parent and " 1601 "child."); 1602 err = EIO; 1603 } 1604 1605 } else { 1606 struct fuse_vnode_data *fvdat; 1607 1608 err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, 1609 &vp, cnp, vtyp); 1610 if (err) 1611 goto out; 1612 *vpp = vp; 1613 fvdat = VTOFUD(vp); 1614 1615 MPASS(feo != NULL); 1616 if (timespeccmp(&now, &fvdat->last_local_modify, >)) { 1617 /* 1618 * Attributes from the server are definitely 1619 * newer than the last attributes we sent to 1620 * the server, so cache them. 1621 */ 1622 fuse_internal_cache_attrs(*vpp, &feo->attr, 1623 feo->attr_valid, feo->attr_valid_nsec, 1624 NULL, true); 1625 } 1626 fuse_validity_2_bintime(feo->entry_valid, 1627 feo->entry_valid_nsec, 1628 &fvdat->entry_cache_timeout); 1629 1630 if ((nameiop == DELETE || nameiop == RENAME) && 1631 islastcn && default_permissions) 1632 { 1633 struct vattr dvattr; 1634 1635 err = fuse_internal_access(dvp, VWRITE, td, 1636 cred); 1637 if (err != 0) 1638 goto out; 1639 /* 1640 * if the parent's sticky bit is set, check 1641 * whether we're allowed to remove the file. 1642 * Need to figure out the vnode locking to make 1643 * this work. 1644 */ 1645 fuse_internal_getattr(dvp, &dvattr, cred, td); 1646 if ((dvattr.va_mode & S_ISTXT) && 1647 fuse_internal_access(dvp, VADMIN, td, 1648 cred) && 1649 fuse_internal_access(*vpp, VADMIN, td, 1650 cred)) { 1651 err = EPERM; 1652 goto out; 1653 } 1654 } 1655 } 1656 } 1657 out: 1658 if (err) { 1659 if (vp != NULL && dvp != vp) 1660 vput(vp); 1661 else if (vp != NULL) 1662 vrele(vp); 1663 *vpp = NULL; 1664 } 1665 if (did_lookup) 1666 fdisp_destroy(&fdi); 1667 1668 return err; 1669 } 1670 1671 /* 1672 struct vnop_mkdir_args { 1673 struct vnode *a_dvp; 1674 struct vnode **a_vpp; 1675 struct componentname *a_cnp; 1676 struct vattr *a_vap; 1677 }; 1678 */ 1679 static int 1680 fuse_vnop_mkdir(struct vop_mkdir_args *ap) 1681 { 1682 struct vnode *dvp = ap->a_dvp; 1683 struct vnode **vpp = ap->a_vpp; 1684 struct componentname *cnp = ap->a_cnp; 1685 struct vattr *vap = ap->a_vap; 1686 1687 struct fuse_mkdir_in fmdi; 1688 1689 if (fuse_isdeadfs(dvp)) { 1690 return ENXIO; 1691 } 1692 fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); 1693 fmdi.umask = curthread->td_proc->p_pd->pd_cmask; 1694 1695 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, 1696 sizeof(fmdi), VDIR)); 1697 } 1698 1699 /* 1700 struct vnop_mknod_args { 1701 struct vnode *a_dvp; 1702 struct vnode **a_vpp; 1703 struct componentname *a_cnp; 1704 struct vattr *a_vap; 1705 }; 1706 */ 1707 static int 1708 fuse_vnop_mknod(struct vop_mknod_args *ap) 1709 { 1710 1711 struct vnode *dvp = ap->a_dvp; 1712 struct vnode **vpp = ap->a_vpp; 1713 struct componentname *cnp = ap->a_cnp; 1714 struct vattr *vap = ap->a_vap; 1715 1716 if (fuse_isdeadfs(dvp)) 1717 return ENXIO; 1718 1719 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1720 } 1721 1722 /* 1723 struct vop_open_args { 1724 struct vnode *a_vp; 1725 int a_mode; 1726 struct ucred *a_cred; 1727 struct thread *a_td; 1728 int a_fdidx; / struct file *a_fp; 1729 }; 1730 */ 1731 static int 1732 fuse_vnop_open(struct vop_open_args *ap) 1733 { 1734 struct vnode *vp = ap->a_vp; 1735 int a_mode = ap->a_mode; 1736 struct thread *td = ap->a_td; 1737 struct ucred *cred = ap->a_cred; 1738 pid_t pid = td->td_proc->p_pid; 1739 1740 if (fuse_isdeadfs(vp)) 1741 return ENXIO; 1742 if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) 1743 return (EOPNOTSUPP); 1744 if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) 1745 return EINVAL; 1746 1747 if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { 1748 fuse_vnode_open(vp, 0, td); 1749 return 0; 1750 } 1751 1752 return fuse_filehandle_open(vp, a_mode, NULL, td, cred); 1753 } 1754 1755 static int 1756 fuse_vnop_pathconf(struct vop_pathconf_args *ap) 1757 { 1758 struct vnode *vp = ap->a_vp; 1759 struct mount *mp; 1760 1761 switch (ap->a_name) { 1762 case _PC_FILESIZEBITS: 1763 *ap->a_retval = 64; 1764 return (0); 1765 case _PC_NAME_MAX: 1766 *ap->a_retval = NAME_MAX; 1767 return (0); 1768 case _PC_LINK_MAX: 1769 *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); 1770 return (0); 1771 case _PC_SYMLINK_MAX: 1772 *ap->a_retval = MAXPATHLEN; 1773 return (0); 1774 case _PC_NO_TRUNC: 1775 *ap->a_retval = 1; 1776 return (0); 1777 case _PC_MIN_HOLE_SIZE: 1778 /* 1779 * The FUSE protocol provides no mechanism for a server to 1780 * report _PC_MIN_HOLE_SIZE. It's a protocol bug. Instead, 1781 * return EINVAL if the server does not support FUSE_LSEEK, or 1782 * 1 if it does. 1783 */ 1784 mp = vnode_mount(vp); 1785 if (!fsess_is_impl(mp, FUSE_LSEEK) && 1786 !fsess_not_impl(mp, FUSE_LSEEK)) { 1787 off_t offset = 0; 1788 1789 /* Issue a FUSE_LSEEK to find out if it's implemented */ 1790 fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred, 1791 curthread->td_proc->p_pid, &offset, SEEK_DATA); 1792 } 1793 1794 if (fsess_is_impl(mp, FUSE_LSEEK)) { 1795 *ap->a_retval = 1; 1796 return (0); 1797 } else { 1798 /* 1799 * Probably FUSE_LSEEK is not implemented. It might 1800 * be, if the FUSE_LSEEK above returned an error like 1801 * EACCES, but in that case we can't tell, so it's 1802 * safest to report EINVAL anyway. 1803 */ 1804 return (EINVAL); 1805 } 1806 default: 1807 return (vop_stdpathconf(ap)); 1808 } 1809 } 1810 1811 SDT_PROBE_DEFINE3(fusefs, , vnops, filehandles_closed, "struct vnode*", 1812 "struct uio*", "struct ucred*"); 1813 /* 1814 struct vnop_read_args { 1815 struct vnode *a_vp; 1816 struct uio *a_uio; 1817 int a_ioflag; 1818 struct ucred *a_cred; 1819 }; 1820 */ 1821 static int 1822 fuse_vnop_read(struct vop_read_args *ap) 1823 { 1824 struct vnode *vp = ap->a_vp; 1825 struct uio *uio = ap->a_uio; 1826 int ioflag = ap->a_ioflag; 1827 struct ucred *cred = ap->a_cred; 1828 pid_t pid = curthread->td_proc->p_pid; 1829 struct fuse_filehandle *fufh; 1830 int err; 1831 bool closefufh = false, directio; 1832 1833 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 1834 1835 if (fuse_isdeadfs(vp)) { 1836 return ENXIO; 1837 } 1838 1839 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 1840 ioflag |= IO_DIRECT; 1841 } 1842 1843 err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); 1844 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 1845 /* 1846 * nfsd will do I/O without first doing VOP_OPEN. We 1847 * must implicitly open the file here 1848 */ 1849 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1850 closefufh = true; 1851 } 1852 if (err) { 1853 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 1854 return err; 1855 } 1856 1857 /* 1858 * Ideally, when the daemon asks for direct io at open time, the 1859 * standard file flag should be set according to this, so that would 1860 * just change the default mode, which later on could be changed via 1861 * fcntl(2). 1862 * But this doesn't work, the O_DIRECT flag gets cleared at some point 1863 * (don't know where). So to make any use of the Fuse direct_io option, 1864 * we hardwire it into the file's private data (similarly to Linux, 1865 * btw.). 1866 */ 1867 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 1868 1869 fuse_vnode_update(vp, FN_ATIMECHANGE); 1870 if (directio) { 1871 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct read of vnode"); 1872 err = fuse_read_directbackend(vp, uio, cred, fufh); 1873 } else { 1874 SDT_PROBE2(fusefs, , vnops, trace, 1, "buffered read of vnode"); 1875 err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); 1876 } 1877 1878 if (closefufh) 1879 fuse_filehandle_close(vp, fufh, curthread, cred); 1880 1881 return (err); 1882 } 1883 1884 /* 1885 struct vnop_readdir_args { 1886 struct vnode *a_vp; 1887 struct uio *a_uio; 1888 struct ucred *a_cred; 1889 int *a_eofflag; 1890 int *a_ncookies; 1891 uint64_t **a_cookies; 1892 }; 1893 */ 1894 static int 1895 fuse_vnop_readdir(struct vop_readdir_args *ap) 1896 { 1897 struct vnode *vp = ap->a_vp; 1898 struct uio *uio = ap->a_uio; 1899 struct ucred *cred = ap->a_cred; 1900 struct fuse_filehandle *fufh = NULL; 1901 struct mount *mp = vnode_mount(vp); 1902 struct fuse_iov cookediov; 1903 int err = 0; 1904 uint64_t *cookies; 1905 ssize_t tresid; 1906 int ncookies; 1907 bool closefufh = false; 1908 pid_t pid = curthread->td_proc->p_pid; 1909 1910 if (ap->a_eofflag) 1911 *ap->a_eofflag = 0; 1912 if (fuse_isdeadfs(vp)) { 1913 return ENXIO; 1914 } 1915 if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ 1916 (uio_resid(uio) < sizeof(struct dirent))) { 1917 return EINVAL; 1918 } 1919 1920 tresid = uio->uio_resid; 1921 err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); 1922 if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) { 1923 KASSERT(fuse_get_mpdata(mp)->dataflags 1924 & FSESS_NO_OPENDIR_SUPPORT, 1925 ("FUSE file systems that don't set " 1926 "FUSE_NO_OPENDIR_SUPPORT should not be exported")); 1927 /* 1928 * nfsd will do VOP_READDIR without first doing VOP_OPEN. We 1929 * must implicitly open the directory here. 1930 */ 1931 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1932 closefufh = true; 1933 } 1934 if (err) 1935 return (err); 1936 if (ap->a_ncookies != NULL) { 1937 ncookies = uio->uio_resid / 1938 (offsetof(struct dirent, d_name) + 4) + 1; 1939 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 1940 *ap->a_ncookies = ncookies; 1941 *ap->a_cookies = cookies; 1942 } else { 1943 ncookies = 0; 1944 cookies = NULL; 1945 } 1946 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) 1947 fiov_init(&cookediov, DIRCOOKEDSIZE); 1948 1949 err = fuse_internal_readdir(vp, uio, fufh, &cookediov, 1950 &ncookies, cookies); 1951 1952 fiov_teardown(&cookediov); 1953 if (closefufh) 1954 fuse_filehandle_close(vp, fufh, curthread, cred); 1955 1956 if (ap->a_ncookies != NULL) { 1957 if (err == 0) { 1958 *ap->a_ncookies -= ncookies; 1959 } else { 1960 free(*ap->a_cookies, M_TEMP); 1961 *ap->a_ncookies = 0; 1962 *ap->a_cookies = NULL; 1963 } 1964 } 1965 if (err == 0 && tresid == uio->uio_resid) 1966 *ap->a_eofflag = 1; 1967 1968 return err; 1969 } 1970 1971 /* 1972 struct vnop_readlink_args { 1973 struct vnode *a_vp; 1974 struct uio *a_uio; 1975 struct ucred *a_cred; 1976 }; 1977 */ 1978 static int 1979 fuse_vnop_readlink(struct vop_readlink_args *ap) 1980 { 1981 struct vnode *vp = ap->a_vp; 1982 struct uio *uio = ap->a_uio; 1983 struct ucred *cred = ap->a_cred; 1984 1985 struct fuse_dispatcher fdi; 1986 int err; 1987 1988 if (fuse_isdeadfs(vp)) { 1989 return ENXIO; 1990 } 1991 if (!vnode_islnk(vp)) { 1992 return EINVAL; 1993 } 1994 fdisp_init(&fdi, 0); 1995 err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); 1996 if (err) { 1997 goto out; 1998 } 1999 if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { 2000 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 2001 fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, 2002 "Returned an embedded NUL from FUSE_READLINK."); 2003 err = EIO; 2004 goto out; 2005 } 2006 if (((char *)fdi.answ)[0] == '/' && 2007 fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { 2008 char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; 2009 2010 err = uiomove(mpth, strlen(mpth), uio); 2011 } 2012 if (!err) { 2013 err = uiomove(fdi.answ, fdi.iosize, uio); 2014 } 2015 out: 2016 fdisp_destroy(&fdi); 2017 return err; 2018 } 2019 2020 /* 2021 struct vnop_reclaim_args { 2022 struct vnode *a_vp; 2023 }; 2024 */ 2025 static int 2026 fuse_vnop_reclaim(struct vop_reclaim_args *ap) 2027 { 2028 struct vnode *vp = ap->a_vp; 2029 struct thread *td = curthread; 2030 struct fuse_vnode_data *fvdat = VTOFUD(vp); 2031 struct fuse_filehandle *fufh, *fufh_tmp; 2032 2033 if (!fvdat) { 2034 panic("FUSE: no vnode data during recycling"); 2035 } 2036 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 2037 printf("FUSE: vnode being reclaimed with open fufh " 2038 "(type=%#x)", fufh->fufh_type); 2039 fuse_filehandle_close(vp, fufh, td, NULL); 2040 } 2041 2042 if (VTOI(vp) == 1) { 2043 /* 2044 * Don't send FUSE_FORGET for the root inode, because 2045 * we never send FUSE_LOOKUP for it (see 2046 * fuse_vfsop_root) and we don't want the server to see 2047 * mismatched lookup counts. 2048 */ 2049 struct fuse_data *data; 2050 struct vnode *vroot; 2051 2052 data = fuse_get_mpdata(vnode_mount(vp)); 2053 FUSE_LOCK(); 2054 vroot = data->vroot; 2055 data->vroot = NULL; 2056 FUSE_UNLOCK(); 2057 if (vroot) 2058 vrele(vroot); 2059 } else if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { 2060 fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), 2061 fvdat->nlookup); 2062 } 2063 cache_purge(vp); 2064 vfs_hash_remove(vp); 2065 fuse_vnode_destroy(vp); 2066 2067 return 0; 2068 } 2069 2070 /* 2071 struct vnop_remove_args { 2072 struct vnode *a_dvp; 2073 struct vnode *a_vp; 2074 struct componentname *a_cnp; 2075 }; 2076 */ 2077 static int 2078 fuse_vnop_remove(struct vop_remove_args *ap) 2079 { 2080 struct vnode *dvp = ap->a_dvp; 2081 struct vnode *vp = ap->a_vp; 2082 struct componentname *cnp = ap->a_cnp; 2083 2084 int err; 2085 2086 if (fuse_isdeadfs(vp)) { 2087 return ENXIO; 2088 } 2089 if (vnode_isdir(vp)) { 2090 return EPERM; 2091 } 2092 2093 err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); 2094 2095 return err; 2096 } 2097 2098 /* 2099 struct vnop_rename_args { 2100 struct vnode *a_fdvp; 2101 struct vnode *a_fvp; 2102 struct componentname *a_fcnp; 2103 struct vnode *a_tdvp; 2104 struct vnode *a_tvp; 2105 struct componentname *a_tcnp; 2106 }; 2107 */ 2108 static int 2109 fuse_vnop_rename(struct vop_rename_args *ap) 2110 { 2111 struct vnode *fdvp = ap->a_fdvp; 2112 struct vnode *fvp = ap->a_fvp; 2113 struct componentname *fcnp = ap->a_fcnp; 2114 struct vnode *tdvp = ap->a_tdvp; 2115 struct vnode *tvp = ap->a_tvp; 2116 struct componentname *tcnp = ap->a_tcnp; 2117 struct fuse_data *data; 2118 bool newparent = fdvp != tdvp; 2119 bool isdir = fvp->v_type == VDIR; 2120 int err = 0; 2121 2122 if (fuse_isdeadfs(fdvp)) { 2123 return ENXIO; 2124 } 2125 if (fvp->v_mount != tdvp->v_mount || 2126 (tvp && fvp->v_mount != tvp->v_mount)) { 2127 SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); 2128 err = EXDEV; 2129 goto out; 2130 } 2131 cache_purge(fvp); 2132 2133 /* 2134 * FUSE library is expected to check if target directory is not 2135 * under the source directory in the file system tree. 2136 * Linux performs this check at VFS level. 2137 */ 2138 /* 2139 * If source is a directory, and it will get a new parent, user must 2140 * have write permission to it, so ".." can be modified. 2141 */ 2142 data = fuse_get_mpdata(vnode_mount(tdvp)); 2143 if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { 2144 err = fuse_internal_access(fvp, VWRITE, 2145 curthread, tcnp->cn_cred); 2146 if (err) 2147 goto out; 2148 } 2149 sx_xlock(&data->rename_lock); 2150 err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); 2151 if (err == 0) { 2152 if (tdvp != fdvp) 2153 fuse_vnode_setparent(fvp, tdvp); 2154 if (tvp != NULL) 2155 fuse_vnode_setparent(tvp, NULL); 2156 } 2157 sx_unlock(&data->rename_lock); 2158 2159 if (tvp != NULL && tvp != fvp) { 2160 cache_purge(tvp); 2161 } 2162 if (vnode_isdir(fvp)) { 2163 if (((tvp != NULL) && vnode_isdir(tvp)) || vnode_isdir(fvp)) { 2164 cache_purge(tdvp); 2165 } 2166 cache_purge(fdvp); 2167 } 2168 out: 2169 if (tdvp == tvp) { 2170 vrele(tdvp); 2171 } else { 2172 vput(tdvp); 2173 } 2174 if (tvp != NULL) { 2175 vput(tvp); 2176 } 2177 vrele(fdvp); 2178 vrele(fvp); 2179 2180 return err; 2181 } 2182 2183 /* 2184 struct vnop_rmdir_args { 2185 struct vnode *a_dvp; 2186 struct vnode *a_vp; 2187 struct componentname *a_cnp; 2188 } *ap; 2189 */ 2190 static int 2191 fuse_vnop_rmdir(struct vop_rmdir_args *ap) 2192 { 2193 struct vnode *dvp = ap->a_dvp; 2194 struct vnode *vp = ap->a_vp; 2195 2196 int err; 2197 2198 if (fuse_isdeadfs(vp)) { 2199 return ENXIO; 2200 } 2201 if (VTOFUD(vp) == VTOFUD(dvp)) { 2202 return EINVAL; 2203 } 2204 err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); 2205 2206 return err; 2207 } 2208 2209 /* 2210 struct vnop_setattr_args { 2211 struct vnode *a_vp; 2212 struct vattr *a_vap; 2213 struct ucred *a_cred; 2214 struct thread *a_td; 2215 }; 2216 */ 2217 static int 2218 fuse_vnop_setattr(struct vop_setattr_args *ap) 2219 { 2220 struct vnode *vp = ap->a_vp; 2221 struct vattr *vap = ap->a_vap; 2222 struct ucred *cred = ap->a_cred; 2223 struct thread *td = curthread; 2224 struct mount *mp; 2225 struct fuse_data *data; 2226 struct vattr old_va; 2227 int dataflags; 2228 int err = 0, err2; 2229 accmode_t accmode = 0; 2230 bool checkperm; 2231 bool drop_suid = false; 2232 2233 mp = vnode_mount(vp); 2234 data = fuse_get_mpdata(mp); 2235 dataflags = data->dataflags; 2236 checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; 2237 2238 if (fuse_isdeadfs(vp)) { 2239 return ENXIO; 2240 } 2241 2242 if (vap->va_uid != (uid_t)VNOVAL) { 2243 if (checkperm) { 2244 /* Only root may change a file's owner */ 2245 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2246 if (err) { 2247 /* As a special case, allow the null chown */ 2248 err2 = fuse_internal_getattr(vp, &old_va, cred, 2249 td); 2250 if (err2) 2251 return (err2); 2252 if (vap->va_uid != old_va.va_uid) 2253 return err; 2254 else 2255 accmode |= VADMIN; 2256 drop_suid = true; 2257 } else 2258 accmode |= VADMIN; 2259 } else 2260 accmode |= VADMIN; 2261 } 2262 if (vap->va_gid != (gid_t)VNOVAL) { 2263 if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) 2264 drop_suid = true; 2265 if (checkperm && !groupmember(vap->va_gid, cred)) 2266 { 2267 /* 2268 * Non-root users may only chgrp to one of their own 2269 * groups 2270 */ 2271 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 2272 if (err) { 2273 /* As a special case, allow the null chgrp */ 2274 err2 = fuse_internal_getattr(vp, &old_va, cred, 2275 td); 2276 if (err2) 2277 return (err2); 2278 if (vap->va_gid != old_va.va_gid) 2279 return err; 2280 accmode |= VADMIN; 2281 } else 2282 accmode |= VADMIN; 2283 } else 2284 accmode |= VADMIN; 2285 } 2286 if (vap->va_size != VNOVAL) { 2287 switch (vp->v_type) { 2288 case VDIR: 2289 return (EISDIR); 2290 case VLNK: 2291 case VREG: 2292 if (vfs_isrdonly(mp)) 2293 return (EROFS); 2294 err = vn_rlimit_trunc(vap->va_size, td); 2295 if (err) 2296 return (err); 2297 break; 2298 default: 2299 /* 2300 * According to POSIX, the result is unspecified 2301 * for file types other than regular files, 2302 * directories and shared memory objects. We 2303 * don't support shared memory objects in the file 2304 * system, and have dubious support for truncating 2305 * symlinks. Just ignore the request in other cases. 2306 */ 2307 return (0); 2308 } 2309 /* Don't set accmode. Permission to trunc is checked upstack */ 2310 } 2311 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 2312 if (vap->va_vaflags & VA_UTIMES_NULL) 2313 accmode |= VWRITE; 2314 else 2315 accmode |= VADMIN; 2316 } 2317 if (drop_suid) { 2318 if (vap->va_mode != (mode_t)VNOVAL) 2319 vap->va_mode &= ~(S_ISUID | S_ISGID); 2320 else { 2321 err = fuse_internal_getattr(vp, &old_va, cred, td); 2322 if (err) 2323 return (err); 2324 vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); 2325 } 2326 } 2327 if (vap->va_mode != (mode_t)VNOVAL) { 2328 /* Only root may set the sticky bit on non-directories */ 2329 if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) 2330 && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 2331 return EFTYPE; 2332 if (checkperm && (vap->va_mode & S_ISGID)) { 2333 err = fuse_internal_getattr(vp, &old_va, cred, td); 2334 if (err) 2335 return (err); 2336 if (!groupmember(old_va.va_gid, cred)) { 2337 err = priv_check_cred(cred, PRIV_VFS_SETGID); 2338 if (err) 2339 return (err); 2340 } 2341 } 2342 accmode |= VADMIN; 2343 } 2344 2345 if (vfs_isrdonly(mp)) 2346 return EROFS; 2347 2348 if (checkperm) { 2349 err = fuse_internal_access(vp, accmode, td, cred); 2350 } else { 2351 err = 0; 2352 } 2353 if (err) 2354 return err; 2355 else 2356 return fuse_internal_setattr(vp, vap, td, cred); 2357 } 2358 2359 /* 2360 struct vnop_strategy_args { 2361 struct vnode *a_vp; 2362 struct buf *a_bp; 2363 }; 2364 */ 2365 static int 2366 fuse_vnop_strategy(struct vop_strategy_args *ap) 2367 { 2368 struct vnode *vp = ap->a_vp; 2369 struct buf *bp = ap->a_bp; 2370 2371 if (!vp || fuse_isdeadfs(vp)) { 2372 bp->b_ioflags |= BIO_ERROR; 2373 bp->b_error = ENXIO; 2374 bufdone(bp); 2375 return 0; 2376 } 2377 2378 /* 2379 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. 2380 * fuse_io_strategy sets bp's error fields 2381 */ 2382 (void)fuse_io_strategy(vp, bp); 2383 2384 return 0; 2385 } 2386 2387 /* 2388 struct vnop_symlink_args { 2389 struct vnode *a_dvp; 2390 struct vnode **a_vpp; 2391 struct componentname *a_cnp; 2392 struct vattr *a_vap; 2393 char *a_target; 2394 }; 2395 */ 2396 static int 2397 fuse_vnop_symlink(struct vop_symlink_args *ap) 2398 { 2399 struct vnode *dvp = ap->a_dvp; 2400 struct vnode **vpp = ap->a_vpp; 2401 struct componentname *cnp = ap->a_cnp; 2402 const char *target = ap->a_target; 2403 2404 struct fuse_dispatcher fdi; 2405 2406 int err; 2407 size_t len; 2408 2409 if (fuse_isdeadfs(dvp)) { 2410 return ENXIO; 2411 } 2412 /* 2413 * Unlike the other creator type calls, here we have to create a message 2414 * where the name of the new entry comes first, and the data describing 2415 * the entry comes second. 2416 * Hence we can't rely on our handy fuse_internal_newentry() routine, 2417 * but put together the message manually and just call the core part. 2418 */ 2419 2420 len = strlen(target) + 1; 2421 fdisp_init(&fdi, len + cnp->cn_namelen + 1); 2422 fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); 2423 2424 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 2425 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 2426 memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); 2427 2428 err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); 2429 fdisp_destroy(&fdi); 2430 return err; 2431 } 2432 2433 /* 2434 struct vnop_write_args { 2435 struct vnode *a_vp; 2436 struct uio *a_uio; 2437 int a_ioflag; 2438 struct ucred *a_cred; 2439 }; 2440 */ 2441 static int 2442 fuse_vnop_write(struct vop_write_args *ap) 2443 { 2444 struct vnode *vp = ap->a_vp; 2445 struct uio *uio = ap->a_uio; 2446 int ioflag = ap->a_ioflag; 2447 struct ucred *cred = ap->a_cred; 2448 pid_t pid = curthread->td_proc->p_pid; 2449 struct fuse_filehandle *fufh; 2450 int err; 2451 bool closefufh = false, directio; 2452 2453 MPASS(vp->v_type == VREG || vp->v_type == VDIR); 2454 2455 if (fuse_isdeadfs(vp)) { 2456 return ENXIO; 2457 } 2458 2459 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 2460 ioflag |= IO_DIRECT; 2461 } 2462 2463 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 2464 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 2465 /* 2466 * nfsd will do I/O without first doing VOP_OPEN. We 2467 * must implicitly open the file here 2468 */ 2469 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 2470 closefufh = true; 2471 } 2472 if (err) { 2473 SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred); 2474 return err; 2475 } 2476 2477 /* 2478 * Ideally, when the daemon asks for direct io at open time, the 2479 * standard file flag should be set according to this, so that would 2480 * just change the default mode, which later on could be changed via 2481 * fcntl(2). 2482 * But this doesn't work, the O_DIRECT flag gets cleared at some point 2483 * (don't know where). So to make any use of the Fuse direct_io option, 2484 * we hardwire it into the file's private data (similarly to Linux, 2485 * btw.). 2486 */ 2487 directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 2488 2489 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 2490 if (directio) { 2491 off_t start, end, filesize; 2492 bool pages = (ioflag & IO_VMIO) != 0; 2493 2494 SDT_PROBE2(fusefs, , vnops, trace, 1, "direct write of vnode"); 2495 2496 err = fuse_vnode_size(vp, &filesize, cred, curthread); 2497 if (err) 2498 goto out; 2499 2500 start = uio->uio_offset; 2501 end = start + uio->uio_resid; 2502 if (!pages) { 2503 err = fuse_inval_buf_range(vp, filesize, start, 2504 end); 2505 if (err) 2506 goto out; 2507 } 2508 err = fuse_write_directbackend(vp, uio, cred, fufh, 2509 filesize, ioflag, pages); 2510 } else { 2511 SDT_PROBE2(fusefs, , vnops, trace, 1, 2512 "buffered write of vnode"); 2513 if (!fsess_opt_writeback(vnode_mount(vp))) 2514 ioflag |= IO_SYNC; 2515 err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); 2516 } 2517 fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); 2518 2519 out: 2520 if (closefufh) 2521 fuse_filehandle_close(vp, fufh, curthread, cred); 2522 2523 return (err); 2524 } 2525 2526 static daddr_t 2527 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) 2528 { 2529 const int biosize = fuse_iosize(vp); 2530 2531 return (off / biosize); 2532 } 2533 2534 static int 2535 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz) 2536 { 2537 off_t filesize; 2538 int err; 2539 const int biosize = fuse_iosize(vp); 2540 2541 err = fuse_vnode_size(vp, &filesize, NULL, NULL); 2542 if (err) { 2543 /* This will turn into a SIGBUS */ 2544 return (EIO); 2545 } else if ((off_t)lbn * biosize >= filesize) { 2546 *blksz = 0; 2547 } else if ((off_t)(lbn + 1) * biosize > filesize) { 2548 *blksz = filesize - (off_t)lbn *biosize; 2549 } else { 2550 *blksz = biosize; 2551 } 2552 return (0); 2553 } 2554 2555 /* 2556 struct vnop_getpages_args { 2557 struct vnode *a_vp; 2558 vm_page_t *a_m; 2559 int a_count; 2560 int a_reqpage; 2561 }; 2562 */ 2563 static int 2564 fuse_vnop_getpages(struct vop_getpages_args *ap) 2565 { 2566 struct vnode *vp = ap->a_vp; 2567 2568 if (!fsess_opt_mmap(vnode_mount(vp))) { 2569 SDT_PROBE2(fusefs, , vnops, trace, 1, 2570 "called on non-cacheable vnode??\n"); 2571 return (VM_PAGER_ERROR); 2572 } 2573 2574 return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, 2575 ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); 2576 } 2577 2578 static const char extattr_namespace_separator = '.'; 2579 2580 /* 2581 struct vop_getextattr_args { 2582 struct vop_generic_args a_gen; 2583 struct vnode *a_vp; 2584 int a_attrnamespace; 2585 const char *a_name; 2586 struct uio *a_uio; 2587 size_t *a_size; 2588 struct ucred *a_cred; 2589 struct thread *a_td; 2590 }; 2591 */ 2592 static int 2593 fuse_vnop_getextattr(struct vop_getextattr_args *ap) 2594 { 2595 struct vnode *vp = ap->a_vp; 2596 struct uio *uio = ap->a_uio; 2597 struct fuse_dispatcher fdi; 2598 struct fuse_getxattr_in *get_xattr_in; 2599 struct fuse_getxattr_out *get_xattr_out; 2600 struct mount *mp = vnode_mount(vp); 2601 struct thread *td = ap->a_td; 2602 struct ucred *cred = ap->a_cred; 2603 char *prefix; 2604 char *attr_str; 2605 size_t len; 2606 int err; 2607 2608 if (fuse_isdeadfs(vp)) 2609 return (ENXIO); 2610 2611 if (fsess_not_impl(mp, FUSE_GETXATTR)) 2612 return EOPNOTSUPP; 2613 2614 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2615 if (err) 2616 return err; 2617 2618 /* Default to looking for user attributes. */ 2619 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2620 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2621 else 2622 prefix = EXTATTR_NAMESPACE_USER_STRING; 2623 2624 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2625 strlen(ap->a_name) + 1; 2626 2627 fdisp_init(&fdi, len + sizeof(*get_xattr_in)); 2628 fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); 2629 2630 get_xattr_in = fdi.indata; 2631 /* 2632 * Check to see whether we're querying the available size or 2633 * issuing the actual request. If we pass in 0, we get back struct 2634 * fuse_getxattr_out. If we pass in a non-zero size, we get back 2635 * that much data, without the struct fuse_getxattr_out header. 2636 */ 2637 if (uio == NULL) 2638 get_xattr_in->size = 0; 2639 else 2640 get_xattr_in->size = uio->uio_resid; 2641 2642 attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); 2643 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2644 ap->a_name); 2645 2646 err = fdisp_wait_answ(&fdi); 2647 if (err != 0) { 2648 if (err == ENOSYS) { 2649 fsess_set_notimpl(mp, FUSE_GETXATTR); 2650 err = EOPNOTSUPP; 2651 } 2652 goto out; 2653 } 2654 2655 get_xattr_out = fdi.answ; 2656 2657 if (ap->a_size != NULL) 2658 *ap->a_size = get_xattr_out->size; 2659 2660 if (uio != NULL) 2661 err = uiomove(fdi.answ, fdi.iosize, uio); 2662 2663 out: 2664 fdisp_destroy(&fdi); 2665 return (err); 2666 } 2667 2668 /* 2669 struct vop_setextattr_args { 2670 struct vop_generic_args a_gen; 2671 struct vnode *a_vp; 2672 int a_attrnamespace; 2673 const char *a_name; 2674 struct uio *a_uio; 2675 struct ucred *a_cred; 2676 struct thread *a_td; 2677 }; 2678 */ 2679 static int 2680 fuse_vnop_setextattr(struct vop_setextattr_args *ap) 2681 { 2682 struct vnode *vp = ap->a_vp; 2683 struct uio *uio = ap->a_uio; 2684 struct fuse_dispatcher fdi; 2685 struct fuse_setxattr_in *set_xattr_in; 2686 struct mount *mp = vnode_mount(vp); 2687 struct thread *td = ap->a_td; 2688 struct ucred *cred = ap->a_cred; 2689 char *prefix; 2690 size_t len; 2691 char *attr_str; 2692 int err; 2693 2694 if (fuse_isdeadfs(vp)) 2695 return (ENXIO); 2696 2697 if (fsess_not_impl(mp, FUSE_SETXATTR)) 2698 return EOPNOTSUPP; 2699 2700 if (vfs_isrdonly(mp)) 2701 return EROFS; 2702 2703 /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ 2704 if (ap->a_uio == NULL) { 2705 /* 2706 * If we got here as fallback from VOP_DELETEEXTATTR, then 2707 * return EOPNOTSUPP. 2708 */ 2709 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 2710 return (EOPNOTSUPP); 2711 else 2712 return (EINVAL); 2713 } 2714 2715 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 2716 VWRITE); 2717 if (err) 2718 return err; 2719 2720 /* Default to looking for user attributes. */ 2721 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2722 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2723 else 2724 prefix = EXTATTR_NAMESPACE_USER_STRING; 2725 2726 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2727 strlen(ap->a_name) + 1; 2728 2729 fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); 2730 fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); 2731 2732 set_xattr_in = fdi.indata; 2733 set_xattr_in->size = uio->uio_resid; 2734 2735 attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); 2736 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2737 ap->a_name); 2738 2739 err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, 2740 uio->uio_resid, uio); 2741 if (err != 0) { 2742 goto out; 2743 } 2744 2745 err = fdisp_wait_answ(&fdi); 2746 2747 if (err == ENOSYS) { 2748 fsess_set_notimpl(mp, FUSE_SETXATTR); 2749 err = EOPNOTSUPP; 2750 } 2751 if (err == ERESTART) { 2752 /* Can't restart after calling uiomove */ 2753 err = EINTR; 2754 } 2755 2756 out: 2757 fdisp_destroy(&fdi); 2758 return (err); 2759 } 2760 2761 /* 2762 * The Linux / FUSE extended attribute list is simply a collection of 2763 * NUL-terminated strings. The FreeBSD extended attribute list is a single 2764 * byte length followed by a non-NUL terminated string. So, this allows 2765 * conversion of the Linux / FUSE format to the FreeBSD format in place. 2766 * Linux attribute names are reported with the namespace as a prefix (e.g. 2767 * "user.attribute_name"), but in FreeBSD they are reported without the 2768 * namespace prefix (e.g. "attribute_name"). So, we're going from: 2769 * 2770 * user.attr_name1\0user.attr_name2\0 2771 * 2772 * to: 2773 * 2774 * <num>attr_name1<num>attr_name2 2775 * 2776 * Where "<num>" is a single byte number of characters in the attribute name. 2777 * 2778 * Args: 2779 * prefix - exattr namespace prefix string 2780 * list, list_len - input list with namespace prefixes 2781 * bsd_list, bsd_list_len - output list compatible with bsd vfs 2782 */ 2783 static int 2784 fuse_xattrlist_convert(char *prefix, const char *list, int list_len, 2785 char *bsd_list, int *bsd_list_len) 2786 { 2787 int len, pos, dist_to_next, prefix_len; 2788 2789 pos = 0; 2790 *bsd_list_len = 0; 2791 prefix_len = strlen(prefix); 2792 2793 while (pos < list_len && list[pos] != '\0') { 2794 dist_to_next = strlen(&list[pos]) + 1; 2795 if (bcmp(&list[pos], prefix, prefix_len) == 0 && 2796 list[pos + prefix_len] == extattr_namespace_separator) { 2797 len = dist_to_next - 2798 (prefix_len + sizeof(extattr_namespace_separator)) - 1; 2799 if (len >= EXTATTR_MAXNAMELEN) 2800 return (ENAMETOOLONG); 2801 2802 bsd_list[*bsd_list_len] = len; 2803 memcpy(&bsd_list[*bsd_list_len + 1], 2804 &list[pos + prefix_len + 2805 sizeof(extattr_namespace_separator)], len); 2806 2807 *bsd_list_len += len + 1; 2808 } 2809 2810 pos += dist_to_next; 2811 } 2812 2813 return (0); 2814 } 2815 2816 /* 2817 * List extended attributes 2818 * 2819 * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which 2820 * has a number of differences compared to its FreeBSD equivalent, 2821 * extattr_list_file: 2822 * 2823 * - FUSE_LISTXATTR returns all extended attributes across all namespaces, 2824 * whereas listxattr(2) only returns attributes for a single namespace 2825 * - FUSE_LISTXATTR prepends each attribute name with "namespace." 2826 * - If the provided buffer is not large enough to hold the result, 2827 * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to 2828 * return as many results as will fit. 2829 */ 2830 /* 2831 struct vop_listextattr_args { 2832 struct vop_generic_args a_gen; 2833 struct vnode *a_vp; 2834 int a_attrnamespace; 2835 struct uio *a_uio; 2836 size_t *a_size; 2837 struct ucred *a_cred; 2838 struct thread *a_td; 2839 }; 2840 */ 2841 static int 2842 fuse_vnop_listextattr(struct vop_listextattr_args *ap) 2843 { 2844 struct vnode *vp = ap->a_vp; 2845 struct uio *uio = ap->a_uio; 2846 struct fuse_dispatcher fdi; 2847 struct fuse_listxattr_in *list_xattr_in; 2848 struct fuse_listxattr_out *list_xattr_out; 2849 struct mount *mp = vnode_mount(vp); 2850 struct thread *td = ap->a_td; 2851 struct ucred *cred = ap->a_cred; 2852 char *prefix; 2853 char *bsd_list = NULL; 2854 char *linux_list; 2855 int bsd_list_len; 2856 int linux_list_len; 2857 int err; 2858 2859 if (fuse_isdeadfs(vp)) 2860 return (ENXIO); 2861 2862 if (fsess_not_impl(mp, FUSE_LISTXATTR)) 2863 return EOPNOTSUPP; 2864 2865 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2866 if (err) 2867 return err; 2868 2869 /* 2870 * Add space for a NUL and the period separator if enabled. 2871 * Default to looking for user attributes. 2872 */ 2873 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2874 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2875 else 2876 prefix = EXTATTR_NAMESPACE_USER_STRING; 2877 2878 fdisp_init(&fdi, sizeof(*list_xattr_in)); 2879 fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2880 2881 /* 2882 * Retrieve Linux / FUSE compatible list size. 2883 */ 2884 list_xattr_in = fdi.indata; 2885 list_xattr_in->size = 0; 2886 2887 err = fdisp_wait_answ(&fdi); 2888 if (err != 0) { 2889 if (err == ENOSYS) { 2890 fsess_set_notimpl(mp, FUSE_LISTXATTR); 2891 err = EOPNOTSUPP; 2892 } 2893 goto out; 2894 } 2895 2896 list_xattr_out = fdi.answ; 2897 linux_list_len = list_xattr_out->size; 2898 if (linux_list_len == 0) { 2899 if (ap->a_size != NULL) 2900 *ap->a_size = linux_list_len; 2901 goto out; 2902 } 2903 2904 /* 2905 * Retrieve Linux / FUSE compatible list values. 2906 */ 2907 fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2908 list_xattr_in = fdi.indata; 2909 list_xattr_in->size = linux_list_len; 2910 2911 err = fdisp_wait_answ(&fdi); 2912 if (err == ERANGE) { 2913 /* 2914 * Race detected. The attribute list must've grown since the 2915 * first FUSE_LISTXATTR call. Start over. Go all the way back 2916 * to userland so we can process signals, if necessary, before 2917 * restarting. 2918 */ 2919 err = ERESTART; 2920 goto out; 2921 } else if (err != 0) 2922 goto out; 2923 2924 linux_list = fdi.answ; 2925 /* FUSE doesn't allow the server to return more data than requested */ 2926 if (fdi.iosize > linux_list_len) { 2927 struct fuse_data *data = fuse_get_mpdata(mp); 2928 2929 fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG, 2930 "server returned " 2931 "more extended attribute data than requested; " 2932 "should've returned ERANGE instead."); 2933 } else { 2934 /* But returning less data is fine */ 2935 linux_list_len = fdi.iosize; 2936 } 2937 2938 /* 2939 * Retrieve the BSD compatible list values. 2940 * The Linux / FUSE attribute list format isn't the same 2941 * as FreeBSD's format. So we need to transform it into 2942 * FreeBSD's format before giving it to the user. 2943 */ 2944 bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); 2945 err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, 2946 bsd_list, &bsd_list_len); 2947 if (err != 0) 2948 goto out; 2949 2950 if (ap->a_size != NULL) 2951 *ap->a_size = bsd_list_len; 2952 2953 if (uio != NULL) 2954 err = uiomove(bsd_list, bsd_list_len, uio); 2955 2956 out: 2957 free(bsd_list, M_TEMP); 2958 fdisp_destroy(&fdi); 2959 return (err); 2960 } 2961 2962 /* 2963 struct vop_deallocate_args { 2964 struct vop_generic_args a_gen; 2965 struct vnode *a_vp; 2966 off_t *a_offset; 2967 off_t *a_len; 2968 int a_flags; 2969 int a_ioflag; 2970 struct ucred *a_cred; 2971 }; 2972 */ 2973 static int 2974 fuse_vnop_deallocate(struct vop_deallocate_args *ap) 2975 { 2976 struct vnode *vp = ap->a_vp; 2977 struct mount *mp = vnode_mount(vp); 2978 struct fuse_filehandle *fufh; 2979 struct fuse_dispatcher fdi; 2980 struct fuse_fallocate_in *ffi; 2981 struct ucred *cred = ap->a_cred; 2982 pid_t pid = curthread->td_proc->p_pid; 2983 off_t *len = ap->a_len; 2984 off_t *offset = ap->a_offset; 2985 int ioflag = ap->a_ioflag; 2986 off_t filesize; 2987 int err; 2988 bool closefufh = false; 2989 2990 if (fuse_isdeadfs(vp)) 2991 return (ENXIO); 2992 2993 if (vfs_isrdonly(mp)) 2994 return (EROFS); 2995 2996 if (fsess_not_impl(mp, FUSE_FALLOCATE)) 2997 goto fallback; 2998 2999 err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 3000 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 3001 /* 3002 * nfsd will do I/O without first doing VOP_OPEN. We 3003 * must implicitly open the file here 3004 */ 3005 err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred); 3006 closefufh = true; 3007 } 3008 if (err) 3009 return (err); 3010 3011 fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); 3012 3013 err = fuse_vnode_size(vp, &filesize, cred, curthread); 3014 if (err) 3015 goto out; 3016 fuse_inval_buf_range(vp, filesize, *offset, *offset + *len); 3017 3018 fdisp_init(&fdi, sizeof(*ffi)); 3019 fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred); 3020 ffi = fdi.indata; 3021 ffi->fh = fufh->fh_id; 3022 ffi->offset = *offset; 3023 ffi->length = *len; 3024 /* 3025 * FreeBSD's fspacectl is equivalent to Linux's fallocate with 3026 * mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE 3027 */ 3028 ffi->mode = FUSE_FALLOC_FL_PUNCH_HOLE | FUSE_FALLOC_FL_KEEP_SIZE; 3029 err = fdisp_wait_answ(&fdi); 3030 3031 if (err == ENOSYS) { 3032 fdisp_destroy(&fdi); 3033 fsess_set_notimpl(mp, FUSE_FALLOCATE); 3034 goto fallback; 3035 } else if (err == EOPNOTSUPP) { 3036 /* 3037 * The file system server does not support FUSE_FALLOCATE with 3038 * the supplied mode for this particular file. 3039 */ 3040 fdisp_destroy(&fdi); 3041 goto fallback; 3042 } else if (!err) { 3043 /* 3044 * Clip the returned offset to EoF. Do it here rather than 3045 * before FUSE_FALLOCATE just in case the kernel's cached file 3046 * size is out of date. Unfortunately, FUSE does not return 3047 * any information about filesize from that operation. 3048 */ 3049 *offset = MIN(*offset + *len, filesize); 3050 *len = 0; 3051 fuse_vnode_undirty_cached_timestamps(vp, false); 3052 fuse_internal_clear_suid_on_write(vp, cred, curthread); 3053 3054 if (ioflag & IO_SYNC) 3055 err = fuse_internal_fsync(vp, curthread, MNT_WAIT, 3056 false); 3057 } 3058 3059 out: 3060 fdisp_destroy(&fdi); 3061 if (closefufh) 3062 fuse_filehandle_close(vp, fufh, curthread, cred); 3063 3064 return (err); 3065 3066 fallback: 3067 if (closefufh) 3068 fuse_filehandle_close(vp, fufh, curthread, cred); 3069 3070 return (vop_stddeallocate(ap)); 3071 } 3072 3073 /* 3074 struct vop_deleteextattr_args { 3075 struct vop_generic_args a_gen; 3076 struct vnode *a_vp; 3077 int a_attrnamespace; 3078 const char *a_name; 3079 struct ucred *a_cred; 3080 struct thread *a_td; 3081 }; 3082 */ 3083 static int 3084 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) 3085 { 3086 struct vnode *vp = ap->a_vp; 3087 struct fuse_dispatcher fdi; 3088 struct mount *mp = vnode_mount(vp); 3089 struct thread *td = ap->a_td; 3090 struct ucred *cred = ap->a_cred; 3091 char *prefix; 3092 size_t len; 3093 char *attr_str; 3094 int err; 3095 3096 if (fuse_isdeadfs(vp)) 3097 return (ENXIO); 3098 3099 if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) 3100 return EOPNOTSUPP; 3101 3102 if (vfs_isrdonly(mp)) 3103 return EROFS; 3104 3105 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 3106 VWRITE); 3107 if (err) 3108 return err; 3109 3110 /* Default to looking for user attributes. */ 3111 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 3112 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 3113 else 3114 prefix = EXTATTR_NAMESPACE_USER_STRING; 3115 3116 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 3117 strlen(ap->a_name) + 1; 3118 3119 fdisp_init(&fdi, len); 3120 fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); 3121 3122 attr_str = fdi.indata; 3123 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 3124 ap->a_name); 3125 3126 err = fdisp_wait_answ(&fdi); 3127 if (err == ENOSYS) { 3128 fsess_set_notimpl(mp, FUSE_REMOVEXATTR); 3129 err = EOPNOTSUPP; 3130 } 3131 3132 fdisp_destroy(&fdi); 3133 return (err); 3134 } 3135 3136 /* 3137 struct vnop_print_args { 3138 struct vnode *a_vp; 3139 }; 3140 */ 3141 static int 3142 fuse_vnop_print(struct vop_print_args *ap) 3143 { 3144 struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); 3145 3146 printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", 3147 (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, 3148 (uintmax_t)fvdat->nlookup, 3149 fvdat->flag); 3150 3151 return 0; 3152 } 3153 3154 /* 3155 * Get an NFS filehandle for a FUSE file. 3156 * 3157 * This will only work for FUSE file systems that guarantee the uniqueness of 3158 * nodeid:generation, which most don't. 3159 */ 3160 /* 3161 vop_vptofh { 3162 IN struct vnode *a_vp; 3163 IN struct fid *a_fhp; 3164 }; 3165 */ 3166 static int 3167 fuse_vnop_vptofh(struct vop_vptofh_args *ap) 3168 { 3169 struct vnode *vp = ap->a_vp; 3170 struct fuse_vnode_data *fvdat = VTOFUD(vp); 3171 struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); 3172 _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), 3173 "FUSE fid type is too big"); 3174 struct mount *mp = vnode_mount(vp); 3175 struct fuse_data *data = fuse_get_mpdata(mp); 3176 struct vattr va; 3177 int err; 3178 3179 if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { 3180 /* NFS requires lookups for "." and ".." */ 3181 SDT_PROBE2(fusefs, , vnops, trace, 1, 3182 "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); 3183 return EOPNOTSUPP; 3184 } 3185 if ((mp->mnt_flag & MNT_EXPORTED) && 3186 !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT)) 3187 { 3188 /* 3189 * NFS is stateless, so nfsd must reopen a directory on every 3190 * call to VOP_READDIR, passing in the d_off field from the 3191 * final dirent of the previous invocation. But without 3192 * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not 3193 * guarantee that d_off will be valid after a directory is 3194 * closed and reopened. So prohibit exporting FUSE file 3195 * systems that don't set that flag. 3196 * 3197 * But userspace NFS servers don't have this problem. 3198 */ 3199 SDT_PROBE2(fusefs, , vnops, trace, 1, 3200 "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT"); 3201 return EOPNOTSUPP; 3202 } 3203 3204 err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); 3205 if (err) 3206 return err; 3207 3208 /*ip = VTOI(ap->a_vp);*/ 3209 /*ufhp = (struct ufid *)ap->a_fhp;*/ 3210 fhp->len = sizeof(struct fuse_fid); 3211 fhp->nid = fvdat->nid; 3212 if (fvdat->generation <= UINT32_MAX) 3213 fhp->gen = fvdat->generation; 3214 else 3215 return EOVERFLOW; 3216 return (0); 3217 } 3218