1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include <sys/param.h> 67 #include <sys/module.h> 68 #include <sys/systm.h> 69 #include <sys/errno.h> 70 #include <sys/kernel.h> 71 #include <sys/conf.h> 72 #include <sys/uio.h> 73 #include <sys/malloc.h> 74 #include <sys/queue.h> 75 #include <sys/limits.h> 76 #include <sys/lock.h> 77 #include <sys/rwlock.h> 78 #include <sys/sx.h> 79 #include <sys/proc.h> 80 #include <sys/mount.h> 81 #include <sys/vnode.h> 82 #include <sys/namei.h> 83 #include <sys/extattr.h> 84 #include <sys/stat.h> 85 #include <sys/unistd.h> 86 #include <sys/filedesc.h> 87 #include <sys/file.h> 88 #include <sys/fcntl.h> 89 #include <sys/dirent.h> 90 #include <sys/bio.h> 91 #include <sys/buf.h> 92 #include <sys/sysctl.h> 93 #include <sys/vmmeter.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_extern.h> 97 #include <vm/pmap.h> 98 #include <vm/vm_map.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_object.h> 102 #include <vm/vm_pager.h> 103 #include <vm/vnode_pager.h> 104 #include <vm/vm_object.h> 105 106 #include "fuse.h" 107 #include "fuse_file.h" 108 #include "fuse_internal.h" 109 #include "fuse_ipc.h" 110 #include "fuse_node.h" 111 #include "fuse_io.h" 112 113 #include <sys/priv.h> 114 115 /* Maximum number of hardlinks to a single FUSE file */ 116 #define FUSE_LINK_MAX UINT32_MAX 117 118 SDT_PROVIDER_DECLARE(fusefs); 119 /* 120 * Fuse trace probe: 121 * arg0: verbosity. Higher numbers give more verbose messages 122 * arg1: Textual message 123 */ 124 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); 125 126 /* vnode ops */ 127 static vop_access_t fuse_vnop_access; 128 static vop_advlock_t fuse_vnop_advlock; 129 static vop_bmap_t fuse_vnop_bmap; 130 static vop_close_t fuse_fifo_close; 131 static vop_close_t fuse_vnop_close; 132 static vop_create_t fuse_vnop_create; 133 static vop_deleteextattr_t fuse_vnop_deleteextattr; 134 static vop_fdatasync_t fuse_vnop_fdatasync; 135 static vop_fsync_t fuse_vnop_fsync; 136 static vop_getattr_t fuse_vnop_getattr; 137 static vop_getextattr_t fuse_vnop_getextattr; 138 static vop_inactive_t fuse_vnop_inactive; 139 static vop_link_t fuse_vnop_link; 140 static vop_listextattr_t fuse_vnop_listextattr; 141 static vop_lookup_t fuse_vnop_lookup; 142 static vop_mkdir_t fuse_vnop_mkdir; 143 static vop_mknod_t fuse_vnop_mknod; 144 static vop_open_t fuse_vnop_open; 145 static vop_pathconf_t fuse_vnop_pathconf; 146 static vop_read_t fuse_vnop_read; 147 static vop_readdir_t fuse_vnop_readdir; 148 static vop_readlink_t fuse_vnop_readlink; 149 static vop_reclaim_t fuse_vnop_reclaim; 150 static vop_remove_t fuse_vnop_remove; 151 static vop_rename_t fuse_vnop_rename; 152 static vop_rmdir_t fuse_vnop_rmdir; 153 static vop_setattr_t fuse_vnop_setattr; 154 static vop_setextattr_t fuse_vnop_setextattr; 155 static vop_strategy_t fuse_vnop_strategy; 156 static vop_symlink_t fuse_vnop_symlink; 157 static vop_write_t fuse_vnop_write; 158 static vop_getpages_t fuse_vnop_getpages; 159 static vop_print_t fuse_vnop_print; 160 static vop_vptofh_t fuse_vnop_vptofh; 161 162 struct vop_vector fuse_fifoops = { 163 .vop_default = &fifo_specops, 164 .vop_access = fuse_vnop_access, 165 .vop_close = fuse_fifo_close, 166 .vop_fsync = fuse_vnop_fsync, 167 .vop_getattr = fuse_vnop_getattr, 168 .vop_inactive = fuse_vnop_inactive, 169 .vop_pathconf = fuse_vnop_pathconf, 170 .vop_print = fuse_vnop_print, 171 .vop_read = VOP_PANIC, 172 .vop_reclaim = fuse_vnop_reclaim, 173 .vop_setattr = fuse_vnop_setattr, 174 .vop_write = VOP_PANIC, 175 .vop_vptofh = fuse_vnop_vptofh, 176 }; 177 VFS_VOP_VECTOR_REGISTER(fuse_fifoops); 178 179 struct vop_vector fuse_vnops = { 180 .vop_allocate = VOP_EINVAL, 181 .vop_default = &default_vnodeops, 182 .vop_access = fuse_vnop_access, 183 .vop_advlock = fuse_vnop_advlock, 184 .vop_bmap = fuse_vnop_bmap, 185 .vop_close = fuse_vnop_close, 186 .vop_create = fuse_vnop_create, 187 .vop_deleteextattr = fuse_vnop_deleteextattr, 188 .vop_fsync = fuse_vnop_fsync, 189 .vop_fdatasync = fuse_vnop_fdatasync, 190 .vop_getattr = fuse_vnop_getattr, 191 .vop_getextattr = fuse_vnop_getextattr, 192 .vop_inactive = fuse_vnop_inactive, 193 /* 194 * TODO: implement vop_ioctl after upgrading to protocol 7.16. 195 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until 196 * 7.16. 197 */ 198 .vop_link = fuse_vnop_link, 199 .vop_listextattr = fuse_vnop_listextattr, 200 .vop_lookup = fuse_vnop_lookup, 201 .vop_mkdir = fuse_vnop_mkdir, 202 .vop_mknod = fuse_vnop_mknod, 203 .vop_open = fuse_vnop_open, 204 .vop_pathconf = fuse_vnop_pathconf, 205 /* 206 * TODO: implement vop_poll after upgrading to protocol 7.21. 207 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until 208 * 7.21, which adds the ability for the client to choose which poll 209 * events it wants, and for a client to deregister a file handle 210 */ 211 .vop_read = fuse_vnop_read, 212 .vop_readdir = fuse_vnop_readdir, 213 .vop_readlink = fuse_vnop_readlink, 214 .vop_reclaim = fuse_vnop_reclaim, 215 .vop_remove = fuse_vnop_remove, 216 .vop_rename = fuse_vnop_rename, 217 .vop_rmdir = fuse_vnop_rmdir, 218 .vop_setattr = fuse_vnop_setattr, 219 .vop_setextattr = fuse_vnop_setextattr, 220 .vop_strategy = fuse_vnop_strategy, 221 .vop_symlink = fuse_vnop_symlink, 222 .vop_write = fuse_vnop_write, 223 .vop_getpages = fuse_vnop_getpages, 224 .vop_print = fuse_vnop_print, 225 .vop_vptofh = fuse_vnop_vptofh, 226 }; 227 VFS_VOP_VECTOR_REGISTER(fuse_vnops); 228 229 uma_zone_t fuse_pbuf_zone; 230 231 /* Check permission for extattr operations, much like extattr_check_cred */ 232 static int 233 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, 234 struct thread *td, accmode_t accmode) 235 { 236 struct mount *mp = vnode_mount(vp); 237 struct fuse_data *data = fuse_get_mpdata(mp); 238 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 239 240 /* 241 * Kernel-invoked always succeeds. 242 */ 243 if (cred == NOCRED) 244 return (0); 245 246 /* 247 * Do not allow privileged processes in jail to directly manipulate 248 * system attributes. 249 */ 250 switch (ns) { 251 case EXTATTR_NAMESPACE_SYSTEM: 252 if (default_permissions) { 253 return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); 254 } 255 return (0); 256 case EXTATTR_NAMESPACE_USER: 257 if (default_permissions) { 258 return (fuse_internal_access(vp, accmode, td, cred)); 259 } 260 return (0); 261 default: 262 return (EPERM); 263 } 264 } 265 266 /* Get a filehandle for a directory */ 267 static int 268 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, 269 struct ucred *cred, pid_t pid) 270 { 271 if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) 272 return 0; 273 return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); 274 } 275 276 /* Send FUSE_FLUSH for this vnode */ 277 static int 278 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) 279 { 280 struct fuse_flush_in *ffi; 281 struct fuse_filehandle *fufh; 282 struct fuse_dispatcher fdi; 283 struct thread *td = curthread; 284 struct mount *mp = vnode_mount(vp); 285 int err; 286 287 if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH)) 288 return 0; 289 290 err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); 291 if (err) 292 return err; 293 294 fdisp_init(&fdi, sizeof(*ffi)); 295 fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); 296 ffi = fdi.indata; 297 ffi->fh = fufh->fh_id; 298 /* 299 * If the file has a POSIX lock then we're supposed to set lock_owner. 300 * If not, then lock_owner is undefined. So we may as well always set 301 * it. 302 */ 303 ffi->lock_owner = td->td_proc->p_pid; 304 305 err = fdisp_wait_answ(&fdi); 306 if (err == ENOSYS) { 307 fsess_set_notimpl(mp, FUSE_FLUSH); 308 err = 0; 309 } 310 fdisp_destroy(&fdi); 311 return err; 312 } 313 314 /* Close wrapper for fifos. */ 315 static int 316 fuse_fifo_close(struct vop_close_args *ap) 317 { 318 return (fifo_specops.vop_close(ap)); 319 } 320 321 /* 322 struct vnop_access_args { 323 struct vnode *a_vp; 324 #if VOP_ACCESS_TAKES_ACCMODE_T 325 accmode_t a_accmode; 326 #else 327 int a_mode; 328 #endif 329 struct ucred *a_cred; 330 struct thread *a_td; 331 }; 332 */ 333 static int 334 fuse_vnop_access(struct vop_access_args *ap) 335 { 336 struct vnode *vp = ap->a_vp; 337 int accmode = ap->a_accmode; 338 struct ucred *cred = ap->a_cred; 339 340 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); 341 342 int err; 343 344 if (fuse_isdeadfs(vp)) { 345 if (vnode_isvroot(vp)) { 346 return 0; 347 } 348 return ENXIO; 349 } 350 if (!(data->dataflags & FSESS_INITED)) { 351 if (vnode_isvroot(vp)) { 352 if (priv_check_cred(cred, PRIV_VFS_ADMIN) || 353 (fuse_match_cred(data->daemoncred, cred) == 0)) { 354 return 0; 355 } 356 } 357 return EBADF; 358 } 359 if (vnode_islnk(vp)) { 360 return 0; 361 } 362 363 err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); 364 return err; 365 } 366 367 /* 368 * struct vop_advlock_args { 369 * struct vop_generic_args a_gen; 370 * struct vnode *a_vp; 371 * void *a_id; 372 * int a_op; 373 * struct flock *a_fl; 374 * int a_flags; 375 * } 376 */ 377 static int 378 fuse_vnop_advlock(struct vop_advlock_args *ap) 379 { 380 struct vnode *vp = ap->a_vp; 381 struct flock *fl = ap->a_fl; 382 struct thread *td = curthread; 383 struct ucred *cred = td->td_ucred; 384 pid_t pid = td->td_proc->p_pid; 385 struct fuse_filehandle *fufh; 386 struct fuse_dispatcher fdi; 387 struct fuse_lk_in *fli; 388 struct fuse_lk_out *flo; 389 enum fuse_opcode op; 390 int dataflags, err; 391 int flags = ap->a_flags; 392 393 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 394 395 if (fuse_isdeadfs(vp)) { 396 return ENXIO; 397 } 398 399 if (!(dataflags & FSESS_POSIX_LOCKS)) 400 return vop_stdadvlock(ap); 401 /* FUSE doesn't properly support flock until protocol 7.17 */ 402 if (flags & F_FLOCK) 403 return vop_stdadvlock(ap); 404 405 err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); 406 if (err) 407 return err; 408 409 fdisp_init(&fdi, sizeof(*fli)); 410 411 switch(ap->a_op) { 412 case F_GETLK: 413 op = FUSE_GETLK; 414 break; 415 case F_SETLK: 416 op = FUSE_SETLK; 417 break; 418 case F_SETLKW: 419 op = FUSE_SETLKW; 420 break; 421 default: 422 return EINVAL; 423 } 424 425 fdisp_make_vp(&fdi, op, vp, td, cred); 426 fli = fdi.indata; 427 fli->fh = fufh->fh_id; 428 fli->owner = fl->l_pid; 429 fli->lk.start = fl->l_start; 430 if (fl->l_len != 0) 431 fli->lk.end = fl->l_start + fl->l_len - 1; 432 else 433 fli->lk.end = INT64_MAX; 434 fli->lk.type = fl->l_type; 435 fli->lk.pid = fl->l_pid; 436 437 err = fdisp_wait_answ(&fdi); 438 fdisp_destroy(&fdi); 439 440 if (err == 0 && op == FUSE_GETLK) { 441 flo = fdi.answ; 442 fl->l_type = flo->lk.type; 443 fl->l_pid = flo->lk.pid; 444 if (flo->lk.type != F_UNLCK) { 445 fl->l_start = flo->lk.start; 446 if (flo->lk.end == INT64_MAX) 447 fl->l_len = 0; 448 else 449 fl->l_len = flo->lk.end - flo->lk.start + 1; 450 fl->l_start = flo->lk.start; 451 } 452 } 453 454 return err; 455 } 456 457 /* { 458 struct vnode *a_vp; 459 daddr_t a_bn; 460 struct bufobj **a_bop; 461 daddr_t *a_bnp; 462 int *a_runp; 463 int *a_runb; 464 } */ 465 static int 466 fuse_vnop_bmap(struct vop_bmap_args *ap) 467 { 468 struct vnode *vp = ap->a_vp; 469 struct bufobj **bo = ap->a_bop; 470 struct thread *td = curthread; 471 struct mount *mp; 472 struct fuse_dispatcher fdi; 473 struct fuse_bmap_in *fbi; 474 struct fuse_bmap_out *fbo; 475 struct fuse_data *data; 476 uint64_t biosize; 477 off_t filesize; 478 daddr_t lbn = ap->a_bn; 479 daddr_t *pbn = ap->a_bnp; 480 int *runp = ap->a_runp; 481 int *runb = ap->a_runb; 482 int error = 0; 483 int maxrun; 484 485 if (fuse_isdeadfs(vp)) { 486 return ENXIO; 487 } 488 489 mp = vnode_mount(vp); 490 data = fuse_get_mpdata(mp); 491 biosize = fuse_iosize(vp); 492 maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, 493 data->max_readahead_blocks); 494 495 if (bo != NULL) 496 *bo = &vp->v_bufobj; 497 498 /* 499 * The FUSE_BMAP operation does not include the runp and runb 500 * variables, so we must guess. Report nonzero contiguous runs so 501 * cluster_read will combine adjacent reads. It's worthwhile to reduce 502 * upcalls even if we don't know the true physical layout of the file. 503 * 504 * FUSE file systems may opt out of read clustering in two ways: 505 * * mounting with -onoclusterr 506 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT 507 */ 508 if (runb != NULL) 509 *runb = MIN(lbn, maxrun); 510 if (runp != NULL) { 511 error = fuse_vnode_size(vp, &filesize, td->td_ucred, td); 512 if (error == 0) 513 *runp = MIN(MAX(0, filesize / (off_t)biosize - lbn - 1), 514 maxrun); 515 else 516 *runp = 0; 517 } 518 519 if (fsess_isimpl(mp, FUSE_BMAP)) { 520 fdisp_init(&fdi, sizeof(*fbi)); 521 fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); 522 fbi = fdi.indata; 523 fbi->block = lbn; 524 fbi->blocksize = biosize; 525 error = fdisp_wait_answ(&fdi); 526 if (error == ENOSYS) { 527 fdisp_destroy(&fdi); 528 fsess_set_notimpl(mp, FUSE_BMAP); 529 error = 0; 530 } else { 531 fbo = fdi.answ; 532 if (error == 0 && pbn != NULL) 533 *pbn = fbo->block; 534 fdisp_destroy(&fdi); 535 return error; 536 } 537 } 538 539 /* If the daemon doesn't support BMAP, make up a sensible default */ 540 if (pbn != NULL) 541 *pbn = lbn * btodb(biosize); 542 return (error); 543 } 544 545 /* 546 struct vop_close_args { 547 struct vnode *a_vp; 548 int a_fflag; 549 struct ucred *a_cred; 550 struct thread *a_td; 551 }; 552 */ 553 static int 554 fuse_vnop_close(struct vop_close_args *ap) 555 { 556 struct vnode *vp = ap->a_vp; 557 struct ucred *cred = ap->a_cred; 558 int fflag = ap->a_fflag; 559 struct thread *td = ap->a_td; 560 pid_t pid = td->td_proc->p_pid; 561 int err = 0; 562 563 if (fuse_isdeadfs(vp)) 564 return 0; 565 if (vnode_isdir(vp)) 566 return 0; 567 if (fflag & IO_NDELAY) 568 return 0; 569 570 err = fuse_flush(vp, cred, pid, fflag); 571 /* TODO: close the file handle, if we're sure it's no longer used */ 572 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { 573 fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); 574 } 575 return err; 576 } 577 578 static void 579 fdisp_make_mknod_for_fallback( 580 struct fuse_dispatcher *fdip, 581 struct componentname *cnp, 582 struct vnode *dvp, 583 uint64_t parentnid, 584 struct thread *td, 585 struct ucred *cred, 586 mode_t mode, 587 enum fuse_opcode *op) 588 { 589 struct fuse_mknod_in *fmni; 590 591 fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); 592 *op = FUSE_MKNOD; 593 fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); 594 fmni = fdip->indata; 595 fmni->mode = mode; 596 fmni->rdev = 0; 597 memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, 598 cnp->cn_namelen); 599 ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; 600 } 601 /* 602 struct vnop_create_args { 603 struct vnode *a_dvp; 604 struct vnode **a_vpp; 605 struct componentname *a_cnp; 606 struct vattr *a_vap; 607 }; 608 */ 609 static int 610 fuse_vnop_create(struct vop_create_args *ap) 611 { 612 struct vnode *dvp = ap->a_dvp; 613 struct vnode **vpp = ap->a_vpp; 614 struct componentname *cnp = ap->a_cnp; 615 struct vattr *vap = ap->a_vap; 616 struct thread *td = cnp->cn_thread; 617 struct ucred *cred = cnp->cn_cred; 618 619 struct fuse_data *data; 620 struct fuse_create_in *fci; 621 struct fuse_entry_out *feo; 622 struct fuse_open_out *foo; 623 struct fuse_dispatcher fdi, fdi2; 624 struct fuse_dispatcher *fdip = &fdi; 625 struct fuse_dispatcher *fdip2 = NULL; 626 627 int err; 628 629 struct mount *mp = vnode_mount(dvp); 630 data = fuse_get_mpdata(mp); 631 uint64_t parentnid = VTOFUD(dvp)->nid; 632 mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); 633 enum fuse_opcode op; 634 int flags; 635 636 if (fuse_isdeadfs(dvp)) 637 return ENXIO; 638 639 /* FUSE expects sockets to be created with FUSE_MKNOD */ 640 if (vap->va_type == VSOCK) 641 return fuse_internal_mknod(dvp, vpp, cnp, vap); 642 643 /* 644 * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a 645 * writable mode makes sense, and we might as well include readability 646 * too. 647 */ 648 flags = O_RDWR; 649 650 bzero(&fdi, sizeof(fdi)); 651 652 if (vap->va_type != VREG) 653 return (EINVAL); 654 655 if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { 656 /* Fallback to FUSE_MKNOD/FUSE_OPEN */ 657 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, 658 cred, mode, &op); 659 } else { 660 /* Use FUSE_CREATE */ 661 size_t insize; 662 663 op = FUSE_CREATE; 664 fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); 665 fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); 666 fci = fdip->indata; 667 fci->mode = mode; 668 fci->flags = O_CREAT | flags; 669 if (fuse_libabi_geq(data, 7, 12)) { 670 insize = sizeof(*fci); 671 fci->umask = td->td_proc->p_pd->pd_cmask; 672 } else { 673 insize = sizeof(struct fuse_open_in); 674 } 675 676 memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, 677 cnp->cn_namelen); 678 ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; 679 } 680 681 err = fdisp_wait_answ(fdip); 682 683 if (err) { 684 if (err == ENOSYS && op == FUSE_CREATE) { 685 fsess_set_notimpl(mp, FUSE_CREATE); 686 fdisp_destroy(fdip); 687 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, 688 parentnid, td, cred, mode, &op); 689 err = fdisp_wait_answ(fdip); 690 } 691 if (err) 692 goto out; 693 } 694 695 feo = fdip->answ; 696 697 if ((err = fuse_internal_checkentry(feo, vap->va_type))) { 698 goto out; 699 } 700 701 if (op == FUSE_CREATE) { 702 foo = (struct fuse_open_out*)(feo + 1); 703 } else { 704 /* Issue a separate FUSE_OPEN */ 705 struct fuse_open_in *foi; 706 707 fdip2 = &fdi2; 708 fdisp_init(fdip2, sizeof(*foi)); 709 fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, 710 cred); 711 foi = fdip2->indata; 712 foi->flags = flags; 713 err = fdisp_wait_answ(fdip2); 714 if (err) 715 goto out; 716 foo = fdip2->answ; 717 } 718 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); 719 if (err) { 720 struct fuse_release_in *fri; 721 uint64_t nodeid = feo->nodeid; 722 uint64_t fh_id = foo->fh; 723 724 fdisp_init(fdip, sizeof(*fri)); 725 fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); 726 fri = fdip->indata; 727 fri->fh = fh_id; 728 fri->flags = flags; 729 fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); 730 fuse_insert_message(fdip->tick, false); 731 goto out; 732 } 733 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); 734 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 735 feo->attr_valid_nsec, NULL); 736 737 fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); 738 fuse_vnode_open(*vpp, foo->open_flags, td); 739 /* 740 * Purge the parent's attribute cache because the daemon should've 741 * updated its mtime and ctime 742 */ 743 fuse_vnode_clear_attr_cache(dvp); 744 cache_purge_negative(dvp); 745 746 out: 747 if (fdip2) 748 fdisp_destroy(fdip2); 749 fdisp_destroy(fdip); 750 return err; 751 } 752 753 /* 754 struct vnop_fdatasync_args { 755 struct vop_generic_args a_gen; 756 struct vnode * a_vp; 757 struct thread * a_td; 758 }; 759 */ 760 static int 761 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) 762 { 763 struct vnode *vp = ap->a_vp; 764 struct thread *td = ap->a_td; 765 int waitfor = MNT_WAIT; 766 767 int err = 0; 768 769 if (fuse_isdeadfs(vp)) { 770 return 0; 771 } 772 if ((err = vop_stdfdatasync_buf(ap))) 773 return err; 774 775 return fuse_internal_fsync(vp, td, waitfor, true); 776 } 777 778 /* 779 struct vnop_fsync_args { 780 struct vop_generic_args a_gen; 781 struct vnode * a_vp; 782 int a_waitfor; 783 struct thread * a_td; 784 }; 785 */ 786 static int 787 fuse_vnop_fsync(struct vop_fsync_args *ap) 788 { 789 struct vnode *vp = ap->a_vp; 790 struct thread *td = ap->a_td; 791 int waitfor = ap->a_waitfor; 792 int err = 0; 793 794 if (fuse_isdeadfs(vp)) { 795 return 0; 796 } 797 if ((err = vop_stdfsync(ap))) 798 return err; 799 800 return fuse_internal_fsync(vp, td, waitfor, false); 801 } 802 803 /* 804 struct vnop_getattr_args { 805 struct vnode *a_vp; 806 struct vattr *a_vap; 807 struct ucred *a_cred; 808 struct thread *a_td; 809 }; 810 */ 811 static int 812 fuse_vnop_getattr(struct vop_getattr_args *ap) 813 { 814 struct vnode *vp = ap->a_vp; 815 struct vattr *vap = ap->a_vap; 816 struct ucred *cred = ap->a_cred; 817 struct thread *td = curthread; 818 819 int err = 0; 820 int dataflags; 821 822 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; 823 824 /* Note that we are not bailing out on a dead file system just yet. */ 825 826 if (!(dataflags & FSESS_INITED)) { 827 if (!vnode_isvroot(vp)) { 828 fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); 829 err = ENOTCONN; 830 return err; 831 } else { 832 goto fake; 833 } 834 } 835 err = fuse_internal_getattr(vp, vap, cred, td); 836 if (err == ENOTCONN && vnode_isvroot(vp)) { 837 /* see comment in fuse_vfsop_statfs() */ 838 goto fake; 839 } else { 840 return err; 841 } 842 843 fake: 844 bzero(vap, sizeof(*vap)); 845 vap->va_type = vnode_vtype(vp); 846 847 return 0; 848 } 849 850 /* 851 struct vnop_inactive_args { 852 struct vnode *a_vp; 853 }; 854 */ 855 static int 856 fuse_vnop_inactive(struct vop_inactive_args *ap) 857 { 858 struct vnode *vp = ap->a_vp; 859 struct thread *td = curthread; 860 861 struct fuse_vnode_data *fvdat = VTOFUD(vp); 862 struct fuse_filehandle *fufh, *fufh_tmp; 863 864 int need_flush = 1; 865 866 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 867 if (need_flush && vp->v_type == VREG) { 868 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { 869 fuse_vnode_savesize(vp, NULL, 0); 870 } 871 if ((fvdat->flag & FN_REVOKED) != 0) 872 fuse_io_invalbuf(vp, td); 873 else 874 fuse_io_flushbuf(vp, MNT_WAIT, td); 875 need_flush = 0; 876 } 877 fuse_filehandle_close(vp, fufh, td, NULL); 878 } 879 880 if ((fvdat->flag & FN_REVOKED) != 0) 881 vrecycle(vp); 882 883 return 0; 884 } 885 886 /* 887 struct vnop_link_args { 888 struct vnode *a_tdvp; 889 struct vnode *a_vp; 890 struct componentname *a_cnp; 891 }; 892 */ 893 static int 894 fuse_vnop_link(struct vop_link_args *ap) 895 { 896 struct vnode *vp = ap->a_vp; 897 struct vnode *tdvp = ap->a_tdvp; 898 struct componentname *cnp = ap->a_cnp; 899 900 struct vattr *vap = VTOVA(vp); 901 902 struct fuse_dispatcher fdi; 903 struct fuse_entry_out *feo; 904 struct fuse_link_in fli; 905 906 int err; 907 908 if (fuse_isdeadfs(vp)) { 909 return ENXIO; 910 } 911 if (vnode_mount(tdvp) != vnode_mount(vp)) { 912 return EXDEV; 913 } 914 915 /* 916 * This is a seatbelt check to protect naive userspace filesystems from 917 * themselves and the limitations of the FUSE IPC protocol. If a 918 * filesystem does not allow attribute caching, assume it is capable of 919 * validating that nlink does not overflow. 920 */ 921 if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) 922 return EMLINK; 923 fli.oldnodeid = VTOI(vp); 924 925 fdisp_init(&fdi, 0); 926 fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, 927 FUSE_LINK, &fli, sizeof(fli), &fdi); 928 if ((err = fdisp_wait_answ(&fdi))) { 929 goto out; 930 } 931 feo = fdi.answ; 932 933 err = fuse_internal_checkentry(feo, vnode_vtype(vp)); 934 if (!err) { 935 /* 936 * Purge the parent's attribute cache because the daemon 937 * should've updated its mtime and ctime 938 */ 939 fuse_vnode_clear_attr_cache(tdvp); 940 fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, 941 feo->attr_valid_nsec, NULL); 942 } 943 out: 944 fdisp_destroy(&fdi); 945 return err; 946 } 947 948 struct fuse_lookup_alloc_arg { 949 struct fuse_entry_out *feo; 950 struct componentname *cnp; 951 uint64_t nid; 952 enum vtype vtyp; 953 }; 954 955 /* Callback for vn_get_ino */ 956 static int 957 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 958 { 959 struct fuse_lookup_alloc_arg *flaa = arg; 960 961 return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, 962 flaa->vtyp); 963 } 964 965 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, 966 "int", "struct timespec*", "struct timespec*"); 967 SDT_PROBE_DEFINE2(fusefs, , vnops, lookup_cache_incoherent, 968 "struct vnode*", "struct fuse_entry_out*"); 969 /* 970 struct vnop_lookup_args { 971 struct vnodeop_desc *a_desc; 972 struct vnode *a_dvp; 973 struct vnode **a_vpp; 974 struct componentname *a_cnp; 975 }; 976 */ 977 int 978 fuse_vnop_lookup(struct vop_lookup_args *ap) 979 { 980 struct vnode *dvp = ap->a_dvp; 981 struct vnode **vpp = ap->a_vpp; 982 struct componentname *cnp = ap->a_cnp; 983 struct thread *td = cnp->cn_thread; 984 struct ucred *cred = cnp->cn_cred; 985 986 int nameiop = cnp->cn_nameiop; 987 int flags = cnp->cn_flags; 988 int wantparent = flags & (LOCKPARENT | WANTPARENT); 989 int islastcn = flags & ISLASTCN; 990 struct mount *mp = vnode_mount(dvp); 991 struct fuse_data *data = fuse_get_mpdata(mp); 992 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; 993 994 int err = 0; 995 int lookup_err = 0; 996 struct vnode *vp = NULL; 997 998 struct fuse_dispatcher fdi; 999 bool did_lookup = false; 1000 struct fuse_entry_out *feo = NULL; 1001 enum vtype vtyp; /* vnode type of target */ 1002 off_t filesize; /* filesize of target */ 1003 1004 uint64_t nid; 1005 1006 if (fuse_isdeadfs(dvp)) { 1007 *vpp = NULL; 1008 return ENXIO; 1009 } 1010 if (!vnode_isdir(dvp)) 1011 return ENOTDIR; 1012 1013 if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) 1014 return EROFS; 1015 1016 if ((cnp->cn_flags & NOEXECCHECK) != 0) 1017 cnp->cn_flags &= ~NOEXECCHECK; 1018 else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) 1019 return err; 1020 1021 if (flags & ISDOTDOT) { 1022 KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID, 1023 ("Looking up .. is TODO")); 1024 nid = VTOFUD(dvp)->parent_nid; 1025 if (nid == 0) 1026 return ENOENT; 1027 /* .. is obviously a directory */ 1028 vtyp = VDIR; 1029 filesize = 0; 1030 } else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') { 1031 nid = VTOI(dvp); 1032 /* . is obviously a directory */ 1033 vtyp = VDIR; 1034 filesize = 0; 1035 } else { 1036 struct timespec now, timeout; 1037 int ncpticks; /* here to accomodate for API contract */ 1038 1039 err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); 1040 getnanouptime(&now); 1041 SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); 1042 switch (err) { 1043 case -1: /* positive match */ 1044 if (timespeccmp(&timeout, &now, >)) { 1045 counter_u64_add(fuse_lookup_cache_hits, 1); 1046 } else { 1047 /* Cache timeout */ 1048 counter_u64_add(fuse_lookup_cache_misses, 1); 1049 bintime_clear( 1050 &VTOFUD(*vpp)->entry_cache_timeout); 1051 cache_purge(*vpp); 1052 if (dvp != *vpp) 1053 vput(*vpp); 1054 else 1055 vrele(*vpp); 1056 *vpp = NULL; 1057 break; 1058 } 1059 return 0; 1060 1061 case 0: /* no match in cache */ 1062 counter_u64_add(fuse_lookup_cache_misses, 1); 1063 break; 1064 1065 case ENOENT: /* negative match */ 1066 getnanouptime(&now); 1067 if (timespeccmp(&timeout, &now, <=)) { 1068 /* Cache timeout */ 1069 cache_purge_negative(dvp); 1070 break; 1071 } 1072 /* fall through */ 1073 default: 1074 return err; 1075 } 1076 1077 nid = VTOI(dvp); 1078 fdisp_init(&fdi, cnp->cn_namelen + 1); 1079 fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred); 1080 1081 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 1082 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 1083 lookup_err = fdisp_wait_answ(&fdi); 1084 did_lookup = true; 1085 1086 if (!lookup_err) { 1087 /* lookup call succeeded */ 1088 feo = (struct fuse_entry_out *)fdi.answ; 1089 nid = feo->nodeid; 1090 if (nid == 0) { 1091 /* zero nodeid means ENOENT and cache it */ 1092 struct timespec timeout; 1093 1094 fdi.answ_stat = ENOENT; 1095 lookup_err = ENOENT; 1096 if (cnp->cn_flags & MAKEENTRY) { 1097 fuse_validity_2_timespec(feo, &timeout); 1098 cache_enter_time(dvp, *vpp, cnp, 1099 &timeout, NULL); 1100 } 1101 } else if (nid == FUSE_ROOT_ID) { 1102 lookup_err = EINVAL; 1103 } 1104 vtyp = IFTOVT(feo->attr.mode); 1105 filesize = feo->attr.size; 1106 } 1107 if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { 1108 fdisp_destroy(&fdi); 1109 return lookup_err; 1110 } 1111 } 1112 /* lookup_err, if non-zero, must be ENOENT at this point */ 1113 1114 if (lookup_err) { 1115 /* Entry not found */ 1116 if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { 1117 if (default_permissions) 1118 err = fuse_internal_access(dvp, VWRITE, td, 1119 cred); 1120 else 1121 err = 0; 1122 if (!err) { 1123 /* 1124 * Set the SAVENAME flag to hold onto the 1125 * pathname for use later in VOP_CREATE or 1126 * VOP_RENAME. 1127 */ 1128 cnp->cn_flags |= SAVENAME; 1129 1130 err = EJUSTRETURN; 1131 } 1132 } else { 1133 err = ENOENT; 1134 } 1135 } else { 1136 /* Entry was found */ 1137 if (flags & ISDOTDOT) { 1138 struct fuse_lookup_alloc_arg flaa; 1139 1140 flaa.nid = nid; 1141 flaa.feo = feo; 1142 flaa.cnp = cnp; 1143 flaa.vtyp = vtyp; 1144 err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, 1145 &vp); 1146 *vpp = vp; 1147 } else if (nid == VTOI(dvp)) { 1148 vref(dvp); 1149 *vpp = dvp; 1150 } else { 1151 struct fuse_vnode_data *fvdat; 1152 struct vattr *vap; 1153 1154 err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, 1155 &vp, cnp, vtyp); 1156 if (err) 1157 goto out; 1158 *vpp = vp; 1159 1160 /* 1161 * In the case where we are looking up a FUSE node 1162 * represented by an existing cached vnode, and the 1163 * true size reported by FUSE_LOOKUP doesn't match 1164 * the vnode's cached size, then any cached writes 1165 * beyond the file's current size are lost. 1166 * 1167 * We can get here: 1168 * * following attribute cache expiration, or 1169 * * due a bug in the daemon, or 1170 */ 1171 fvdat = VTOFUD(vp); 1172 if (vnode_isreg(vp) && 1173 ((filesize != fvdat->cached_attrs.va_size && 1174 fvdat->flag & FN_SIZECHANGE) || 1175 ((vap = VTOVA(vp)) && 1176 filesize != vap->va_size))) 1177 { 1178 SDT_PROBE2(fusefs, , vnops, lookup_cache_incoherent, vp, feo); 1179 fvdat->flag &= ~FN_SIZECHANGE; 1180 /* 1181 * The server changed the file's size even 1182 * though we had it cached, or had dirty writes 1183 * in the WB cache! 1184 */ 1185 printf("%s: cache incoherent on %s! " 1186 "Buggy FUSE server detected. To prevent " 1187 "data corruption, disable the data cache " 1188 "by mounting with -o direct_io, or as " 1189 "directed otherwise by your FUSE server's " 1190 "documentation\n", __func__, 1191 vnode_mount(vp)->mnt_stat.f_mntonname); 1192 int iosize = fuse_iosize(vp); 1193 v_inval_buf_range(vp, 0, INT64_MAX, iosize); 1194 } 1195 1196 MPASS(feo != NULL); 1197 fuse_internal_cache_attrs(*vpp, &feo->attr, 1198 feo->attr_valid, feo->attr_valid_nsec, NULL); 1199 fuse_validity_2_bintime(feo->entry_valid, 1200 feo->entry_valid_nsec, 1201 &fvdat->entry_cache_timeout); 1202 1203 if ((nameiop == DELETE || nameiop == RENAME) && 1204 islastcn && default_permissions) 1205 { 1206 struct vattr dvattr; 1207 1208 err = fuse_internal_access(dvp, VWRITE, td, 1209 cred); 1210 if (err != 0) 1211 goto out; 1212 /* 1213 * if the parent's sticky bit is set, check 1214 * whether we're allowed to remove the file. 1215 * Need to figure out the vnode locking to make 1216 * this work. 1217 */ 1218 fuse_internal_getattr(dvp, &dvattr, cred, td); 1219 if ((dvattr.va_mode & S_ISTXT) && 1220 fuse_internal_access(dvp, VADMIN, td, 1221 cred) && 1222 fuse_internal_access(*vpp, VADMIN, td, 1223 cred)) { 1224 err = EPERM; 1225 goto out; 1226 } 1227 } 1228 1229 if (islastcn && ( 1230 (nameiop == DELETE) || 1231 (nameiop == RENAME && wantparent))) { 1232 cnp->cn_flags |= SAVENAME; 1233 } 1234 } 1235 } 1236 out: 1237 if (err) { 1238 if (vp != NULL && dvp != vp) 1239 vput(vp); 1240 else if (vp != NULL) 1241 vrele(vp); 1242 *vpp = NULL; 1243 } 1244 if (did_lookup) 1245 fdisp_destroy(&fdi); 1246 1247 return err; 1248 } 1249 1250 /* 1251 struct vnop_mkdir_args { 1252 struct vnode *a_dvp; 1253 struct vnode **a_vpp; 1254 struct componentname *a_cnp; 1255 struct vattr *a_vap; 1256 }; 1257 */ 1258 static int 1259 fuse_vnop_mkdir(struct vop_mkdir_args *ap) 1260 { 1261 struct vnode *dvp = ap->a_dvp; 1262 struct vnode **vpp = ap->a_vpp; 1263 struct componentname *cnp = ap->a_cnp; 1264 struct vattr *vap = ap->a_vap; 1265 1266 struct fuse_mkdir_in fmdi; 1267 1268 if (fuse_isdeadfs(dvp)) { 1269 return ENXIO; 1270 } 1271 fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); 1272 fmdi.umask = curthread->td_proc->p_pd->pd_cmask; 1273 1274 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, 1275 sizeof(fmdi), VDIR)); 1276 } 1277 1278 /* 1279 struct vnop_mknod_args { 1280 struct vnode *a_dvp; 1281 struct vnode **a_vpp; 1282 struct componentname *a_cnp; 1283 struct vattr *a_vap; 1284 }; 1285 */ 1286 static int 1287 fuse_vnop_mknod(struct vop_mknod_args *ap) 1288 { 1289 1290 struct vnode *dvp = ap->a_dvp; 1291 struct vnode **vpp = ap->a_vpp; 1292 struct componentname *cnp = ap->a_cnp; 1293 struct vattr *vap = ap->a_vap; 1294 1295 if (fuse_isdeadfs(dvp)) 1296 return ENXIO; 1297 1298 return fuse_internal_mknod(dvp, vpp, cnp, vap); 1299 } 1300 1301 /* 1302 struct vop_open_args { 1303 struct vnode *a_vp; 1304 int a_mode; 1305 struct ucred *a_cred; 1306 struct thread *a_td; 1307 int a_fdidx; / struct file *a_fp; 1308 }; 1309 */ 1310 static int 1311 fuse_vnop_open(struct vop_open_args *ap) 1312 { 1313 struct vnode *vp = ap->a_vp; 1314 int a_mode = ap->a_mode; 1315 struct thread *td = ap->a_td; 1316 struct ucred *cred = ap->a_cred; 1317 pid_t pid = td->td_proc->p_pid; 1318 struct fuse_vnode_data *fvdat; 1319 1320 if (fuse_isdeadfs(vp)) 1321 return ENXIO; 1322 if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) 1323 return (EOPNOTSUPP); 1324 if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) 1325 return EINVAL; 1326 1327 fvdat = VTOFUD(vp); 1328 1329 if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { 1330 fuse_vnode_open(vp, 0, td); 1331 return 0; 1332 } 1333 1334 return fuse_filehandle_open(vp, a_mode, NULL, td, cred); 1335 } 1336 1337 static int 1338 fuse_vnop_pathconf(struct vop_pathconf_args *ap) 1339 { 1340 1341 switch (ap->a_name) { 1342 case _PC_FILESIZEBITS: 1343 *ap->a_retval = 64; 1344 return (0); 1345 case _PC_NAME_MAX: 1346 *ap->a_retval = NAME_MAX; 1347 return (0); 1348 case _PC_LINK_MAX: 1349 *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); 1350 return (0); 1351 case _PC_SYMLINK_MAX: 1352 *ap->a_retval = MAXPATHLEN; 1353 return (0); 1354 case _PC_NO_TRUNC: 1355 *ap->a_retval = 1; 1356 return (0); 1357 default: 1358 return (vop_stdpathconf(ap)); 1359 } 1360 } 1361 1362 /* 1363 struct vnop_read_args { 1364 struct vnode *a_vp; 1365 struct uio *a_uio; 1366 int a_ioflag; 1367 struct ucred *a_cred; 1368 }; 1369 */ 1370 static int 1371 fuse_vnop_read(struct vop_read_args *ap) 1372 { 1373 struct vnode *vp = ap->a_vp; 1374 struct uio *uio = ap->a_uio; 1375 int ioflag = ap->a_ioflag; 1376 struct ucred *cred = ap->a_cred; 1377 pid_t pid = curthread->td_proc->p_pid; 1378 1379 if (fuse_isdeadfs(vp)) { 1380 return ENXIO; 1381 } 1382 1383 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 1384 ioflag |= IO_DIRECT; 1385 } 1386 1387 return fuse_io_dispatch(vp, uio, ioflag, cred, pid); 1388 } 1389 1390 /* 1391 struct vnop_readdir_args { 1392 struct vnode *a_vp; 1393 struct uio *a_uio; 1394 struct ucred *a_cred; 1395 int *a_eofflag; 1396 int *a_ncookies; 1397 u_long **a_cookies; 1398 }; 1399 */ 1400 static int 1401 fuse_vnop_readdir(struct vop_readdir_args *ap) 1402 { 1403 struct vnode *vp = ap->a_vp; 1404 struct uio *uio = ap->a_uio; 1405 struct ucred *cred = ap->a_cred; 1406 struct fuse_filehandle *fufh = NULL; 1407 struct fuse_iov cookediov; 1408 int err = 0; 1409 u_long *cookies; 1410 off_t startoff; 1411 ssize_t tresid; 1412 int ncookies; 1413 bool closefufh = false; 1414 pid_t pid = curthread->td_proc->p_pid; 1415 1416 if (ap->a_eofflag) 1417 *ap->a_eofflag = 0; 1418 if (fuse_isdeadfs(vp)) { 1419 return ENXIO; 1420 } 1421 if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ 1422 (uio_resid(uio) < sizeof(struct dirent))) { 1423 return EINVAL; 1424 } 1425 1426 tresid = uio->uio_resid; 1427 startoff = uio->uio_offset; 1428 err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); 1429 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { 1430 /* 1431 * nfsd will do VOP_READDIR without first doing VOP_OPEN. We 1432 * must implicitly open the directory here 1433 */ 1434 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); 1435 if (err == 0) { 1436 /* 1437 * When a directory is opened, it must be read from 1438 * the beginning. Hopefully, the "startoff" still 1439 * exists as an offset cookie for the directory. 1440 * If not, it will read the entire directory without 1441 * returning any entries and just return eof. 1442 */ 1443 uio->uio_offset = 0; 1444 } 1445 closefufh = true; 1446 } 1447 if (err) 1448 return (err); 1449 if (ap->a_ncookies != NULL) { 1450 ncookies = uio->uio_resid / 1451 (offsetof(struct dirent, d_name) + 4) + 1; 1452 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); 1453 *ap->a_ncookies = ncookies; 1454 *ap->a_cookies = cookies; 1455 } else { 1456 ncookies = 0; 1457 cookies = NULL; 1458 } 1459 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) 1460 fiov_init(&cookediov, DIRCOOKEDSIZE); 1461 1462 err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov, 1463 &ncookies, cookies); 1464 1465 fiov_teardown(&cookediov); 1466 if (closefufh) 1467 fuse_filehandle_close(vp, fufh, curthread, cred); 1468 1469 if (ap->a_ncookies != NULL) { 1470 if (err == 0) { 1471 *ap->a_ncookies -= ncookies; 1472 } else { 1473 free(*ap->a_cookies, M_TEMP); 1474 *ap->a_ncookies = 0; 1475 *ap->a_cookies = NULL; 1476 } 1477 } 1478 if (err == 0 && tresid == uio->uio_resid) 1479 *ap->a_eofflag = 1; 1480 1481 return err; 1482 } 1483 1484 /* 1485 struct vnop_readlink_args { 1486 struct vnode *a_vp; 1487 struct uio *a_uio; 1488 struct ucred *a_cred; 1489 }; 1490 */ 1491 static int 1492 fuse_vnop_readlink(struct vop_readlink_args *ap) 1493 { 1494 struct vnode *vp = ap->a_vp; 1495 struct uio *uio = ap->a_uio; 1496 struct ucred *cred = ap->a_cred; 1497 1498 struct fuse_dispatcher fdi; 1499 int err; 1500 1501 if (fuse_isdeadfs(vp)) { 1502 return ENXIO; 1503 } 1504 if (!vnode_islnk(vp)) { 1505 return EINVAL; 1506 } 1507 fdisp_init(&fdi, 0); 1508 err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); 1509 if (err) { 1510 goto out; 1511 } 1512 if (((char *)fdi.answ)[0] == '/' && 1513 fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { 1514 char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; 1515 1516 err = uiomove(mpth, strlen(mpth), uio); 1517 } 1518 if (!err) { 1519 err = uiomove(fdi.answ, fdi.iosize, uio); 1520 } 1521 out: 1522 fdisp_destroy(&fdi); 1523 return err; 1524 } 1525 1526 /* 1527 struct vnop_reclaim_args { 1528 struct vnode *a_vp; 1529 }; 1530 */ 1531 static int 1532 fuse_vnop_reclaim(struct vop_reclaim_args *ap) 1533 { 1534 struct vnode *vp = ap->a_vp; 1535 struct thread *td = curthread; 1536 struct fuse_vnode_data *fvdat = VTOFUD(vp); 1537 struct fuse_filehandle *fufh, *fufh_tmp; 1538 1539 if (!fvdat) { 1540 panic("FUSE: no vnode data during recycling"); 1541 } 1542 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { 1543 printf("FUSE: vnode being reclaimed with open fufh " 1544 "(type=%#x)", fufh->fufh_type); 1545 fuse_filehandle_close(vp, fufh, td, NULL); 1546 } 1547 1548 if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { 1549 fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), 1550 fvdat->nlookup); 1551 } 1552 cache_purge(vp); 1553 vfs_hash_remove(vp); 1554 fuse_vnode_destroy(vp); 1555 1556 return 0; 1557 } 1558 1559 /* 1560 struct vnop_remove_args { 1561 struct vnode *a_dvp; 1562 struct vnode *a_vp; 1563 struct componentname *a_cnp; 1564 }; 1565 */ 1566 static int 1567 fuse_vnop_remove(struct vop_remove_args *ap) 1568 { 1569 struct vnode *dvp = ap->a_dvp; 1570 struct vnode *vp = ap->a_vp; 1571 struct componentname *cnp = ap->a_cnp; 1572 1573 int err; 1574 1575 if (fuse_isdeadfs(vp)) { 1576 return ENXIO; 1577 } 1578 if (vnode_isdir(vp)) { 1579 return EPERM; 1580 } 1581 1582 err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); 1583 1584 return err; 1585 } 1586 1587 /* 1588 struct vnop_rename_args { 1589 struct vnode *a_fdvp; 1590 struct vnode *a_fvp; 1591 struct componentname *a_fcnp; 1592 struct vnode *a_tdvp; 1593 struct vnode *a_tvp; 1594 struct componentname *a_tcnp; 1595 }; 1596 */ 1597 static int 1598 fuse_vnop_rename(struct vop_rename_args *ap) 1599 { 1600 struct vnode *fdvp = ap->a_fdvp; 1601 struct vnode *fvp = ap->a_fvp; 1602 struct componentname *fcnp = ap->a_fcnp; 1603 struct vnode *tdvp = ap->a_tdvp; 1604 struct vnode *tvp = ap->a_tvp; 1605 struct componentname *tcnp = ap->a_tcnp; 1606 struct fuse_data *data; 1607 bool newparent = fdvp != tdvp; 1608 bool isdir = fvp->v_type == VDIR; 1609 int err = 0; 1610 1611 if (fuse_isdeadfs(fdvp)) { 1612 return ENXIO; 1613 } 1614 if (fvp->v_mount != tdvp->v_mount || 1615 (tvp && fvp->v_mount != tvp->v_mount)) { 1616 SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); 1617 err = EXDEV; 1618 goto out; 1619 } 1620 cache_purge(fvp); 1621 1622 /* 1623 * FUSE library is expected to check if target directory is not 1624 * under the source directory in the file system tree. 1625 * Linux performs this check at VFS level. 1626 */ 1627 /* 1628 * If source is a directory, and it will get a new parent, user must 1629 * have write permission to it, so ".." can be modified. 1630 */ 1631 data = fuse_get_mpdata(vnode_mount(tdvp)); 1632 if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { 1633 err = fuse_internal_access(fvp, VWRITE, 1634 tcnp->cn_thread, tcnp->cn_cred); 1635 if (err) 1636 goto out; 1637 } 1638 sx_xlock(&data->rename_lock); 1639 err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); 1640 if (err == 0) { 1641 if (tdvp != fdvp) 1642 fuse_vnode_setparent(fvp, tdvp); 1643 if (tvp != NULL) 1644 fuse_vnode_setparent(tvp, NULL); 1645 } 1646 sx_unlock(&data->rename_lock); 1647 1648 if (tvp != NULL && tvp != fvp) { 1649 cache_purge(tvp); 1650 } 1651 if (vnode_isdir(fvp)) { 1652 if ((tvp != NULL) && vnode_isdir(tvp)) { 1653 cache_purge(tdvp); 1654 } 1655 cache_purge(fdvp); 1656 } 1657 out: 1658 if (tdvp == tvp) { 1659 vrele(tdvp); 1660 } else { 1661 vput(tdvp); 1662 } 1663 if (tvp != NULL) { 1664 vput(tvp); 1665 } 1666 vrele(fdvp); 1667 vrele(fvp); 1668 1669 return err; 1670 } 1671 1672 /* 1673 struct vnop_rmdir_args { 1674 struct vnode *a_dvp; 1675 struct vnode *a_vp; 1676 struct componentname *a_cnp; 1677 } *ap; 1678 */ 1679 static int 1680 fuse_vnop_rmdir(struct vop_rmdir_args *ap) 1681 { 1682 struct vnode *dvp = ap->a_dvp; 1683 struct vnode *vp = ap->a_vp; 1684 1685 int err; 1686 1687 if (fuse_isdeadfs(vp)) { 1688 return ENXIO; 1689 } 1690 if (VTOFUD(vp) == VTOFUD(dvp)) { 1691 return EINVAL; 1692 } 1693 err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); 1694 1695 return err; 1696 } 1697 1698 /* 1699 struct vnop_setattr_args { 1700 struct vnode *a_vp; 1701 struct vattr *a_vap; 1702 struct ucred *a_cred; 1703 struct thread *a_td; 1704 }; 1705 */ 1706 static int 1707 fuse_vnop_setattr(struct vop_setattr_args *ap) 1708 { 1709 struct vnode *vp = ap->a_vp; 1710 struct vattr *vap = ap->a_vap; 1711 struct ucred *cred = ap->a_cred; 1712 struct thread *td = curthread; 1713 struct mount *mp; 1714 struct fuse_data *data; 1715 struct vattr old_va; 1716 int dataflags; 1717 int err = 0, err2; 1718 accmode_t accmode = 0; 1719 bool checkperm; 1720 bool drop_suid = false; 1721 gid_t cr_gid; 1722 1723 mp = vnode_mount(vp); 1724 data = fuse_get_mpdata(mp); 1725 dataflags = data->dataflags; 1726 checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; 1727 if (cred->cr_ngroups > 0) 1728 cr_gid = cred->cr_groups[0]; 1729 else 1730 cr_gid = 0; 1731 1732 if (fuse_isdeadfs(vp)) { 1733 return ENXIO; 1734 } 1735 1736 if (vap->va_uid != (uid_t)VNOVAL) { 1737 if (checkperm) { 1738 /* Only root may change a file's owner */ 1739 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 1740 if (err) { 1741 /* As a special case, allow the null chown */ 1742 err2 = fuse_internal_getattr(vp, &old_va, cred, 1743 td); 1744 if (err2) 1745 return (err2); 1746 if (vap->va_uid != old_va.va_uid) 1747 return err; 1748 else 1749 accmode |= VADMIN; 1750 drop_suid = true; 1751 } else 1752 accmode |= VADMIN; 1753 } else 1754 accmode |= VADMIN; 1755 } 1756 if (vap->va_gid != (gid_t)VNOVAL) { 1757 if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) 1758 drop_suid = true; 1759 if (checkperm && !groupmember(vap->va_gid, cred)) 1760 { 1761 /* 1762 * Non-root users may only chgrp to one of their own 1763 * groups 1764 */ 1765 err = priv_check_cred(cred, PRIV_VFS_CHOWN); 1766 if (err) { 1767 /* As a special case, allow the null chgrp */ 1768 err2 = fuse_internal_getattr(vp, &old_va, cred, 1769 td); 1770 if (err2) 1771 return (err2); 1772 if (vap->va_gid != old_va.va_gid) 1773 return err; 1774 accmode |= VADMIN; 1775 } else 1776 accmode |= VADMIN; 1777 } else 1778 accmode |= VADMIN; 1779 } 1780 if (vap->va_size != VNOVAL) { 1781 switch (vp->v_type) { 1782 case VDIR: 1783 return (EISDIR); 1784 case VLNK: 1785 case VREG: 1786 if (vfs_isrdonly(mp)) 1787 return (EROFS); 1788 break; 1789 default: 1790 /* 1791 * According to POSIX, the result is unspecified 1792 * for file types other than regular files, 1793 * directories and shared memory objects. We 1794 * don't support shared memory objects in the file 1795 * system, and have dubious support for truncating 1796 * symlinks. Just ignore the request in other cases. 1797 */ 1798 return (0); 1799 } 1800 /* Don't set accmode. Permission to trunc is checked upstack */ 1801 } 1802 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1803 if (vap->va_vaflags & VA_UTIMES_NULL) 1804 accmode |= VWRITE; 1805 else 1806 accmode |= VADMIN; 1807 } 1808 if (drop_suid) { 1809 if (vap->va_mode != (mode_t)VNOVAL) 1810 vap->va_mode &= ~(S_ISUID | S_ISGID); 1811 else { 1812 err = fuse_internal_getattr(vp, &old_va, cred, td); 1813 if (err) 1814 return (err); 1815 vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); 1816 } 1817 } 1818 if (vap->va_mode != (mode_t)VNOVAL) { 1819 /* Only root may set the sticky bit on non-directories */ 1820 if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) 1821 && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) 1822 return EFTYPE; 1823 if (checkperm && (vap->va_mode & S_ISGID)) { 1824 err = fuse_internal_getattr(vp, &old_va, cred, td); 1825 if (err) 1826 return (err); 1827 if (!groupmember(old_va.va_gid, cred)) { 1828 err = priv_check_cred(cred, PRIV_VFS_SETGID); 1829 if (err) 1830 return (err); 1831 } 1832 } 1833 accmode |= VADMIN; 1834 } 1835 1836 if (vfs_isrdonly(mp)) 1837 return EROFS; 1838 1839 if (checkperm) { 1840 err = fuse_internal_access(vp, accmode, td, cred); 1841 } else { 1842 err = 0; 1843 } 1844 if (err) 1845 return err; 1846 else 1847 return fuse_internal_setattr(vp, vap, td, cred); 1848 } 1849 1850 /* 1851 struct vnop_strategy_args { 1852 struct vnode *a_vp; 1853 struct buf *a_bp; 1854 }; 1855 */ 1856 static int 1857 fuse_vnop_strategy(struct vop_strategy_args *ap) 1858 { 1859 struct vnode *vp = ap->a_vp; 1860 struct buf *bp = ap->a_bp; 1861 1862 if (!vp || fuse_isdeadfs(vp)) { 1863 bp->b_ioflags |= BIO_ERROR; 1864 bp->b_error = ENXIO; 1865 bufdone(bp); 1866 return 0; 1867 } 1868 1869 /* 1870 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. 1871 * fuse_io_strategy sets bp's error fields 1872 */ 1873 (void)fuse_io_strategy(vp, bp); 1874 1875 return 0; 1876 } 1877 1878 /* 1879 struct vnop_symlink_args { 1880 struct vnode *a_dvp; 1881 struct vnode **a_vpp; 1882 struct componentname *a_cnp; 1883 struct vattr *a_vap; 1884 char *a_target; 1885 }; 1886 */ 1887 static int 1888 fuse_vnop_symlink(struct vop_symlink_args *ap) 1889 { 1890 struct vnode *dvp = ap->a_dvp; 1891 struct vnode **vpp = ap->a_vpp; 1892 struct componentname *cnp = ap->a_cnp; 1893 const char *target = ap->a_target; 1894 1895 struct fuse_dispatcher fdi; 1896 1897 int err; 1898 size_t len; 1899 1900 if (fuse_isdeadfs(dvp)) { 1901 return ENXIO; 1902 } 1903 /* 1904 * Unlike the other creator type calls, here we have to create a message 1905 * where the name of the new entry comes first, and the data describing 1906 * the entry comes second. 1907 * Hence we can't rely on our handy fuse_internal_newentry() routine, 1908 * but put together the message manually and just call the core part. 1909 */ 1910 1911 len = strlen(target) + 1; 1912 fdisp_init(&fdi, len + cnp->cn_namelen + 1); 1913 fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); 1914 1915 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 1916 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 1917 memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); 1918 1919 err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); 1920 fdisp_destroy(&fdi); 1921 return err; 1922 } 1923 1924 /* 1925 struct vnop_write_args { 1926 struct vnode *a_vp; 1927 struct uio *a_uio; 1928 int a_ioflag; 1929 struct ucred *a_cred; 1930 }; 1931 */ 1932 static int 1933 fuse_vnop_write(struct vop_write_args *ap) 1934 { 1935 struct vnode *vp = ap->a_vp; 1936 struct uio *uio = ap->a_uio; 1937 int ioflag = ap->a_ioflag; 1938 struct ucred *cred = ap->a_cred; 1939 pid_t pid = curthread->td_proc->p_pid; 1940 1941 if (fuse_isdeadfs(vp)) { 1942 return ENXIO; 1943 } 1944 1945 if (VTOFUD(vp)->flag & FN_DIRECTIO) { 1946 ioflag |= IO_DIRECT; 1947 } 1948 1949 return fuse_io_dispatch(vp, uio, ioflag, cred, pid); 1950 } 1951 1952 static daddr_t 1953 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) 1954 { 1955 const int biosize = fuse_iosize(vp); 1956 1957 return (off / biosize); 1958 } 1959 1960 static int 1961 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn) 1962 { 1963 off_t filesize; 1964 int blksz, err; 1965 const int biosize = fuse_iosize(vp); 1966 1967 err = fuse_vnode_size(vp, &filesize, NULL, NULL); 1968 KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here")); 1969 if (err) 1970 return biosize; 1971 1972 if ((off_t)lbn * biosize >= filesize) { 1973 blksz = 0; 1974 } else if ((off_t)(lbn + 1) * biosize > filesize) { 1975 blksz = filesize - (off_t)lbn *biosize; 1976 } else { 1977 blksz = biosize; 1978 } 1979 return (blksz); 1980 } 1981 1982 /* 1983 struct vnop_getpages_args { 1984 struct vnode *a_vp; 1985 vm_page_t *a_m; 1986 int a_count; 1987 int a_reqpage; 1988 }; 1989 */ 1990 static int 1991 fuse_vnop_getpages(struct vop_getpages_args *ap) 1992 { 1993 struct vnode *vp = ap->a_vp; 1994 1995 if (!fsess_opt_mmap(vnode_mount(vp))) { 1996 SDT_PROBE2(fusefs, , vnops, trace, 1, 1997 "called on non-cacheable vnode??\n"); 1998 return (VM_PAGER_ERROR); 1999 } 2000 2001 return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, 2002 ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); 2003 } 2004 2005 static const char extattr_namespace_separator = '.'; 2006 2007 /* 2008 struct vop_getextattr_args { 2009 struct vop_generic_args a_gen; 2010 struct vnode *a_vp; 2011 int a_attrnamespace; 2012 const char *a_name; 2013 struct uio *a_uio; 2014 size_t *a_size; 2015 struct ucred *a_cred; 2016 struct thread *a_td; 2017 }; 2018 */ 2019 static int 2020 fuse_vnop_getextattr(struct vop_getextattr_args *ap) 2021 { 2022 struct vnode *vp = ap->a_vp; 2023 struct uio *uio = ap->a_uio; 2024 struct fuse_dispatcher fdi; 2025 struct fuse_getxattr_in *get_xattr_in; 2026 struct fuse_getxattr_out *get_xattr_out; 2027 struct mount *mp = vnode_mount(vp); 2028 struct thread *td = ap->a_td; 2029 struct ucred *cred = ap->a_cred; 2030 char *prefix; 2031 char *attr_str; 2032 size_t len; 2033 int err; 2034 2035 if (fuse_isdeadfs(vp)) 2036 return (ENXIO); 2037 2038 if (!fsess_isimpl(mp, FUSE_GETXATTR)) 2039 return EOPNOTSUPP; 2040 2041 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2042 if (err) 2043 return err; 2044 2045 /* Default to looking for user attributes. */ 2046 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2047 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2048 else 2049 prefix = EXTATTR_NAMESPACE_USER_STRING; 2050 2051 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2052 strlen(ap->a_name) + 1; 2053 2054 fdisp_init(&fdi, len + sizeof(*get_xattr_in)); 2055 fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); 2056 2057 get_xattr_in = fdi.indata; 2058 /* 2059 * Check to see whether we're querying the available size or 2060 * issuing the actual request. If we pass in 0, we get back struct 2061 * fuse_getxattr_out. If we pass in a non-zero size, we get back 2062 * that much data, without the struct fuse_getxattr_out header. 2063 */ 2064 if (uio == NULL) 2065 get_xattr_in->size = 0; 2066 else 2067 get_xattr_in->size = uio->uio_resid; 2068 2069 attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); 2070 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2071 ap->a_name); 2072 2073 err = fdisp_wait_answ(&fdi); 2074 if (err != 0) { 2075 if (err == ENOSYS) { 2076 fsess_set_notimpl(mp, FUSE_GETXATTR); 2077 err = EOPNOTSUPP; 2078 } 2079 goto out; 2080 } 2081 2082 get_xattr_out = fdi.answ; 2083 2084 if (ap->a_size != NULL) 2085 *ap->a_size = get_xattr_out->size; 2086 2087 if (uio != NULL) 2088 err = uiomove(fdi.answ, fdi.iosize, uio); 2089 2090 out: 2091 fdisp_destroy(&fdi); 2092 return (err); 2093 } 2094 2095 /* 2096 struct vop_setextattr_args { 2097 struct vop_generic_args a_gen; 2098 struct vnode *a_vp; 2099 int a_attrnamespace; 2100 const char *a_name; 2101 struct uio *a_uio; 2102 struct ucred *a_cred; 2103 struct thread *a_td; 2104 }; 2105 */ 2106 static int 2107 fuse_vnop_setextattr(struct vop_setextattr_args *ap) 2108 { 2109 struct vnode *vp = ap->a_vp; 2110 struct uio *uio = ap->a_uio; 2111 struct fuse_dispatcher fdi; 2112 struct fuse_setxattr_in *set_xattr_in; 2113 struct mount *mp = vnode_mount(vp); 2114 struct thread *td = ap->a_td; 2115 struct ucred *cred = ap->a_cred; 2116 char *prefix; 2117 size_t len; 2118 char *attr_str; 2119 int err; 2120 2121 if (fuse_isdeadfs(vp)) 2122 return (ENXIO); 2123 2124 if (!fsess_isimpl(mp, FUSE_SETXATTR)) 2125 return EOPNOTSUPP; 2126 2127 if (vfs_isrdonly(mp)) 2128 return EROFS; 2129 2130 /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ 2131 if (ap->a_uio == NULL) { 2132 /* 2133 * If we got here as fallback from VOP_DELETEEXTATTR, then 2134 * return EOPNOTSUPP. 2135 */ 2136 if (!fsess_isimpl(mp, FUSE_REMOVEXATTR)) 2137 return (EOPNOTSUPP); 2138 else 2139 return (EINVAL); 2140 } 2141 2142 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 2143 VWRITE); 2144 if (err) 2145 return err; 2146 2147 /* Default to looking for user attributes. */ 2148 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2149 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2150 else 2151 prefix = EXTATTR_NAMESPACE_USER_STRING; 2152 2153 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2154 strlen(ap->a_name) + 1; 2155 2156 fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); 2157 fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); 2158 2159 set_xattr_in = fdi.indata; 2160 set_xattr_in->size = uio->uio_resid; 2161 2162 attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); 2163 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2164 ap->a_name); 2165 2166 err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, 2167 uio->uio_resid, uio); 2168 if (err != 0) { 2169 goto out; 2170 } 2171 2172 err = fdisp_wait_answ(&fdi); 2173 2174 if (err == ENOSYS) { 2175 fsess_set_notimpl(mp, FUSE_SETXATTR); 2176 err = EOPNOTSUPP; 2177 } 2178 if (err == ERESTART) { 2179 /* Can't restart after calling uiomove */ 2180 err = EINTR; 2181 } 2182 2183 out: 2184 fdisp_destroy(&fdi); 2185 return (err); 2186 } 2187 2188 /* 2189 * The Linux / FUSE extended attribute list is simply a collection of 2190 * NUL-terminated strings. The FreeBSD extended attribute list is a single 2191 * byte length followed by a non-NUL terminated string. So, this allows 2192 * conversion of the Linux / FUSE format to the FreeBSD format in place. 2193 * Linux attribute names are reported with the namespace as a prefix (e.g. 2194 * "user.attribute_name"), but in FreeBSD they are reported without the 2195 * namespace prefix (e.g. "attribute_name"). So, we're going from: 2196 * 2197 * user.attr_name1\0user.attr_name2\0 2198 * 2199 * to: 2200 * 2201 * <num>attr_name1<num>attr_name2 2202 * 2203 * Where "<num>" is a single byte number of characters in the attribute name. 2204 * 2205 * Args: 2206 * prefix - exattr namespace prefix string 2207 * list, list_len - input list with namespace prefixes 2208 * bsd_list, bsd_list_len - output list compatible with bsd vfs 2209 */ 2210 static int 2211 fuse_xattrlist_convert(char *prefix, const char *list, int list_len, 2212 char *bsd_list, int *bsd_list_len) 2213 { 2214 int len, pos, dist_to_next, prefix_len; 2215 2216 pos = 0; 2217 *bsd_list_len = 0; 2218 prefix_len = strlen(prefix); 2219 2220 while (pos < list_len && list[pos] != '\0') { 2221 dist_to_next = strlen(&list[pos]) + 1; 2222 if (bcmp(&list[pos], prefix, prefix_len) == 0 && 2223 list[pos + prefix_len] == extattr_namespace_separator) { 2224 len = dist_to_next - 2225 (prefix_len + sizeof(extattr_namespace_separator)) - 1; 2226 if (len >= EXTATTR_MAXNAMELEN) 2227 return (ENAMETOOLONG); 2228 2229 bsd_list[*bsd_list_len] = len; 2230 memcpy(&bsd_list[*bsd_list_len + 1], 2231 &list[pos + prefix_len + 2232 sizeof(extattr_namespace_separator)], len); 2233 2234 *bsd_list_len += len + 1; 2235 } 2236 2237 pos += dist_to_next; 2238 } 2239 2240 return (0); 2241 } 2242 2243 /* 2244 * List extended attributes 2245 * 2246 * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which 2247 * has a number of differences compared to its FreeBSD equivalent, 2248 * extattr_list_file: 2249 * 2250 * - FUSE_LISTXATTR returns all extended attributes across all namespaces, 2251 * whereas listxattr(2) only returns attributes for a single namespace 2252 * - FUSE_LISTXATTR prepends each attribute name with "namespace." 2253 * - If the provided buffer is not large enough to hold the result, 2254 * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to 2255 * return as many results as will fit. 2256 */ 2257 /* 2258 struct vop_listextattr_args { 2259 struct vop_generic_args a_gen; 2260 struct vnode *a_vp; 2261 int a_attrnamespace; 2262 struct uio *a_uio; 2263 size_t *a_size; 2264 struct ucred *a_cred; 2265 struct thread *a_td; 2266 }; 2267 */ 2268 static int 2269 fuse_vnop_listextattr(struct vop_listextattr_args *ap) 2270 { 2271 struct vnode *vp = ap->a_vp; 2272 struct uio *uio = ap->a_uio; 2273 struct fuse_dispatcher fdi; 2274 struct fuse_listxattr_in *list_xattr_in; 2275 struct fuse_listxattr_out *list_xattr_out; 2276 struct mount *mp = vnode_mount(vp); 2277 struct thread *td = ap->a_td; 2278 struct ucred *cred = ap->a_cred; 2279 char *prefix; 2280 char *bsd_list = NULL; 2281 char *linux_list; 2282 int bsd_list_len; 2283 int linux_list_len; 2284 int err; 2285 2286 if (fuse_isdeadfs(vp)) 2287 return (ENXIO); 2288 2289 if (!fsess_isimpl(mp, FUSE_LISTXATTR)) 2290 return EOPNOTSUPP; 2291 2292 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); 2293 if (err) 2294 return err; 2295 2296 /* 2297 * Add space for a NUL and the period separator if enabled. 2298 * Default to looking for user attributes. 2299 */ 2300 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2301 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2302 else 2303 prefix = EXTATTR_NAMESPACE_USER_STRING; 2304 2305 fdisp_init(&fdi, sizeof(*list_xattr_in)); 2306 fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2307 2308 /* 2309 * Retrieve Linux / FUSE compatible list size. 2310 */ 2311 list_xattr_in = fdi.indata; 2312 list_xattr_in->size = 0; 2313 2314 err = fdisp_wait_answ(&fdi); 2315 if (err != 0) { 2316 if (err == ENOSYS) { 2317 fsess_set_notimpl(mp, FUSE_LISTXATTR); 2318 err = EOPNOTSUPP; 2319 } 2320 goto out; 2321 } 2322 2323 list_xattr_out = fdi.answ; 2324 linux_list_len = list_xattr_out->size; 2325 if (linux_list_len == 0) { 2326 if (ap->a_size != NULL) 2327 *ap->a_size = linux_list_len; 2328 goto out; 2329 } 2330 2331 /* 2332 * Retrieve Linux / FUSE compatible list values. 2333 */ 2334 fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); 2335 list_xattr_in = fdi.indata; 2336 list_xattr_in->size = linux_list_len; 2337 2338 err = fdisp_wait_answ(&fdi); 2339 if (err == ERANGE) { 2340 /* 2341 * Race detected. The attribute list must've grown since the 2342 * first FUSE_LISTXATTR call. Start over. Go all the way back 2343 * to userland so we can process signals, if necessary, before 2344 * restarting. 2345 */ 2346 err = ERESTART; 2347 goto out; 2348 } else if (err != 0) 2349 goto out; 2350 2351 linux_list = fdi.answ; 2352 /* FUSE doesn't allow the server to return more data than requested */ 2353 if (fdi.iosize > linux_list_len) { 2354 printf("WARNING: FUSE protocol violation. Server returned " 2355 "more extended attribute data than requested; " 2356 "should've returned ERANGE instead"); 2357 } else { 2358 /* But returning less data is fine */ 2359 linux_list_len = fdi.iosize; 2360 } 2361 2362 /* 2363 * Retrieve the BSD compatible list values. 2364 * The Linux / FUSE attribute list format isn't the same 2365 * as FreeBSD's format. So we need to transform it into 2366 * FreeBSD's format before giving it to the user. 2367 */ 2368 bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); 2369 err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, 2370 bsd_list, &bsd_list_len); 2371 if (err != 0) 2372 goto out; 2373 2374 if (ap->a_size != NULL) 2375 *ap->a_size = bsd_list_len; 2376 2377 if (uio != NULL) 2378 err = uiomove(bsd_list, bsd_list_len, uio); 2379 2380 out: 2381 free(bsd_list, M_TEMP); 2382 fdisp_destroy(&fdi); 2383 return (err); 2384 } 2385 2386 /* 2387 struct vop_deleteextattr_args { 2388 struct vop_generic_args a_gen; 2389 struct vnode *a_vp; 2390 int a_attrnamespace; 2391 const char *a_name; 2392 struct ucred *a_cred; 2393 struct thread *a_td; 2394 }; 2395 */ 2396 static int 2397 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) 2398 { 2399 struct vnode *vp = ap->a_vp; 2400 struct fuse_dispatcher fdi; 2401 struct mount *mp = vnode_mount(vp); 2402 struct thread *td = ap->a_td; 2403 struct ucred *cred = ap->a_cred; 2404 char *prefix; 2405 size_t len; 2406 char *attr_str; 2407 int err; 2408 2409 if (fuse_isdeadfs(vp)) 2410 return (ENXIO); 2411 2412 if (!fsess_isimpl(mp, FUSE_REMOVEXATTR)) 2413 return EOPNOTSUPP; 2414 2415 if (vfs_isrdonly(mp)) 2416 return EROFS; 2417 2418 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, 2419 VWRITE); 2420 if (err) 2421 return err; 2422 2423 /* Default to looking for user attributes. */ 2424 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) 2425 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; 2426 else 2427 prefix = EXTATTR_NAMESPACE_USER_STRING; 2428 2429 len = strlen(prefix) + sizeof(extattr_namespace_separator) + 2430 strlen(ap->a_name) + 1; 2431 2432 fdisp_init(&fdi, len); 2433 fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); 2434 2435 attr_str = fdi.indata; 2436 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, 2437 ap->a_name); 2438 2439 err = fdisp_wait_answ(&fdi); 2440 if (err == ENOSYS) { 2441 fsess_set_notimpl(mp, FUSE_REMOVEXATTR); 2442 err = EOPNOTSUPP; 2443 } 2444 2445 fdisp_destroy(&fdi); 2446 return (err); 2447 } 2448 2449 /* 2450 struct vnop_print_args { 2451 struct vnode *a_vp; 2452 }; 2453 */ 2454 static int 2455 fuse_vnop_print(struct vop_print_args *ap) 2456 { 2457 struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); 2458 2459 printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", 2460 (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, 2461 (uintmax_t)fvdat->nlookup, 2462 fvdat->flag); 2463 2464 return 0; 2465 } 2466 2467 /* 2468 * Get an NFS filehandle for a FUSE file. 2469 * 2470 * This will only work for FUSE file systems that guarantee the uniqueness of 2471 * nodeid:generation, which most don't. 2472 */ 2473 /* 2474 vop_vptofh { 2475 IN struct vnode *a_vp; 2476 IN struct fid *a_fhp; 2477 }; 2478 */ 2479 static int 2480 fuse_vnop_vptofh(struct vop_vptofh_args *ap) 2481 { 2482 struct vnode *vp = ap->a_vp; 2483 struct fuse_vnode_data *fvdat = VTOFUD(vp); 2484 struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); 2485 _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), 2486 "FUSE fid type is too big"); 2487 struct mount *mp = vnode_mount(vp); 2488 struct fuse_data *data = fuse_get_mpdata(mp); 2489 struct vattr va; 2490 int err; 2491 2492 if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) 2493 return EOPNOTSUPP; 2494 2495 err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); 2496 if (err) 2497 return err; 2498 2499 /*ip = VTOI(ap->a_vp);*/ 2500 /*ufhp = (struct ufid *)ap->a_fhp;*/ 2501 fhp->len = sizeof(struct fuse_fid); 2502 fhp->nid = fvdat->nid; 2503 if (fvdat->generation <= UINT32_MAX) 2504 fhp->gen = fvdat->generation; 2505 else 2506 return EOVERFLOW; 2507 return (0); 2508 } 2509