1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/counter.h> 69 #include <sys/module.h> 70 #include <sys/errno.h> 71 #include <sys/kernel.h> 72 #include <sys/conf.h> 73 #include <sys/uio.h> 74 #include <sys/malloc.h> 75 #include <sys/queue.h> 76 #include <sys/lock.h> 77 #include <sys/mutex.h> 78 #include <sys/sdt.h> 79 #include <sys/sx.h> 80 #include <sys/proc.h> 81 #include <sys/mount.h> 82 #include <sys/vnode.h> 83 #include <sys/namei.h> 84 #include <sys/stat.h> 85 #include <sys/unistd.h> 86 #include <sys/filedesc.h> 87 #include <sys/file.h> 88 #include <sys/fcntl.h> 89 #include <sys/dirent.h> 90 #include <sys/bio.h> 91 #include <sys/buf.h> 92 #include <sys/sysctl.h> 93 #include <sys/priv.h> 94 95 #include "fuse.h" 96 #include "fuse_file.h" 97 #include "fuse_internal.h" 98 #include "fuse_io.h" 99 #include "fuse_ipc.h" 100 #include "fuse_node.h" 101 #include "fuse_file.h" 102 103 SDT_PROVIDER_DECLARE(fusefs); 104 /* 105 * Fuse trace probe: 106 * arg0: verbosity. Higher numbers give more verbose messages 107 * arg1: Textual message 108 */ 109 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); 110 111 #ifdef ZERO_PAD_INCOMPLETE_BUFS 112 static int isbzero(void *buf, size_t len); 113 114 #endif 115 116 counter_u64_t fuse_lookup_cache_hits; 117 counter_u64_t fuse_lookup_cache_misses; 118 119 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD, 120 &fuse_lookup_cache_hits, "number of positive cache hits in lookup"); 121 122 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD, 123 &fuse_lookup_cache_misses, "number of cache misses in lookup"); 124 125 int 126 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags, 127 struct vnode **vpp) 128 { 129 struct bintime now; 130 struct thread *td = curthread; 131 uint64_t nodeid = ino; 132 int error; 133 134 *vpp = NULL; 135 136 error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp, 137 fuse_vnode_cmp, &nodeid); 138 if (error) 139 return error; 140 /* 141 * Check the entry cache timeout. We have to do this within fusefs 142 * instead of by using cache_enter_time/cache_lookup because those 143 * routines are only intended to work with pathnames, not inodes 144 */ 145 if (*vpp != NULL) { 146 getbinuptime(&now); 147 if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){ 148 counter_u64_add(fuse_lookup_cache_hits, 1); 149 return 0; 150 } else { 151 /* Entry cache timeout */ 152 counter_u64_add(fuse_lookup_cache_misses, 1); 153 cache_purge(*vpp); 154 vput(*vpp); 155 *vpp = NULL; 156 } 157 } 158 return 0; 159 } 160 161 SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin); 162 /* Synchronously send a FUSE_ACCESS operation */ 163 int 164 fuse_internal_access(struct vnode *vp, 165 accmode_t mode, 166 struct thread *td, 167 struct ucred *cred) 168 { 169 int err = 0; 170 uint32_t mask = F_OK; 171 int dataflags; 172 int vtype; 173 struct mount *mp; 174 struct fuse_dispatcher fdi; 175 struct fuse_access_in *fai; 176 struct fuse_data *data; 177 178 mp = vnode_mount(vp); 179 vtype = vnode_vtype(vp); 180 181 data = fuse_get_mpdata(mp); 182 dataflags = data->dataflags; 183 184 if (mode == 0) 185 return 0; 186 187 if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { 188 switch (vp->v_type) { 189 case VDIR: 190 /* FALLTHROUGH */ 191 case VLNK: 192 /* FALLTHROUGH */ 193 case VREG: 194 return EROFS; 195 default: 196 break; 197 } 198 } 199 200 /* Unless explicitly permitted, deny everyone except the fs owner. */ 201 if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { 202 if (fuse_match_cred(data->daemoncred, cred)) 203 return EPERM; 204 } 205 206 if (dataflags & FSESS_DEFAULT_PERMISSIONS) { 207 struct vattr va; 208 209 fuse_internal_getattr(vp, &va, cred, td); 210 return vaccess(vp->v_type, va.va_mode, va.va_uid, 211 va.va_gid, mode, cred, NULL); 212 } 213 214 if (mode & VADMIN) { 215 /* 216 * The FUSE protocol doesn't have an equivalent of VADMIN, so 217 * it's a bug if we ever reach this point with that bit set. 218 */ 219 SDT_PROBE0(fusefs, , internal, access_vadmin); 220 } 221 222 if (!fsess_isimpl(mp, FUSE_ACCESS)) 223 return 0; 224 225 if ((mode & (VWRITE | VAPPEND)) != 0) 226 mask |= W_OK; 227 if ((mode & VREAD) != 0) 228 mask |= R_OK; 229 if ((mode & VEXEC) != 0) 230 mask |= X_OK; 231 232 fdisp_init(&fdi, sizeof(*fai)); 233 fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); 234 235 fai = fdi.indata; 236 fai->mask = mask; 237 238 err = fdisp_wait_answ(&fdi); 239 fdisp_destroy(&fdi); 240 241 if (err == ENOSYS) { 242 fsess_set_notimpl(mp, FUSE_ACCESS); 243 err = 0; 244 } 245 return err; 246 } 247 248 /* 249 * Cache FUSE attributes from attr, in attribute cache associated with vnode 250 * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the 251 * converted attributes there as well. 252 * 253 * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do 254 * return the result to the caller). 255 */ 256 void 257 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, 258 uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) 259 { 260 struct mount *mp; 261 struct fuse_vnode_data *fvdat; 262 struct fuse_data *data; 263 struct vattr *vp_cache_at; 264 265 mp = vnode_mount(vp); 266 fvdat = VTOFUD(vp); 267 data = fuse_get_mpdata(mp); 268 269 ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); 270 271 fuse_validity_2_bintime(attr_valid, attr_valid_nsec, 272 &fvdat->attr_cache_timeout); 273 274 /* Fix our buffers if the filesize changed without us knowing */ 275 if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { 276 (void)fuse_vnode_setsize(vp, attr->size); 277 fvdat->cached_attrs.va_size = attr->size; 278 } 279 280 if (attr_valid > 0 || attr_valid_nsec > 0) 281 vp_cache_at = &(fvdat->cached_attrs); 282 else if (vap != NULL) 283 vp_cache_at = vap; 284 else 285 return; 286 287 vattr_null(vp_cache_at); 288 vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; 289 vp_cache_at->va_fileid = attr->ino; 290 vp_cache_at->va_mode = attr->mode & ~S_IFMT; 291 vp_cache_at->va_nlink = attr->nlink; 292 vp_cache_at->va_uid = attr->uid; 293 vp_cache_at->va_gid = attr->gid; 294 vp_cache_at->va_rdev = attr->rdev; 295 vp_cache_at->va_size = attr->size; 296 /* XXX on i386, seconds are truncated to 32 bits */ 297 vp_cache_at->va_atime.tv_sec = attr->atime; 298 vp_cache_at->va_atime.tv_nsec = attr->atimensec; 299 vp_cache_at->va_mtime.tv_sec = attr->mtime; 300 vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; 301 vp_cache_at->va_ctime.tv_sec = attr->ctime; 302 vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; 303 if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) 304 vp_cache_at->va_blocksize = attr->blksize; 305 else 306 vp_cache_at->va_blocksize = PAGE_SIZE; 307 vp_cache_at->va_type = IFTOVT(attr->mode); 308 vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; 309 vp_cache_at->va_flags = 0; 310 311 if (vap != vp_cache_at && vap != NULL) 312 memcpy(vap, vp_cache_at, sizeof(*vap)); 313 } 314 315 316 /* fsync */ 317 318 int 319 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) 320 { 321 if (tick->tk_aw_ohead.error == ENOSYS) { 322 fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); 323 } 324 return 0; 325 } 326 327 int 328 fuse_internal_fsync(struct vnode *vp, 329 struct thread *td, 330 int waitfor, 331 bool datasync) 332 { 333 struct fuse_fsync_in *ffsi = NULL; 334 struct fuse_dispatcher fdi; 335 struct fuse_filehandle *fufh; 336 struct fuse_vnode_data *fvdat = VTOFUD(vp); 337 struct mount *mp = vnode_mount(vp); 338 int op = FUSE_FSYNC; 339 int err = 0; 340 341 if (!fsess_isimpl(vnode_mount(vp), 342 (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { 343 return 0; 344 } 345 if (vnode_isdir(vp)) 346 op = FUSE_FSYNCDIR; 347 348 if (!fsess_isimpl(mp, op)) 349 return 0; 350 351 fdisp_init(&fdi, sizeof(*ffsi)); 352 /* 353 * fsync every open file handle for this file, because we can't be sure 354 * which file handle the caller is really referring to. 355 */ 356 LIST_FOREACH(fufh, &fvdat->handles, next) { 357 fdi.iosize = sizeof(*ffsi); 358 if (ffsi == NULL) 359 fdisp_make_vp(&fdi, op, vp, td, NULL); 360 else 361 fdisp_refresh_vp(&fdi, op, vp, td, NULL); 362 ffsi = fdi.indata; 363 ffsi->fh = fufh->fh_id; 364 ffsi->fsync_flags = 0; 365 366 if (datasync) 367 ffsi->fsync_flags = 1; 368 369 if (waitfor == MNT_WAIT) { 370 err = fdisp_wait_answ(&fdi); 371 } else { 372 fuse_insert_callback(fdi.tick, 373 fuse_internal_fsync_callback); 374 fuse_insert_message(fdi.tick, false); 375 } 376 if (err == ENOSYS) { 377 /* ENOSYS means "success, and don't call again" */ 378 fsess_set_notimpl(mp, op); 379 err = 0; 380 break; 381 } 382 } 383 fdisp_destroy(&fdi); 384 385 return err; 386 } 387 388 /* Asynchronous invalidation */ 389 SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry, 390 "struct vnode*", "struct fuse_notify_inval_entry_out*", "char*"); 391 int 392 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) 393 { 394 struct fuse_notify_inval_entry_out fnieo; 395 struct componentname cn; 396 struct vnode *dvp, *vp; 397 char name[PATH_MAX]; 398 int err; 399 400 if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) 401 return (err); 402 403 if (fnieo.namelen >= sizeof(name)) 404 return (EINVAL); 405 406 if ((err = uiomove(name, fnieo.namelen, uio)) != 0) 407 return (err); 408 name[fnieo.namelen] = '\0'; 409 /* fusefs does not cache "." or ".." entries */ 410 if (strncmp(name, ".", sizeof(".")) == 0 || 411 strncmp(name, "..", sizeof("..")) == 0) 412 return (0); 413 414 if (fnieo.parent == FUSE_ROOT_ID) 415 err = VFS_ROOT(mp, LK_SHARED, &dvp); 416 else 417 err = fuse_internal_get_cached_vnode( mp, fnieo.parent, 418 LK_SHARED, &dvp); 419 SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name); 420 /* 421 * If dvp is not in the cache, then it must've been reclaimed. And 422 * since fuse_vnop_reclaim does a cache_purge, name's entry must've 423 * been invalidated already. So we can safely return if dvp == NULL 424 */ 425 if (err != 0 || dvp == NULL) 426 return (err); 427 /* 428 * XXX we can't check dvp's generation because the FUSE invalidate 429 * entry message doesn't include it. Worse case is that we invalidate 430 * an entry that didn't need to be invalidated. 431 */ 432 433 cn.cn_nameiop = LOOKUP; 434 cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ 435 cn.cn_thread = curthread; 436 cn.cn_cred = curthread->td_ucred; 437 cn.cn_lkflags = LK_SHARED; 438 cn.cn_pnbuf = NULL; 439 cn.cn_nameptr = name; 440 cn.cn_namelen = fnieo.namelen; 441 err = cache_lookup(dvp, &vp, &cn, NULL, NULL); 442 MPASS(err == 0); 443 fuse_vnode_clear_attr_cache(dvp); 444 vput(dvp); 445 return (0); 446 } 447 448 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode, 449 "struct vnode*", "struct fuse_notify_inval_inode_out *"); 450 int 451 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) 452 { 453 struct fuse_notify_inval_inode_out fniio; 454 struct vnode *vp; 455 int err; 456 457 if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0) 458 return (err); 459 460 if (fniio.ino == FUSE_ROOT_ID) 461 err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); 462 else 463 err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, 464 &vp); 465 SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio); 466 if (err != 0 || vp == NULL) 467 return (err); 468 /* 469 * XXX we can't check vp's generation because the FUSE invalidate 470 * entry message doesn't include it. Worse case is that we invalidate 471 * an inode that didn't need to be invalidated. 472 */ 473 474 /* 475 * Flush and invalidate buffers if off >= 0. Technically we only need 476 * to flush and invalidate the range of offsets [off, off + len), but 477 * for simplicity's sake we do everything. 478 */ 479 if (fniio.off >= 0) 480 fuse_io_invalbuf(vp, curthread); 481 fuse_vnode_clear_attr_cache(vp); 482 vput(vp); 483 return (0); 484 } 485 486 /* mknod */ 487 int 488 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, 489 struct componentname *cnp, struct vattr *vap) 490 { 491 struct fuse_data *data; 492 struct fuse_mknod_in fmni; 493 size_t insize; 494 495 data = fuse_get_mpdata(dvp->v_mount); 496 497 fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); 498 fmni.rdev = vap->va_rdev; 499 if (fuse_libabi_geq(data, 7, 12)) { 500 insize = sizeof(fmni); 501 fmni.umask = curthread->td_proc->p_fd->fd_cmask; 502 } else { 503 insize = FUSE_COMPAT_MKNOD_IN_SIZE; 504 } 505 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, 506 insize, vap->va_type)); 507 } 508 509 /* readdir */ 510 511 int 512 fuse_internal_readdir(struct vnode *vp, 513 struct uio *uio, 514 off_t startoff, 515 struct fuse_filehandle *fufh, 516 struct fuse_iov *cookediov, 517 int *ncookies, 518 u_long *cookies) 519 { 520 int err = 0; 521 struct fuse_dispatcher fdi; 522 struct fuse_read_in *fri = NULL; 523 int fnd_start; 524 525 if (uio_resid(uio) == 0) 526 return 0; 527 fdisp_init(&fdi, 0); 528 529 /* 530 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p 531 * I/O). 532 */ 533 534 /* 535 * fnd_start is set non-zero once the offset in the directory gets 536 * to the startoff. This is done because directories must be read 537 * from the beginning (offset == 0) when fuse_vnop_readdir() needs 538 * to do an open of the directory. 539 * If it is not set non-zero here, it will be set non-zero in 540 * fuse_internal_readdir_processdata() when uio_offset == startoff. 541 */ 542 fnd_start = 0; 543 if (uio->uio_offset == startoff) 544 fnd_start = 1; 545 while (uio_resid(uio) > 0) { 546 fdi.iosize = sizeof(*fri); 547 if (fri == NULL) 548 fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); 549 else 550 fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); 551 552 fri = fdi.indata; 553 fri->fh = fufh->fh_id; 554 fri->offset = uio_offset(uio); 555 fri->size = MIN(uio->uio_resid, 556 fuse_get_mpdata(vp->v_mount)->max_read); 557 558 if ((err = fdisp_wait_answ(&fdi))) 559 break; 560 if ((err = fuse_internal_readdir_processdata(uio, startoff, 561 &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, 562 ncookies, &cookies))) 563 break; 564 } 565 566 fdisp_destroy(&fdi); 567 return ((err == -1) ? 0 : err); 568 } 569 570 /* 571 * Return -1 to indicate that this readdir is finished, 0 if it copied 572 * all the directory data read in and it may be possible to read more 573 * and greater than 0 for a failure. 574 */ 575 int 576 fuse_internal_readdir_processdata(struct uio *uio, 577 off_t startoff, 578 int *fnd_start, 579 size_t reqsize, 580 void *buf, 581 size_t bufsize, 582 struct fuse_iov *cookediov, 583 int *ncookies, 584 u_long **cookiesp) 585 { 586 int err = 0; 587 int bytesavail; 588 size_t freclen; 589 590 struct dirent *de; 591 struct fuse_dirent *fudge; 592 u_long *cookies; 593 594 cookies = *cookiesp; 595 if (bufsize < FUSE_NAME_OFFSET) 596 return -1; 597 for (;;) { 598 if (bufsize < FUSE_NAME_OFFSET) { 599 err = -1; 600 break; 601 } 602 fudge = (struct fuse_dirent *)buf; 603 freclen = FUSE_DIRENT_SIZE(fudge); 604 605 if (bufsize < freclen) { 606 /* 607 * This indicates a partial directory entry at the 608 * end of the directory data. 609 */ 610 err = -1; 611 break; 612 } 613 #ifdef ZERO_PAD_INCOMPLETE_BUFS 614 if (isbzero(buf, FUSE_NAME_OFFSET)) { 615 err = -1; 616 break; 617 } 618 #endif 619 620 if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { 621 err = EINVAL; 622 break; 623 } 624 bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) 625 &fudge->namelen); 626 627 if (bytesavail > uio_resid(uio)) { 628 /* Out of space for the dir so we are done. */ 629 err = -1; 630 break; 631 } 632 /* 633 * Don't start to copy the directory entries out until 634 * the requested offset in the directory is found. 635 */ 636 if (*fnd_start != 0) { 637 fiov_adjust(cookediov, bytesavail); 638 bzero(cookediov->base, bytesavail); 639 640 de = (struct dirent *)cookediov->base; 641 de->d_fileno = fudge->ino; 642 de->d_reclen = bytesavail; 643 de->d_type = fudge->type; 644 de->d_namlen = fudge->namelen; 645 memcpy((char *)cookediov->base + sizeof(struct dirent) - 646 MAXNAMLEN - 1, 647 (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); 648 dirent_terminate(de); 649 650 err = uiomove(cookediov->base, cookediov->len, uio); 651 if (err) 652 break; 653 if (cookies != NULL) { 654 if (*ncookies == 0) { 655 err = -1; 656 break; 657 } 658 *cookies = fudge->off; 659 cookies++; 660 (*ncookies)--; 661 } 662 } else if (startoff == fudge->off) 663 *fnd_start = 1; 664 buf = (char *)buf + freclen; 665 bufsize -= freclen; 666 uio_setoffset(uio, fudge->off); 667 } 668 *cookiesp = cookies; 669 670 return err; 671 } 672 673 /* remove */ 674 675 int 676 fuse_internal_remove(struct vnode *dvp, 677 struct vnode *vp, 678 struct componentname *cnp, 679 enum fuse_opcode op) 680 { 681 struct fuse_dispatcher fdi; 682 nlink_t nlink; 683 int err = 0; 684 685 fdisp_init(&fdi, cnp->cn_namelen + 1); 686 fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); 687 688 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 689 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 690 691 err = fdisp_wait_answ(&fdi); 692 fdisp_destroy(&fdi); 693 694 if (err) 695 return (err); 696 697 /* 698 * Access the cached nlink even if the attr cached has expired. If 699 * it's inaccurate, the worst that will happen is: 700 * 1) We'll recycle the vnode even though the file has another link we 701 * don't know about, costing a bit of cpu time, or 702 * 2) We won't recycle the vnode even though all of its links are gone. 703 * It will linger around until vnlru reclaims it, costing a bit of 704 * temporary memory. 705 */ 706 nlink = VTOFUD(vp)->cached_attrs.va_nlink--; 707 708 /* 709 * Purge the parent's attribute cache because the daemon 710 * should've updated its mtime and ctime. 711 */ 712 fuse_vnode_clear_attr_cache(dvp); 713 714 /* NB: nlink could be zero if it was never cached */ 715 if (nlink <= 1 || vnode_vtype(vp) == VDIR) { 716 fuse_internal_vnode_disappear(vp); 717 } else { 718 cache_purge(vp); 719 fuse_vnode_update(vp, FN_CTIMECHANGE); 720 } 721 722 return err; 723 } 724 725 /* rename */ 726 727 int 728 fuse_internal_rename(struct vnode *fdvp, 729 struct componentname *fcnp, 730 struct vnode *tdvp, 731 struct componentname *tcnp) 732 { 733 struct fuse_dispatcher fdi; 734 struct fuse_rename_in *fri; 735 int err = 0; 736 737 fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); 738 fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); 739 740 fri = fdi.indata; 741 fri->newdir = VTOI(tdvp); 742 memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, 743 fcnp->cn_namelen); 744 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; 745 memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, 746 tcnp->cn_nameptr, tcnp->cn_namelen); 747 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + 748 tcnp->cn_namelen + 1] = '\0'; 749 750 err = fdisp_wait_answ(&fdi); 751 fdisp_destroy(&fdi); 752 return err; 753 } 754 755 /* strategy */ 756 757 /* entity creation */ 758 759 void 760 fuse_internal_newentry_makerequest(struct mount *mp, 761 uint64_t dnid, 762 struct componentname *cnp, 763 enum fuse_opcode op, 764 void *buf, 765 size_t bufsize, 766 struct fuse_dispatcher *fdip) 767 { 768 fdip->iosize = bufsize + cnp->cn_namelen + 1; 769 770 fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); 771 memcpy(fdip->indata, buf, bufsize); 772 memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); 773 ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; 774 } 775 776 int 777 fuse_internal_newentry_core(struct vnode *dvp, 778 struct vnode **vpp, 779 struct componentname *cnp, 780 enum vtype vtyp, 781 struct fuse_dispatcher *fdip) 782 { 783 int err = 0; 784 struct fuse_entry_out *feo; 785 struct mount *mp = vnode_mount(dvp); 786 787 if ((err = fdisp_wait_answ(fdip))) { 788 return err; 789 } 790 feo = fdip->answ; 791 792 if ((err = fuse_internal_checkentry(feo, vtyp))) { 793 return err; 794 } 795 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); 796 if (err) { 797 fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, 798 feo->nodeid, 1); 799 return err; 800 } 801 802 /* 803 * Purge the parent's attribute cache because the daemon should've 804 * updated its mtime and ctime 805 */ 806 fuse_vnode_clear_attr_cache(dvp); 807 808 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 809 feo->attr_valid_nsec, NULL); 810 811 return err; 812 } 813 814 int 815 fuse_internal_newentry(struct vnode *dvp, 816 struct vnode **vpp, 817 struct componentname *cnp, 818 enum fuse_opcode op, 819 void *buf, 820 size_t bufsize, 821 enum vtype vtype) 822 { 823 int err; 824 struct fuse_dispatcher fdi; 825 struct mount *mp = vnode_mount(dvp); 826 827 fdisp_init(&fdi, 0); 828 fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, 829 bufsize, &fdi); 830 err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); 831 fdisp_destroy(&fdi); 832 833 return err; 834 } 835 836 /* entity destruction */ 837 838 int 839 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) 840 { 841 fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, 842 ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); 843 844 return 0; 845 } 846 847 void 848 fuse_internal_forget_send(struct mount *mp, 849 struct thread *td, 850 struct ucred *cred, 851 uint64_t nodeid, 852 uint64_t nlookup) 853 { 854 855 struct fuse_dispatcher fdi; 856 struct fuse_forget_in *ffi; 857 858 /* 859 * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", 860 * (long long unsigned) nodeid)); 861 */ 862 863 fdisp_init(&fdi, sizeof(*ffi)); 864 fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); 865 866 ffi = fdi.indata; 867 ffi->nlookup = nlookup; 868 869 fuse_insert_message(fdi.tick, false); 870 fdisp_destroy(&fdi); 871 } 872 873 SDT_PROBE_DEFINE2(fusefs, , internal, getattr_cache_incoherent, 874 "struct vnode*", "struct fuse_attr_out*"); 875 876 /* Fetch the vnode's attributes from the daemon*/ 877 int 878 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, 879 struct ucred *cred, struct thread *td) 880 { 881 struct fuse_dispatcher fdi; 882 struct fuse_vnode_data *fvdat = VTOFUD(vp); 883 struct fuse_getattr_in *fgai; 884 struct fuse_attr_out *fao; 885 off_t old_filesize = fvdat->cached_attrs.va_size; 886 struct timespec old_ctime = fvdat->cached_attrs.va_ctime; 887 struct timespec old_mtime = fvdat->cached_attrs.va_mtime; 888 enum vtype vtyp; 889 int err; 890 891 fdisp_init(&fdi, sizeof(*fgai)); 892 fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); 893 fgai = fdi.indata; 894 /* 895 * We could look up a file handle and set it in fgai->fh, but that 896 * involves extra runtime work and I'm unaware of any file systems that 897 * care. 898 */ 899 fgai->getattr_flags = 0; 900 if ((err = fdisp_wait_answ(&fdi))) { 901 if (err == ENOENT) 902 fuse_internal_vnode_disappear(vp); 903 goto out; 904 } 905 906 fao = (struct fuse_attr_out *)fdi.answ; 907 vtyp = IFTOVT(fao->attr.mode); 908 if (fvdat->flag & FN_SIZECHANGE) 909 fao->attr.size = old_filesize; 910 if (fvdat->flag & FN_CTIMECHANGE) { 911 fao->attr.ctime = old_ctime.tv_sec; 912 fao->attr.ctimensec = old_ctime.tv_nsec; 913 } 914 if (fvdat->flag & FN_MTIMECHANGE) { 915 fao->attr.mtime = old_mtime.tv_sec; 916 fao->attr.mtimensec = old_mtime.tv_nsec; 917 } 918 if (vnode_isreg(vp) && 919 fvdat->cached_attrs.va_size != VNOVAL && 920 fao->attr.size != fvdat->cached_attrs.va_size) { 921 /* 922 * The server changed the file's size even though we had it 923 * cached! That's a server bug. 924 */ 925 SDT_PROBE2(fusefs, , internal, getattr_cache_incoherent, vp, 926 fao); 927 printf("%s: cache incoherent on %s! " 928 "Buggy FUSE server detected. To prevent data corruption, " 929 "disable the data cache by mounting with -o direct_io, or " 930 "as directed otherwise by your FUSE server's " 931 "documentation\n", __func__, 932 vnode_mount(vp)->mnt_stat.f_mntonname); 933 int iosize = fuse_iosize(vp); 934 v_inval_buf_range(vp, 0, INT64_MAX, iosize); 935 } 936 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, 937 fao->attr_valid_nsec, vap); 938 if (vtyp != vnode_vtype(vp)) { 939 fuse_internal_vnode_disappear(vp); 940 err = ENOENT; 941 } 942 943 out: 944 fdisp_destroy(&fdi); 945 return err; 946 } 947 948 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ 949 int 950 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, 951 struct thread *td) 952 { 953 struct vattr *attrs; 954 955 if ((attrs = VTOVA(vp)) != NULL) { 956 *vap = *attrs; /* struct copy */ 957 return 0; 958 } 959 960 return fuse_internal_do_getattr(vp, vap, cred, td); 961 } 962 963 void 964 fuse_internal_vnode_disappear(struct vnode *vp) 965 { 966 struct fuse_vnode_data *fvdat = VTOFUD(vp); 967 968 ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); 969 fvdat->flag |= FN_REVOKED; 970 cache_purge(vp); 971 } 972 973 /* fuse start/stop */ 974 975 SDT_PROBE_DEFINE2(fusefs, , internal, init_done, 976 "struct fuse_data*", "struct fuse_init_out*"); 977 int 978 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) 979 { 980 int err = 0; 981 struct fuse_data *data = tick->tk_data; 982 struct fuse_init_out *fiio; 983 984 if ((err = tick->tk_aw_ohead.error)) { 985 goto out; 986 } 987 if ((err = fticket_pull(tick, uio))) { 988 goto out; 989 } 990 fiio = fticket_resp(tick)->base; 991 992 data->fuse_libabi_major = fiio->major; 993 data->fuse_libabi_minor = fiio->minor; 994 if (!fuse_libabi_geq(data, 7, 4)) { 995 /* 996 * With a little work we could support servers as old as 7.1. 997 * But there would be little payoff. 998 */ 999 SDT_PROBE2(fusefs, , internal, trace, 1, 1000 "userpace version too low"); 1001 err = EPROTONOSUPPORT; 1002 goto out; 1003 } 1004 1005 if (fuse_libabi_geq(data, 7, 5)) { 1006 if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || 1007 fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { 1008 data->max_write = fiio->max_write; 1009 if (fiio->flags & FUSE_ASYNC_READ) 1010 data->dataflags |= FSESS_ASYNC_READ; 1011 if (fiio->flags & FUSE_POSIX_LOCKS) 1012 data->dataflags |= FSESS_POSIX_LOCKS; 1013 if (fiio->flags & FUSE_EXPORT_SUPPORT) 1014 data->dataflags |= FSESS_EXPORT_SUPPORT; 1015 /* 1016 * Don't bother to check FUSE_BIG_WRITES, because it's 1017 * redundant with max_write 1018 */ 1019 /* 1020 * max_background and congestion_threshold are not 1021 * implemented 1022 */ 1023 } else { 1024 err = EINVAL; 1025 } 1026 } else { 1027 /* Old fixed values */ 1028 data->max_write = 4096; 1029 } 1030 1031 if (fuse_libabi_geq(data, 7, 6)) 1032 data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf; 1033 1034 if (!fuse_libabi_geq(data, 7, 7)) 1035 fsess_set_notimpl(data->mp, FUSE_INTERRUPT); 1036 1037 if (!fuse_libabi_geq(data, 7, 8)) { 1038 fsess_set_notimpl(data->mp, FUSE_BMAP); 1039 fsess_set_notimpl(data->mp, FUSE_DESTROY); 1040 } 1041 1042 if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 && 1043 fiio->time_gran <= 1000000000) 1044 data->time_gran = fiio->time_gran; 1045 else 1046 data->time_gran = 1; 1047 1048 if (!fuse_libabi_geq(data, 7, 23)) 1049 data->cache_mode = fuse_data_cache_mode; 1050 else if (fiio->flags & FUSE_WRITEBACK_CACHE) 1051 data->cache_mode = FUSE_CACHE_WB; 1052 else 1053 data->cache_mode = FUSE_CACHE_WT; 1054 1055 out: 1056 if (err) { 1057 fdata_set_dead(data); 1058 } 1059 FUSE_LOCK(); 1060 data->dataflags |= FSESS_INITED; 1061 SDT_PROBE2(fusefs, , internal, init_done, data, fiio); 1062 wakeup(&data->ticketer); 1063 FUSE_UNLOCK(); 1064 1065 return 0; 1066 } 1067 1068 void 1069 fuse_internal_send_init(struct fuse_data *data, struct thread *td) 1070 { 1071 struct fuse_init_in *fiii; 1072 struct fuse_dispatcher fdi; 1073 1074 fdisp_init(&fdi, sizeof(*fiii)); 1075 fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); 1076 fiii = fdi.indata; 1077 fiii->major = FUSE_KERNEL_VERSION; 1078 fiii->minor = FUSE_KERNEL_MINOR_VERSION; 1079 /* 1080 * fusefs currently reads ahead no more than one cache block at a time. 1081 * See fuse_read_biobackend 1082 */ 1083 fiii->max_readahead = maxbcachebuf; 1084 /* 1085 * Unsupported features: 1086 * FUSE_FILE_OPS: No known FUSE server or client supports it 1087 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it 1088 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even 1089 * when default ACLs are in use. 1090 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD 1091 * doesn't have splice(2). 1092 * FUSE_FLOCK_LOCKS: not yet implemented 1093 * FUSE_HAS_IOCTL_DIR: not yet implemented 1094 * FUSE_AUTO_INVAL_DATA: not yet implemented 1095 * FUSE_DO_READDIRPLUS: not yet implemented 1096 * FUSE_READDIRPLUS_AUTO: not yet implemented 1097 * FUSE_ASYNC_DIO: not yet implemented 1098 * FUSE_NO_OPEN_SUPPORT: not yet implemented 1099 */ 1100 fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT 1101 | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE; 1102 1103 fuse_insert_callback(fdi.tick, fuse_internal_init_callback); 1104 fuse_insert_message(fdi.tick, false); 1105 fdisp_destroy(&fdi); 1106 } 1107 1108 /* 1109 * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, 1110 * send the request with root credentials 1111 */ 1112 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, 1113 struct thread *td, struct ucred *cred) 1114 { 1115 struct fuse_vnode_data *fvdat; 1116 struct fuse_dispatcher fdi; 1117 struct fuse_setattr_in *fsai; 1118 struct mount *mp; 1119 pid_t pid = td->td_proc->p_pid; 1120 struct fuse_data *data; 1121 int dataflags; 1122 int err = 0; 1123 enum vtype vtyp; 1124 int sizechanged = -1; 1125 uint64_t newsize = 0; 1126 1127 mp = vnode_mount(vp); 1128 fvdat = VTOFUD(vp); 1129 data = fuse_get_mpdata(mp); 1130 dataflags = data->dataflags; 1131 1132 fdisp_init(&fdi, sizeof(*fsai)); 1133 fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); 1134 if (!cred) { 1135 fdi.finh->uid = 0; 1136 fdi.finh->gid = 0; 1137 } 1138 fsai = fdi.indata; 1139 fsai->valid = 0; 1140 1141 if (vap->va_uid != (uid_t)VNOVAL) { 1142 fsai->uid = vap->va_uid; 1143 fsai->valid |= FATTR_UID; 1144 } 1145 if (vap->va_gid != (gid_t)VNOVAL) { 1146 fsai->gid = vap->va_gid; 1147 fsai->valid |= FATTR_GID; 1148 } 1149 if (vap->va_size != VNOVAL) { 1150 struct fuse_filehandle *fufh = NULL; 1151 1152 /*Truncate to a new value. */ 1153 fsai->size = vap->va_size; 1154 sizechanged = 1; 1155 newsize = vap->va_size; 1156 fsai->valid |= FATTR_SIZE; 1157 1158 fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 1159 if (fufh) { 1160 fsai->fh = fufh->fh_id; 1161 fsai->valid |= FATTR_FH; 1162 } 1163 VTOFUD(vp)->flag &= ~FN_SIZECHANGE; 1164 } 1165 if (vap->va_atime.tv_sec != VNOVAL) { 1166 fsai->atime = vap->va_atime.tv_sec; 1167 fsai->atimensec = vap->va_atime.tv_nsec; 1168 fsai->valid |= FATTR_ATIME; 1169 if (vap->va_vaflags & VA_UTIMES_NULL) 1170 fsai->valid |= FATTR_ATIME_NOW; 1171 } 1172 if (vap->va_mtime.tv_sec != VNOVAL) { 1173 fsai->mtime = vap->va_mtime.tv_sec; 1174 fsai->mtimensec = vap->va_mtime.tv_nsec; 1175 fsai->valid |= FATTR_MTIME; 1176 if (vap->va_vaflags & VA_UTIMES_NULL) 1177 fsai->valid |= FATTR_MTIME_NOW; 1178 } else if (fvdat->flag & FN_MTIMECHANGE) { 1179 fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec; 1180 fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; 1181 fsai->valid |= FATTR_MTIME; 1182 } 1183 if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) { 1184 fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec; 1185 fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec; 1186 fsai->valid |= FATTR_CTIME; 1187 } 1188 if (vap->va_mode != (mode_t)VNOVAL) { 1189 fsai->mode = vap->va_mode & ALLPERMS; 1190 fsai->valid |= FATTR_MODE; 1191 } 1192 if (!fsai->valid) { 1193 goto out; 1194 } 1195 1196 if ((err = fdisp_wait_answ(&fdi))) 1197 goto out; 1198 vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); 1199 1200 if (vnode_vtype(vp) != vtyp) { 1201 if (vnode_vtype(vp) == VNON && vtyp != VNON) { 1202 SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " 1203 "vnode_vtype is VNON and vtype isn't."); 1204 } else { 1205 /* 1206 * STALE vnode, ditch 1207 * 1208 * The vnode has changed its type "behind our back". 1209 * There's nothing really we can do, so let us just 1210 * force an internal revocation and tell the caller to 1211 * try again, if interested. 1212 */ 1213 fuse_internal_vnode_disappear(vp); 1214 err = EAGAIN; 1215 } 1216 } 1217 if (err == 0) { 1218 struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; 1219 fuse_vnode_undirty_cached_timestamps(vp); 1220 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, 1221 fao->attr_valid_nsec, NULL); 1222 } 1223 1224 out: 1225 fdisp_destroy(&fdi); 1226 return err; 1227 } 1228 1229 #ifdef ZERO_PAD_INCOMPLETE_BUFS 1230 static int 1231 isbzero(void *buf, size_t len) 1232 { 1233 int i; 1234 1235 for (i = 0; i < len; i++) { 1236 if (((char *)buf)[i]) 1237 return (0); 1238 } 1239 1240 return (1); 1241 } 1242 1243 #endif 1244 1245 void 1246 fuse_internal_init(void) 1247 { 1248 fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK); 1249 fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK); 1250 } 1251 1252 void 1253 fuse_internal_destroy(void) 1254 { 1255 counter_u64_free(fuse_lookup_cache_hits); 1256 counter_u64_free(fuse_lookup_cache_misses); 1257 } 1258