1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/counter.h> 69 #include <sys/module.h> 70 #include <sys/errno.h> 71 #include <sys/kernel.h> 72 #include <sys/conf.h> 73 #include <sys/uio.h> 74 #include <sys/malloc.h> 75 #include <sys/queue.h> 76 #include <sys/lock.h> 77 #include <sys/mutex.h> 78 #include <sys/sdt.h> 79 #include <sys/sx.h> 80 #include <sys/proc.h> 81 #include <sys/mount.h> 82 #include <sys/vnode.h> 83 #include <sys/namei.h> 84 #include <sys/stat.h> 85 #include <sys/unistd.h> 86 #include <sys/filedesc.h> 87 #include <sys/file.h> 88 #include <sys/fcntl.h> 89 #include <sys/dirent.h> 90 #include <sys/bio.h> 91 #include <sys/buf.h> 92 #include <sys/sysctl.h> 93 #include <sys/priv.h> 94 95 #include "fuse.h" 96 #include "fuse_file.h" 97 #include "fuse_internal.h" 98 #include "fuse_io.h" 99 #include "fuse_ipc.h" 100 #include "fuse_node.h" 101 #include "fuse_file.h" 102 103 SDT_PROVIDER_DECLARE(fusefs); 104 /* 105 * Fuse trace probe: 106 * arg0: verbosity. Higher numbers give more verbose messages 107 * arg1: Textual message 108 */ 109 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); 110 111 #ifdef ZERO_PAD_INCOMPLETE_BUFS 112 static int isbzero(void *buf, size_t len); 113 114 #endif 115 116 counter_u64_t fuse_lookup_cache_hits; 117 counter_u64_t fuse_lookup_cache_misses; 118 119 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD, 120 &fuse_lookup_cache_hits, "number of positive cache hits in lookup"); 121 122 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD, 123 &fuse_lookup_cache_misses, "number of cache misses in lookup"); 124 125 int 126 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags, 127 struct vnode **vpp) 128 { 129 struct bintime now; 130 struct thread *td = curthread; 131 uint64_t nodeid = ino; 132 int error; 133 134 *vpp = NULL; 135 136 error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp, 137 fuse_vnode_cmp, &nodeid); 138 if (error) 139 return error; 140 /* 141 * Check the entry cache timeout. We have to do this within fusefs 142 * instead of by using cache_enter_time/cache_lookup because those 143 * routines are only intended to work with pathnames, not inodes 144 */ 145 if (*vpp != NULL) { 146 getbinuptime(&now); 147 if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){ 148 counter_u64_add(fuse_lookup_cache_hits, 1); 149 return 0; 150 } else { 151 /* Entry cache timeout */ 152 counter_u64_add(fuse_lookup_cache_misses, 1); 153 cache_purge(*vpp); 154 vput(*vpp); 155 *vpp = NULL; 156 } 157 } 158 return 0; 159 } 160 161 /* Synchronously send a FUSE_ACCESS operation */ 162 int 163 fuse_internal_access(struct vnode *vp, 164 accmode_t mode, 165 struct thread *td, 166 struct ucred *cred) 167 { 168 int err = 0; 169 uint32_t mask = F_OK; 170 int dataflags; 171 int vtype; 172 struct mount *mp; 173 struct fuse_dispatcher fdi; 174 struct fuse_access_in *fai; 175 struct fuse_data *data; 176 177 mp = vnode_mount(vp); 178 vtype = vnode_vtype(vp); 179 180 data = fuse_get_mpdata(mp); 181 dataflags = data->dataflags; 182 183 if (mode == 0) 184 return 0; 185 186 if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { 187 switch (vp->v_type) { 188 case VDIR: 189 /* FALLTHROUGH */ 190 case VLNK: 191 /* FALLTHROUGH */ 192 case VREG: 193 return EROFS; 194 default: 195 break; 196 } 197 } 198 199 /* Unless explicitly permitted, deny everyone except the fs owner. */ 200 if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { 201 if (fuse_match_cred(data->daemoncred, cred)) 202 return EPERM; 203 } 204 205 if (dataflags & FSESS_DEFAULT_PERMISSIONS) { 206 struct vattr va; 207 208 fuse_internal_getattr(vp, &va, cred, td); 209 return vaccess(vp->v_type, va.va_mode, va.va_uid, 210 va.va_gid, mode, cred, NULL); 211 } 212 213 if (!fsess_isimpl(mp, FUSE_ACCESS)) 214 return 0; 215 216 if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) 217 mask |= W_OK; 218 if ((mode & VREAD) != 0) 219 mask |= R_OK; 220 if ((mode & VEXEC) != 0) 221 mask |= X_OK; 222 223 fdisp_init(&fdi, sizeof(*fai)); 224 fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); 225 226 fai = fdi.indata; 227 fai->mask = mask; 228 229 err = fdisp_wait_answ(&fdi); 230 fdisp_destroy(&fdi); 231 232 if (err == ENOSYS) { 233 fsess_set_notimpl(mp, FUSE_ACCESS); 234 err = 0; 235 } 236 return err; 237 } 238 239 /* 240 * Cache FUSE attributes from attr, in attribute cache associated with vnode 241 * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the 242 * converted attributes there as well. 243 * 244 * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do 245 * return the result to the caller). 246 */ 247 void 248 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, 249 uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) 250 { 251 struct mount *mp; 252 struct fuse_vnode_data *fvdat; 253 struct fuse_data *data; 254 struct vattr *vp_cache_at; 255 256 mp = vnode_mount(vp); 257 fvdat = VTOFUD(vp); 258 data = fuse_get_mpdata(mp); 259 260 ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); 261 262 fuse_validity_2_bintime(attr_valid, attr_valid_nsec, 263 &fvdat->attr_cache_timeout); 264 265 /* Fix our buffers if the filesize changed without us knowing */ 266 if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { 267 (void)fuse_vnode_setsize(vp, attr->size); 268 fvdat->cached_attrs.va_size = attr->size; 269 } 270 271 if (attr_valid > 0 || attr_valid_nsec > 0) 272 vp_cache_at = &(fvdat->cached_attrs); 273 else if (vap != NULL) 274 vp_cache_at = vap; 275 else 276 return; 277 278 vattr_null(vp_cache_at); 279 vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; 280 vp_cache_at->va_fileid = attr->ino; 281 vp_cache_at->va_mode = attr->mode & ~S_IFMT; 282 vp_cache_at->va_nlink = attr->nlink; 283 vp_cache_at->va_uid = attr->uid; 284 vp_cache_at->va_gid = attr->gid; 285 vp_cache_at->va_rdev = attr->rdev; 286 vp_cache_at->va_size = attr->size; 287 /* XXX on i386, seconds are truncated to 32 bits */ 288 vp_cache_at->va_atime.tv_sec = attr->atime; 289 vp_cache_at->va_atime.tv_nsec = attr->atimensec; 290 vp_cache_at->va_mtime.tv_sec = attr->mtime; 291 vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; 292 vp_cache_at->va_ctime.tv_sec = attr->ctime; 293 vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; 294 if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) 295 vp_cache_at->va_blocksize = attr->blksize; 296 else 297 vp_cache_at->va_blocksize = PAGE_SIZE; 298 vp_cache_at->va_type = IFTOVT(attr->mode); 299 vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; 300 vp_cache_at->va_flags = 0; 301 302 if (vap != vp_cache_at && vap != NULL) 303 memcpy(vap, vp_cache_at, sizeof(*vap)); 304 } 305 306 307 /* fsync */ 308 309 int 310 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) 311 { 312 if (tick->tk_aw_ohead.error == ENOSYS) { 313 fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); 314 } 315 return 0; 316 } 317 318 int 319 fuse_internal_fsync(struct vnode *vp, 320 struct thread *td, 321 int waitfor, 322 bool datasync) 323 { 324 struct fuse_fsync_in *ffsi = NULL; 325 struct fuse_dispatcher fdi; 326 struct fuse_filehandle *fufh; 327 struct fuse_vnode_data *fvdat = VTOFUD(vp); 328 struct mount *mp = vnode_mount(vp); 329 int op = FUSE_FSYNC; 330 int err = 0; 331 332 if (!fsess_isimpl(vnode_mount(vp), 333 (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { 334 return 0; 335 } 336 if (vnode_isdir(vp)) 337 op = FUSE_FSYNCDIR; 338 339 if (!fsess_isimpl(mp, op)) 340 return 0; 341 342 fdisp_init(&fdi, sizeof(*ffsi)); 343 /* 344 * fsync every open file handle for this file, because we can't be sure 345 * which file handle the caller is really referring to. 346 */ 347 LIST_FOREACH(fufh, &fvdat->handles, next) { 348 fdi.iosize = sizeof(*ffsi); 349 if (ffsi == NULL) 350 fdisp_make_vp(&fdi, op, vp, td, NULL); 351 else 352 fdisp_refresh_vp(&fdi, op, vp, td, NULL); 353 ffsi = fdi.indata; 354 ffsi->fh = fufh->fh_id; 355 ffsi->fsync_flags = 0; 356 357 if (datasync) 358 ffsi->fsync_flags = 1; 359 360 if (waitfor == MNT_WAIT) { 361 err = fdisp_wait_answ(&fdi); 362 } else { 363 fuse_insert_callback(fdi.tick, 364 fuse_internal_fsync_callback); 365 fuse_insert_message(fdi.tick, false); 366 } 367 if (err == ENOSYS) { 368 /* ENOSYS means "success, and don't call again" */ 369 fsess_set_notimpl(mp, op); 370 err = 0; 371 break; 372 } 373 } 374 fdisp_destroy(&fdi); 375 376 return err; 377 } 378 379 /* Asynchronous invalidation */ 380 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit, 381 "struct vnode*", "struct vnode*"); 382 int 383 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) 384 { 385 struct fuse_notify_inval_entry_out fnieo; 386 struct componentname cn; 387 struct vnode *dvp, *vp; 388 char name[PATH_MAX]; 389 int err; 390 391 if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) 392 return (err); 393 394 if (fnieo.namelen >= sizeof(name)) 395 return (EINVAL); 396 397 if ((err = uiomove(name, fnieo.namelen, uio)) != 0) 398 return (err); 399 name[fnieo.namelen] = '\0'; 400 /* fusefs does not cache "." or ".." entries */ 401 if (strncmp(name, ".", sizeof(".")) == 0 || 402 strncmp(name, "..", sizeof("..")) == 0) 403 return (0); 404 405 if (fnieo.parent == FUSE_ROOT_ID) 406 err = VFS_ROOT(mp, LK_SHARED, &dvp); 407 else 408 err = fuse_internal_get_cached_vnode( mp, fnieo.parent, 409 LK_SHARED, &dvp); 410 /* 411 * If dvp is not in the cache, then it must've been reclaimed. And 412 * since fuse_vnop_reclaim does a cache_purge, name's entry must've 413 * been invalidated already. So we can safely return if dvp == NULL 414 */ 415 if (err != 0 || dvp == NULL) 416 return (err); 417 /* 418 * XXX we can't check dvp's generation because the FUSE invalidate 419 * entry message doesn't include it. Worse case is that we invalidate 420 * an entry that didn't need to be invalidated. 421 */ 422 423 cn.cn_nameiop = LOOKUP; 424 cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ 425 cn.cn_thread = curthread; 426 cn.cn_cred = curthread->td_ucred; 427 cn.cn_lkflags = LK_SHARED; 428 cn.cn_pnbuf = NULL; 429 cn.cn_nameptr = name; 430 cn.cn_namelen = fnieo.namelen; 431 err = cache_lookup(dvp, &vp, &cn, NULL, NULL); 432 MPASS(err == 0); 433 fuse_vnode_clear_attr_cache(dvp); 434 vput(dvp); 435 return (0); 436 } 437 438 int 439 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) 440 { 441 struct fuse_notify_inval_inode_out fniio; 442 struct vnode *vp; 443 int err; 444 445 if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0) 446 return (err); 447 448 if (fniio.ino == FUSE_ROOT_ID) 449 err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); 450 else 451 err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, 452 &vp); 453 if (err != 0 || vp == NULL) 454 return (err); 455 /* 456 * XXX we can't check vp's generation because the FUSE invalidate 457 * entry message doesn't include it. Worse case is that we invalidate 458 * an inode that didn't need to be invalidated. 459 */ 460 461 /* 462 * Flush and invalidate buffers if off >= 0. Technically we only need 463 * to flush and invalidate the range of offsets [off, off + len), but 464 * for simplicity's sake we do everything. 465 */ 466 if (fniio.off >= 0) 467 fuse_io_invalbuf(vp, curthread); 468 fuse_vnode_clear_attr_cache(vp); 469 vput(vp); 470 return (0); 471 } 472 473 /* mknod */ 474 int 475 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, 476 struct componentname *cnp, struct vattr *vap) 477 { 478 struct fuse_data *data; 479 struct fuse_mknod_in fmni; 480 size_t insize; 481 482 data = fuse_get_mpdata(dvp->v_mount); 483 484 fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); 485 fmni.rdev = vap->va_rdev; 486 if (fuse_libabi_geq(data, 7, 12)) { 487 insize = sizeof(fmni); 488 fmni.umask = curthread->td_proc->p_fd->fd_cmask; 489 } else { 490 insize = FUSE_COMPAT_MKNOD_IN_SIZE; 491 } 492 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, 493 insize, vap->va_type)); 494 } 495 496 /* readdir */ 497 498 int 499 fuse_internal_readdir(struct vnode *vp, 500 struct uio *uio, 501 off_t startoff, 502 struct fuse_filehandle *fufh, 503 struct fuse_iov *cookediov, 504 int *ncookies, 505 u_long *cookies) 506 { 507 int err = 0; 508 struct fuse_dispatcher fdi; 509 struct fuse_read_in *fri = NULL; 510 int fnd_start; 511 512 if (uio_resid(uio) == 0) 513 return 0; 514 fdisp_init(&fdi, 0); 515 516 /* 517 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p 518 * I/O). 519 */ 520 521 /* 522 * fnd_start is set non-zero once the offset in the directory gets 523 * to the startoff. This is done because directories must be read 524 * from the beginning (offset == 0) when fuse_vnop_readdir() needs 525 * to do an open of the directory. 526 * If it is not set non-zero here, it will be set non-zero in 527 * fuse_internal_readdir_processdata() when uio_offset == startoff. 528 */ 529 fnd_start = 0; 530 if (uio->uio_offset == startoff) 531 fnd_start = 1; 532 while (uio_resid(uio) > 0) { 533 fdi.iosize = sizeof(*fri); 534 if (fri == NULL) 535 fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); 536 else 537 fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); 538 539 fri = fdi.indata; 540 fri->fh = fufh->fh_id; 541 fri->offset = uio_offset(uio); 542 fri->size = MIN(uio->uio_resid, 543 fuse_get_mpdata(vp->v_mount)->max_read); 544 545 if ((err = fdisp_wait_answ(&fdi))) 546 break; 547 if ((err = fuse_internal_readdir_processdata(uio, startoff, 548 &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, 549 ncookies, &cookies))) 550 break; 551 } 552 553 fdisp_destroy(&fdi); 554 return ((err == -1) ? 0 : err); 555 } 556 557 /* 558 * Return -1 to indicate that this readdir is finished, 0 if it copied 559 * all the directory data read in and it may be possible to read more 560 * and greater than 0 for a failure. 561 */ 562 int 563 fuse_internal_readdir_processdata(struct uio *uio, 564 off_t startoff, 565 int *fnd_start, 566 size_t reqsize, 567 void *buf, 568 size_t bufsize, 569 struct fuse_iov *cookediov, 570 int *ncookies, 571 u_long **cookiesp) 572 { 573 int err = 0; 574 int bytesavail; 575 size_t freclen; 576 577 struct dirent *de; 578 struct fuse_dirent *fudge; 579 u_long *cookies; 580 581 cookies = *cookiesp; 582 if (bufsize < FUSE_NAME_OFFSET) 583 return -1; 584 for (;;) { 585 if (bufsize < FUSE_NAME_OFFSET) { 586 err = -1; 587 break; 588 } 589 fudge = (struct fuse_dirent *)buf; 590 freclen = FUSE_DIRENT_SIZE(fudge); 591 592 if (bufsize < freclen) { 593 /* 594 * This indicates a partial directory entry at the 595 * end of the directory data. 596 */ 597 err = -1; 598 break; 599 } 600 #ifdef ZERO_PAD_INCOMPLETE_BUFS 601 if (isbzero(buf, FUSE_NAME_OFFSET)) { 602 err = -1; 603 break; 604 } 605 #endif 606 607 if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { 608 err = EINVAL; 609 break; 610 } 611 bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) 612 &fudge->namelen); 613 614 if (bytesavail > uio_resid(uio)) { 615 /* Out of space for the dir so we are done. */ 616 err = -1; 617 break; 618 } 619 /* 620 * Don't start to copy the directory entries out until 621 * the requested offset in the directory is found. 622 */ 623 if (*fnd_start != 0) { 624 fiov_adjust(cookediov, bytesavail); 625 bzero(cookediov->base, bytesavail); 626 627 de = (struct dirent *)cookediov->base; 628 de->d_fileno = fudge->ino; 629 de->d_reclen = bytesavail; 630 de->d_type = fudge->type; 631 de->d_namlen = fudge->namelen; 632 memcpy((char *)cookediov->base + sizeof(struct dirent) - 633 MAXNAMLEN - 1, 634 (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); 635 dirent_terminate(de); 636 637 err = uiomove(cookediov->base, cookediov->len, uio); 638 if (err) 639 break; 640 if (cookies != NULL) { 641 if (*ncookies == 0) { 642 err = -1; 643 break; 644 } 645 *cookies = fudge->off; 646 cookies++; 647 (*ncookies)--; 648 } 649 } else if (startoff == fudge->off) 650 *fnd_start = 1; 651 buf = (char *)buf + freclen; 652 bufsize -= freclen; 653 uio_setoffset(uio, fudge->off); 654 } 655 *cookiesp = cookies; 656 657 return err; 658 } 659 660 /* remove */ 661 662 int 663 fuse_internal_remove(struct vnode *dvp, 664 struct vnode *vp, 665 struct componentname *cnp, 666 enum fuse_opcode op) 667 { 668 struct fuse_dispatcher fdi; 669 nlink_t nlink; 670 int err = 0; 671 672 fdisp_init(&fdi, cnp->cn_namelen + 1); 673 fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); 674 675 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); 676 ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; 677 678 err = fdisp_wait_answ(&fdi); 679 fdisp_destroy(&fdi); 680 681 if (err) 682 return (err); 683 684 /* 685 * Access the cached nlink even if the attr cached has expired. If 686 * it's inaccurate, the worst that will happen is: 687 * 1) We'll recycle the vnode even though the file has another link we 688 * don't know about, costing a bit of cpu time, or 689 * 2) We won't recycle the vnode even though all of its links are gone. 690 * It will linger around until vnlru reclaims it, costing a bit of 691 * temporary memory. 692 */ 693 nlink = VTOFUD(vp)->cached_attrs.va_nlink--; 694 695 /* 696 * Purge the parent's attribute cache because the daemon 697 * should've updated its mtime and ctime. 698 */ 699 fuse_vnode_clear_attr_cache(dvp); 700 701 /* NB: nlink could be zero if it was never cached */ 702 if (nlink <= 1 || vnode_vtype(vp) == VDIR) { 703 fuse_internal_vnode_disappear(vp); 704 } else { 705 cache_purge(vp); 706 fuse_vnode_update(vp, FN_CTIMECHANGE); 707 } 708 709 return err; 710 } 711 712 /* rename */ 713 714 int 715 fuse_internal_rename(struct vnode *fdvp, 716 struct componentname *fcnp, 717 struct vnode *tdvp, 718 struct componentname *tcnp) 719 { 720 struct fuse_dispatcher fdi; 721 struct fuse_rename_in *fri; 722 int err = 0; 723 724 fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); 725 fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); 726 727 fri = fdi.indata; 728 fri->newdir = VTOI(tdvp); 729 memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, 730 fcnp->cn_namelen); 731 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; 732 memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, 733 tcnp->cn_nameptr, tcnp->cn_namelen); 734 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + 735 tcnp->cn_namelen + 1] = '\0'; 736 737 err = fdisp_wait_answ(&fdi); 738 fdisp_destroy(&fdi); 739 return err; 740 } 741 742 /* strategy */ 743 744 /* entity creation */ 745 746 void 747 fuse_internal_newentry_makerequest(struct mount *mp, 748 uint64_t dnid, 749 struct componentname *cnp, 750 enum fuse_opcode op, 751 void *buf, 752 size_t bufsize, 753 struct fuse_dispatcher *fdip) 754 { 755 fdip->iosize = bufsize + cnp->cn_namelen + 1; 756 757 fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); 758 memcpy(fdip->indata, buf, bufsize); 759 memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); 760 ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; 761 } 762 763 int 764 fuse_internal_newentry_core(struct vnode *dvp, 765 struct vnode **vpp, 766 struct componentname *cnp, 767 enum vtype vtyp, 768 struct fuse_dispatcher *fdip) 769 { 770 int err = 0; 771 struct fuse_entry_out *feo; 772 struct mount *mp = vnode_mount(dvp); 773 774 if ((err = fdisp_wait_answ(fdip))) { 775 return err; 776 } 777 feo = fdip->answ; 778 779 if ((err = fuse_internal_checkentry(feo, vtyp))) { 780 return err; 781 } 782 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); 783 if (err) { 784 fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, 785 feo->nodeid, 1); 786 return err; 787 } 788 789 /* 790 * Purge the parent's attribute cache because the daemon should've 791 * updated its mtime and ctime 792 */ 793 fuse_vnode_clear_attr_cache(dvp); 794 795 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, 796 feo->attr_valid_nsec, NULL); 797 798 return err; 799 } 800 801 int 802 fuse_internal_newentry(struct vnode *dvp, 803 struct vnode **vpp, 804 struct componentname *cnp, 805 enum fuse_opcode op, 806 void *buf, 807 size_t bufsize, 808 enum vtype vtype) 809 { 810 int err; 811 struct fuse_dispatcher fdi; 812 struct mount *mp = vnode_mount(dvp); 813 814 fdisp_init(&fdi, 0); 815 fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, 816 bufsize, &fdi); 817 err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); 818 fdisp_destroy(&fdi); 819 820 return err; 821 } 822 823 /* entity destruction */ 824 825 int 826 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) 827 { 828 fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, 829 ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); 830 831 return 0; 832 } 833 834 void 835 fuse_internal_forget_send(struct mount *mp, 836 struct thread *td, 837 struct ucred *cred, 838 uint64_t nodeid, 839 uint64_t nlookup) 840 { 841 842 struct fuse_dispatcher fdi; 843 struct fuse_forget_in *ffi; 844 845 /* 846 * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", 847 * (long long unsigned) nodeid)); 848 */ 849 850 fdisp_init(&fdi, sizeof(*ffi)); 851 fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); 852 853 ffi = fdi.indata; 854 ffi->nlookup = nlookup; 855 856 fuse_insert_message(fdi.tick, false); 857 fdisp_destroy(&fdi); 858 } 859 860 SDT_PROBE_DEFINE2(fusefs, , internal, getattr_cache_incoherent, 861 "struct vnode*", "struct fuse_attr_out*"); 862 863 /* Fetch the vnode's attributes from the daemon*/ 864 int 865 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, 866 struct ucred *cred, struct thread *td) 867 { 868 struct fuse_dispatcher fdi; 869 struct fuse_vnode_data *fvdat = VTOFUD(vp); 870 struct fuse_getattr_in *fgai; 871 struct fuse_attr_out *fao; 872 off_t old_filesize = fvdat->cached_attrs.va_size; 873 struct timespec old_ctime = fvdat->cached_attrs.va_ctime; 874 struct timespec old_mtime = fvdat->cached_attrs.va_mtime; 875 enum vtype vtyp; 876 int err; 877 878 fdisp_init(&fdi, sizeof(*fgai)); 879 fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); 880 fgai = fdi.indata; 881 /* 882 * We could look up a file handle and set it in fgai->fh, but that 883 * involves extra runtime work and I'm unaware of any file systems that 884 * care. 885 */ 886 fgai->getattr_flags = 0; 887 if ((err = fdisp_wait_answ(&fdi))) { 888 if (err == ENOENT) 889 fuse_internal_vnode_disappear(vp); 890 goto out; 891 } 892 893 fao = (struct fuse_attr_out *)fdi.answ; 894 vtyp = IFTOVT(fao->attr.mode); 895 if (fvdat->flag & FN_SIZECHANGE) 896 fao->attr.size = old_filesize; 897 if (fvdat->flag & FN_CTIMECHANGE) { 898 fao->attr.ctime = old_ctime.tv_sec; 899 fao->attr.ctimensec = old_ctime.tv_nsec; 900 } 901 if (fvdat->flag & FN_MTIMECHANGE) { 902 fao->attr.mtime = old_mtime.tv_sec; 903 fao->attr.mtimensec = old_mtime.tv_nsec; 904 } 905 if (vnode_isreg(vp) && 906 fvdat->cached_attrs.va_size != VNOVAL && 907 fao->attr.size != fvdat->cached_attrs.va_size) { 908 /* 909 * The server changed the file's size even though we had it 910 * cached! That's a server bug. 911 */ 912 SDT_PROBE2(fusefs, , internal, getattr_cache_incoherent, vp, 913 fao); 914 printf("%s: cache incoherent on %s! " 915 "Buggy FUSE server detected. To prevent data corruption, " 916 "disable the data cache by mounting with -o direct_io, or " 917 "as directed otherwise by your FUSE server's " 918 "documentation\n", __func__, 919 vnode_mount(vp)->mnt_stat.f_mntonname); 920 int iosize = fuse_iosize(vp); 921 v_inval_buf_range(vp, 0, INT64_MAX, iosize); 922 } 923 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, 924 fao->attr_valid_nsec, vap); 925 if (vtyp != vnode_vtype(vp)) { 926 fuse_internal_vnode_disappear(vp); 927 err = ENOENT; 928 } 929 930 out: 931 fdisp_destroy(&fdi); 932 return err; 933 } 934 935 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ 936 int 937 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, 938 struct thread *td) 939 { 940 struct vattr *attrs; 941 942 if ((attrs = VTOVA(vp)) != NULL) { 943 *vap = *attrs; /* struct copy */ 944 return 0; 945 } 946 947 return fuse_internal_do_getattr(vp, vap, cred, td); 948 } 949 950 void 951 fuse_internal_vnode_disappear(struct vnode *vp) 952 { 953 struct fuse_vnode_data *fvdat = VTOFUD(vp); 954 955 ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); 956 fvdat->flag |= FN_REVOKED; 957 cache_purge(vp); 958 } 959 960 /* fuse start/stop */ 961 962 int 963 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) 964 { 965 int err = 0; 966 struct fuse_data *data = tick->tk_data; 967 struct fuse_init_out *fiio; 968 969 if ((err = tick->tk_aw_ohead.error)) { 970 goto out; 971 } 972 if ((err = fticket_pull(tick, uio))) { 973 goto out; 974 } 975 fiio = fticket_resp(tick)->base; 976 977 data->fuse_libabi_major = fiio->major; 978 data->fuse_libabi_minor = fiio->minor; 979 if (!fuse_libabi_geq(data, 7, 4)) { 980 /* 981 * With a little work we could support servers as old as 7.1. 982 * But there would be little payoff. 983 */ 984 SDT_PROBE2(fusefs, , internal, trace, 1, 985 "userpace version too low"); 986 err = EPROTONOSUPPORT; 987 goto out; 988 } 989 990 if (fuse_libabi_geq(data, 7, 5)) { 991 if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || 992 fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { 993 data->max_write = fiio->max_write; 994 if (fiio->flags & FUSE_ASYNC_READ) 995 data->dataflags |= FSESS_ASYNC_READ; 996 if (fiio->flags & FUSE_POSIX_LOCKS) 997 data->dataflags |= FSESS_POSIX_LOCKS; 998 if (fiio->flags & FUSE_EXPORT_SUPPORT) 999 data->dataflags |= FSESS_EXPORT_SUPPORT; 1000 /* 1001 * Don't bother to check FUSE_BIG_WRITES, because it's 1002 * redundant with max_write 1003 */ 1004 /* 1005 * max_background and congestion_threshold are not 1006 * implemented 1007 */ 1008 } else { 1009 err = EINVAL; 1010 } 1011 } else { 1012 /* Old fixed values */ 1013 data->max_write = 4096; 1014 } 1015 1016 if (fuse_libabi_geq(data, 7, 6)) 1017 data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf; 1018 1019 if (!fuse_libabi_geq(data, 7, 7)) 1020 fsess_set_notimpl(data->mp, FUSE_INTERRUPT); 1021 1022 if (!fuse_libabi_geq(data, 7, 8)) { 1023 fsess_set_notimpl(data->mp, FUSE_BMAP); 1024 fsess_set_notimpl(data->mp, FUSE_DESTROY); 1025 } 1026 1027 if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 && 1028 fiio->time_gran <= 1000000000) 1029 data->time_gran = fiio->time_gran; 1030 else 1031 data->time_gran = 1; 1032 1033 if (!fuse_libabi_geq(data, 7, 23)) 1034 data->cache_mode = fuse_data_cache_mode; 1035 else if (fiio->flags & FUSE_WRITEBACK_CACHE) 1036 data->cache_mode = FUSE_CACHE_WB; 1037 else 1038 data->cache_mode = FUSE_CACHE_WT; 1039 1040 out: 1041 if (err) { 1042 fdata_set_dead(data); 1043 } 1044 FUSE_LOCK(); 1045 data->dataflags |= FSESS_INITED; 1046 wakeup(&data->ticketer); 1047 FUSE_UNLOCK(); 1048 1049 return 0; 1050 } 1051 1052 void 1053 fuse_internal_send_init(struct fuse_data *data, struct thread *td) 1054 { 1055 struct fuse_init_in *fiii; 1056 struct fuse_dispatcher fdi; 1057 1058 fdisp_init(&fdi, sizeof(*fiii)); 1059 fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); 1060 fiii = fdi.indata; 1061 fiii->major = FUSE_KERNEL_VERSION; 1062 fiii->minor = FUSE_KERNEL_MINOR_VERSION; 1063 /* 1064 * fusefs currently reads ahead no more than one cache block at a time. 1065 * See fuse_read_biobackend 1066 */ 1067 fiii->max_readahead = maxbcachebuf; 1068 /* 1069 * Unsupported features: 1070 * FUSE_FILE_OPS: No known FUSE server or client supports it 1071 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it 1072 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even 1073 * when default ACLs are in use. 1074 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD 1075 * doesn't have splice(2). 1076 * FUSE_FLOCK_LOCKS: not yet implemented 1077 * FUSE_HAS_IOCTL_DIR: not yet implemented 1078 * FUSE_AUTO_INVAL_DATA: not yet implemented 1079 * FUSE_DO_READDIRPLUS: not yet implemented 1080 * FUSE_READDIRPLUS_AUTO: not yet implemented 1081 * FUSE_ASYNC_DIO: not yet implemented 1082 * FUSE_NO_OPEN_SUPPORT: not yet implemented 1083 */ 1084 fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT 1085 | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE; 1086 1087 fuse_insert_callback(fdi.tick, fuse_internal_init_callback); 1088 fuse_insert_message(fdi.tick, false); 1089 fdisp_destroy(&fdi); 1090 } 1091 1092 /* 1093 * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, 1094 * send the request with root credentials 1095 */ 1096 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, 1097 struct thread *td, struct ucred *cred) 1098 { 1099 struct fuse_vnode_data *fvdat; 1100 struct fuse_dispatcher fdi; 1101 struct fuse_setattr_in *fsai; 1102 struct mount *mp; 1103 pid_t pid = td->td_proc->p_pid; 1104 struct fuse_data *data; 1105 int dataflags; 1106 int err = 0; 1107 enum vtype vtyp; 1108 int sizechanged = -1; 1109 uint64_t newsize = 0; 1110 1111 mp = vnode_mount(vp); 1112 fvdat = VTOFUD(vp); 1113 data = fuse_get_mpdata(mp); 1114 dataflags = data->dataflags; 1115 1116 fdisp_init(&fdi, sizeof(*fsai)); 1117 fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); 1118 if (!cred) { 1119 fdi.finh->uid = 0; 1120 fdi.finh->gid = 0; 1121 } 1122 fsai = fdi.indata; 1123 fsai->valid = 0; 1124 1125 if (vap->va_uid != (uid_t)VNOVAL) { 1126 fsai->uid = vap->va_uid; 1127 fsai->valid |= FATTR_UID; 1128 } 1129 if (vap->va_gid != (gid_t)VNOVAL) { 1130 fsai->gid = vap->va_gid; 1131 fsai->valid |= FATTR_GID; 1132 } 1133 if (vap->va_size != VNOVAL) { 1134 struct fuse_filehandle *fufh = NULL; 1135 1136 /*Truncate to a new value. */ 1137 fsai->size = vap->va_size; 1138 sizechanged = 1; 1139 newsize = vap->va_size; 1140 fsai->valid |= FATTR_SIZE; 1141 1142 fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 1143 if (fufh) { 1144 fsai->fh = fufh->fh_id; 1145 fsai->valid |= FATTR_FH; 1146 } 1147 VTOFUD(vp)->flag &= ~FN_SIZECHANGE; 1148 } 1149 if (vap->va_atime.tv_sec != VNOVAL) { 1150 fsai->atime = vap->va_atime.tv_sec; 1151 fsai->atimensec = vap->va_atime.tv_nsec; 1152 fsai->valid |= FATTR_ATIME; 1153 if (vap->va_vaflags & VA_UTIMES_NULL) 1154 fsai->valid |= FATTR_ATIME_NOW; 1155 } 1156 if (vap->va_mtime.tv_sec != VNOVAL) { 1157 fsai->mtime = vap->va_mtime.tv_sec; 1158 fsai->mtimensec = vap->va_mtime.tv_nsec; 1159 fsai->valid |= FATTR_MTIME; 1160 if (vap->va_vaflags & VA_UTIMES_NULL) 1161 fsai->valid |= FATTR_MTIME_NOW; 1162 } else if (fvdat->flag & FN_MTIMECHANGE) { 1163 fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec; 1164 fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; 1165 fsai->valid |= FATTR_MTIME; 1166 } 1167 if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) { 1168 fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec; 1169 fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec; 1170 fsai->valid |= FATTR_CTIME; 1171 } 1172 if (vap->va_mode != (mode_t)VNOVAL) { 1173 fsai->mode = vap->va_mode & ALLPERMS; 1174 fsai->valid |= FATTR_MODE; 1175 } 1176 if (!fsai->valid) { 1177 goto out; 1178 } 1179 1180 if ((err = fdisp_wait_answ(&fdi))) 1181 goto out; 1182 vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); 1183 1184 if (vnode_vtype(vp) != vtyp) { 1185 if (vnode_vtype(vp) == VNON && vtyp != VNON) { 1186 SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " 1187 "vnode_vtype is VNON and vtype isn't."); 1188 } else { 1189 /* 1190 * STALE vnode, ditch 1191 * 1192 * The vnode has changed its type "behind our back". 1193 * There's nothing really we can do, so let us just 1194 * force an internal revocation and tell the caller to 1195 * try again, if interested. 1196 */ 1197 fuse_internal_vnode_disappear(vp); 1198 err = EAGAIN; 1199 } 1200 } 1201 if (err == 0) { 1202 struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; 1203 fuse_vnode_undirty_cached_timestamps(vp); 1204 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, 1205 fao->attr_valid_nsec, NULL); 1206 } 1207 1208 out: 1209 fdisp_destroy(&fdi); 1210 return err; 1211 } 1212 1213 #ifdef ZERO_PAD_INCOMPLETE_BUFS 1214 static int 1215 isbzero(void *buf, size_t len) 1216 { 1217 int i; 1218 1219 for (i = 0; i < len; i++) { 1220 if (((char *)buf)[i]) 1221 return (0); 1222 } 1223 1224 return (1); 1225 } 1226 1227 #endif 1228 1229 void 1230 fuse_internal_init(void) 1231 { 1232 fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK); 1233 fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK); 1234 } 1235 1236 void 1237 fuse_internal_destroy(void) 1238 { 1239 counter_u64_free(fuse_lookup_cache_hits); 1240 counter_u64_free(fuse_lookup_cache_misses); 1241 } 1242