1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58 #include <sys/cdefs.h> 59 __FBSDID("$FreeBSD$"); 60 61 #include <sys/types.h> 62 #include <sys/module.h> 63 #include <sys/systm.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> 66 #include <sys/kernel.h> 67 #include <sys/conf.h> 68 #include <sys/uio.h> 69 #include <sys/malloc.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/sx.h> 73 #include <sys/mutex.h> 74 #include <sys/proc.h> 75 #include <sys/vnode.h> 76 #include <sys/namei.h> 77 #include <sys/mount.h> 78 #include <sys/sysctl.h> 79 #include <sys/fcntl.h> 80 #include <sys/fnv_hash.h> 81 #include <sys/priv.h> 82 #include <security/mac/mac_framework.h> 83 #include <vm/vm.h> 84 #include <vm/vm_extern.h> 85 86 #include "fuse.h" 87 #include "fuse_node.h" 88 #include "fuse_internal.h" 89 #include "fuse_io.h" 90 #include "fuse_ipc.h" 91 92 SDT_PROVIDER_DECLARE(fuse); 93 /* 94 * Fuse trace probe: 95 * arg0: verbosity. Higher numbers give more verbose messages 96 * arg1: Textual message 97 */ 98 SDT_PROBE_DEFINE2(fuse, , node, trace, "int", "char*"); 99 100 MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); 101 102 static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS); 103 104 static int fuse_node_count = 0; 105 106 SYSCTL_INT(_vfs_fusefs, OID_AUTO, node_count, CTLFLAG_RD, 107 &fuse_node_count, 0, "Count of FUSE vnodes"); 108 109 int fuse_data_cache_mode = FUSE_CACHE_WT; 110 111 SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW, 112 &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", 113 "Zero: disable caching of FUSE file data; One: write-through caching " 114 "(default); Two: write-back caching (generally unsafe)"); 115 116 int fuse_data_cache_invalidate = 0; 117 118 SYSCTL_INT(_vfs_fusefs, OID_AUTO, data_cache_invalidate, CTLFLAG_RW, 119 &fuse_data_cache_invalidate, 0, 120 "If non-zero, discard cached clean file data when there are no active file" 121 " users"); 122 123 int fuse_mmap_enable = 1; 124 125 SYSCTL_INT(_vfs_fusefs, OID_AUTO, mmap_enable, CTLFLAG_RW, 126 &fuse_mmap_enable, 0, 127 "If non-zero, and data_cache_mode is also non-zero, enable mmap(2) of " 128 "FUSE files"); 129 130 int fuse_refresh_size = 0; 131 132 SYSCTL_INT(_vfs_fusefs, OID_AUTO, refresh_size, CTLFLAG_RW, 133 &fuse_refresh_size, 0, 134 "If non-zero, and no dirty file extension data is buffered, fetch file " 135 "size before write operations"); 136 137 int fuse_sync_resize = 1; 138 139 SYSCTL_INT(_vfs_fusefs, OID_AUTO, sync_resize, CTLFLAG_RW, 140 &fuse_sync_resize, 0, 141 "If a cached write extended a file, inform FUSE filesystem of the changed" 142 "size immediately subsequent to the issued writes"); 143 144 int fuse_fix_broken_io = 0; 145 146 SYSCTL_INT(_vfs_fusefs, OID_AUTO, fix_broken_io, CTLFLAG_RW, 147 &fuse_fix_broken_io, 0, 148 "If non-zero, print a diagnostic warning if a userspace filesystem returns" 149 " EIO on reads of recently extended portions of files"); 150 151 static int 152 sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS) 153 { 154 int val, error; 155 156 val = *(int *)arg1; 157 error = sysctl_handle_int(oidp, &val, 0, req); 158 if (error || !req->newptr) 159 return (error); 160 161 switch (val) { 162 case FUSE_CACHE_UC: 163 case FUSE_CACHE_WT: 164 case FUSE_CACHE_WB: 165 *(int *)arg1 = val; 166 break; 167 default: 168 return (EDOM); 169 } 170 return (0); 171 } 172 173 static void 174 fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, 175 uint64_t nodeid, enum vtype vtyp) 176 { 177 int i; 178 179 fvdat->nid = nodeid; 180 vattr_null(&fvdat->cached_attrs); 181 if (nodeid == FUSE_ROOT_ID) { 182 vp->v_vflag |= VV_ROOT; 183 } 184 vp->v_type = vtyp; 185 vp->v_data = fvdat; 186 187 for (i = 0; i < FUFH_MAXTYPE; i++) 188 fvdat->fufh[i].fh_type = FUFH_INVALID; 189 190 atomic_add_acq_int(&fuse_node_count, 1); 191 } 192 193 void 194 fuse_vnode_destroy(struct vnode *vp) 195 { 196 struct fuse_vnode_data *fvdat = vp->v_data; 197 198 vp->v_data = NULL; 199 free(fvdat, M_FUSEVN); 200 201 atomic_subtract_acq_int(&fuse_node_count, 1); 202 } 203 204 static int 205 fuse_vnode_cmp(struct vnode *vp, void *nidp) 206 { 207 return (VTOI(vp) != *((uint64_t *)nidp)); 208 } 209 210 static uint32_t inline 211 fuse_vnode_hash(uint64_t id) 212 { 213 return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); 214 } 215 216 static int 217 fuse_vnode_alloc(struct mount *mp, 218 struct thread *td, 219 uint64_t nodeid, 220 enum vtype vtyp, 221 struct vnode **vpp) 222 { 223 struct fuse_vnode_data *fvdat; 224 struct vnode *vp2; 225 int err = 0; 226 227 if (vtyp == VNON) { 228 return EINVAL; 229 } 230 *vpp = NULL; 231 err = vfs_hash_get(mp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, vpp, 232 fuse_vnode_cmp, &nodeid); 233 if (err) 234 return (err); 235 236 if (*vpp) { 237 MPASS((*vpp)->v_type == vtyp && (*vpp)->v_data != NULL); 238 SDT_PROBE2(fuse, , node, trace, 1, "vnode taken from hash"); 239 return (0); 240 } 241 fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO); 242 err = getnewvnode("fuse", mp, &fuse_vnops, vpp); 243 if (err) { 244 free(fvdat, M_FUSEVN); 245 return (err); 246 } 247 lockmgr((*vpp)->v_vnlock, LK_EXCLUSIVE, NULL); 248 fuse_vnode_init(*vpp, fvdat, nodeid, vtyp); 249 err = insmntque(*vpp, mp); 250 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); 251 if (err) { 252 free(fvdat, M_FUSEVN); 253 *vpp = NULL; 254 return (err); 255 } 256 err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, 257 td, &vp2, fuse_vnode_cmp, &nodeid); 258 if (err) 259 return (err); 260 if (vp2 != NULL) { 261 *vpp = vp2; 262 return (0); 263 } 264 265 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); 266 267 return (0); 268 } 269 270 int 271 fuse_vnode_get(struct mount *mp, 272 struct fuse_entry_out *feo, 273 uint64_t nodeid, 274 struct vnode *dvp, 275 struct vnode **vpp, 276 struct componentname *cnp, 277 enum vtype vtyp) 278 { 279 struct thread *td = (cnp != NULL ? cnp->cn_thread : curthread); 280 int err = 0; 281 282 err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp); 283 if (err) { 284 return err; 285 } 286 if (dvp != NULL) { 287 MPASS((cnp->cn_flags & ISDOTDOT) == 0); 288 MPASS(!(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); 289 fuse_vnode_setparent(*vpp, dvp); 290 } 291 if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 && 292 feo != NULL && 293 (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) { 294 ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); 295 ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); 296 cache_enter(dvp, *vpp, cnp); 297 } 298 299 /* 300 * In userland, libfuse uses cached lookups for dot and dotdot entries, 301 * thus it does not really bump the nlookup counter for forget. 302 * Follow the same semantic and avoid tu bump it in order to keep 303 * nlookup counters consistent. 304 */ 305 if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 && 306 (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.'))) 307 VTOFUD(*vpp)->nlookup++; 308 309 return 0; 310 } 311 312 void 313 fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td) 314 { 315 /* 316 * Funcation is called for every vnode open. 317 * Merge fuse_open_flags it may be 0 318 */ 319 /* 320 * Ideally speaking, direct io should be enabled on 321 * fd's but do not see of any way of providing that 322 * this implementation. 323 * 324 * Also cannot think of a reason why would two 325 * different fd's on same vnode would like 326 * have DIRECT_IO turned on and off. But linux 327 * based implementation works on an fd not an 328 * inode and provides such a feature. 329 * 330 * XXXIP: Handle fd based DIRECT_IO 331 */ 332 if (fuse_open_flags & FOPEN_DIRECT_IO) { 333 ASSERT_VOP_ELOCKED(vp, __func__); 334 VTOFUD(vp)->flag |= FN_DIRECTIO; 335 fuse_io_invalbuf(vp, td); 336 } else { 337 if ((fuse_open_flags & FOPEN_KEEP_CACHE) == 0) 338 fuse_io_invalbuf(vp, td); 339 VTOFUD(vp)->flag &= ~FN_DIRECTIO; 340 } 341 342 if (vnode_vtype(vp) == VREG) { 343 /* XXXIP prevent getattr, by using cached node size */ 344 vnode_create_vobject(vp, 0, td); 345 } 346 } 347 348 int 349 fuse_vnode_savesize(struct vnode *vp, struct ucred *cred) 350 { 351 struct fuse_vnode_data *fvdat = VTOFUD(vp); 352 struct thread *td = curthread; 353 struct fuse_filehandle *fufh = NULL; 354 struct fuse_dispatcher fdi; 355 struct fuse_setattr_in *fsai; 356 int err = 0; 357 358 ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); 359 360 if (fuse_isdeadfs(vp)) { 361 return EBADF; 362 } 363 if (vnode_vtype(vp) == VDIR) { 364 return EISDIR; 365 } 366 if (vfs_isrdonly(vnode_mount(vp))) { 367 return EROFS; 368 } 369 if (cred == NULL) { 370 cred = td->td_ucred; 371 } 372 fdisp_init(&fdi, sizeof(*fsai)); 373 fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); 374 fsai = fdi.indata; 375 fsai->valid = 0; 376 377 /* Truncate to a new value. */ 378 fsai->size = fvdat->filesize; 379 fsai->valid |= FATTR_SIZE; 380 381 fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh); 382 if (fufh) { 383 fsai->fh = fufh->fh_id; 384 fsai->valid |= FATTR_FH; 385 } 386 err = fdisp_wait_answ(&fdi); 387 fdisp_destroy(&fdi); 388 if (err == 0) 389 fvdat->flag &= ~FN_SIZECHANGE; 390 391 return err; 392 } 393 394 void 395 fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred) 396 { 397 398 struct fuse_vnode_data *fvdat = VTOFUD(vp); 399 struct vattr va; 400 401 if ((fvdat->flag & FN_SIZECHANGE) != 0 || 402 fuse_data_cache_mode == FUSE_CACHE_UC || 403 (fuse_refresh_size == 0 && fvdat->filesize != 0)) 404 return; 405 406 VOP_GETATTR(vp, &va, cred); 407 SDT_PROBE2(fuse, , node, trace, 1, "refreshed file size"); 408 } 409 410 int 411 fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize) 412 { 413 struct fuse_vnode_data *fvdat = VTOFUD(vp); 414 off_t oldsize; 415 int err = 0; 416 417 ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); 418 419 oldsize = fvdat->filesize; 420 fvdat->filesize = newsize; 421 fvdat->flag |= FN_SIZECHANGE; 422 423 if (newsize < oldsize) { 424 err = vtruncbuf(vp, cred, newsize, fuse_iosize(vp)); 425 } 426 vnode_pager_setsize(vp, newsize); 427 return err; 428 } 429