1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed 8 * to Berkeley by John Heidemann of the UCLA Ficus project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/fnv_hash.h> 38 #include <sys/jail.h> 39 #include <sys/kernel.h> 40 #include <sys/linker.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/sx.h> 44 #include <sys/syscallsubr.h> 45 #include <sys/sysctl.h> 46 #include <sys/vnode.h> 47 #include <sys/malloc.h> 48 49 static int vfs_register(struct vfsconf *); 50 static int vfs_unregister(struct vfsconf *); 51 52 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 53 54 /* 55 * The highest defined VFS number. 56 */ 57 int maxvfsconf = VFS_GENERIC + 1; 58 59 /* 60 * Single-linked list of configured VFSes. 61 * New entries are added/deleted by vfs_register()/vfs_unregister() 62 */ 63 struct vfsconfhead vfsconf = TAILQ_HEAD_INITIALIZER(vfsconf); 64 struct sx vfsconf_sx; 65 SX_SYSINIT(vfsconf, &vfsconf_sx, "vfsconf"); 66 67 /* 68 * Loader.conf variable vfs.typenumhash enables setting vfc_typenum using a hash 69 * calculation on vfc_name, so that it doesn't change when file systems are 70 * loaded in a different order. This will avoid the NFS server file handles from 71 * changing for file systems that use vfc_typenum in their fsid. 72 */ 73 static int vfs_typenumhash = 1; 74 SYSCTL_INT(_vfs, OID_AUTO, typenumhash, CTLFLAG_RDTUN, &vfs_typenumhash, 0, 75 "Set vfc_typenum using a hash calculation on vfc_name, so that it does not" 76 " change when file systems are loaded in a different order."); 77 78 /* 79 * A Zen vnode attribute structure. 80 * 81 * Initialized when the first filesystem registers by vfs_register(). 82 */ 83 struct vattr va_null; 84 85 /* 86 * vfs_init.c 87 * 88 * Allocate and fill in operations vectors. 89 * 90 * An undocumented feature of this approach to defining operations is that 91 * there can be multiple entries in vfs_opv_descs for the same operations 92 * vector. This allows third parties to extend the set of operations 93 * supported by another layer in a binary compatibile way. For example, 94 * assume that NFS needed to be modified to support Ficus. NFS has an entry 95 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 96 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 97 * listing those new operations Ficus adds to NFS, all without modifying the 98 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 99 * that is a(whole)nother story.) This is a feature. 100 */ 101 102 /* 103 * Routines having to do with the management of the vnode table. 104 */ 105 106 static struct vfsconf * 107 vfs_byname_locked(const char *name) 108 { 109 struct vfsconf *vfsp; 110 111 sx_assert(&vfsconf_sx, SA_LOCKED); 112 if (!strcmp(name, "ffs")) 113 name = "ufs"; 114 TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { 115 if (!strcmp(name, vfsp->vfc_name)) 116 return (vfsp); 117 } 118 return (NULL); 119 } 120 121 struct vfsconf * 122 vfs_byname(const char *name) 123 { 124 struct vfsconf *vfsp; 125 126 vfsconf_slock(); 127 vfsp = vfs_byname_locked(name); 128 vfsconf_sunlock(); 129 return (vfsp); 130 } 131 132 struct vfsconf * 133 vfs_byname_kld(const char *fstype, struct thread *td, int *error) 134 { 135 struct vfsconf *vfsp; 136 int fileid, loaded; 137 138 vfsp = vfs_byname(fstype); 139 if (vfsp != NULL) 140 return (vfsp); 141 142 /* Try to load the respective module. */ 143 *error = kern_kldload(td, fstype, &fileid); 144 loaded = (*error == 0); 145 if (*error == EEXIST) 146 *error = 0; 147 if (*error) { 148 *error = ENODEV; 149 return (NULL); 150 } 151 152 /* Look up again to see if the VFS was loaded. */ 153 vfsp = vfs_byname(fstype); 154 if (vfsp == NULL) { 155 if (loaded) 156 (void)kern_kldunload(td, fileid, LINKER_UNLOAD_FORCE); 157 *error = ENODEV; 158 return (NULL); 159 } 160 return (vfsp); 161 } 162 163 static int 164 vfs_mount_sigdefer(struct mount *mp) 165 { 166 int prev_stops, rc; 167 168 TSRAW(curthread, TS_ENTER, "VFS_MOUNT", mp->mnt_vfc->vfc_name); 169 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 170 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_mount)(mp); 171 sigallowstop(prev_stops); 172 TSRAW(curthread, TS_EXIT, "VFS_MOUNT", mp->mnt_vfc->vfc_name); 173 return (rc); 174 } 175 176 static int 177 vfs_unmount_sigdefer(struct mount *mp, int mntflags) 178 { 179 int prev_stops, rc; 180 181 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 182 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_unmount)(mp, mntflags); 183 sigallowstop(prev_stops); 184 return (rc); 185 } 186 187 static int 188 vfs_root_sigdefer(struct mount *mp, int flags, struct vnode **vpp) 189 { 190 int prev_stops, rc; 191 192 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 193 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_root)(mp, flags, vpp); 194 sigallowstop(prev_stops); 195 return (rc); 196 } 197 198 static int 199 vfs_cachedroot_sigdefer(struct mount *mp, int flags, struct vnode **vpp) 200 { 201 int prev_stops, rc; 202 203 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 204 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_cachedroot)(mp, flags, vpp); 205 sigallowstop(prev_stops); 206 return (rc); 207 } 208 209 static int 210 vfs_quotactl_sigdefer(struct mount *mp, int cmd, uid_t uid, void *arg, 211 bool *mp_busy) 212 { 213 int prev_stops, rc; 214 215 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 216 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_quotactl)(mp, cmd, uid, arg, 217 mp_busy); 218 sigallowstop(prev_stops); 219 return (rc); 220 } 221 222 static int 223 vfs_statfs_sigdefer(struct mount *mp, struct statfs *sbp) 224 { 225 int prev_stops, rc; 226 227 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 228 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_statfs)(mp, sbp); 229 sigallowstop(prev_stops); 230 return (rc); 231 } 232 233 static int 234 vfs_sync_sigdefer(struct mount *mp, int waitfor) 235 { 236 int prev_stops, rc; 237 238 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 239 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_sync)(mp, waitfor); 240 sigallowstop(prev_stops); 241 return (rc); 242 } 243 244 static int 245 vfs_vget_sigdefer(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 246 { 247 int prev_stops, rc; 248 249 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 250 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_vget)(mp, ino, flags, vpp); 251 sigallowstop(prev_stops); 252 return (rc); 253 } 254 255 static int 256 vfs_fhtovp_sigdefer(struct mount *mp, struct fid *fidp, int flags, 257 struct vnode **vpp) 258 { 259 int prev_stops, rc; 260 261 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 262 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_fhtovp)(mp, fidp, flags, vpp); 263 sigallowstop(prev_stops); 264 return (rc); 265 } 266 267 static int 268 vfs_checkexp_sigdefer(struct mount *mp, struct sockaddr *nam, uint64_t *exflg, 269 struct ucred **credp, int *numsecflavors, int *secflavors) 270 { 271 int prev_stops, rc; 272 273 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 274 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_checkexp)(mp, nam, exflg, credp, 275 numsecflavors, secflavors); 276 sigallowstop(prev_stops); 277 return (rc); 278 } 279 280 static int 281 vfs_extattrctl_sigdefer(struct mount *mp, int cmd, struct vnode *filename_vp, 282 int attrnamespace, const char *attrname) 283 { 284 int prev_stops, rc; 285 286 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 287 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_extattrctl)(mp, cmd, 288 filename_vp, attrnamespace, attrname); 289 sigallowstop(prev_stops); 290 return (rc); 291 } 292 293 static int 294 vfs_sysctl_sigdefer(struct mount *mp, fsctlop_t op, struct sysctl_req *req) 295 { 296 int prev_stops, rc; 297 298 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 299 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_sysctl)(mp, op, req); 300 sigallowstop(prev_stops); 301 return (rc); 302 } 303 304 static void 305 vfs_susp_clean_sigdefer(struct mount *mp) 306 { 307 int prev_stops; 308 309 if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_susp_clean == NULL) 310 return; 311 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 312 (*mp->mnt_vfc->vfc_vfsops_sd->vfs_susp_clean)(mp); 313 sigallowstop(prev_stops); 314 } 315 316 static void 317 vfs_reclaim_lowervp_sigdefer(struct mount *mp, struct vnode *vp) 318 { 319 int prev_stops; 320 321 if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_reclaim_lowervp == NULL) 322 return; 323 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 324 (*mp->mnt_vfc->vfc_vfsops_sd->vfs_reclaim_lowervp)(mp, vp); 325 sigallowstop(prev_stops); 326 } 327 328 static void 329 vfs_unlink_lowervp_sigdefer(struct mount *mp, struct vnode *vp) 330 { 331 int prev_stops; 332 333 if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_unlink_lowervp == NULL) 334 return; 335 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 336 (*(mp)->mnt_vfc->vfc_vfsops_sd->vfs_unlink_lowervp)(mp, vp); 337 sigallowstop(prev_stops); 338 } 339 340 static void 341 vfs_purge_sigdefer(struct mount *mp) 342 { 343 int prev_stops; 344 345 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 346 (*mp->mnt_vfc->vfc_vfsops_sd->vfs_purge)(mp); 347 sigallowstop(prev_stops); 348 } 349 350 static int 351 vfs_report_lockf_sigdefer(struct mount *mp, struct sbuf *sb) 352 { 353 int prev_stops, rc; 354 355 prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); 356 rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_report_lockf)(mp, sb); 357 sigallowstop(prev_stops); 358 return (rc); 359 } 360 361 static struct vfsops vfsops_sigdefer = { 362 .vfs_mount = vfs_mount_sigdefer, 363 .vfs_unmount = vfs_unmount_sigdefer, 364 .vfs_root = vfs_root_sigdefer, 365 .vfs_cachedroot = vfs_cachedroot_sigdefer, 366 .vfs_quotactl = vfs_quotactl_sigdefer, 367 .vfs_statfs = vfs_statfs_sigdefer, 368 .vfs_sync = vfs_sync_sigdefer, 369 .vfs_vget = vfs_vget_sigdefer, 370 .vfs_fhtovp = vfs_fhtovp_sigdefer, 371 .vfs_checkexp = vfs_checkexp_sigdefer, 372 .vfs_extattrctl = vfs_extattrctl_sigdefer, 373 .vfs_sysctl = vfs_sysctl_sigdefer, 374 .vfs_susp_clean = vfs_susp_clean_sigdefer, 375 .vfs_reclaim_lowervp = vfs_reclaim_lowervp_sigdefer, 376 .vfs_unlink_lowervp = vfs_unlink_lowervp_sigdefer, 377 .vfs_purge = vfs_purge_sigdefer, 378 .vfs_report_lockf = vfs_report_lockf_sigdefer, 379 }; 380 381 /* Register a new filesystem type in the global table */ 382 static int 383 vfs_register(struct vfsconf *vfc) 384 { 385 struct sysctl_oid *oidp; 386 struct vfsops *vfsops; 387 static int once; 388 struct vfsconf *tvfc; 389 uint32_t hashval; 390 int secondpass; 391 392 if (!once) { 393 vattr_null(&va_null); 394 once = 1; 395 } 396 397 if (vfc->vfc_version != VFS_VERSION) { 398 printf("ERROR: filesystem %s, unsupported ABI version %x\n", 399 vfc->vfc_name, vfc->vfc_version); 400 return (EINVAL); 401 } 402 vfsconf_lock(); 403 if (vfs_byname_locked(vfc->vfc_name) != NULL) { 404 vfsconf_unlock(); 405 return (EEXIST); 406 } 407 408 if (vfs_typenumhash != 0) { 409 /* 410 * Calculate a hash on vfc_name to use for vfc_typenum. Unless 411 * all of 1<->255 are assigned, it is limited to 8bits since 412 * that is what ZFS uses from vfc_typenum and is also the 413 * preferred range for vfs_getnewfsid(). 414 */ 415 hashval = fnv_32_str(vfc->vfc_name, FNV1_32_INIT); 416 hashval &= 0xff; 417 secondpass = 0; 418 do { 419 /* Look for and fix any collision. */ 420 TAILQ_FOREACH(tvfc, &vfsconf, vfc_list) { 421 if (hashval == tvfc->vfc_typenum) { 422 if (hashval == 255 && secondpass == 0) { 423 hashval = 1; 424 secondpass = 1; 425 } else 426 hashval++; 427 break; 428 } 429 } 430 } while (tvfc != NULL); 431 vfc->vfc_typenum = hashval; 432 if (vfc->vfc_typenum >= maxvfsconf) 433 maxvfsconf = vfc->vfc_typenum + 1; 434 } else 435 vfc->vfc_typenum = maxvfsconf++; 436 TAILQ_INSERT_TAIL(&vfsconf, vfc, vfc_list); 437 438 /* 439 * Initialise unused ``struct vfsops'' fields, to use 440 * the vfs_std*() functions. Note, we need the mount 441 * and unmount operations, at the least. The check 442 * for vfsops available is just a debugging aid. 443 */ 444 KASSERT(vfc->vfc_vfsops != NULL, 445 ("Filesystem %s has no vfsops", vfc->vfc_name)); 446 /* 447 * Check the mount and unmount operations. 448 */ 449 vfsops = vfc->vfc_vfsops; 450 KASSERT(vfsops->vfs_mount != NULL, 451 ("Filesystem %s has no mount op", vfc->vfc_name)); 452 KASSERT(vfsops->vfs_unmount != NULL, 453 ("Filesystem %s has no unmount op", vfc->vfc_name)); 454 455 if (vfsops->vfs_root == NULL) 456 /* return file system's root vnode */ 457 vfsops->vfs_root = vfs_stdroot; 458 if (vfsops->vfs_quotactl == NULL) 459 /* quota control */ 460 vfsops->vfs_quotactl = vfs_stdquotactl; 461 if (vfsops->vfs_statfs == NULL) 462 /* return file system's status */ 463 vfsops->vfs_statfs = vfs_stdstatfs; 464 if (vfsops->vfs_sync == NULL) 465 /* 466 * flush unwritten data (nosync) 467 * file systems can use vfs_stdsync 468 * explicitly by setting it in the 469 * vfsop vector. 470 */ 471 vfsops->vfs_sync = vfs_stdnosync; 472 if (vfsops->vfs_vget == NULL) 473 /* convert an inode number to a vnode */ 474 vfsops->vfs_vget = vfs_stdvget; 475 if (vfsops->vfs_fhtovp == NULL) 476 /* turn an NFS file handle into a vnode */ 477 vfsops->vfs_fhtovp = vfs_stdfhtovp; 478 if (vfsops->vfs_checkexp == NULL) 479 /* check if file system is exported */ 480 vfsops->vfs_checkexp = vfs_stdcheckexp; 481 if (vfsops->vfs_init == NULL) 482 /* file system specific initialisation */ 483 vfsops->vfs_init = vfs_stdinit; 484 if (vfsops->vfs_uninit == NULL) 485 /* file system specific uninitialisation */ 486 vfsops->vfs_uninit = vfs_stduninit; 487 if (vfsops->vfs_extattrctl == NULL) 488 /* extended attribute control */ 489 vfsops->vfs_extattrctl = vfs_stdextattrctl; 490 if (vfsops->vfs_sysctl == NULL) 491 vfsops->vfs_sysctl = vfs_stdsysctl; 492 if (vfsops->vfs_report_lockf == NULL) 493 vfsops->vfs_report_lockf = vfs_report_lockf; 494 495 if ((vfc->vfc_flags & VFCF_SBDRY) != 0) { 496 vfc->vfc_vfsops_sd = vfc->vfc_vfsops; 497 vfc->vfc_vfsops = &vfsops_sigdefer; 498 } 499 500 if (vfc->vfc_flags & VFCF_JAIL) 501 prison_add_vfs(vfc); 502 503 /* 504 * Call init function for this VFS... 505 */ 506 if ((vfc->vfc_flags & VFCF_SBDRY) != 0) 507 vfc->vfc_vfsops_sd->vfs_init(vfc); 508 else 509 vfc->vfc_vfsops->vfs_init(vfc); 510 vfsconf_unlock(); 511 512 /* 513 * If this filesystem has a sysctl node under vfs 514 * (i.e. vfs.xxfs), then change the oid number of that node to 515 * match the filesystem's type number. This allows user code 516 * which uses the type number to read sysctl variables defined 517 * by the filesystem to continue working. Since the oids are 518 * in a sorted list, we need to make sure the order is 519 * preserved by re-registering the oid after modifying its 520 * number. 521 */ 522 sysctl_wlock(); 523 RB_FOREACH(oidp, sysctl_oid_list, SYSCTL_CHILDREN(&sysctl___vfs)) { 524 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 525 sysctl_unregister_oid(oidp); 526 oidp->oid_number = vfc->vfc_typenum; 527 sysctl_register_oid(oidp); 528 break; 529 } 530 } 531 sysctl_wunlock(); 532 533 return (0); 534 } 535 536 /* Remove registration of a filesystem type */ 537 static int 538 vfs_unregister(struct vfsconf *vfc) 539 { 540 struct vfsconf *vfsp; 541 int error, maxtypenum; 542 543 vfsconf_lock(); 544 vfsp = vfs_byname_locked(vfc->vfc_name); 545 if (vfsp == NULL) { 546 vfsconf_unlock(); 547 return (EINVAL); 548 } 549 if (vfsp->vfc_refcount != 0) { 550 vfsconf_unlock(); 551 return (EBUSY); 552 } 553 error = 0; 554 if ((vfc->vfc_flags & VFCF_SBDRY) != 0) { 555 if (vfc->vfc_vfsops_sd->vfs_uninit != NULL) 556 error = vfc->vfc_vfsops_sd->vfs_uninit(vfsp); 557 } else { 558 if (vfc->vfc_vfsops->vfs_uninit != NULL) 559 error = vfc->vfc_vfsops->vfs_uninit(vfsp); 560 } 561 if (error != 0) { 562 vfsconf_unlock(); 563 return (error); 564 } 565 TAILQ_REMOVE(&vfsconf, vfsp, vfc_list); 566 maxtypenum = VFS_GENERIC; 567 TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) 568 if (maxtypenum < vfsp->vfc_typenum) 569 maxtypenum = vfsp->vfc_typenum; 570 maxvfsconf = maxtypenum + 1; 571 vfsconf_unlock(); 572 return (0); 573 } 574 575 /* 576 * Standard kernel module handling code for filesystem modules. 577 * Referenced from VFS_SET(). 578 */ 579 int 580 vfs_modevent(module_t mod, int type, void *data) 581 { 582 struct vfsconf *vfc; 583 int error = 0; 584 585 vfc = (struct vfsconf *)data; 586 587 switch (type) { 588 case MOD_LOAD: 589 if (vfc) 590 error = vfs_register(vfc); 591 break; 592 593 case MOD_UNLOAD: 594 if (vfc) 595 error = vfs_unregister(vfc); 596 break; 597 default: 598 error = EOPNOTSUPP; 599 break; 600 } 601 return (error); 602 } 603