1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/vnode.h> 50 #include <sys/malloc.h> 51 52 53 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 54 55 /* 56 * The highest defined VFS number. 57 */ 58 int maxvfsconf = VFS_GENERIC + 1; 59 60 /* 61 * Single-linked list of configured VFSes. 62 * New entries are added/deleted by vfs_register()/vfs_unregister() 63 */ 64 struct vfsconf *vfsconf; 65 66 /* 67 * vfs_init.c 68 * 69 * Allocate and fill in operations vectors. 70 * 71 * An undocumented feature of this approach to defining operations is that 72 * there can be multiple entries in vfs_opv_descs for the same operations 73 * vector. This allows third parties to extend the set of operations 74 * supported by another layer in a binary compatibile way. For example, 75 * assume that NFS needed to be modified to support Ficus. NFS has an entry 76 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 77 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 78 * listing those new operations Ficus adds to NFS, all without modifying the 79 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 80 * that is a(whole)nother story.) This is a feature. 81 */ 82 83 /* Table of known vnodeop vectors (list of VFS vnode vectors) */ 84 static const struct vnodeopv_desc **vnodeopv_descs; 85 static int vnodeopv_num; 86 87 /* Table of known descs (list of vnode op handlers "vop_access_desc") */ 88 static struct vnodeop_desc **vfs_op_descs; 89 /* Reference counts for vfs_op_descs */ 90 static int *vfs_op_desc_refs; 91 /* Number of descriptions */ 92 static int num_op_descs; 93 /* Number of entries in each description */ 94 static int vfs_opv_numops = 64; 95 96 /* Allow this number to be tuned at boot */ 97 TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 98 SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RD, &vfs_opv_numops, 99 0, "Maximum number of operations in vop_t vector"); 100 101 static int int_cmp(const void *a, const void *b); 102 103 static int 104 int_cmp(const void *a, const void *b) 105 { 106 return(*(const int *)a - *(const int *)b); 107 } 108 109 /* 110 * Recalculate the operations vector/description (those parts of it that can 111 * be recalculated, that is.) 112 * Always allocate operations vector large enough to hold vfs_opv_numops 113 * entries. The vector is never freed or deallocated once it is initialized, 114 * so that vnodes might safely reference it through their v_op pointer without 115 * vector changing suddenly from under them. 116 */ 117 static void 118 vfs_opv_recalc(void) 119 { 120 int i, j, k; 121 int *vfs_op_offsets; 122 vop_t ***opv_desc_vector_p; 123 vop_t **opv_desc_vector; 124 struct vnodeopv_entry_desc *opve_descp; 125 const struct vnodeopv_desc *opv; 126 127 if (vfs_op_descs == NULL) 128 panic("vfs_opv_recalc called with null vfs_op_descs"); 129 130 /* 131 * Allocate and initialize temporary array to store 132 * offsets. Sort it to put all uninitialized entries 133 * first and to make holes in existing offset sequence 134 * detectable. 135 */ 136 MALLOC(vfs_op_offsets, int *, 137 num_op_descs * sizeof(int), M_TEMP, M_WAITOK); 138 if (vfs_op_offsets == NULL) 139 panic("vfs_opv_recalc: no memory"); 140 for (i = 0; i < num_op_descs; i++) 141 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 142 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 143 144 /* 145 * Run through and make sure all known descs have an offset. 146 * Use vfs_op_offsets to locate holes in offset sequence and 147 * reuse them. 148 * vop_default_desc is hardwired at offset 1, and offset 0 149 * is a panic sanity check. 150 */ 151 j = 1; k = 1; 152 for (i = 0; i < num_op_descs; i++) { 153 if (vfs_op_descs[i]->vdesc_offset != 0) 154 continue; 155 /* 156 * Look at two adjacent entries vfs_op_offsets[j - 1] and 157 * vfs_op_offsets[j] and see if we can fit a new offset 158 * number in between. If not, look at the next pair until 159 * hole is found or the end of the vfs_op_offsets vector is 160 * reached. j has been initialized to 1 above so that 161 * referencing (j-1)-th element is safe and the loop will 162 * never execute if num_op_descs is 1. For each new value s 163 * of i the j loop pick up from where previous iteration has 164 * left off. When the last hole has been consumed or if no 165 * hole has been found, we will start allocating new numbers 166 * starting from the biggest already available offset + 1. 167 */ 168 for (; j < num_op_descs; j++) { 169 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 170 break; 171 k = vfs_op_offsets[j] + 1; 172 } 173 vfs_op_descs[i]->vdesc_offset = k++; 174 } 175 FREE(vfs_op_offsets, M_TEMP); 176 177 /* Panic if new vops will cause vector overflow */ 178 if (k > vfs_opv_numops) 179 panic("VFS: Ran out of vop_t vector entries. %d entries required, only %d available.\n", k, vfs_opv_numops); 180 181 /* 182 * Allocate and fill in the vectors 183 */ 184 for (i = 0; i < vnodeopv_num; i++) { 185 opv = vnodeopv_descs[i]; 186 opv_desc_vector_p = opv->opv_desc_vector_p; 187 if (*opv_desc_vector_p == NULL) 188 MALLOC(*opv_desc_vector_p, vop_t **, 189 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 190 M_WAITOK | M_ZERO); 191 192 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 193 opv_desc_vector = *opv_desc_vector_p; 194 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 195 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 196 opve_descp = &(opv->opv_desc_ops[j]); 197 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 198 opve_descp->opve_impl; 199 } 200 201 /* Replace unfilled routines with their default (slot 1). */ 202 opv_desc_vector = *(opv->opv_desc_vector_p); 203 if (opv_desc_vector[1] == NULL) 204 panic("vfs_opv_recalc: vector without a default."); 205 for (j = 0; j < vfs_opv_numops; j++) 206 if (opv_desc_vector[j] == NULL) 207 opv_desc_vector[j] = opv_desc_vector[1]; 208 } 209 } 210 211 /* Add a set of vnode operations (a description) to the table above. */ 212 void 213 vfs_add_vnodeops(const void *data) 214 { 215 const struct vnodeopv_desc *opv; 216 const struct vnodeopv_desc **newopv; 217 struct vnodeop_desc **newop; 218 int *newref; 219 vop_t **opv_desc_vector; 220 struct vnodeop_desc *desc; 221 int i, j; 222 223 opv = (const struct vnodeopv_desc *)data; 224 MALLOC(newopv, const struct vnodeopv_desc **, 225 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 226 if (vnodeopv_descs) { 227 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 228 FREE(vnodeopv_descs, M_VNODE); 229 } 230 newopv[vnodeopv_num] = opv; 231 vnodeopv_descs = newopv; 232 vnodeopv_num++; 233 234 /* See if we have turned up a new vnode op desc */ 235 opv_desc_vector = *(opv->opv_desc_vector_p); 236 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 237 for (j = 0; j < num_op_descs; j++) { 238 if (desc == vfs_op_descs[j]) { 239 /* found it, increase reference count */ 240 vfs_op_desc_refs[j]++; 241 break; 242 } 243 } 244 if (j == num_op_descs) { 245 /* not found, new entry */ 246 MALLOC(newop, struct vnodeop_desc **, 247 (num_op_descs + 1) * sizeof(*newop), 248 M_VNODE, M_WAITOK); 249 /* new reference count (for unload) */ 250 MALLOC(newref, int *, 251 (num_op_descs + 1) * sizeof(*newref), 252 M_VNODE, M_WAITOK); 253 if (vfs_op_descs) { 254 bcopy(vfs_op_descs, newop, 255 num_op_descs * sizeof(*newop)); 256 FREE(vfs_op_descs, M_VNODE); 257 } 258 if (vfs_op_desc_refs) { 259 bcopy(vfs_op_desc_refs, newref, 260 num_op_descs * sizeof(*newref)); 261 FREE(vfs_op_desc_refs, M_VNODE); 262 } 263 newop[num_op_descs] = desc; 264 newref[num_op_descs] = 1; 265 vfs_op_descs = newop; 266 vfs_op_desc_refs = newref; 267 num_op_descs++; 268 } 269 } 270 vfs_opv_recalc(); 271 } 272 273 /* Remove a vnode type from the vnode description table above. */ 274 void 275 vfs_rm_vnodeops(const void *data) 276 { 277 const struct vnodeopv_desc *opv; 278 const struct vnodeopv_desc **newopv; 279 struct vnodeop_desc **newop; 280 int *newref; 281 vop_t **opv_desc_vector; 282 struct vnodeop_desc *desc; 283 int i, j, k; 284 285 opv = (const struct vnodeopv_desc *)data; 286 /* Lower ref counts on descs in the table and release if zero */ 287 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 288 for (j = 0; j < num_op_descs; j++) { 289 if (desc == vfs_op_descs[j]) { 290 /* found it, decrease reference count */ 291 vfs_op_desc_refs[j]--; 292 break; 293 } 294 } 295 for (j = 0; j < num_op_descs; j++) { 296 if (vfs_op_desc_refs[j] > 0) 297 continue; 298 if (vfs_op_desc_refs[j] < 0) 299 panic("vfs_remove_vnodeops: negative refcnt"); 300 /* Entry is going away - replace it with defaultop */ 301 for (k = 0; k < vnodeopv_num; k++) { 302 opv_desc_vector = 303 *(vnodeopv_descs[k]->opv_desc_vector_p); 304 if (opv_desc_vector != NULL) 305 opv_desc_vector[desc->vdesc_offset] = 306 opv_desc_vector[1]; 307 } 308 MALLOC(newop, struct vnodeop_desc **, 309 (num_op_descs - 1) * sizeof(*newop), 310 M_VNODE, M_WAITOK); 311 /* new reference count (for unload) */ 312 MALLOC(newref, int *, 313 (num_op_descs - 1) * sizeof(*newref), 314 M_VNODE, M_WAITOK); 315 for (k = j; k < (num_op_descs - 1); k++) { 316 vfs_op_descs[k] = vfs_op_descs[k + 1]; 317 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 318 } 319 bcopy(vfs_op_descs, newop, 320 (num_op_descs - 1) * sizeof(*newop)); 321 bcopy(vfs_op_desc_refs, newref, 322 (num_op_descs - 1) * sizeof(*newref)); 323 FREE(vfs_op_descs, M_VNODE); 324 FREE(vfs_op_desc_refs, M_VNODE); 325 vfs_op_descs = newop; 326 vfs_op_desc_refs = newref; 327 num_op_descs--; 328 } 329 } 330 331 for (i = 0; i < vnodeopv_num; i++) { 332 if (vnodeopv_descs[i] == opv) { 333 for (j = i; j < (vnodeopv_num - 1); j++) 334 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 335 break; 336 } 337 } 338 if (i == vnodeopv_num) 339 panic("vfs_remove_vnodeops: opv not found"); 340 opv_desc_vector = *(opv->opv_desc_vector_p); 341 if (opv_desc_vector != NULL) 342 FREE(opv_desc_vector, M_VNODE); 343 MALLOC(newopv, const struct vnodeopv_desc **, 344 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 345 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 346 FREE(vnodeopv_descs, M_VNODE); 347 vnodeopv_descs = newopv; 348 vnodeopv_num--; 349 350 vfs_opv_recalc(); 351 } 352 353 /* 354 * Routines having to do with the management of the vnode table. 355 */ 356 struct vattr va_null; 357 358 /* 359 * Initialize the vnode structures and initialize each filesystem type. 360 */ 361 /* ARGSUSED*/ 362 static void 363 vfsinit(void *dummy) 364 { 365 366 vattr_null(&va_null); 367 } 368 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) 369 370 /* Register a new filesystem type in the global table */ 371 int 372 vfs_register(struct vfsconf *vfc) 373 { 374 struct sysctl_oid *oidp; 375 struct vfsconf *vfsp; 376 377 struct vfsops *vfsops; 378 379 vfsp = NULL; 380 if (vfsconf) 381 for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next) 382 if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0) 383 return EEXIST; 384 385 vfc->vfc_typenum = maxvfsconf++; 386 if (vfsp) 387 vfsp->vfc_next = vfc; 388 else 389 vfsconf = vfc; 390 vfc->vfc_next = NULL; 391 392 /* 393 * If this filesystem has a sysctl node under vfs 394 * (i.e. vfs.xxfs), then change the oid number of that node to 395 * match the filesystem's type number. This allows user code 396 * which uses the type number to read sysctl variables defined 397 * by the filesystem to continue working. Since the oids are 398 * in a sorted list, we need to make sure the order is 399 * preserved by re-registering the oid after modifying its 400 * number. 401 */ 402 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 403 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 404 sysctl_unregister_oid(oidp); 405 oidp->oid_number = vfc->vfc_typenum; 406 sysctl_register_oid(oidp); 407 } 408 409 /* 410 * Initialise unused ``struct vfsops'' fields, to use 411 * the vfs_std*() functions. Note, we need the mount 412 * and unmount operations, at the least. The check 413 * for vfsops available is just a debugging aid. 414 */ 415 KASSERT(vfc->vfc_vfsops != NULL, 416 ("Filesystem %s has no vfsops", vfc->vfc_name)); 417 /* 418 * Check the mount and unmount operations. 419 */ 420 vfsops = vfc->vfc_vfsops; 421 KASSERT(vfsops->vfs_mount != NULL || vfsops->vfs_nmount != NULL, 422 ("Filesystem %s has no (n)mount op", vfc->vfc_name)); 423 KASSERT(vfsops->vfs_unmount != NULL, 424 ("Filesystem %s has no unmount op", vfc->vfc_name)); 425 426 if (vfsops->vfs_start == NULL) 427 /* make a file system operational */ 428 vfsops->vfs_start = vfs_stdstart; 429 if (vfsops->vfs_root == NULL) 430 /* return file system's root vnode */ 431 vfsops->vfs_root = vfs_stdroot; 432 if (vfsops->vfs_quotactl == NULL) 433 /* quota control */ 434 vfsops->vfs_quotactl = vfs_stdquotactl; 435 if (vfsops->vfs_statfs == NULL) 436 /* return file system's status */ 437 vfsops->vfs_statfs = vfs_stdstatfs; 438 if (vfsops->vfs_sync == NULL) 439 /* 440 * flush unwritten data (nosync) 441 * file systems can use vfs_stdsync 442 * explicitly by setting it in the 443 * vfsop vector. 444 */ 445 vfsops->vfs_sync = vfs_stdnosync; 446 if (vfsops->vfs_vget == NULL) 447 /* convert an inode number to a vnode */ 448 vfsops->vfs_vget = vfs_stdvget; 449 if (vfsops->vfs_fhtovp == NULL) 450 /* turn an NFS file handle into a vnode */ 451 vfsops->vfs_fhtovp = vfs_stdfhtovp; 452 if (vfsops->vfs_checkexp == NULL) 453 /* check if file system is exported */ 454 vfsops->vfs_checkexp = vfs_stdcheckexp; 455 if (vfsops->vfs_vptofh == NULL) 456 /* turn a vnode into an NFS file handle */ 457 vfsops->vfs_vptofh = vfs_stdvptofh; 458 if (vfsops->vfs_init == NULL) 459 /* file system specific initialisation */ 460 vfsops->vfs_init = vfs_stdinit; 461 if (vfsops->vfs_uninit == NULL) 462 /* file system specific uninitialisation */ 463 vfsops->vfs_uninit = vfs_stduninit; 464 if (vfsops->vfs_extattrctl == NULL) 465 /* extended attribute control */ 466 vfsops->vfs_extattrctl = vfs_stdextattrctl; 467 468 /* 469 * Call init function for this VFS... 470 */ 471 (*(vfc->vfc_vfsops->vfs_init))(vfc); 472 473 return 0; 474 } 475 476 477 /* Remove registration of a filesystem type */ 478 int 479 vfs_unregister(struct vfsconf *vfc) 480 { 481 struct vfsconf *vfsp, *prev_vfsp; 482 int error, i, maxtypenum; 483 484 i = vfc->vfc_typenum; 485 486 prev_vfsp = NULL; 487 for (vfsp = vfsconf; vfsp; 488 prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { 489 if (!strcmp(vfc->vfc_name, vfsp->vfc_name)) 490 break; 491 } 492 if (vfsp == NULL) 493 return EINVAL; 494 if (vfsp->vfc_refcount) 495 return EBUSY; 496 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 497 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 498 if (error) 499 return (error); 500 } 501 if (prev_vfsp) 502 prev_vfsp->vfc_next = vfsp->vfc_next; 503 else 504 vfsconf = vfsp->vfc_next; 505 maxtypenum = VFS_GENERIC; 506 for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next) 507 if (maxtypenum < vfsp->vfc_typenum) 508 maxtypenum = vfsp->vfc_typenum; 509 maxvfsconf = maxtypenum + 1; 510 return 0; 511 } 512 513 /* 514 * Standard kernel module handling code for filesystem modules. 515 * Referenced from VFS_SET(). 516 */ 517 int 518 vfs_modevent(module_t mod, int type, void *data) 519 { 520 struct vfsconf *vfc; 521 int error = 0; 522 523 vfc = (struct vfsconf *)data; 524 525 switch (type) { 526 case MOD_LOAD: 527 if (vfc) 528 error = vfs_register(vfc); 529 break; 530 531 case MOD_UNLOAD: 532 if (vfc) 533 error = vfs_unregister(vfc); 534 break; 535 default: /* including MOD_SHUTDOWN */ 536 break; 537 } 538 return (error); 539 } 540