1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/vnode.h> 50 #include <sys/malloc.h> 51 52 53 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 54 55 /* 56 * The highest defined VFS number. 57 */ 58 int maxvfsconf = VFS_GENERIC + 1; 59 60 /* 61 * Single-linked list of configured VFSes. 62 * New entries are added/deleted by vfs_register()/vfs_unregister() 63 */ 64 struct vfsconf *vfsconf; 65 66 /* 67 * vfs_init.c 68 * 69 * Allocate and fill in operations vectors. 70 * 71 * An undocumented feature of this approach to defining operations is that 72 * there can be multiple entries in vfs_opv_descs for the same operations 73 * vector. This allows third parties to extend the set of operations 74 * supported by another layer in a binary compatibile way. For example, 75 * assume that NFS needed to be modified to support Ficus. NFS has an entry 76 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 77 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 78 * listing those new operations Ficus adds to NFS, all without modifying the 79 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 80 * that is a(whole)nother story.) This is a feature. 81 */ 82 83 /* Table of known vnodeop vectors (list of VFS vnode vectors) */ 84 static const struct vnodeopv_desc **vnodeopv_descs; 85 static int vnodeopv_num; 86 87 /* Table of known descs (list of vnode op handlers "vop_access_desc") */ 88 static struct vnodeop_desc **vfs_op_descs; 89 /* Reference counts for vfs_op_descs */ 90 static int *vfs_op_desc_refs; 91 /* Number of descriptions */ 92 static int num_op_descs; 93 /* Number of entries in each description */ 94 static int vfs_opv_numops = 64; 95 96 /* Allow this number to be tuned at boot */ 97 TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 98 SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RDTUN, &vfs_opv_numops, 99 0, "Maximum number of operations in vop_t vector"); 100 101 static int int_cmp(const void *a, const void *b); 102 103 static int 104 int_cmp(const void *a, const void *b) 105 { 106 return(*(const int *)a - *(const int *)b); 107 } 108 109 /* 110 * Recalculate the operations vector/description (those parts of it that can 111 * be recalculated, that is.) 112 * Always allocate operations vector large enough to hold vfs_opv_numops 113 * entries. The vector is never freed or deallocated once it is initialized, 114 * so that vnodes might safely reference it through their v_op pointer without 115 * vector changing suddenly from under them. 116 */ 117 static void 118 vfs_opv_recalc(void) 119 { 120 int i, j, k; 121 int *vfs_op_offsets; 122 vop_t ***opv_desc_vector_p; 123 vop_t **opv_desc_vector; 124 struct vnodeopv_entry_desc *opve_descp; 125 const struct vnodeopv_desc *opv; 126 127 if (vfs_op_descs == NULL) 128 panic("vfs_opv_recalc called with null vfs_op_descs"); 129 130 /* 131 * Allocate and initialize temporary array to store 132 * offsets. Sort it to put all uninitialized entries 133 * first and to make holes in existing offset sequence 134 * detectable. 135 */ 136 MALLOC(vfs_op_offsets, int *, 137 num_op_descs * sizeof(int), M_TEMP, M_WAITOK); 138 if (vfs_op_offsets == NULL) 139 panic("vfs_opv_recalc: no memory"); 140 for (i = 0; i < num_op_descs; i++) 141 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 142 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 143 144 /* 145 * Run through and make sure all known descs have an offset. 146 * Use vfs_op_offsets to locate holes in offset sequence and 147 * reuse them. 148 * vop_default_desc is hardwired at offset 1, and offset 0 149 * is a panic sanity check. 150 */ 151 j = 1; k = 1; 152 for (i = 0; i < num_op_descs; i++) { 153 if (vfs_op_descs[i]->vdesc_offset != 0) 154 continue; 155 /* 156 * Look at two adjacent entries vfs_op_offsets[j - 1] and 157 * vfs_op_offsets[j] and see if we can fit a new offset 158 * number in between. If not, look at the next pair until 159 * hole is found or the end of the vfs_op_offsets vector is 160 * reached. j has been initialized to 1 above so that 161 * referencing (j-1)-th element is safe and the loop will 162 * never execute if num_op_descs is 1. For each new value s 163 * of i the j loop pick up from where previous iteration has 164 * left off. When the last hole has been consumed or if no 165 * hole has been found, we will start allocating new numbers 166 * starting from the biggest already available offset + 1. 167 */ 168 for (; j < num_op_descs; j++) { 169 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 170 break; 171 k = vfs_op_offsets[j] + 1; 172 } 173 vfs_op_descs[i]->vdesc_offset = k++; 174 } 175 FREE(vfs_op_offsets, M_TEMP); 176 177 /* Panic if new vops will cause vector overflow */ 178 if (k > vfs_opv_numops) 179 panic("VFS: Ran out of vop_t vector entries. %d entries required, only %d available.\n", k, vfs_opv_numops); 180 181 /* 182 * Allocate and fill in the vectors 183 */ 184 for (i = 0; i < vnodeopv_num; i++) { 185 opv = vnodeopv_descs[i]; 186 opv_desc_vector_p = opv->opv_desc_vector_p; 187 if (*opv_desc_vector_p == NULL) 188 MALLOC(*opv_desc_vector_p, vop_t **, 189 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 190 M_WAITOK | M_ZERO); 191 192 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 193 opv_desc_vector = *opv_desc_vector_p; 194 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 195 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 196 opve_descp = &(opv->opv_desc_ops[j]); 197 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 198 opve_descp->opve_impl; 199 } 200 201 /* Replace unfilled routines with their default (slot 1). */ 202 opv_desc_vector = *(opv->opv_desc_vector_p); 203 if (opv_desc_vector[1] == NULL) 204 panic("vfs_opv_recalc: vector without a default."); 205 for (j = 0; j < vfs_opv_numops; j++) 206 if (opv_desc_vector[j] == NULL) 207 opv_desc_vector[j] = opv_desc_vector[1]; 208 } 209 } 210 211 /* Add a set of vnode operations (a description) to the table above. */ 212 void 213 vfs_add_vnodeops(const void *data) 214 { 215 const struct vnodeopv_desc *opv; 216 const struct vnodeopv_desc **newopv; 217 struct vnodeop_desc **newop; 218 int *newref; 219 struct vnodeop_desc *desc; 220 int i, j; 221 222 opv = (const struct vnodeopv_desc *)data; 223 MALLOC(newopv, const struct vnodeopv_desc **, 224 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 225 if (vnodeopv_descs) { 226 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 227 FREE(vnodeopv_descs, M_VNODE); 228 } 229 newopv[vnodeopv_num] = opv; 230 vnodeopv_descs = newopv; 231 vnodeopv_num++; 232 233 /* See if we have turned up a new vnode op desc */ 234 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 235 for (j = 0; j < num_op_descs; j++) { 236 if (desc == vfs_op_descs[j]) { 237 /* found it, increase reference count */ 238 vfs_op_desc_refs[j]++; 239 break; 240 } 241 } 242 if (j == num_op_descs) { 243 /* not found, new entry */ 244 MALLOC(newop, struct vnodeop_desc **, 245 (num_op_descs + 1) * sizeof(*newop), 246 M_VNODE, M_WAITOK); 247 /* new reference count (for unload) */ 248 MALLOC(newref, int *, 249 (num_op_descs + 1) * sizeof(*newref), 250 M_VNODE, M_WAITOK); 251 if (vfs_op_descs) { 252 bcopy(vfs_op_descs, newop, 253 num_op_descs * sizeof(*newop)); 254 FREE(vfs_op_descs, M_VNODE); 255 } 256 if (vfs_op_desc_refs) { 257 bcopy(vfs_op_desc_refs, newref, 258 num_op_descs * sizeof(*newref)); 259 FREE(vfs_op_desc_refs, M_VNODE); 260 } 261 newop[num_op_descs] = desc; 262 newref[num_op_descs] = 1; 263 vfs_op_descs = newop; 264 vfs_op_desc_refs = newref; 265 num_op_descs++; 266 } 267 } 268 vfs_opv_recalc(); 269 } 270 271 /* Remove a vnode type from the vnode description table above. */ 272 void 273 vfs_rm_vnodeops(const void *data) 274 { 275 const struct vnodeopv_desc *opv; 276 const struct vnodeopv_desc **newopv; 277 struct vnodeop_desc **newop; 278 int *newref; 279 vop_t **opv_desc_vector; 280 struct vnodeop_desc *desc; 281 int i, j, k; 282 283 opv = (const struct vnodeopv_desc *)data; 284 /* Lower ref counts on descs in the table and release if zero */ 285 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 286 for (j = 0; j < num_op_descs; j++) { 287 if (desc == vfs_op_descs[j]) { 288 /* found it, decrease reference count */ 289 vfs_op_desc_refs[j]--; 290 break; 291 } 292 } 293 for (j = 0; j < num_op_descs; j++) { 294 if (vfs_op_desc_refs[j] > 0) 295 continue; 296 if (vfs_op_desc_refs[j] < 0) 297 panic("vfs_remove_vnodeops: negative refcnt"); 298 /* Entry is going away - replace it with defaultop */ 299 for (k = 0; k < vnodeopv_num; k++) { 300 opv_desc_vector = 301 *(vnodeopv_descs[k]->opv_desc_vector_p); 302 if (opv_desc_vector != NULL) 303 opv_desc_vector[desc->vdesc_offset] = 304 opv_desc_vector[1]; 305 } 306 MALLOC(newop, struct vnodeop_desc **, 307 (num_op_descs - 1) * sizeof(*newop), 308 M_VNODE, M_WAITOK); 309 /* new reference count (for unload) */ 310 MALLOC(newref, int *, 311 (num_op_descs - 1) * sizeof(*newref), 312 M_VNODE, M_WAITOK); 313 for (k = j; k < (num_op_descs - 1); k++) { 314 vfs_op_descs[k] = vfs_op_descs[k + 1]; 315 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 316 } 317 bcopy(vfs_op_descs, newop, 318 (num_op_descs - 1) * sizeof(*newop)); 319 bcopy(vfs_op_desc_refs, newref, 320 (num_op_descs - 1) * sizeof(*newref)); 321 FREE(vfs_op_descs, M_VNODE); 322 FREE(vfs_op_desc_refs, M_VNODE); 323 vfs_op_descs = newop; 324 vfs_op_desc_refs = newref; 325 num_op_descs--; 326 } 327 } 328 329 for (i = 0; i < vnodeopv_num; i++) { 330 if (vnodeopv_descs[i] == opv) { 331 for (j = i; j < (vnodeopv_num - 1); j++) 332 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 333 break; 334 } 335 } 336 if (i == vnodeopv_num) 337 panic("vfs_remove_vnodeops: opv not found"); 338 opv_desc_vector = *(opv->opv_desc_vector_p); 339 if (opv_desc_vector != NULL) 340 FREE(opv_desc_vector, M_VNODE); 341 MALLOC(newopv, const struct vnodeopv_desc **, 342 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 343 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 344 FREE(vnodeopv_descs, M_VNODE); 345 vnodeopv_descs = newopv; 346 vnodeopv_num--; 347 348 vfs_opv_recalc(); 349 } 350 351 /* 352 * Routines having to do with the management of the vnode table. 353 */ 354 struct vattr va_null; 355 356 /* 357 * Initialize the vnode structures and initialize each filesystem type. 358 */ 359 /* ARGSUSED*/ 360 static void 361 vfsinit(void *dummy) 362 { 363 364 vattr_null(&va_null); 365 } 366 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) 367 368 /* Register a new filesystem type in the global table */ 369 int 370 vfs_register(struct vfsconf *vfc) 371 { 372 struct sysctl_oid *oidp; 373 struct vfsconf *vfsp; 374 375 struct vfsops *vfsops; 376 377 vfsp = NULL; 378 if (vfsconf) 379 for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next) 380 if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0) 381 return EEXIST; 382 383 vfc->vfc_typenum = maxvfsconf++; 384 if (vfsp) 385 vfsp->vfc_next = vfc; 386 else 387 vfsconf = vfc; 388 vfc->vfc_next = NULL; 389 390 /* 391 * If this filesystem has a sysctl node under vfs 392 * (i.e. vfs.xxfs), then change the oid number of that node to 393 * match the filesystem's type number. This allows user code 394 * which uses the type number to read sysctl variables defined 395 * by the filesystem to continue working. Since the oids are 396 * in a sorted list, we need to make sure the order is 397 * preserved by re-registering the oid after modifying its 398 * number. 399 */ 400 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 401 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 402 sysctl_unregister_oid(oidp); 403 oidp->oid_number = vfc->vfc_typenum; 404 sysctl_register_oid(oidp); 405 } 406 407 /* 408 * Initialise unused ``struct vfsops'' fields, to use 409 * the vfs_std*() functions. Note, we need the mount 410 * and unmount operations, at the least. The check 411 * for vfsops available is just a debugging aid. 412 */ 413 KASSERT(vfc->vfc_vfsops != NULL, 414 ("Filesystem %s has no vfsops", vfc->vfc_name)); 415 /* 416 * Check the mount and unmount operations. 417 */ 418 vfsops = vfc->vfc_vfsops; 419 KASSERT(vfsops->vfs_mount != NULL || vfsops->vfs_nmount != NULL, 420 ("Filesystem %s has no (n)mount op", vfc->vfc_name)); 421 KASSERT(vfsops->vfs_unmount != NULL, 422 ("Filesystem %s has no unmount op", vfc->vfc_name)); 423 424 if (vfsops->vfs_start == NULL) 425 /* make a file system operational */ 426 vfsops->vfs_start = vfs_stdstart; 427 if (vfsops->vfs_root == NULL) 428 /* return file system's root vnode */ 429 vfsops->vfs_root = vfs_stdroot; 430 if (vfsops->vfs_quotactl == NULL) 431 /* quota control */ 432 vfsops->vfs_quotactl = vfs_stdquotactl; 433 if (vfsops->vfs_statfs == NULL) 434 /* return file system's status */ 435 vfsops->vfs_statfs = vfs_stdstatfs; 436 if (vfsops->vfs_sync == NULL) 437 /* 438 * flush unwritten data (nosync) 439 * file systems can use vfs_stdsync 440 * explicitly by setting it in the 441 * vfsop vector. 442 */ 443 vfsops->vfs_sync = vfs_stdnosync; 444 if (vfsops->vfs_vget == NULL) 445 /* convert an inode number to a vnode */ 446 vfsops->vfs_vget = vfs_stdvget; 447 if (vfsops->vfs_fhtovp == NULL) 448 /* turn an NFS file handle into a vnode */ 449 vfsops->vfs_fhtovp = vfs_stdfhtovp; 450 if (vfsops->vfs_checkexp == NULL) 451 /* check if file system is exported */ 452 vfsops->vfs_checkexp = vfs_stdcheckexp; 453 if (vfsops->vfs_vptofh == NULL) 454 /* turn a vnode into an NFS file handle */ 455 vfsops->vfs_vptofh = vfs_stdvptofh; 456 if (vfsops->vfs_init == NULL) 457 /* file system specific initialisation */ 458 vfsops->vfs_init = vfs_stdinit; 459 if (vfsops->vfs_uninit == NULL) 460 /* file system specific uninitialisation */ 461 vfsops->vfs_uninit = vfs_stduninit; 462 if (vfsops->vfs_extattrctl == NULL) 463 /* extended attribute control */ 464 vfsops->vfs_extattrctl = vfs_stdextattrctl; 465 466 /* 467 * Call init function for this VFS... 468 */ 469 (*(vfc->vfc_vfsops->vfs_init))(vfc); 470 471 return 0; 472 } 473 474 475 /* Remove registration of a filesystem type */ 476 int 477 vfs_unregister(struct vfsconf *vfc) 478 { 479 struct vfsconf *vfsp, *prev_vfsp; 480 int error, i, maxtypenum; 481 482 i = vfc->vfc_typenum; 483 484 prev_vfsp = NULL; 485 for (vfsp = vfsconf; vfsp; 486 prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { 487 if (!strcmp(vfc->vfc_name, vfsp->vfc_name)) 488 break; 489 } 490 if (vfsp == NULL) 491 return EINVAL; 492 if (vfsp->vfc_refcount) 493 return EBUSY; 494 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 495 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 496 if (error) 497 return (error); 498 } 499 if (prev_vfsp) 500 prev_vfsp->vfc_next = vfsp->vfc_next; 501 else 502 vfsconf = vfsp->vfc_next; 503 maxtypenum = VFS_GENERIC; 504 for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next) 505 if (maxtypenum < vfsp->vfc_typenum) 506 maxtypenum = vfsp->vfc_typenum; 507 maxvfsconf = maxtypenum + 1; 508 return 0; 509 } 510 511 /* 512 * Standard kernel module handling code for filesystem modules. 513 * Referenced from VFS_SET(). 514 */ 515 int 516 vfs_modevent(module_t mod, int type, void *data) 517 { 518 struct vfsconf *vfc; 519 int error = 0; 520 521 vfc = (struct vfsconf *)data; 522 523 switch (type) { 524 case MOD_LOAD: 525 if (vfc) 526 error = vfs_register(vfc); 527 break; 528 529 case MOD_UNLOAD: 530 if (vfc) 531 error = vfs_unregister(vfc); 532 break; 533 default: /* including MOD_SHUTDOWN */ 534 break; 535 } 536 return (error); 537 } 538