1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/mount.h> 44 #include <sys/sysctl.h> 45 #include <sys/vnode.h> 46 #include <sys/malloc.h> 47 48 49 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 50 51 /* 52 * The highest defined VFS number. 53 */ 54 int maxvfsconf = VFS_GENERIC + 1; 55 56 /* 57 * Single-linked list of configured VFSes. 58 * New entries are added/deleted by vfs_register()/vfs_unregister() 59 */ 60 struct vfsconf *vfsconf; 61 62 /* 63 * vfs_init.c 64 * 65 * Allocate and fill in operations vectors. 66 * 67 * An undocumented feature of this approach to defining operations is that 68 * there can be multiple entries in vfs_opv_descs for the same operations 69 * vector. This allows third parties to extend the set of operations 70 * supported by another layer in a binary compatibile way. For example, 71 * assume that NFS needed to be modified to support Ficus. NFS has an entry 72 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 73 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 74 * listing those new operations Ficus adds to NFS, all without modifying the 75 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 76 * that is a(whole)nother story.) This is a feature. 77 */ 78 79 /* Table of known vnodeop vectors (list of VFS vnode vectors) */ 80 static const struct vnodeopv_desc **vnodeopv_descs; 81 static int vnodeopv_num; 82 83 /* Table of known descs (list of vnode op handlers "vop_access_desc") */ 84 static struct vnodeop_desc **vfs_op_descs; 85 /* Reference counts for vfs_op_descs */ 86 static int *vfs_op_desc_refs; 87 /* Number of descriptions */ 88 static int num_op_descs; 89 /* Number of entries in each description */ 90 static int vfs_opv_numops = 64; 91 92 /* Allow this number to be tuned at boot */ 93 TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 94 SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RDTUN, &vfs_opv_numops, 95 0, "Maximum number of operations in vop_t vector"); 96 97 static int int_cmp(const void *a, const void *b); 98 99 static int 100 int_cmp(const void *a, const void *b) 101 { 102 return(*(const int *)a - *(const int *)b); 103 } 104 105 /* 106 * Recalculate the operations vector/description (those parts of it that can 107 * be recalculated, that is.) 108 * Always allocate operations vector large enough to hold vfs_opv_numops 109 * entries. The vector is never freed or deallocated once it is initialized, 110 * so that vnodes might safely reference it through their v_op pointer without 111 * vector changing suddenly from under them. 112 */ 113 static void 114 vfs_opv_recalc(void) 115 { 116 int i, j, k; 117 int *vfs_op_offsets; 118 vop_t ***opv_desc_vector_p; 119 vop_t **opv_desc_vector; 120 struct vnodeopv_entry_desc *opve_descp; 121 const struct vnodeopv_desc *opv; 122 123 if (vfs_op_descs == NULL) 124 panic("vfs_opv_recalc called with null vfs_op_descs"); 125 126 /* 127 * Allocate and initialize temporary array to store 128 * offsets. Sort it to put all uninitialized entries 129 * first and to make holes in existing offset sequence 130 * detectable. 131 */ 132 MALLOC(vfs_op_offsets, int *, 133 num_op_descs * sizeof(int), M_TEMP, M_WAITOK); 134 if (vfs_op_offsets == NULL) 135 panic("vfs_opv_recalc: no memory"); 136 for (i = 0; i < num_op_descs; i++) 137 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 138 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 139 140 /* 141 * Run through and make sure all known descs have an offset. 142 * Use vfs_op_offsets to locate holes in offset sequence and 143 * reuse them. 144 * vop_default_desc is hardwired at offset 1, and offset 0 145 * is a panic sanity check. 146 */ 147 j = 1; k = 1; 148 for (i = 0; i < num_op_descs; i++) { 149 if (vfs_op_descs[i]->vdesc_offset != 0) 150 continue; 151 /* 152 * Look at two adjacent entries vfs_op_offsets[j - 1] and 153 * vfs_op_offsets[j] and see if we can fit a new offset 154 * number in between. If not, look at the next pair until 155 * hole is found or the end of the vfs_op_offsets vector is 156 * reached. j has been initialized to 1 above so that 157 * referencing (j-1)-th element is safe and the loop will 158 * never execute if num_op_descs is 1. For each new value s 159 * of i the j loop pick up from where previous iteration has 160 * left off. When the last hole has been consumed or if no 161 * hole has been found, we will start allocating new numbers 162 * starting from the biggest already available offset + 1. 163 */ 164 for (; j < num_op_descs; j++) { 165 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 166 break; 167 k = vfs_op_offsets[j] + 1; 168 } 169 vfs_op_descs[i]->vdesc_offset = k++; 170 } 171 FREE(vfs_op_offsets, M_TEMP); 172 173 /* Panic if new vops will cause vector overflow */ 174 if (k > vfs_opv_numops) 175 panic("VFS: Ran out of vop_t vector entries. %d entries required, only %d available.\n", k, vfs_opv_numops); 176 177 /* 178 * Allocate and fill in the vectors 179 */ 180 for (i = 0; i < vnodeopv_num; i++) { 181 opv = vnodeopv_descs[i]; 182 opv_desc_vector_p = opv->opv_desc_vector_p; 183 if (*opv_desc_vector_p == NULL) 184 MALLOC(*opv_desc_vector_p, vop_t **, 185 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 186 M_WAITOK | M_ZERO); 187 188 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 189 opv_desc_vector = *opv_desc_vector_p; 190 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 191 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 192 opve_descp = &(opv->opv_desc_ops[j]); 193 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 194 opve_descp->opve_impl; 195 } 196 197 /* Replace unfilled routines with their default (slot 1). */ 198 opv_desc_vector = *(opv->opv_desc_vector_p); 199 if (opv_desc_vector[1] == NULL) 200 panic("vfs_opv_recalc: vector without a default."); 201 for (j = 0; j < vfs_opv_numops; j++) 202 if (opv_desc_vector[j] == NULL) 203 opv_desc_vector[j] = opv_desc_vector[1]; 204 } 205 } 206 207 /* Add a set of vnode operations (a description) to the table above. */ 208 void 209 vfs_add_vnodeops(const void *data) 210 { 211 const struct vnodeopv_desc *opv; 212 const struct vnodeopv_desc **newopv; 213 struct vnodeop_desc **newop; 214 int *newref; 215 struct vnodeop_desc *desc; 216 int i, j; 217 218 opv = (const struct vnodeopv_desc *)data; 219 MALLOC(newopv, const struct vnodeopv_desc **, 220 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 221 if (vnodeopv_descs) { 222 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 223 FREE(vnodeopv_descs, M_VNODE); 224 } 225 newopv[vnodeopv_num] = opv; 226 vnodeopv_descs = newopv; 227 vnodeopv_num++; 228 229 /* See if we have turned up a new vnode op desc */ 230 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 231 for (j = 0; j < num_op_descs; j++) { 232 if (desc == vfs_op_descs[j]) { 233 /* found it, increase reference count */ 234 vfs_op_desc_refs[j]++; 235 break; 236 } 237 } 238 if (j == num_op_descs) { 239 /* not found, new entry */ 240 MALLOC(newop, struct vnodeop_desc **, 241 (num_op_descs + 1) * sizeof(*newop), 242 M_VNODE, M_WAITOK); 243 /* new reference count (for unload) */ 244 MALLOC(newref, int *, 245 (num_op_descs + 1) * sizeof(*newref), 246 M_VNODE, M_WAITOK); 247 if (vfs_op_descs) { 248 bcopy(vfs_op_descs, newop, 249 num_op_descs * sizeof(*newop)); 250 FREE(vfs_op_descs, M_VNODE); 251 } 252 if (vfs_op_desc_refs) { 253 bcopy(vfs_op_desc_refs, newref, 254 num_op_descs * sizeof(*newref)); 255 FREE(vfs_op_desc_refs, M_VNODE); 256 } 257 newop[num_op_descs] = desc; 258 newref[num_op_descs] = 1; 259 vfs_op_descs = newop; 260 vfs_op_desc_refs = newref; 261 num_op_descs++; 262 } 263 } 264 vfs_opv_recalc(); 265 } 266 267 /* Remove a vnode type from the vnode description table above. */ 268 void 269 vfs_rm_vnodeops(const void *data) 270 { 271 const struct vnodeopv_desc *opv; 272 const struct vnodeopv_desc **newopv; 273 struct vnodeop_desc **newop; 274 int *newref; 275 vop_t **opv_desc_vector; 276 struct vnodeop_desc *desc; 277 int i, j, k; 278 279 opv = (const struct vnodeopv_desc *)data; 280 /* Lower ref counts on descs in the table and release if zero */ 281 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 282 for (j = 0; j < num_op_descs; j++) { 283 if (desc == vfs_op_descs[j]) { 284 /* found it, decrease reference count */ 285 vfs_op_desc_refs[j]--; 286 break; 287 } 288 } 289 for (j = 0; j < num_op_descs; j++) { 290 if (vfs_op_desc_refs[j] > 0) 291 continue; 292 if (vfs_op_desc_refs[j] < 0) 293 panic("vfs_remove_vnodeops: negative refcnt"); 294 /* Entry is going away - replace it with defaultop */ 295 for (k = 0; k < vnodeopv_num; k++) { 296 opv_desc_vector = 297 *(vnodeopv_descs[k]->opv_desc_vector_p); 298 if (opv_desc_vector != NULL) 299 opv_desc_vector[desc->vdesc_offset] = 300 opv_desc_vector[1]; 301 } 302 MALLOC(newop, struct vnodeop_desc **, 303 (num_op_descs - 1) * sizeof(*newop), 304 M_VNODE, M_WAITOK); 305 /* new reference count (for unload) */ 306 MALLOC(newref, int *, 307 (num_op_descs - 1) * sizeof(*newref), 308 M_VNODE, M_WAITOK); 309 for (k = j; k < (num_op_descs - 1); k++) { 310 vfs_op_descs[k] = vfs_op_descs[k + 1]; 311 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 312 } 313 bcopy(vfs_op_descs, newop, 314 (num_op_descs - 1) * sizeof(*newop)); 315 bcopy(vfs_op_desc_refs, newref, 316 (num_op_descs - 1) * sizeof(*newref)); 317 FREE(vfs_op_descs, M_VNODE); 318 FREE(vfs_op_desc_refs, M_VNODE); 319 vfs_op_descs = newop; 320 vfs_op_desc_refs = newref; 321 num_op_descs--; 322 } 323 } 324 325 for (i = 0; i < vnodeopv_num; i++) { 326 if (vnodeopv_descs[i] == opv) { 327 for (j = i; j < (vnodeopv_num - 1); j++) 328 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 329 break; 330 } 331 } 332 if (i == vnodeopv_num) 333 panic("vfs_remove_vnodeops: opv not found"); 334 opv_desc_vector = *(opv->opv_desc_vector_p); 335 if (opv_desc_vector != NULL) 336 FREE(opv_desc_vector, M_VNODE); 337 MALLOC(newopv, const struct vnodeopv_desc **, 338 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 339 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 340 FREE(vnodeopv_descs, M_VNODE); 341 vnodeopv_descs = newopv; 342 vnodeopv_num--; 343 344 vfs_opv_recalc(); 345 } 346 347 /* 348 * Routines having to do with the management of the vnode table. 349 */ 350 struct vattr va_null; 351 352 /* 353 * Initialize the vnode structures and initialize each filesystem type. 354 */ 355 /* ARGSUSED*/ 356 static void 357 vfsinit(void *dummy) 358 { 359 360 vattr_null(&va_null); 361 } 362 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) 363 364 /* Register a new filesystem type in the global table */ 365 int 366 vfs_register(struct vfsconf *vfc) 367 { 368 struct sysctl_oid *oidp; 369 struct vfsconf *vfsp; 370 371 struct vfsops *vfsops; 372 373 vfsp = NULL; 374 if (vfsconf) 375 for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next) 376 if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0) 377 return EEXIST; 378 379 vfc->vfc_typenum = maxvfsconf++; 380 if (vfsp) 381 vfsp->vfc_next = vfc; 382 else 383 vfsconf = vfc; 384 vfc->vfc_next = NULL; 385 386 /* 387 * If this filesystem has a sysctl node under vfs 388 * (i.e. vfs.xxfs), then change the oid number of that node to 389 * match the filesystem's type number. This allows user code 390 * which uses the type number to read sysctl variables defined 391 * by the filesystem to continue working. Since the oids are 392 * in a sorted list, we need to make sure the order is 393 * preserved by re-registering the oid after modifying its 394 * number. 395 */ 396 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 397 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 398 sysctl_unregister_oid(oidp); 399 oidp->oid_number = vfc->vfc_typenum; 400 sysctl_register_oid(oidp); 401 } 402 403 /* 404 * Initialise unused ``struct vfsops'' fields, to use 405 * the vfs_std*() functions. Note, we need the mount 406 * and unmount operations, at the least. The check 407 * for vfsops available is just a debugging aid. 408 */ 409 KASSERT(vfc->vfc_vfsops != NULL, 410 ("Filesystem %s has no vfsops", vfc->vfc_name)); 411 /* 412 * Check the mount and unmount operations. 413 */ 414 vfsops = vfc->vfc_vfsops; 415 KASSERT(vfsops->vfs_mount != NULL || vfsops->vfs_nmount != NULL, 416 ("Filesystem %s has no (n)mount op", vfc->vfc_name)); 417 KASSERT(vfsops->vfs_unmount != NULL, 418 ("Filesystem %s has no unmount op", vfc->vfc_name)); 419 420 if (vfsops->vfs_start == NULL) 421 /* make a file system operational */ 422 vfsops->vfs_start = vfs_stdstart; 423 if (vfsops->vfs_root == NULL) 424 /* return file system's root vnode */ 425 vfsops->vfs_root = vfs_stdroot; 426 if (vfsops->vfs_quotactl == NULL) 427 /* quota control */ 428 vfsops->vfs_quotactl = vfs_stdquotactl; 429 if (vfsops->vfs_statfs == NULL) 430 /* return file system's status */ 431 vfsops->vfs_statfs = vfs_stdstatfs; 432 if (vfsops->vfs_sync == NULL) 433 /* 434 * flush unwritten data (nosync) 435 * file systems can use vfs_stdsync 436 * explicitly by setting it in the 437 * vfsop vector. 438 */ 439 vfsops->vfs_sync = vfs_stdnosync; 440 if (vfsops->vfs_vget == NULL) 441 /* convert an inode number to a vnode */ 442 vfsops->vfs_vget = vfs_stdvget; 443 if (vfsops->vfs_fhtovp == NULL) 444 /* turn an NFS file handle into a vnode */ 445 vfsops->vfs_fhtovp = vfs_stdfhtovp; 446 if (vfsops->vfs_checkexp == NULL) 447 /* check if file system is exported */ 448 vfsops->vfs_checkexp = vfs_stdcheckexp; 449 if (vfsops->vfs_vptofh == NULL) 450 /* turn a vnode into an NFS file handle */ 451 vfsops->vfs_vptofh = vfs_stdvptofh; 452 if (vfsops->vfs_init == NULL) 453 /* file system specific initialisation */ 454 vfsops->vfs_init = vfs_stdinit; 455 if (vfsops->vfs_uninit == NULL) 456 /* file system specific uninitialisation */ 457 vfsops->vfs_uninit = vfs_stduninit; 458 if (vfsops->vfs_extattrctl == NULL) 459 /* extended attribute control */ 460 vfsops->vfs_extattrctl = vfs_stdextattrctl; 461 462 /* 463 * Call init function for this VFS... 464 */ 465 (*(vfc->vfc_vfsops->vfs_init))(vfc); 466 467 return 0; 468 } 469 470 471 /* Remove registration of a filesystem type */ 472 int 473 vfs_unregister(struct vfsconf *vfc) 474 { 475 struct vfsconf *vfsp, *prev_vfsp; 476 int error, i, maxtypenum; 477 478 i = vfc->vfc_typenum; 479 480 prev_vfsp = NULL; 481 for (vfsp = vfsconf; vfsp; 482 prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { 483 if (!strcmp(vfc->vfc_name, vfsp->vfc_name)) 484 break; 485 } 486 if (vfsp == NULL) 487 return EINVAL; 488 if (vfsp->vfc_refcount) 489 return EBUSY; 490 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 491 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 492 if (error) 493 return (error); 494 } 495 if (prev_vfsp) 496 prev_vfsp->vfc_next = vfsp->vfc_next; 497 else 498 vfsconf = vfsp->vfc_next; 499 maxtypenum = VFS_GENERIC; 500 for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next) 501 if (maxtypenum < vfsp->vfc_typenum) 502 maxtypenum = vfsp->vfc_typenum; 503 maxvfsconf = maxtypenum + 1; 504 return 0; 505 } 506 507 /* 508 * Standard kernel module handling code for filesystem modules. 509 * Referenced from VFS_SET(). 510 */ 511 int 512 vfs_modevent(module_t mod, int type, void *data) 513 { 514 struct vfsconf *vfc; 515 int error = 0; 516 517 vfc = (struct vfsconf *)data; 518 519 switch (type) { 520 case MOD_LOAD: 521 if (vfc) 522 error = vfs_register(vfc); 523 break; 524 525 case MOD_UNLOAD: 526 if (vfc) 527 error = vfs_unregister(vfc); 528 break; 529 default: /* including MOD_SHUTDOWN */ 530 break; 531 } 532 return (error); 533 } 534