1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #ifndef _SYS_VFS_H 36 #define _SYS_VFS_H 37 38 #pragma ident "%Z%%M% %I% %E% SMI" 39 40 #include <sys/types.h> 41 #include <sys/t_lock.h> 42 #include <sys/cred.h> 43 #include <sys/vnode.h> 44 #include <sys/statvfs.h> 45 #include <sys/refstr.h> 46 47 #ifdef __cplusplus 48 extern "C" { 49 #endif 50 51 /* 52 * Data associated with mounted file systems. 53 */ 54 55 /* 56 * Operations vector. This is used internal to the kernel; file systems 57 * supply their list of operations via vfs_setfsops(). 58 */ 59 60 typedef struct vfsops vfsops_t; 61 62 /* 63 * File system identifier. Should be unique (at least per machine). 64 */ 65 typedef struct { 66 int val[2]; /* file system id type */ 67 } fsid_t; 68 69 /* 70 * File identifier. Should be unique per filesystem on a single 71 * machine. This is typically called by a stateless file server 72 * in order to generate "file handles". 73 * 74 * Do not change the definition of struct fid ... fid_t without 75 * letting the CacheFS group know about it! They will have to do at 76 * least two things, in the same change that changes this structure: 77 * 1. change CFSVERSION in usr/src/uts/common/sys/fs/cachefs_fs.h 78 * 2. put the old version # in the canupgrade array 79 * in cachfs_upgrade() in usr/src/cmd/fs.d/cachefs/fsck/fsck.c 80 * This is necessary because CacheFS stores FIDs on disk. 81 * 82 * Many underlying file systems cast a struct fid into other 83 * file system dependent structures which may require 4 byte alignment. 84 * Because a fid starts with a short it may not be 4 byte aligned, the 85 * fid_pad will force the alignment. 86 */ 87 #define MAXFIDSZ 64 88 #define OLD_MAXFIDSZ 16 89 90 typedef struct fid { 91 union { 92 long fid_pad; 93 struct { 94 ushort_t len; /* length of data in bytes */ 95 char data[MAXFIDSZ]; /* data (variable len) */ 96 } _fid; 97 } un; 98 } fid_t; 99 100 #ifdef _SYSCALL32 101 /* 102 * Solaris 64 - use old-style cache format with 32-bit aligned fid for on-disk 103 * struct compatibility. 104 */ 105 typedef struct fid32 { 106 union { 107 int32_t fid_pad; 108 struct { 109 uint16_t len; /* length of data in bytes */ 110 char data[MAXFIDSZ]; /* data (variable len) */ 111 } _fid; 112 } un; 113 } fid32_t; 114 #else /* not _SYSCALL32 */ 115 #define fid32 fid 116 typedef fid_t fid32_t; 117 #endif /* _SYSCALL32 */ 118 119 #define fid_len un._fid.len 120 #define fid_data un._fid.data 121 122 /* 123 * Structure defining a mount option for a filesystem. 124 * option names are found in mntent.h 125 */ 126 typedef struct mntopt { 127 char *mo_name; /* option name */ 128 char **mo_cancel; /* list of options cancelled by this one */ 129 char *mo_arg; /* argument string for this option */ 130 int mo_flags; /* flags for this mount option */ 131 void *mo_data; /* filesystem specific data */ 132 } mntopt_t; 133 134 /* 135 * Flags that apply to mount options 136 */ 137 138 #define MO_SET 0x01 /* option is set */ 139 #define MO_NODISPLAY 0x02 /* option not listed in mnttab */ 140 #define MO_HASVALUE 0x04 /* option takes a value */ 141 #define MO_IGNORE 0x08 /* option ignored by parser */ 142 #define MO_DEFAULT MO_SET /* option is on by default */ 143 #define MO_TAG 0x10 /* flags a tag set by user program */ 144 #define MO_EMPTY 0x20 /* empty space in option table */ 145 146 #define VFS_NOFORCEOPT 0x01 /* honor MO_IGNORE (don't set option) */ 147 #define VFS_DISPLAY 0x02 /* Turn off MO_NODISPLAY bit for opt */ 148 #define VFS_NODISPLAY 0x04 /* Turn on MO_NODISPLAY bit for opt */ 149 #define VFS_CREATEOPT 0x08 /* Create the opt if it's not there */ 150 151 /* 152 * Structure holding mount option strings for the mounted file system. 153 */ 154 typedef struct mntopts { 155 uint_t mo_count; /* number of entries in table */ 156 mntopt_t *mo_list; /* list of mount options */ 157 } mntopts_t; 158 159 /* 160 * Structure per mounted file system. Each mounted file system has 161 * an array of operations and an instance record. 162 * 163 * The file systems are kept on a doubly linked circular list headed by 164 * "rootvfs". 165 * File system implementations should not access this list; 166 * it's intended for use only in the kernel's vfs layer. 167 * 168 * Each zone also has its own list of mounts, containing filesystems mounted 169 * somewhere within the filesystem tree rooted at the zone's rootpath. The 170 * list is doubly linked to match the global list. 171 * 172 * mnttab locking: the in-kernel mnttab uses the vfs_mntpt, vfs_resource and 173 * vfs_mntopts fields in the vfs_t. mntpt and resource are refstr_ts that 174 * are set at mount time and can only be modified during a remount. 175 * It is safe to read these fields if you can prevent a remount on the vfs, 176 * or through the convenience funcs vfs_getmntpoint() and vfs_getresource(). 177 * The mntopts field may only be accessed through the provided convenience 178 * functions, as it is protected by the vfs list lock. Modifying a mount 179 * option requires grabbing the vfs list write lock, which can be a very 180 * high latency lock. 181 */ 182 struct zone; /* from zone.h */ 183 struct fem_head; /* from fem.h */ 184 185 typedef struct vfs { 186 struct vfs *vfs_next; /* next VFS in VFS list */ 187 struct vfs *vfs_prev; /* prev VFS in VFS list */ 188 189 /* vfs_op should not be used directly. Accessor functions are provided */ 190 vfsops_t *vfs_op; /* operations on VFS */ 191 192 struct vnode *vfs_vnodecovered; /* vnode mounted on */ 193 uint_t vfs_flag; /* flags */ 194 uint_t vfs_bsize; /* native block size */ 195 int vfs_fstype; /* file system type index */ 196 fsid_t vfs_fsid; /* file system id */ 197 void *vfs_data; /* private data */ 198 dev_t vfs_dev; /* device of mounted VFS */ 199 ulong_t vfs_bcount; /* I/O count (accounting) */ 200 struct vfs *vfs_list; /* sync list pointer */ 201 struct vfs *vfs_hash; /* hash list pointer */ 202 ksema_t vfs_reflock; /* mount/unmount/sync lock */ 203 uint_t vfs_count; /* vfs reference count */ 204 mntopts_t vfs_mntopts; /* options mounted with */ 205 refstr_t *vfs_resource; /* mounted resource name */ 206 refstr_t *vfs_mntpt; /* mount point name */ 207 time_t vfs_mtime; /* time we were mounted */ 208 struct fem_head *vfs_femhead; /* fs monitoring */ 209 /* 210 * Zones support. Note that the zone that "owns" the mount isn't 211 * necessarily the same as the zone in which the zone is visible. 212 * That is, vfs_zone and (vfs_zone_next|vfs_zone_prev) may refer to 213 * different zones. 214 */ 215 struct zone *vfs_zone; /* zone that owns the mount */ 216 struct vfs *vfs_zone_next; /* next VFS visible in zone */ 217 struct vfs *vfs_zone_prev; /* prev VFS visible in zone */ 218 } vfs_t; 219 220 /* 221 * VFS flags. 222 */ 223 #define VFS_RDONLY 0x01 /* read-only vfs */ 224 #define VFS_NOMNTTAB 0x02 /* vfs not seen in mnttab */ 225 #define VFS_NOSETUID 0x08 /* setuid disallowed */ 226 #define VFS_REMOUNT 0x10 /* modify mount options only */ 227 #define VFS_NOTRUNC 0x20 /* does not truncate long file names */ 228 #define VFS_UNLINKABLE 0x40 /* unlink(2) can be applied to root */ 229 #define VFS_PXFS 0x80 /* clustering: global fs proxy vfs */ 230 #define VFS_UNMOUNTED 0x100 /* file system has been unmounted */ 231 #define VFS_NBMAND 0x200 /* allow non-blocking mandatory locks */ 232 #define VFS_XATTR 0x400 /* fs supports extended attributes */ 233 #define VFS_NODEVICES 0x800 /* device-special files disallowed */ 234 #define VFS_NOEXEC 0x1000 /* executables disallowed */ 235 236 #define VFS_NORESOURCE "unspecified_resource" 237 #define VFS_NOMNTPT "unspecified_mountpoint" 238 239 /* 240 * Argument structure for mount(2). 241 * 242 * Flags are defined in <sys/mount.h>. 243 * 244 * Note that if the MS_SYSSPACE bit is set in flags, the pointer fields in 245 * this structure are to be interpreted as kernel addresses. File systems 246 * should be prepared for this possibility. 247 */ 248 struct mounta { 249 char *spec; 250 char *dir; 251 int flags; 252 char *fstype; 253 char *dataptr; 254 int datalen; 255 char *optptr; 256 int optlen; 257 }; 258 259 /* 260 * Reasons for calling the vfs_mountroot() operation. 261 */ 262 enum whymountroot { ROOT_INIT, ROOT_REMOUNT, ROOT_UNMOUNT}; 263 typedef enum whymountroot whymountroot_t; 264 265 /* 266 * Reasons for calling the VFS_VNSTATE(): 267 */ 268 enum vntrans { 269 VNTRANS_EXISTS, 270 VNTRANS_IDLED, 271 VNTRANS_RECLAIMED, 272 VNTRANS_DESTROYED 273 }; 274 typedef enum vntrans vntrans_t; 275 276 277 278 279 /* 280 * Operations supported on virtual file system. 281 */ 282 struct vfsops { 283 int (*vfs_mount)(vfs_t *, vnode_t *, struct mounta *, cred_t *); 284 int (*vfs_unmount)(vfs_t *, int, cred_t *); 285 int (*vfs_root)(vfs_t *, vnode_t **); 286 int (*vfs_statvfs)(vfs_t *, statvfs64_t *); 287 int (*vfs_sync)(vfs_t *, short, cred_t *); 288 int (*vfs_vget)(vfs_t *, vnode_t **, fid_t *); 289 int (*vfs_mountroot)(vfs_t *, enum whymountroot); 290 int (*vfs_freevfs)(vfs_t *); 291 int (*vfs_vnstate)(vfs_t *, vnode_t *, vntrans_t); 292 }; 293 294 extern int fsop_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 295 extern int fsop_unmount(vfs_t *, int, cred_t *); 296 extern int fsop_root(vfs_t *, vnode_t **); 297 extern int fsop_statfs(vfs_t *, statvfs64_t *); 298 extern int fsop_sync(vfs_t *, short, cred_t *); 299 extern int fsop_vget(vfs_t *, vnode_t **, fid_t *); 300 extern int fsop_mountroot(vfs_t *, enum whymountroot); 301 extern void fsop_freefs(vfs_t *); 302 extern int fsop_sync_by_kind(int, short, cred_t *); 303 extern int fsop_vnstate(vfs_t *, vnode_t *, vntrans_t); 304 305 #define VFS_MOUNT(vfsp, mvp, uap, cr) fsop_mount(vfsp, mvp, uap, cr) 306 #define VFS_UNMOUNT(vfsp, flag, cr) fsop_unmount(vfsp, flag, cr) 307 #define VFS_ROOT(vfsp, vpp) fsop_root(vfsp, vpp) 308 #define VFS_STATVFS(vfsp, sp) fsop_statfs(vfsp, sp) 309 #define VFS_SYNC(vfsp, flag, cr) fsop_sync(vfsp, flag, cr) 310 #define VFS_VGET(vfsp, vpp, fidp) fsop_vget(vfsp, vpp, fidp) 311 #define VFS_MOUNTROOT(vfsp, init) fsop_mountroot(vfsp, init) 312 #define VFS_FREEVFS(vfsp) fsop_freefs(vfsp) 313 #define VFS_VNSTATE(vfsp, vn, ns) fsop_vnstate(vfsp, vn, ns) 314 315 #define VFSNAME_MOUNT "mount" 316 #define VFSNAME_UNMOUNT "unmount" 317 #define VFSNAME_ROOT "root" 318 #define VFSNAME_STATVFS "statvfs" 319 #define VFSNAME_SYNC "sync" 320 #define VFSNAME_VGET "vget" 321 #define VFSNAME_MOUNTROOT "mountroot" 322 #define VFSNAME_FREEVFS "freevfs" 323 #define VFSNAME_VNSTATE "vnstate" 324 /* 325 * Filesystem type switch table. 326 */ 327 328 typedef struct vfssw { 329 char *vsw_name; /* type name -- max len _ST_FSTYPSZ */ 330 int (*vsw_init) (int, char *); 331 /* init routine (for non-loadable fs only) */ 332 int vsw_flag; /* flags */ 333 mntopts_t vsw_optproto; /* mount options table prototype */ 334 uint_t vsw_count; /* count of references */ 335 kmutex_t vsw_lock; /* lock to protect vsw_count */ 336 vfsops_t vsw_vfsops; /* filesystem operations vector */ 337 } vfssw_t; 338 339 /* 340 * Filesystem type definition record. All file systems must export a record 341 * of this type through their modlfs structure. 342 */ 343 344 typedef struct vfsdef_v3 { 345 int def_version; /* structure version, must be first */ 346 char *name; /* filesystem type name */ 347 int (*init) (int, char *); /* init routine */ 348 int flags; /* filesystem flags */ 349 mntopts_t *optproto; /* mount options table prototype */ 350 } vfsdef_v3; 351 352 typedef struct vfsdef_v3 vfsdef_t; 353 354 enum { 355 VFSDEF_VERSION = 3 356 }; 357 358 /* 359 * Generic operations vector types (used for vfs/vnode ops registration). 360 */ 361 362 extern int fs_default(); /* "default" function placeholder */ 363 extern int fs_error(); /* "error" function placeholder */ 364 365 int fs_build_vector(void *vector, int *unused_ops, 366 const fs_operation_trans_def_t *translation, 367 const fs_operation_def_t *operations); 368 369 /* 370 * flags for vfssw and vfsdef 371 */ 372 #define VSW_HASPROTO 0x01 /* struct has a mount options prototype */ 373 #define VSW_CANRWRO 0x02 /* file system can transition from rw to ro */ 374 #define VSW_CANREMOUNT 0x04 /* file system supports remounts */ 375 #define VSW_NOTZONESAFE 0x08 /* zone_enter(2) should fail for these files */ 376 #define VSW_VOLATILEDEV 0x10 /* vfs_dev can change each time fs is mounted */ 377 378 #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */ 379 380 #if defined(_KERNEL) 381 /* 382 * Public operations. 383 */ 384 struct umounta; 385 struct statvfsa; 386 struct fstatvfsa; 387 388 int vfs_setfsops(int, const fs_operation_def_t *, vfsops_t **); 389 int vfs_makefsops(const fs_operation_def_t *, vfsops_t **); 390 void vfs_freevfsops(vfsops_t *); 391 int vfs_freevfsops_by_type(int); 392 void vfs_setops(vfs_t *, vfsops_t *); 393 vfsops_t *vfs_getops(vfs_t *vfsp); 394 int vfs_matchops(vfs_t *, vfsops_t *); 395 int vfs_can_sync(vfs_t *vfsp); 396 void vfs_init(vfs_t *vfsp, vfsops_t *, void *); 397 void vn_exists(vnode_t *); 398 void vn_idle(vnode_t *); 399 void vn_reclaim(vnode_t *); 400 void vn_invalid(vnode_t *); 401 402 int rootconf(void); 403 int svm_rootconf(void); 404 int domount(char *, struct mounta *, vnode_t *, struct cred *, 405 struct vfs **); 406 int dounmount(struct vfs *, int, cred_t *); 407 int vfs_lock(struct vfs *); 408 int vfs_rlock(struct vfs *); 409 void vfs_lock_wait(struct vfs *); 410 void vfs_rlock_wait(struct vfs *); 411 void vfs_unlock(struct vfs *); 412 int vfs_lock_held(struct vfs *); 413 struct _kthread *vfs_lock_owner(struct vfs *); 414 void sync(void); 415 void vfs_sync(int); 416 void vfs_mountroot(void); 417 void vfs_add(vnode_t *, struct vfs *, int); 418 void vfs_remove(struct vfs *); 419 420 /* The following functions are not for general use by filesystems */ 421 422 void vfs_createopttbl(mntopts_t *, const char *); 423 void vfs_copyopttbl(const mntopts_t *, mntopts_t *); 424 void vfs_mergeopttbl(const mntopts_t *, const mntopts_t *, mntopts_t *); 425 void vfs_freeopttbl(mntopts_t *); 426 void vfs_parsemntopts(mntopts_t *, char *, int); 427 int vfs_buildoptionstr(const mntopts_t *, char *, int); 428 struct mntopt *vfs_hasopt(const mntopts_t *, const char *); 429 void vfs_mnttab_modtimeupd(void); 430 431 void vfs_clearmntopt(struct vfs *, const char *); 432 void vfs_setmntopt(struct vfs *, const char *, const char *, int); 433 void vfs_setresource(struct vfs *, const char *); 434 void vfs_setmntpoint(struct vfs *, const char *); 435 refstr_t *vfs_getresource(const struct vfs *); 436 refstr_t *vfs_getmntpoint(const struct vfs *); 437 int vfs_optionisset(const struct vfs *, const char *, char **); 438 int vfs_settag(uint_t, uint_t, const char *, const char *, cred_t *); 439 int vfs_clrtag(uint_t, uint_t, const char *, const char *, cred_t *); 440 void vfs_syncall(void); 441 void vfs_syncprogress(void); 442 void vfsinit(void); 443 void vfs_unmountall(void); 444 void vfs_make_fsid(fsid_t *, dev_t, int); 445 void vfs_addmip(dev_t, struct vfs *); 446 void vfs_delmip(struct vfs *); 447 int vfs_devismounted(dev_t); 448 int vfs_devmounting(dev_t, struct vfs *); 449 int vfs_opsinuse(vfsops_t *); 450 struct vfs *getvfs(fsid_t *); 451 struct vfs *vfs_dev2vfsp(dev_t); 452 struct vfs *vfs_mntpoint2vfsp(const char *); 453 struct vfssw *allocate_vfssw(char *); 454 struct vfssw *vfs_getvfssw(char *); 455 struct vfssw *vfs_getvfsswbyname(char *); 456 struct vfssw *vfs_getvfsswbyvfsops(vfsops_t *); 457 void vfs_refvfssw(struct vfssw *); 458 void vfs_unrefvfssw(struct vfssw *); 459 uint_t vf_to_stf(uint_t); 460 void vfs_mnttab_modtime(timespec_t *); 461 void vfs_mnttab_poll(timespec_t *, struct pollhead **); 462 463 void vfs_list_lock(void); 464 void vfs_list_read_lock(void); 465 void vfs_list_unlock(void); 466 void vfs_list_add(struct vfs *); 467 void vfs_list_remove(struct vfs *); 468 void vfs_hold(vfs_t *vfsp); 469 void vfs_rele(vfs_t *vfsp); 470 void fs_freevfs(vfs_t *); 471 void vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype); 472 473 int vfs_zone_change_safe(vfs_t *); 474 475 #define VFSHASH(maj, min) (((int)((maj)+(min))) & (vfshsz - 1)) 476 #define VFS_ON_LIST(vfsp) \ 477 ((vfsp)->vfs_next != (vfsp) && (vfsp)->vfs_next != NULL) 478 479 /* 480 * Globals. 481 */ 482 483 extern struct vfssw vfssw[]; /* table of filesystem types */ 484 extern krwlock_t vfssw_lock; 485 extern char rootfstype[]; /* name of root fstype */ 486 extern const int nfstype; /* # of elements in vfssw array */ 487 extern vfsops_t *EIO_vfsops; /* operations for vfs being torn-down */ 488 489 extern int vfs_vnode_path; /* tunable to control paths in vnodes */ 490 491 /* 492 * The following variables are private to the the kernel's vfs layer. File 493 * system implementations should not access them. 494 */ 495 extern struct vfs *rootvfs; /* ptr to root vfs structure */ 496 typedef struct { 497 struct vfs *rvfs_head; /* head vfs in chain */ 498 kmutex_t rvfs_lock; /* mutex protecting this chain */ 499 uint32_t rvfs_len; /* length of this chain */ 500 } rvfs_t; 501 extern rvfs_t *rvfs_list; 502 extern int vfshsz; /* # of elements in rvfs_head array */ 503 extern const mntopts_t vfs_mntopts; /* globally recognized options */ 504 505 #endif /* defined(_KERNEL) */ 506 507 #define VFS_HOLD(vfsp) { \ 508 vfs_hold(vfsp); \ 509 } 510 511 #define VFS_RELE(vfsp) { \ 512 vfs_rele(vfsp); \ 513 } 514 515 #define VFS_INIT(vfsp, op, data) vfs_init((vfsp), (op), (data)) 516 517 #define _VFS_INIT(vfsp, op, data) { \ 518 (vfsp)->vfs_count = 0; \ 519 (vfsp)->vfs_next = vfsp; \ 520 (vfsp)->vfs_prev = vfsp; \ 521 (vfsp)->vfs_zone_next = vfsp; \ 522 (vfsp)->vfs_zone_prev = vfsp; \ 523 vfs_setops((vfsp), (op)); \ 524 (vfsp)->vfs_flag = 0; \ 525 (vfsp)->vfs_data = (data); \ 526 (vfsp)->vfs_resource = NULL; \ 527 (vfsp)->vfs_mntpt = NULL; \ 528 (vfsp)->vfs_mntopts.mo_count = 0; \ 529 (vfsp)->vfs_mntopts.mo_list = NULL; \ 530 (vfsp)->vfs_femhead = NULL; \ 531 (vfsp)->vfs_zone = NULL; \ 532 sema_init(&(vfsp)->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); \ 533 } 534 535 #define VFS_INSTALLED(vfsswp) (((vfsswp)->vsw_flag & VSW_INSTALLED) != 0) 536 #define ALLOCATED_VFSSW(vswp) ((vswp)->vsw_name[0] != '\0') 537 #define RLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_READER)) 538 #define RUNLOCK_VFSSW() (rw_exit(&vfssw_lock)) 539 #define WLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_WRITER)) 540 #define WUNLOCK_VFSSW() (rw_exit(&vfssw_lock)) 541 #define VFSSW_LOCKED() (RW_LOCK_HELD(&vfssw_lock)) 542 #define VFSSW_WRITE_LOCKED() (RW_WRITE_HELD(&vfssw_lock)) 543 /* 544 * VFS_SYNC flags. 545 */ 546 #define SYNC_ATTR 0x01 /* sync attributes only */ 547 #define SYNC_CLOSE 0x02 /* close open file */ 548 #define SYNC_ALL 0x04 /* force to sync all fs */ 549 550 #ifdef __cplusplus 551 } 552 #endif 553 554 #endif /* _SYS_VFS_H */ 555