/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. */ #ifndef _SYS_FS_UFS_INODE_H #define _SYS_FS_UFS_INODE_H #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/isa_defs.h> #include <sys/fbuf.h> #include <sys/fdbuffer.h> #include <sys/fcntl.h> #include <sys/uio.h> #include <sys/t_lock.h> #include <sys/thread.h> #include <sys/cred.h> #include <sys/time.h> #include <sys/types32.h> #include <sys/fs/ufs_fs.h> #include <sys/fs/ufs_lockfs.h> #include <sys/fs/ufs_trans.h> #include <sys/kstat.h> #include <sys/fs/ufs_acl.h> #include <sys/fs/ufs_panic.h> #include <sys/dnlc.h> #ifdef __cplusplus extern "C" { #endif /* * The I node is the focus of all local file activity in UNIX. * There is a unique inode allocated for each active file, * each current directory, each mounted-on file, each mapping, * and the root. An inode is `named' by its dev/inumber pair. * Data in icommon is read in from permanent inode on volume. * * Each inode has 5 locks associated with it: * i_rwlock: Serializes ufs_write and ufs_setattr request * and allows ufs_read requests to proceed in parallel. * Serializes reads/updates to directories. * vfs_dqrwlock: Manages quota sub-system quiescence. See below. * i_contents: Protects almost all of the fields in the inode * except for those listed below. When held * in writer mode also protects those fields * listed under i_tlock. * i_tlock: When i_tlock is held with the i_contents reader * lock the i_atime, i_mtime, i_ctime, * i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag * i_seq, i_writer & i_mapcnt fields are protected. * For more i_flag locking info see below. * ih_lock: Protects inode hash chain buckets * ifree_lock: Protects inode freelist * * Lock ordering: * i_rwlock > i_contents > i_tlock * i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock * i_contents > i_tlock * vfs_dqrwlock > i_contents(writer) > i_tlock * ih_lock > i_contents > i_tlock * * Making major changes to quota sub-system state, while the file * system is mounted required the addition of another lock. The * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs * structure. This lock is used to manage quota sub-system quiescence * for a particular file system. Major changes to quota sub-system * state (disabling quotas, enabling quotas, and setting new quota * limits) all require the file system to be quiescent and grabbing * vfs_dqrwlock as writer accomplishes this. On the other hand, * grabbing vfs_dqrwlock as reader makes the quota sub-system * non-quiescent and lets the quota sub-system know that now is not a * good time to change major quota sub-system state. Typically * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for * writing. However, there are cases where vfs_dqrwlock is grabbed for * reading without a corresponding i_contents write grab because there * is no relevant inode. There are also cases where i_contents is * grabbed for writing when a vfs_dqrwlock read grab is not needed * because the inode changes do not affect quotas. * * Unfortunately, performance considerations have required that we be more * intelligent about using i_tlock when updating i_flag. Ideally, we would * have simply separated out several of the bits in i_flag into their own * ints to avoid problems. But, instead, we have implemented the following * rules: * * o You can update any i_flag field while holding the writer-contents, * or by holding the reader-contents AND holding i_tlock. * You can only call ITIMES_NOLOCK while holding the writer-contents, * or by holding the reader-contents AND holding i_tlock. * * o For a directory, holding the reader-rw_lock is sufficient for setting * IACC. * * o Races with IREF are avoided by holding the reader contents lock * and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage. * And by holding the writer-contents in ufs_iinactive. * * o The callers are no longer required to handle the calls to ITIMES * and ITIMES_NOLOCK. The functions that set the i_flag bits are * responsible for managing those calls. The exceptions are the * bmap routines. * * SVR4 Extended Fundamental Type (EFT) support: * The inode structure has been enhanced to support * 32-bit user-id, 32-bit group-id, and 32-bit device number. * Standard SVR4 ufs also supports 32-bit mode field. For the reason * of backward compatibility with the previous ufs disk format, * 32-bit mode field is not supported. * * The current inode structure is 100% backward compatible with * the previous inode structure if no user-id or group-id exceeds * USHRT_MAX, and no major or minor number of a device number * stored in an inode exceeds 255. * * Rules for managing i_seq: * o i_seq is locked under the same rules as i_flag * o The i_ctime or i_mtime MUST never change without increasing * the value of i_seq. * o You may increase the value of i_seq without the timestamps * changing, this may decrease the callers performance but will * be functionally correct. * o The common case is when IUPD or ICHG is set, increase i_seq * and immediately call ITIMES* or ufs_iupdat to create a new timestamp. * o A less common case is the setting of IUPD or ICHG and while still * holding the correct lock defer the timestamp and i_seq update * until later, but it must still be done before the lock is released. * bmap_write is an example of this, where the caller does the update. * o If multiple changes are being made with the timestamps being * updated only at the end, a single increase of i_seq is allowed. * o If changes are made with IUPD or ICHG being set, but * the controlling lock is being dropped before the timestamp is * updated, there is a risk that another thread will also change * the file, update i_flag, and push just one timestamp update. * There is also the risk that another thread calls ITIMES or * ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq, * this will cause ufs_imark to change the timestamps without changing * i_seq. If the controlling lock is dropped, ISEQ must be set to * force i_seq to be increased on next ufs_imark, but i_seq MUST still * be increased by the original setting thread before its deferred * call to ITIMES to insure it is increased the correct number of times. */ #define UID_LONG (o_uid_t)65535 /* flag value to indicate uid is 32-bit long */ #define GID_LONG (o_uid_t)65535 /* flag value to indicate gid is 32-bit long */ #define NDADDR 12 /* direct addresses in inode */ #define NIADDR 3 /* indirect addresses in inode */ #define FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t) /* max fast symbolic name length is 56 */ #define i_fs i_ufsvfs->vfs_bufp->b_un.b_fs #define i_vfs i_vnode->v_vfsp struct icommon { o_mode_t ic_smode; /* 0: mode and type of file */ short ic_nlink; /* 2: number of links to file */ o_uid_t ic_suid; /* 4: owner's user id */ o_gid_t ic_sgid; /* 6: owner's group id */ u_offset_t ic_lsize; /* 8: number of bytes in file */ #ifdef _KERNEL struct timeval32 ic_atime; /* 16: time last accessed */ struct timeval32 ic_mtime; /* 24: time last modified */ struct timeval32 ic_ctime; /* 32: last time inode changed */ #else time32_t ic_atime; /* 16: time last accessed */ int32_t ic_atspare; time32_t ic_mtime; /* 24: time last modified */ int32_t ic_mtspare; time32_t ic_ctime; /* 32: last time inode changed */ int32_t ic_ctspare; #endif daddr32_t ic_db[NDADDR]; /* 40: disk block addresses */ daddr32_t ic_ib[NIADDR]; /* 88: indirect blocks */ int32_t ic_flags; /* 100: cflags */ int32_t ic_blocks; /* 104: 512 byte blocks actually held */ int32_t ic_gen; /* 108: generation number */ int32_t ic_shadow; /* 112: shadow inode */ uid_t ic_uid; /* 116: long EFT version of uid */ gid_t ic_gid; /* 120: long EFT version of gid */ uint32_t ic_oeftflag; /* 124: extended attr directory ino, 0 = none */ }; /* * Large directories can be cached. Directory caching can take the following * states: */ typedef enum { CD_DISABLED_NOMEM = -2, CD_DISABLED_TOOBIG, CD_DISABLED, CD_ENABLED } cachedir_t; /* * Large Files: Note we use the inline functions load_double, store_double * to load and store the long long values of i_size. Therefore the * address of i_size must be eight byte aligned. Kmem_alloc of incore * inode structure makes sure that the structure is 8-byte aligned. * XX64 - reorder this structure? */ typedef struct inode { struct inode *i_chain[2]; /* must be first */ struct inode *i_freef; /* free list forward - must be before i_ic */ struct inode *i_freeb; /* free list back - must be before i_ic */ struct icommon i_ic; /* Must be here */ struct vnode *i_vnode; /* vnode associated with this inode */ struct vnode *i_devvp; /* vnode for block I/O */ dev_t i_dev; /* device where inode resides */ ino_t i_number; /* i number, 1-to-1 with device address */ off_t i_diroff; /* offset in dir, where we found last entry */ /* just a hint - no locking needed */ struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */ struct dquot *i_dquot; /* quota structure controlling this file */ krwlock_t i_rwlock; /* serializes write/setattr requests */ krwlock_t i_contents; /* protects (most of) inode contents */ kmutex_t i_tlock; /* protects time fields, i_flag */ offset_t i_nextr; /* */ /* next byte read offset (read-ahead) */ /* No lock required */ /* */ uint_t i_flag; /* inode flags */ uint_t i_seq; /* modification sequence number */ cachedir_t i_cachedir; /* Cache this directory on next lookup */ /* - no locking needed */ long i_mapcnt; /* mappings to file pages */ int *i_map; /* block list for the corresponding file */ dev_t i_rdev; /* INCORE rdev from i_oldrdev by ufs_iget */ size_t i_delaylen; /* delayed writes, units=bytes */ offset_t i_delayoff; /* where we started delaying */ offset_t i_nextrio; /* where to start the next clust */ long i_writes; /* number of outstanding bytes in write q */ kcondvar_t i_wrcv; /* sleep/wakeup for write throttle */ offset_t i_doff; /* dinode byte offset in file system */ si_t *i_ufs_acl; /* pointer to acl entry */ dcanchor_t i_danchor; /* directory cache anchor */ kthread_t *i_writer; /* thread which is in window in wrip() */ } inode_t; struct dinode { union { struct icommon di_icom; char di_size[128]; } di_un; }; #define i_mode i_ic.ic_smode #define i_nlink i_ic.ic_nlink #define i_uid i_ic.ic_uid #define i_gid i_ic.ic_gid #define i_smode i_ic.ic_smode #define i_suid i_ic.ic_suid #define i_sgid i_ic.ic_sgid #define i_size i_ic.ic_lsize #define i_db i_ic.ic_db #define i_ib i_ic.ic_ib #define i_atime i_ic.ic_atime #define i_mtime i_ic.ic_mtime #define i_ctime i_ic.ic_ctime #define i_shadow i_ic.ic_shadow #define i_oeftflag i_ic.ic_oeftflag #define i_blocks i_ic.ic_blocks #define i_cflags i_ic.ic_flags #ifdef _LITTLE_ENDIAN /* * Originally done on x86, but carried on to all other little * architectures, which provides for file system compatibility. */ #define i_ordev i_ic.ic_db[1] /* USL SVR4 compatibility */ #else #define i_ordev i_ic.ic_db[0] /* was i_oldrdev */ #endif #define i_gen i_ic.ic_gen #define i_forw i_chain[0] #define i_back i_chain[1] /* EFT transition aids - obsolete */ #define oEFT_MAGIC 0x90909090 #define di_oeftflag di_ic.ic_oeftflag #define di_ic di_un.di_icom #define di_mode di_ic.ic_smode #define di_nlink di_ic.ic_nlink #define di_uid di_ic.ic_uid #define di_gid di_ic.ic_gid #define di_smode di_ic.ic_smode #define di_suid di_ic.ic_suid #define di_sgid di_ic.ic_sgid #define di_size di_ic.ic_lsize #define di_db di_ic.ic_db #define di_ib di_ic.ic_ib #define di_atime di_ic.ic_atime #define di_mtime di_ic.ic_mtime #define di_ctime di_ic.ic_ctime #define di_cflags di_ic.ic_flags #ifdef _LITTLE_ENDIAN #define di_ordev di_ic.ic_db[1] #else #define di_ordev di_ic.ic_db[0] #endif #define di_shadow di_ic.ic_shadow #define di_blocks di_ic.ic_blocks #define di_gen di_ic.ic_gen /* flags */ #define IUPD 0x0001 /* file has been modified */ #define IACC 0x0002 /* inode access time to be updated */ #define IMOD 0x0004 /* inode has been modified */ #define ICHG 0x0008 /* inode has been changed */ #define INOACC 0x0010 /* no access time update in getpage */ #define IMODTIME 0x0020 /* mod time already set */ #define IREF 0x0040 /* inode is being referenced */ #define ISYNC 0x0080 /* do all allocation synchronously */ #define IFASTSYMLNK 0x0100 /* fast symbolic link */ #define IMODACC 0x0200 /* only access time changed; */ /* filesystem won't become active */ #define IATTCHG 0x0400 /* only size/blocks have changed */ #define IBDWRITE 0x0800 /* the inode has been scheduled for */ /* write operation asynchronously */ #define ISTALE 0x1000 /* inode couldn't be read from disk */ #define IDEL 0x2000 /* inode is being deleted */ #define IDIRECTIO 0x4000 /* attempt directio */ #define ISEQ 0x8000 /* deferred i_seq increase */ #define IJUNKIQ 0x10000 /* on junk idle queue */ #define IQUIET 0x20000 /* No file system full messages */ /* cflags */ #define IXATTR 0x0001 /* extended attribute */ #define IFALLOCATE 0x0002 /* fallocate'd file */ /* modes */ #define IFMT 0170000 /* type of file */ #define IFIFO 0010000 /* named pipe (fifo) */ #define IFCHR 0020000 /* character special */ #define IFDIR 0040000 /* directory */ #define IFBLK 0060000 /* block special */ #define IFREG 0100000 /* regular */ #define IFLNK 0120000 /* symbolic link */ #define IFSHAD 0130000 /* shadow indode */ #define IFSOCK 0140000 /* socket */ #define IFATTRDIR 0160000 /* Attribute directory */ #define ISUID 04000 /* set user id on execution */ #define ISGID 02000 /* set group id on execution */ #define ISVTX 01000 /* save swapped text even after use */ #define IREAD 0400 /* read, write, execute permissions */ #define IWRITE 0200 #define IEXEC 0100 /* specify how the inode info is written in ufs_syncip() */ #define I_SYNC 1 /* wait for the inode written to disk */ #define I_DSYNC 2 /* wait for the inode written to disk */ /* only if IATTCHG is set */ #define I_ASYNC 0 /* don't wait for the inode written */ /* flags passed to ufs_itrunc(), indirtrunc(), and free() */ #define I_FREE 0x00000001 /* inode is being freed */ #define I_DIR 0x00000002 /* inode is a directory */ #define I_IBLK 0x00000004 /* indirect block */ #define I_CHEAP 0x00000008 /* cheap free */ #define I_SHAD 0x00000010 /* inode is a shadow inode */ #define I_QUOTA 0x00000020 /* quota file */ #define I_NOCANCEL 0x40 /* Don't cancel these fragments */ #define I_ACCT 0x00000080 /* Update ufsvfs' unreclaimed_blocks */ /* * If ufs_dircheckforname() fails to find an entry with the given name, * this "slot" structure holds state for ufs_direnter_*() as to where * there is space to put an entry with that name. * If ufs_dircheckforname() finds an entry with the given name, this structure * holds state for ufs_dirrename() and ufs_dirremove() as to where the * entry is. "status" indicates what ufs_dircheckforname() found: * NONE name not found, large enough free slot not found, * FOUND name not found, large enough free slot found * EXIST name found * If ufs_dircheckforname() fails due to an error, this structure is not * filled in. * * After ufs_dircheckforname() succeeds the values are: * status offset size fbp, ep * ------ ------ ---- ------- * NONE end of dir needed not valid * FOUND start of entry of ent both valid if fbp != NULL * EXIST start of entry of prev ent valid * * "endoff" is set to 0 if the an entry with the given name is found, or if no * free slot could be found or made; this means that the directory should not * be truncated. If the entry was found, the search terminates so * ufs_dircheckforname() didn't find out where the last valid entry in the * directory was, so it doesn't know where to cut the directory off; if no free * slot could be found or made, the directory has to be extended to make room * for the new entry, so there's nothing to cut off. * Otherwise, "endoff" is set to the larger of the offset of the last * non-empty entry in the directory, or the offset at which the new entry will * be placed, whichever is larger. This is used by ufs_diraddentry(); if a new * entry is to be added to the directory, any complete directory blocks at the * end of the directory that contain no non-empty entries are lopped off the * end, thus shrinking the directory dynamically. */ typedef enum {NONE, FOUND, EXIST} slotstat_t; struct ufs_slot { struct direct *ep; /* pointer to slot */ struct fbuf *fbp; /* dir buf where slot is */ off_t offset; /* offset of area with free space */ off_t endoff; /* last useful location found in search */ slotstat_t status; /* status of slot */ int size; /* size of area at slotoffset */ int cached; /* cached directory */ }; /* * Statistics on inodes * Not protected by locks */ struct instats { kstat_named_t in_size; /* current cache size */ kstat_named_t in_maxsize; /* maximum cache size */ kstat_named_t in_hits; /* cache hits */ kstat_named_t in_misses; /* cache misses */ kstat_named_t in_malloc; /* kmem_alloce'd */ kstat_named_t in_mfree; /* kmem_free'd */ kstat_named_t in_maxreached; /* Largest size reached by cache */ kstat_named_t in_frfront; /* # put at front of freelist */ kstat_named_t in_frback; /* # put at back of freelist */ kstat_named_t in_qfree; /* q's to delete thread */ kstat_named_t in_scan; /* # inodes scanned */ kstat_named_t in_tidles; /* # inodes idled by idle thread */ kstat_named_t in_lidles; /* # inodes idled by ufs_lookup */ kstat_named_t in_vidles; /* # inodes idled by ufs_vget */ kstat_named_t in_kcalloc; /* # inodes kmem_cache_alloced */ kstat_named_t in_kcfree; /* # inodes kmem_cache_freed */ kstat_named_t in_poc; /* # push-on-close's */ }; #ifdef _KERNEL /* * Extended attributes */ #define XATTR_DIR_NAME "/@/" extern int ufs_ninode; /* high-water mark for inode cache */ extern struct vnodeops *ufs_vnodeops; /* vnode operations for ufs */ extern const struct fs_operation_def ufs_vnodeops_template[]; /* * Convert between inode pointers and vnode pointers */ #define VTOI(VP) ((struct inode *)(VP)->v_data) #define ITOV(IP) ((struct vnode *)(IP)->i_vnode) /* * convert to fs */ #define ITOF(IP) ((struct fs *)(IP)->i_fs) /* * Convert between vnode types and inode formats */ extern enum vtype iftovt_tab[]; #ifdef notneeded /* Look at sys/mode.h and os/vnode.c */ extern int vttoif_tab[]; #endif /* * Mark an inode with the current (unique) timestamp. * (Note that UFS's concept of time only keeps 32 bits of seconds * in the on-disk format). */ struct timeval32 iuniqtime; extern kmutex_t ufs_iuniqtime_lock; #define ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip) #define ITIMES(ip) { \ mutex_enter(&(ip)->i_tlock); \ ITIMES_NOLOCK(ip); \ mutex_exit(&(ip)->i_tlock); \ } /* * The following interfaces are used to do atomic loads and stores * of an inode's i_size, which is a long long data type. * * For LP64, we just to a load or a store - atomicity and alignment * are 8-byte guaranteed. For x86 there are no such instructions, * so we grab i_contents as reader to get the size; we already hold * it as writer when we're setting the size. */ #ifdef _LP64 #define UFS_GET_ISIZE(resultp, ip) *(resultp) = (ip)->i_size #define UFS_SET_ISIZE(value, ip) (ip)->i_size = (value) #else /* _LP64 */ #define UFS_GET_ISIZE(resultp, ip) \ { \ rw_enter(&(ip)->i_contents, RW_READER); \ *(resultp) = (ip)->i_size; \ rw_exit(&(ip)->i_contents); \ } #define UFS_SET_ISIZE(value, ip) \ { \ ASSERT(RW_WRITE_HELD(&(ip)->i_contents)); \ (ip)->i_size = (value); \ } #endif /* _LP64 */ /* * Allocate the specified block in the inode * and make sure any in-core pages are initialized. */ #define BMAPALLOC(ip, off, size, cr) \ bmap_write((ip), (u_offset_t)(off), (size), BI_NORMAL, NULL, cr) #define ESAME (-1) /* trying to rename linked files (special) */ #define UFS_HOLE (daddr32_t)-1 /* value used when no block allocated */ /* * enums */ /* direnter ops */ enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR}; /* dirremove ops */ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* * block initialization type for bmap_write * * BI_NORMAL - allocate and zero fill pages in memory * BI_ALLOC_ONLY - only allocate the block, do not zero out pages in mem * BI_FALLOCATE - allocate only, do not zero out pages, and store as negative * block number in inode block list */ enum bi_type { BI_NORMAL, BI_ALLOC_ONLY, BI_FALLOCATE }; /* * This overlays the fid structure (see vfs.h) * * LP64 note: we use int32_t instead of ino_t since UFS does not use * inode numbers larger than 32-bits and ufid's are passed to NFS * which expects them to not grow in size beyond 10 bytes (12 including * the length). */ struct ufid { ushort_t ufid_len; ushort_t ufid_flags; int32_t ufid_ino; int32_t ufid_gen; }; /* * each ufs thread (see ufs_thread.c) is managed by this struct */ struct ufs_q { union uq_head { void *_uq_generic; /* first entry on q */ struct inode *_uq_i; ufs_failure_t *_uq_uf; } _uq_head; int uq_ne; /* # of entries/failures found */ int uq_lowat; /* thread runs when ne == lowat */ int uq_hiwat; /* synchronous idle if ne >= hiwat */ ushort_t uq_flags; /* flags (see below) */ kcondvar_t uq_cv; /* for sleep/wakeup */ kthread_id_t uq_threadp; /* thread managing this q */ kmutex_t uq_mutex; /* protects this struct */ }; #define uq_head _uq_head._uq_generic #define uq_ihead _uq_head._uq_i #define uq_ufhead _uq_head._uq_uf /* * uq_flags */ #define UQ_EXIT (0x0001) /* q server exits at its convenience */ #define UQ_WAIT (0x0002) /* thread is waiting on q server */ #define UQ_SUSPEND (0x0004) /* request for suspension */ #define UQ_SUSPENDED (0x0008) /* thread has suspended itself */ #define UQ_FASTCLIENTS (0x0010) /* fast clients in ufs_delq_info */ /* * When logging is enabled, statvfs must account for blocks and files that * may be on the delete queue. Protected by ufsvfsp->vfs_delete.uq_mutex */ struct ufs_delq_info { kcondvar_t delq_fast_cv; /* for fast-operating clients */ u_offset_t delq_unreclaimed_blocks; ulong_t delq_unreclaimed_files; }; /* * global idle queues * The queues are sized dynamically in proportion to ufs_ninode * which, unless overridden, scales with the amount of memory. * The idle queue is halved whenever it hits the low water mark * (1/4 of ufs_ninode), but can burst to sizes much larger. The number * of hash queues is currently maintained to give on average IQHASHQLEN * entries when the idle queue is at the low water mark. * Note, we do not need to search along the hash queues, but use them * in order to batch together geographically local inodes to allow * their updates (via the log or buffer cache) to require less disk seeks. * This gives an incredible performance boost for logging and a boost for * non logging file systems. */ typedef struct { inode_t *i_chain[2]; /* must match inode_t, but unused */ inode_t *i_freef; /* must match inode_t, idle list forward */ inode_t *i_freeb; /* must match inode_t, idle list back */ } iqhead_t; extern struct ufs_q ufs_idle_q; /* used by global ufs idle thread */ extern iqhead_t *ufs_junk_iq; /* junk idle queues */ extern iqhead_t *ufs_useful_iq; /* useful idle queues */ extern int ufs_njunk_iq; /* number of entries in junk iq */ extern int ufs_nuseful_iq; /* number of entries in useful iq */ extern int ufs_niqhash; /* number of iq hash qs - power of 2 */ extern int ufs_iqhashmask; /* iq hash mask = ufs_niqhash - 1 */ #define IQHASHQLEN 32 /* see comments above */ #define INOCGSHIFT 7 /* 128 inodes per cylinder group */ #define IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask) #define IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */ extern struct ufs_q ufs_hlock; /* used by global ufs hlock thread */ /* * vfs_lfflags flags */ #define UFS_LARGEFILES ((ushort_t)0x1) /* set if mount allows largefiles */ /* * vfs_dfritime flags */ #define UFS_DFRATIME 0x1 /* deferred access time */ /* * UFS VFS private data. * * UFS file system instances may be linked on several lists. * * - The vfs_next field chains together every extant ufs instance; this * list is rooted at ufs_instances and should be used in preference to * the overall vfs list (which is properly the province of the generic * file system code, not of file system implementations). This same list * link is used during forcible unmounts to chain together instances that * can't yet be completely dismantled, * * - The vfs_wnext field is used within ufs_update to form a work list of * UFS instances to be synced out. */ typedef struct ufsvfs { struct vfs *vfs_vfs; /* back link */ struct ufsvfs *vfs_next; /* instance list link */ struct ufsvfs *vfs_wnext; /* work list link */ struct vnode *vfs_root; /* root vnode */ struct buf *vfs_bufp; /* buffer containing superblock */ struct vnode *vfs_devvp; /* block device vnode */ ushort_t vfs_lfflags; /* Large files (set by mount) */ ushort_t vfs_qflags; /* QUOTA: filesystem flags */ struct inode *vfs_qinod; /* QUOTA: pointer to quota file */ uint_t vfs_btimelimit; /* QUOTA: block time limit */ uint_t vfs_ftimelimit; /* QUOTA: file time limit */ krwlock_t vfs_dqrwlock; /* QUOTA: protects quota fields */ /* * some fs local threads */ struct ufs_q vfs_delete; /* delayed inode delete */ struct ufs_q vfs_reclaim; /* reclaim open, deleted files */ /* * This is copied from the super block at mount time. */ int vfs_nrpos; /* # rotational positions */ /* * This lock protects cg's and super block pointed at by * vfs_bufp->b_fs. Locks contents of fs and cg's and contents * of vfs_dio. */ kmutex_t vfs_lock; struct ulockfs vfs_ulockfs; /* ufs lockfs support */ uint_t vfs_dio; /* delayed io (_FIODIO) */ uint_t vfs_nointr; /* disallow lockfs interrupts */ uint_t vfs_nosetsec; /* disallow ufs_setsecattr */ uint_t vfs_syncdir; /* synchronous local directory ops */ uint_t vfs_dontblock; /* don't block on forced umount */ /* * trans (logging ufs) stuff */ uint_t vfs_domatamap; /* set if matamap enabled */ ulong_t vfs_maxacl; /* transaction stuff - max acl size */ ulong_t vfs_dirsize; /* logspace for directory creation */ ulong_t vfs_avgbfree; /* average free blks in cg (blkpref) */ /* * Some useful constants */ int vfs_nindirshift; /* calc. from fs_nindir */ int vfs_nindiroffset; /* calc. from fs_ninidr */ int vfs_ioclustsz; /* bytes in read/write cluster */ int vfs_iotransz; /* max device i/o transfer size */ vfs_ufsfx_t vfs_fsfx; /* lock/fix-on-panic support */ /* * More useful constants */ int vfs_minfrags; /* calc. from fs_minfree */ /* * Force DirectIO on all files */ uint_t vfs_forcedirectio; /* * Deferred inode time related fields */ clock_t vfs_iotstamp; /* last I/O timestamp */ uint_t vfs_dfritime; /* deferred inode time flags */ /* * Some more useful info */ dev_t vfs_dev; /* device mounted from */ struct ml_unit *vfs_log; /* pointer to embedded log struct */ uint_t vfs_noatime; /* disable inode atime updates */ /* * snapshot stuff */ void *vfs_snapshot; /* snapshot handle */ /* * Controls logging "file system full" messages to messages file */ clock_t vfs_lastwhinetime; int vfs_nolog_si; /* not logging summary info */ int vfs_validfs; /* indicates mounted fs */ /* * Additional information about vfs_delete above */ struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */ } ufsvfs_t; #define vfs_fs vfs_bufp->b_un.b_fs /* * values for vfs_validfs */ #define UT_UNMOUNTED 0 #define UT_MOUNTED 1 #define UT_HLOCKING 2 /* inohsz is guaranteed to be a power of 2 */ #define INOHASH(ino) (((int)ino) & (inohsz - 1)) #define ISFALLOCBLK(ip, bn) \ (((bn) < 0) && ((bn) % ip->i_fs->fs_bsize == 0) && \ ((ip)->i_cflags & IFALLOCATE && (bn) != UFS_HOLE)) union ihead { union ihead *ih_head[2]; struct inode *ih_chain[2]; }; extern union ihead *ihead; extern kmutex_t *ih_lock; extern int *ih_ne; extern int inohsz; extern clock_t ufs_iowait; #endif /* _KERNEL */ /* * ufs function prototypes */ #if defined(_KERNEL) && !defined(_BOOT) extern void ufs_iinit(void); extern int ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *); extern int ufs_iget_alloced(struct vfs *, ino_t, struct inode **, cred_t *); extern void ufs_reset_vnode(vnode_t *); extern void ufs_iinactive(struct inode *); extern void ufs_iupdat(struct inode *, int); extern int ufs_rmidle(struct inode *); extern int ufs_itrunc(struct inode *, u_offset_t, int, cred_t *); extern int ufs_iaccess(void *, int, cred_t *); extern int rdip(struct inode *, struct uio *, int, struct cred *); extern int wrip(struct inode *, struct uio *, int, struct cred *); extern void ufs_imark(struct inode *); extern void ufs_itimes_nolock(struct inode *); extern int ufs_dirlook(struct inode *, char *, struct inode **, cred_t *, int); extern int ufs_direnter_cm(struct inode *, char *, enum de_op, struct vattr *, struct inode **, cred_t *, int); extern int ufs_direnter_lr(struct inode *, char *, enum de_op, struct inode *, struct inode *, cred_t *, vnode_t **); extern int ufs_dircheckpath(ino_t, struct inode *, struct inode *, struct cred *); extern int ufs_dirmakeinode(struct inode *, struct inode **, struct vattr *, enum de_op, cred_t *); extern int ufs_dirremove(struct inode *, char *, struct inode *, vnode_t *, enum dr_op, cred_t *, vnode_t **); extern int ufs_dircheckforname(struct inode *, char *, int, struct ufs_slot *, struct inode **, struct cred *, int); extern int ufs_xattrdirempty(struct inode *, ino_t, cred_t *); extern int blkatoff(struct inode *, off_t, char **, struct fbuf **); extern void sbupdate(struct vfs *); extern int ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **, cred_t *); extern void ufs_ifree(struct inode *, ino_t, mode_t); extern void free(struct inode *, daddr_t, off_t, int); extern int alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *); extern int realloccg(struct inode *, daddr_t, daddr_t, int, int, daddr_t *, cred_t *); extern int ufs_allocsp(struct vnode *, struct flock64 *, cred_t *); extern int ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *); extern ino_t dirpref(inode_t *); extern daddr_t blkpref(struct inode *, daddr_t, int, daddr32_t *); extern daddr_t contigpref(ufsvfs_t *, size_t); extern int ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t, offset_t, enum uio_seg, int *, cred_t *); extern int bmap_read(struct inode *, u_offset_t, daddr_t *, int *); extern int bmap_write(struct inode *, u_offset_t, int, enum bi_type, daddr_t *, struct cred *); extern int bmap_has_holes(struct inode *); extern int bmap_find(struct inode *, boolean_t, u_offset_t *); extern int bmap_set_bn(struct vnode *, u_offset_t, daddr32_t); extern void ufs_vfs_add(struct ufsvfs *); extern void ufs_vfs_remove(struct ufsvfs *); extern void ufs_sbwrite(struct ufsvfs *); extern void ufs_update(int); extern int ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *); extern int ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *); extern int ufs_syncip(struct inode *, int, int, top_t); extern int ufs_sync_indir(struct inode *); extern int ufs_indirblk_sync(struct inode *, offset_t); extern int ufs_badblock(struct inode *, daddr_t); extern int ufs_indir_badblock(struct inode *, daddr32_t *); extern void ufs_notclean(struct ufsvfs *); extern void ufs_checkclean(struct vfs *); extern int isblock(struct fs *, uchar_t *, daddr_t); extern void setblock(struct fs *, uchar_t *, daddr_t); extern void clrblock(struct fs *, uchar_t *, daddr_t); extern int isclrblock(struct fs *, uchar_t *, daddr_t); extern void fragacct(struct fs *, int, int32_t *, int); extern int skpc(char, uint_t, char *); extern int ufs_fbwrite(struct fbuf *, struct inode *); extern int ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long); extern int ufs_putapage(struct vnode *, struct page *, u_offset_t *, size_t *, int, struct cred *); extern inode_t *ufs_alloc_inode(ufsvfs_t *, ino_t); extern void ufs_free_inode(inode_t *); /* * special stuff */ extern void ufs_setreclaim(struct inode *); extern int ufs_scan_inodes(int, int (*)(struct inode *, void *), void *, struct ufsvfs *); extern int ufs_sync_inode(struct inode *, void *); extern int ufs_sticky_remove_access(struct inode *, struct inode *, struct cred *); /* * quota */ extern int chkiq(struct ufsvfs *, int, struct inode *, uid_t, int, struct cred *, char **errp, size_t *lenp); /* * ufs thread stuff */ extern void ufs_thread_delete(struct vfs *); extern void ufs_delete_drain(struct vfs *, int, int); extern void ufs_delete(struct ufsvfs *, struct inode *, int); extern void ufs_inode_cache_reclaim(void *); extern void ufs_idle_drain(struct vfs *); extern void ufs_idle_some(int); extern void ufs_thread_idle(void); extern void ufs_thread_reclaim(struct vfs *); extern void ufs_thread_init(struct ufs_q *, int); extern void ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *); extern void ufs_thread_exit(struct ufs_q *); extern void ufs_thread_suspend(struct ufs_q *); extern void ufs_thread_continue(struct ufs_q *); extern void ufs_thread_hlock(void *); extern void ufs_delete_init(struct ufsvfs *, int); extern void ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *); extern void ufs_delete_drain_wait(struct ufsvfs *, int); /* * ufs lockfs stuff */ struct seg; extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int); extern int ufs_quiesce(struct ulockfs *); extern int ufs_flush(struct vfs *); extern int ufs_fiolfs(struct vnode *, struct lockfs *, int); extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int); extern int ufs_fiolfss(struct vnode *, struct lockfs *); extern int ufs_fioffs(struct vnode *, char *, struct cred *); extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t); extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t); extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **, struct seg *, int, uint_t *); extern void ufs_lockfs_end(struct ulockfs *); /* * ufs acl stuff */ extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *); extern void si_cache_init(void); extern int ufs_si_load(struct inode *, cred_t *); extern void ufs_si_del(struct inode *); extern int ufs_acl_access(struct inode *, int, cred_t *); extern void ufs_si_cache_flush(dev_t); extern int ufs_si_free(si_t *, struct vfs *, cred_t *); extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *); extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *); extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *); /* * ufs directio stuff */ extern void ufs_directio_init(); extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *, int *); extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *); #define DIRECTIO_FAILURE (0) #define DIRECTIO_SUCCESS (1) /* * ufs extensions for PXFS */ int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb, int flags, cred_t *cr); int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb, int flags, cred_t *cr); /* * prototypes to support the forced unmount */ void ufs_freeze(struct ulockfs *, struct lockfs *); int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *); /* * extended attributes */ int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *); int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *); void ufs_unhook_shadow(inode_t *, inode_t *); #endif /* defined(_KERNEL) && !defined(_BOOT) */ #ifdef __cplusplus } #endif #endif /* _SYS_FS_UFS_INODE_H */