1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 #ifndef _SYS_FS_UFS_INODE_H 41 #define _SYS_FS_UFS_INODE_H 42 43 #pragma ident "%Z%%M% %I% %E% SMI" 44 45 #include <sys/isa_defs.h> 46 #include <sys/fbuf.h> 47 #include <sys/fdbuffer.h> 48 #include <sys/fcntl.h> 49 #include <sys/uio.h> 50 #include <sys/t_lock.h> 51 #include <sys/thread.h> 52 #include <sys/cred.h> 53 #include <sys/time.h> 54 #include <sys/types32.h> 55 #include <sys/fs/ufs_fs.h> 56 #include <sys/fs/ufs_lockfs.h> 57 #include <sys/fs/ufs_trans.h> 58 #include <sys/kstat.h> 59 #include <sys/fs/ufs_acl.h> 60 #include <sys/fs/ufs_panic.h> 61 #include <sys/dnlc.h> 62 63 #ifdef __cplusplus 64 extern "C" { 65 #endif 66 67 /* 68 * The I node is the focus of all local file activity in UNIX. 69 * There is a unique inode allocated for each active file, 70 * each current directory, each mounted-on file, each mapping, 71 * and the root. An inode is `named' by its dev/inumber pair. 72 * Data in icommon is read in from permanent inode on volume. 73 * 74 * Each inode has 5 locks associated with it: 75 * i_rwlock: Serializes ufs_write and ufs_setattr request 76 * and allows ufs_read requests to proceed in parallel. 77 * Serializes reads/updates to directories. 78 * vfs_dqrwlock: Manages quota sub-system quiescence. See below. 79 * i_contents: Protects almost all of the fields in the inode 80 * except for those listed below. When held 81 * in writer mode also protects those fields 82 * listed under i_tlock. 83 * i_tlock: When i_tlock is held with the i_contents reader 84 * lock the i_atime, i_mtime, i_ctime, 85 * i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag 86 * i_seq, i_writer & i_mapcnt fields are protected. 87 * For more i_flag locking info see below. 88 * ih_lock: Protects inode hash chain buckets 89 * ifree_lock: Protects inode freelist 90 * 91 * Lock ordering: 92 * i_rwlock > i_contents > i_tlock 93 * i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock 94 * i_contents > i_tlock 95 * vfs_dqrwlock > i_contents(writer) > i_tlock 96 * ih_lock > i_contents > i_tlock 97 * 98 * Making major changes to quota sub-system state, while the file 99 * system is mounted required the addition of another lock. The 100 * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs 101 * structure. This lock is used to manage quota sub-system quiescence 102 * for a particular file system. Major changes to quota sub-system 103 * state (disabling quotas, enabling quotas, and setting new quota 104 * limits) all require the file system to be quiescent and grabbing 105 * vfs_dqrwlock as writer accomplishes this. On the other hand, 106 * grabbing vfs_dqrwlock as reader makes the quota sub-system 107 * non-quiescent and lets the quota sub-system know that now is not a 108 * good time to change major quota sub-system state. Typically 109 * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for 110 * writing. However, there are cases where vfs_dqrwlock is grabbed for 111 * reading without a corresponding i_contents write grab because there 112 * is no relevant inode. There are also cases where i_contents is 113 * grabbed for writing when a vfs_dqrwlock read grab is not needed 114 * because the inode changes do not affect quotas. 115 * 116 * Unfortunately, performance considerations have required that we be more 117 * intelligent about using i_tlock when updating i_flag. Ideally, we would 118 * have simply separated out several of the bits in i_flag into their own 119 * ints to avoid problems. But, instead, we have implemented the following 120 * rules: 121 * 122 * o You can update any i_flag field while holding the writer-contents, 123 * or by holding the reader-contents AND holding i_tlock. 124 * You can only call ITIMES_NOLOCK while holding the writer-contents, 125 * or by holding the reader-contents AND holding i_tlock. 126 * 127 * o For a directory, holding the reader-rw_lock is sufficient for setting 128 * IACC. 129 * 130 * o Races with IREF are avoided by holding the reader contents lock 131 * and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage. 132 * And by holding the writer-contents in ufs_iinactive. 133 * 134 * o The callers are no longer required to handle the calls to ITIMES 135 * and ITIMES_NOLOCK. The functions that set the i_flag bits are 136 * responsible for managing those calls. The exceptions are the 137 * bmap routines. 138 * 139 * SVR4 Extended Fundamental Type (EFT) support: 140 * The inode structure has been enhanced to support 141 * 32-bit user-id, 32-bit group-id, and 32-bit device number. 142 * Standard SVR4 ufs also supports 32-bit mode field. For the reason 143 * of backward compatibility with the previous ufs disk format, 144 * 32-bit mode field is not supported. 145 * 146 * The current inode structure is 100% backward compatible with 147 * the previous inode structure if no user-id or group-id exceeds 148 * USHRT_MAX, and no major or minor number of a device number 149 * stored in an inode exceeds 255. 150 * 151 * Rules for managing i_seq: 152 * o i_seq is locked under the same rules as i_flag 153 * o The i_ctime or i_mtime MUST never change without increasing 154 * the value of i_seq. 155 * o You may increase the value of i_seq without the timestamps 156 * changing, this may decrease the callers performance but will 157 * be functionally correct. 158 * o The common case is when IUPD or ICHG is set, increase i_seq 159 * and immediately call ITIMES* or ufs_iupdat to create a new timestamp. 160 * o A less common case is the setting of IUPD or ICHG and while still 161 * holding the correct lock defer the timestamp and i_seq update 162 * until later, but it must still be done before the lock is released. 163 * bmap_write is an example of this, where the caller does the update. 164 * o If multiple changes are being made with the timestamps being 165 * updated only at the end, a single increase of i_seq is allowed. 166 * o If changes are made with IUPD or ICHG being set, but 167 * the controlling lock is being dropped before the timestamp is 168 * updated, there is a risk that another thread will also change 169 * the file, update i_flag, and push just one timestamp update. 170 * There is also the risk that another thread calls ITIMES or 171 * ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq, 172 * this will cause ufs_imark to change the timestamps without changing 173 * i_seq. If the controlling lock is dropped, ISEQ must be set to 174 * force i_seq to be increased on next ufs_imark, but i_seq MUST still 175 * be increased by the original setting thread before its deferred 176 * call to ITIMES to insure it is increased the correct number of times. 177 */ 178 179 #define UID_LONG (o_uid_t)65535 180 /* flag value to indicate uid is 32-bit long */ 181 #define GID_LONG (o_uid_t)65535 182 /* flag value to indicate gid is 32-bit long */ 183 184 #define NDADDR 12 /* direct addresses in inode */ 185 #define NIADDR 3 /* indirect addresses in inode */ 186 #define FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t) 187 /* max fast symbolic name length is 56 */ 188 189 #define i_fs i_ufsvfs->vfs_bufp->b_un.b_fs 190 #define i_vfs i_vnode->v_vfsp 191 192 struct icommon { 193 o_mode_t ic_smode; /* 0: mode and type of file */ 194 short ic_nlink; /* 2: number of links to file */ 195 o_uid_t ic_suid; /* 4: owner's user id */ 196 o_gid_t ic_sgid; /* 6: owner's group id */ 197 u_offset_t ic_lsize; /* 8: number of bytes in file */ 198 #ifdef _KERNEL 199 struct timeval32 ic_atime; /* 16: time last accessed */ 200 struct timeval32 ic_mtime; /* 24: time last modified */ 201 struct timeval32 ic_ctime; /* 32: last time inode changed */ 202 #else 203 time32_t ic_atime; /* 16: time last accessed */ 204 int32_t ic_atspare; 205 time32_t ic_mtime; /* 24: time last modified */ 206 int32_t ic_mtspare; 207 time32_t ic_ctime; /* 32: last time inode changed */ 208 int32_t ic_ctspare; 209 #endif 210 daddr32_t ic_db[NDADDR]; /* 40: disk block addresses */ 211 daddr32_t ic_ib[NIADDR]; /* 88: indirect blocks */ 212 int32_t ic_flags; /* 100: cflags */ 213 int32_t ic_blocks; /* 104: 512 byte blocks actually held */ 214 int32_t ic_gen; /* 108: generation number */ 215 int32_t ic_shadow; /* 112: shadow inode */ 216 uid_t ic_uid; /* 116: long EFT version of uid */ 217 gid_t ic_gid; /* 120: long EFT version of gid */ 218 uint32_t ic_oeftflag; /* 124: extended attr directory ino, 0 = none */ 219 }; 220 221 /* 222 * Large Files: Note we use the inline functions load_double, store_double 223 * to load and store the long long values of i_size. Therefore the 224 * address of i_size must be eight byte aligned. Kmem_alloc of incore 225 * inode structure makes sure that the structure is 8-byte aligned. 226 * XX64 - reorder this structure? 227 */ 228 typedef struct inode { 229 struct inode *i_chain[2]; /* must be first */ 230 struct inode *i_freef; /* free list forward - must be before i_ic */ 231 struct inode *i_freeb; /* free list back - must be before i_ic */ 232 struct icommon i_ic; /* Must be here */ 233 struct vnode *i_vnode; /* vnode associated with this inode */ 234 struct vnode *i_devvp; /* vnode for block I/O */ 235 dev_t i_dev; /* device where inode resides */ 236 ino_t i_number; /* i number, 1-to-1 with device address */ 237 off_t i_diroff; /* offset in dir, where we found last entry */ 238 /* just a hint - no locking needed */ 239 struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */ 240 struct dquot *i_dquot; /* quota structure controlling this file */ 241 krwlock_t i_rwlock; /* serializes write/setattr requests */ 242 krwlock_t i_contents; /* protects (most of) inode contents */ 243 kmutex_t i_tlock; /* protects time fields, i_flag */ 244 offset_t i_nextr; /* */ 245 /* next byte read offset (read-ahead) */ 246 /* No lock required */ 247 /* */ 248 uint_t i_flag; /* inode flags */ 249 uint_t i_seq; /* modification sequence number */ 250 boolean_t i_cachedir; /* Cache this directory on next lookup */ 251 /* - no locking needed */ 252 long i_mapcnt; /* mappings to file pages */ 253 int *i_map; /* block list for the corresponding file */ 254 dev_t i_rdev; /* INCORE rdev from i_oldrdev by ufs_iget */ 255 size_t i_delaylen; /* delayed writes, units=bytes */ 256 offset_t i_delayoff; /* where we started delaying */ 257 offset_t i_nextrio; /* where to start the next clust */ 258 long i_writes; /* number of outstanding bytes in write q */ 259 kcondvar_t i_wrcv; /* sleep/wakeup for write throttle */ 260 offset_t i_doff; /* dinode byte offset in file system */ 261 si_t *i_ufs_acl; /* pointer to acl entry */ 262 dcanchor_t i_danchor; /* directory cache anchor */ 263 kthread_t *i_writer; /* thread which is in window in wrip() */ 264 } inode_t; 265 266 struct dinode { 267 union { 268 struct icommon di_icom; 269 char di_size[128]; 270 } di_un; 271 }; 272 273 #define i_mode i_ic.ic_smode 274 #define i_nlink i_ic.ic_nlink 275 #define i_uid i_ic.ic_uid 276 #define i_gid i_ic.ic_gid 277 #define i_smode i_ic.ic_smode 278 #define i_suid i_ic.ic_suid 279 #define i_sgid i_ic.ic_sgid 280 281 #define i_size i_ic.ic_lsize 282 #define i_db i_ic.ic_db 283 #define i_ib i_ic.ic_ib 284 285 #define i_atime i_ic.ic_atime 286 #define i_mtime i_ic.ic_mtime 287 #define i_ctime i_ic.ic_ctime 288 289 #define i_shadow i_ic.ic_shadow 290 #define i_oeftflag i_ic.ic_oeftflag 291 #define i_blocks i_ic.ic_blocks 292 #define i_cflags i_ic.ic_flags 293 #ifdef _LITTLE_ENDIAN 294 /* 295 * Originally done on x86, but carried on to all other little 296 * architectures, which provides for file system compatibility. 297 */ 298 #define i_ordev i_ic.ic_db[1] /* USL SVR4 compatibility */ 299 #else 300 #define i_ordev i_ic.ic_db[0] /* was i_oldrdev */ 301 #endif 302 #define i_gen i_ic.ic_gen 303 #define i_forw i_chain[0] 304 #define i_back i_chain[1] 305 306 /* EFT transition aids - obsolete */ 307 #define oEFT_MAGIC 0x90909090 308 #define di_oeftflag di_ic.ic_oeftflag 309 310 #define di_ic di_un.di_icom 311 #define di_mode di_ic.ic_smode 312 #define di_nlink di_ic.ic_nlink 313 #define di_uid di_ic.ic_uid 314 #define di_gid di_ic.ic_gid 315 #define di_smode di_ic.ic_smode 316 #define di_suid di_ic.ic_suid 317 #define di_sgid di_ic.ic_sgid 318 319 #define di_size di_ic.ic_lsize 320 #define di_db di_ic.ic_db 321 #define di_ib di_ic.ic_ib 322 323 #define di_atime di_ic.ic_atime 324 #define di_mtime di_ic.ic_mtime 325 #define di_ctime di_ic.ic_ctime 326 #define di_cflags di_ic.ic_flags 327 328 #ifdef _LITTLE_ENDIAN 329 #define di_ordev di_ic.ic_db[1] 330 #else 331 #define di_ordev di_ic.ic_db[0] 332 #endif 333 #define di_shadow di_ic.ic_shadow 334 #define di_blocks di_ic.ic_blocks 335 #define di_gen di_ic.ic_gen 336 337 /* flags */ 338 #define IUPD 0x0001 /* file has been modified */ 339 #define IACC 0x0002 /* inode access time to be updated */ 340 #define IMOD 0x0004 /* inode has been modified */ 341 #define ICHG 0x0008 /* inode has been changed */ 342 #define INOACC 0x0010 /* no access time update in getpage */ 343 #define IMODTIME 0x0020 /* mod time already set */ 344 #define IREF 0x0040 /* inode is being referenced */ 345 #define ISYNC 0x0080 /* do all allocation synchronously */ 346 #define IFASTSYMLNK 0x0100 /* fast symbolic link */ 347 #define IMODACC 0x0200 /* only access time changed; */ 348 /* filesystem won't become active */ 349 #define IATTCHG 0x0400 /* only size/blocks have changed */ 350 #define IBDWRITE 0x0800 /* the inode has been scheduled for */ 351 /* write operation asynchronously */ 352 #define ISTALE 0x1000 /* inode couldn't be read from disk */ 353 #define IDEL 0x2000 /* inode is being deleted */ 354 #define IDIRECTIO 0x4000 /* attempt directio */ 355 #define ISEQ 0x8000 /* deferred i_seq increase */ 356 #define IJUNKIQ 0x10000 /* on junk idle queue */ 357 #define IQUIET 0x20000 /* No file system full messages */ 358 359 /* cflags */ 360 #define IXATTR 0x0001 /* Extended attribute */ 361 362 /* modes */ 363 #define IFMT 0170000 /* type of file */ 364 #define IFIFO 0010000 /* named pipe (fifo) */ 365 #define IFCHR 0020000 /* character special */ 366 #define IFDIR 0040000 /* directory */ 367 #define IFBLK 0060000 /* block special */ 368 #define IFREG 0100000 /* regular */ 369 #define IFLNK 0120000 /* symbolic link */ 370 #define IFSHAD 0130000 /* shadow indode */ 371 #define IFSOCK 0140000 /* socket */ 372 #define IFATTRDIR 0160000 /* Attribute directory */ 373 374 #define ISUID 04000 /* set user id on execution */ 375 #define ISGID 02000 /* set group id on execution */ 376 #define ISVTX 01000 /* save swapped text even after use */ 377 #define IREAD 0400 /* read, write, execute permissions */ 378 #define IWRITE 0200 379 #define IEXEC 0100 380 381 /* specify how the inode info is written in ufs_syncip() */ 382 #define I_SYNC 1 /* wait for the inode written to disk */ 383 #define I_DSYNC 2 /* wait for the inode written to disk */ 384 /* only if IATTCHG is set */ 385 #define I_ASYNC 0 /* don't wait for the inode written */ 386 387 /* flags passed to ufs_itrunc(), indirtrunc(), and free() */ 388 #define I_FREE 0x00000001 /* inode is being freed */ 389 #define I_DIR 0x00000002 /* inode is a directory */ 390 #define I_IBLK 0x00000004 /* indirect block */ 391 #define I_CHEAP 0x00000008 /* cheap free */ 392 #define I_SHAD 0x00000010 /* inode is a shadow inode */ 393 #define I_QUOTA 0x00000020 /* quota file */ 394 #define I_NOCANCEL 0x40 /* Don't cancel these fragments */ 395 #define I_ACCT 0x00000080 /* Update ufsvfs' unreclaimed_blocks */ 396 /* 397 * Statistics on inodes 398 * Not protected by locks 399 */ 400 struct instats { 401 kstat_named_t in_size; /* current cache size */ 402 kstat_named_t in_maxsize; /* maximum cache size */ 403 kstat_named_t in_hits; /* cache hits */ 404 kstat_named_t in_misses; /* cache misses */ 405 kstat_named_t in_malloc; /* kmem_alloce'd */ 406 kstat_named_t in_mfree; /* kmem_free'd */ 407 kstat_named_t in_maxreached; /* Largest size reached by cache */ 408 kstat_named_t in_frfront; /* # put at front of freelist */ 409 kstat_named_t in_frback; /* # put at back of freelist */ 410 kstat_named_t in_qfree; /* q's to delete thread */ 411 kstat_named_t in_scan; /* # inodes scanned */ 412 kstat_named_t in_tidles; /* # inodes idled by idle thread */ 413 kstat_named_t in_lidles; /* # inodes idled by ufs_lookup */ 414 kstat_named_t in_vidles; /* # inodes idled by ufs_vget */ 415 kstat_named_t in_kcalloc; /* # inodes kmem_cache_alloced */ 416 kstat_named_t in_kcfree; /* # inodes kmem_cache_freed */ 417 kstat_named_t in_poc; /* # push-on-close's */ 418 }; 419 420 #ifdef _KERNEL 421 422 /* 423 * Extended attributes 424 */ 425 426 #define XATTR_DIR_NAME "/@/" 427 extern int ufs_ninode; /* high-water mark for inode cache */ 428 429 extern struct vnodeops *ufs_vnodeops; /* vnode operations for ufs */ 430 extern const struct fs_operation_def ufs_vnodeops_template[]; 431 432 /* 433 * Convert between inode pointers and vnode pointers 434 */ 435 #define VTOI(VP) ((struct inode *)(VP)->v_data) 436 #define ITOV(IP) ((struct vnode *)(IP)->i_vnode) 437 438 /* 439 * convert to fs 440 */ 441 #define ITOF(IP) ((struct fs *)(IP)->i_fs) 442 443 /* 444 * Convert between vnode types and inode formats 445 */ 446 extern enum vtype iftovt_tab[]; 447 448 #ifdef notneeded 449 450 /* Look at sys/mode.h and os/vnode.c */ 451 452 extern int vttoif_tab[]; 453 454 #endif 455 456 /* 457 * Mark an inode with the current (unique) timestamp. 458 * (Note that UFS's concept of time only keeps 32 bits of seconds 459 * in the on-disk format). 460 */ 461 struct timeval32 iuniqtime; 462 extern kmutex_t ufs_iuniqtime_lock; 463 464 #define ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip) 465 466 #define ITIMES(ip) { \ 467 mutex_enter(&(ip)->i_tlock); \ 468 ITIMES_NOLOCK(ip); \ 469 mutex_exit(&(ip)->i_tlock); \ 470 } 471 472 /* 473 * The following interfaces are used to do atomic loads and stores 474 * of an inode's i_size, which is a long long data type. 475 * 476 * For LP64, we just to a load or a store - atomicity and alignment 477 * are 8-byte guaranteed. For x86 there are no such instructions, 478 * so we grab i_contents as reader to get the size; we already hold 479 * it as writer when we're setting the size. 480 */ 481 482 #ifdef _LP64 483 484 #define UFS_GET_ISIZE(resultp, ip) *(resultp) = (ip)->i_size 485 #define UFS_SET_ISIZE(value, ip) (ip)->i_size = (value) 486 487 #else /* _LP64 */ 488 489 #define UFS_GET_ISIZE(resultp, ip) \ 490 { \ 491 rw_enter(&(ip)->i_contents, RW_READER); \ 492 *(resultp) = (ip)->i_size; \ 493 rw_exit(&(ip)->i_contents); \ 494 } 495 #define UFS_SET_ISIZE(value, ip) \ 496 { \ 497 ASSERT(RW_WRITE_HELD(&(ip)->i_contents)); \ 498 (ip)->i_size = (value); \ 499 } 500 501 #endif /* _LP64 */ 502 503 /* 504 * Allocate the specified block in the inode 505 * and make sure any in-core pages are initialized. 506 */ 507 #define BMAPALLOC(ip, off, size, cr) \ 508 bmap_write((ip), (u_offset_t)(off), (size), 0, cr) 509 510 #define ESAME (-1) /* trying to rename linked files (special) */ 511 512 #define UFS_HOLE (daddr32_t)-1 /* value used when no block allocated */ 513 514 /* 515 * enums 516 */ 517 518 /* direnter ops */ 519 enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR}; 520 521 /* dirremove ops */ 522 enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; 523 524 /* 525 * This overlays the fid structure (see vfs.h) 526 * 527 * LP64 note: we use int32_t instead of ino_t since UFS does not use 528 * inode numbers larger than 32-bits and ufid's are passed to NFS 529 * which expects them to not grow in size beyond 10 bytes (12 including 530 * the length). 531 */ 532 struct ufid { 533 ushort_t ufid_len; 534 ushort_t ufid_flags; 535 int32_t ufid_ino; 536 int32_t ufid_gen; 537 }; 538 539 /* 540 * each ufs thread (see ufs_thread.c) is managed by this struct 541 */ 542 struct ufs_q { 543 union uq_head { 544 void *_uq_generic; /* first entry on q */ 545 struct inode *_uq_i; 546 ufs_failure_t *_uq_uf; 547 } _uq_head; 548 int uq_ne; /* # of entries/failures found */ 549 int uq_lowat; /* thread runs when ne == lowat */ 550 int uq_hiwat; /* synchronous idle if ne >= hiwat */ 551 ushort_t uq_flags; /* flags (see below) */ 552 kcondvar_t uq_cv; /* for sleep/wakeup */ 553 kthread_id_t uq_threadp; /* thread managing this q */ 554 kmutex_t uq_mutex; /* protects this struct */ 555 }; 556 557 #define uq_head _uq_head._uq_generic 558 #define uq_ihead _uq_head._uq_i 559 #define uq_ufhead _uq_head._uq_uf 560 561 /* 562 * uq_flags 563 */ 564 #define UQ_EXIT (0x0001) /* q server exits at its convenience */ 565 #define UQ_WAIT (0x0002) /* thread is waiting on q server */ 566 #define UQ_SUSPEND (0x0004) /* request for suspension */ 567 #define UQ_SUSPENDED (0x0008) /* thread has suspended itself */ 568 #define UQ_FASTCLIENTS (0x0010) /* fast clients in ufs_delq_info */ 569 570 /* 571 * When logging is enabled, statvfs must account for blocks and files that 572 * may be on the delete queue. Protected by ufsvfsp->vfs_delete.uq_mutex 573 */ 574 struct ufs_delq_info { 575 kcondvar_t delq_fast_cv; /* for fast-operating clients */ 576 u_offset_t delq_unreclaimed_blocks; 577 ulong_t delq_unreclaimed_files; 578 }; 579 580 581 /* 582 * global idle queues 583 * The queues are sized dynamically in proportion to ufs_ninode 584 * which, unless overridden, scales with the amount of memory. 585 * The idle queue is halved whenever it hits the low water mark 586 * (1/4 of ufs_ninode), but can burst to sizes much larger. The number 587 * of hash queues is currently maintained to give on average IQHASHQLEN 588 * entries when the idle queue is at the low water mark. 589 * Note, we do not need to search along the hash queues, but use them 590 * in order to batch together geographically local inodes to allow 591 * their updates (via the log or buffer cache) to require less disk seeks. 592 * This gives an incredible performance boost for logging and a boost for 593 * non logging file systems. 594 */ 595 typedef struct { 596 inode_t *i_chain[2]; /* must match inode_t, but unused */ 597 inode_t *i_freef; /* must match inode_t, idle list forward */ 598 inode_t *i_freeb; /* must match inode_t, idle list back */ 599 } iqhead_t; 600 601 extern struct ufs_q ufs_idle_q; /* used by global ufs idle thread */ 602 extern iqhead_t *ufs_junk_iq; /* junk idle queues */ 603 extern iqhead_t *ufs_useful_iq; /* useful idle queues */ 604 extern int ufs_njunk_iq; /* number of entries in junk iq */ 605 extern int ufs_nuseful_iq; /* number of entries in useful iq */ 606 extern int ufs_niqhash; /* number of iq hash qs - power of 2 */ 607 extern int ufs_iqhashmask; /* iq hash mask = ufs_niqhash - 1 */ 608 609 #define IQHASHQLEN 32 /* see comments above */ 610 #define INOCGSHIFT 7 /* 128 inodes per cylinder group */ 611 #define IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask) 612 #define IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */ 613 614 extern struct ufs_q ufs_hlock; /* used by global ufs hlock thread */ 615 616 /* 617 * vfs_lfflags flags 618 */ 619 #define UFS_LARGEFILES ((ushort_t)0x1) /* set if mount allows largefiles */ 620 621 /* 622 * vfs_dfritime flags 623 */ 624 #define UFS_DFRATIME 0x1 /* deferred access time */ 625 626 /* 627 * UFS VFS private data. 628 * 629 * UFS file system instances may be linked on several lists. 630 * 631 * - The vfs_next field chains together every extant ufs instance; this 632 * list is rooted at ufs_instances and should be used in preference to 633 * the overall vfs list (which is properly the province of the generic 634 * file system code, not of file system implementations). This same list 635 * link is used during forcible unmounts to chain together instances that 636 * can't yet be completely dismantled, 637 * 638 * - The vfs_wnext field is used within ufs_update to form a work list of 639 * UFS instances to be synced out. 640 */ 641 typedef struct ufsvfs { 642 struct vfs *vfs_vfs; /* back link */ 643 struct ufsvfs *vfs_next; /* instance list link */ 644 struct ufsvfs *vfs_wnext; /* work list link */ 645 struct vnode *vfs_root; /* root vnode */ 646 struct buf *vfs_bufp; /* buffer containing superblock */ 647 struct vnode *vfs_devvp; /* block device vnode */ 648 ushort_t vfs_lfflags; /* Large files (set by mount) */ 649 ushort_t vfs_qflags; /* QUOTA: filesystem flags */ 650 struct inode *vfs_qinod; /* QUOTA: pointer to quota file */ 651 uint_t vfs_btimelimit; /* QUOTA: block time limit */ 652 uint_t vfs_ftimelimit; /* QUOTA: file time limit */ 653 krwlock_t vfs_dqrwlock; /* QUOTA: protects quota fields */ 654 /* 655 * some fs local threads 656 */ 657 struct ufs_q vfs_delete; /* delayed inode delete */ 658 struct ufs_q vfs_reclaim; /* reclaim open, deleted files */ 659 660 /* 661 * This is copied from the super block at mount time. 662 */ 663 int vfs_nrpos; /* # rotational positions */ 664 /* 665 * This lock protects cg's and super block pointed at by 666 * vfs_bufp->b_fs. Locks contents of fs and cg's and contents 667 * of vfs_dio. 668 */ 669 kmutex_t vfs_lock; 670 struct ulockfs vfs_ulockfs; /* ufs lockfs support */ 671 uint_t vfs_dio; /* delayed io (_FIODIO) */ 672 uint_t vfs_nointr; /* disallow lockfs interrupts */ 673 uint_t vfs_nosetsec; /* disallow ufs_setsecattr */ 674 uint_t vfs_syncdir; /* synchronous local directory ops */ 675 uint_t vfs_dontblock; /* don't block on forced umount */ 676 677 /* 678 * trans (logging ufs) stuff 679 */ 680 uint_t vfs_domatamap; /* set if matamap enabled */ 681 ulong_t vfs_maxacl; /* transaction stuff - max acl size */ 682 ulong_t vfs_dirsize; /* logspace for directory creation */ 683 ulong_t vfs_avgbfree; /* average free blks in cg (blkpref) */ 684 /* 685 * Some useful constants 686 */ 687 int vfs_nindirshift; /* calc. from fs_nindir */ 688 int vfs_nindiroffset; /* calc. from fs_ninidr */ 689 int vfs_ioclustsz; /* bytes in read/write cluster */ 690 int vfs_iotransz; /* max device i/o transfer size */ 691 692 vfs_ufsfx_t vfs_fsfx; /* lock/fix-on-panic support */ 693 /* 694 * More useful constants 695 */ 696 int vfs_minfrags; /* calc. from fs_minfree */ 697 /* 698 * Force DirectIO on all files 699 */ 700 uint_t vfs_forcedirectio; 701 /* 702 * Deferred inode time related fields 703 */ 704 clock_t vfs_iotstamp; /* last I/O timestamp */ 705 uint_t vfs_dfritime; /* deferred inode time flags */ 706 /* 707 * Some more useful info 708 */ 709 dev_t vfs_dev; /* device mounted from */ 710 struct ml_unit *vfs_log; /* pointer to embedded log struct */ 711 uint_t vfs_noatime; /* disable inode atime updates */ 712 /* 713 * snapshot stuff 714 */ 715 void *vfs_snapshot; /* snapshot handle */ 716 /* 717 * Controls logging "file system full" messages to messages file 718 */ 719 clock_t vfs_lastwhinetime; 720 721 int vfs_nolog_si; /* not logging summary info */ 722 int vfs_validfs; /* indicates mounted fs */ 723 724 /* 725 * Additional information about vfs_delete above 726 */ 727 struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */ 728 } ufsvfs_t; 729 730 #define vfs_fs vfs_bufp->b_un.b_fs 731 732 /* 733 * values for vfs_validfs 734 */ 735 #define UT_UNMOUNTED 0 736 #define UT_MOUNTED 1 737 #define UT_HLOCKING 2 738 739 /* inohsz is guaranteed to be a power of 2 */ 740 #define INOHASH(ino) (((int)ino) & (inohsz - 1)) 741 742 union ihead { 743 union ihead *ih_head[2]; 744 struct inode *ih_chain[2]; 745 }; 746 747 extern union ihead *ihead; 748 extern kmutex_t *ih_lock; 749 extern int *ih_ne; 750 extern int inohsz; 751 752 extern clock_t ufs_iowait; 753 754 #endif /* _KERNEL */ 755 756 /* 757 * ufs function prototypes 758 */ 759 #if defined(_KERNEL) && !defined(_BOOT) 760 761 extern void ufs_iinit(void); 762 extern int ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *); 763 extern int ufs_iget_alloced(struct vfs *, ino_t, struct inode **, 764 cred_t *); 765 extern void ufs_reset_vnode(vnode_t *); 766 extern void ufs_iinactive(struct inode *); 767 extern void ufs_iupdat(struct inode *, int); 768 extern int ufs_rmidle(struct inode *); 769 extern int ufs_itrunc(struct inode *, u_offset_t, int, cred_t *); 770 extern int ufs_iaccess(void *, int, cred_t *); 771 extern int rdip(struct inode *, struct uio *, int, struct cred *); 772 extern int wrip(struct inode *, struct uio *, int, struct cred *); 773 774 extern void ufs_imark(struct inode *); 775 extern void ufs_itimes_nolock(struct inode *); 776 777 extern int ufs_dirlook(struct inode *, char *, struct inode **, 778 cred_t *, int); 779 extern int ufs_direnter_cm(struct inode *, char *, enum de_op, 780 struct vattr *, struct inode **, cred_t *, int); 781 extern int ufs_direnter_lr(struct inode *, char *, enum de_op, 782 struct inode *, struct inode *, cred_t *, vnode_t **); 783 extern int ufs_dircheckpath(ino_t, struct inode *, struct inode *, 784 struct cred *); 785 extern int ufs_dirmakeinode(struct inode *, struct inode **, 786 struct vattr *, enum de_op, cred_t *); 787 extern int ufs_dirremove(struct inode *, char *, struct inode *, 788 vnode_t *, enum dr_op, cred_t *, vnode_t **); 789 extern int ufs_xattrdirempty(struct inode *, ino_t, cred_t *); 790 extern int blkatoff(struct inode *, off_t, char **, struct fbuf **); 791 792 extern void sbupdate(struct vfs *); 793 794 extern int ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **, 795 cred_t *); 796 extern void ufs_ifree(struct inode *, ino_t, mode_t); 797 extern void free(struct inode *, daddr_t, off_t, int); 798 extern int alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *); 799 extern int realloccg(struct inode *, daddr_t, daddr_t, int, int, 800 daddr_t *, cred_t *); 801 extern int ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *); 802 extern ino_t dirpref(inode_t *); 803 extern daddr_t blkpref(struct inode *, daddr_t, int, daddr32_t *); 804 extern daddr_t contigpref(ufsvfs_t *, size_t); 805 806 extern int ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t, 807 offset_t, enum uio_seg, int *, cred_t *); 808 809 extern int bmap_read(struct inode *, u_offset_t, daddr_t *, int *); 810 extern int bmap_write(struct inode *, u_offset_t, int, int, struct cred *); 811 extern int bmap_has_holes(struct inode *); 812 extern int bmap_find(struct inode *, boolean_t, u_offset_t *); 813 814 extern void ufs_vfs_add(struct ufsvfs *); 815 extern void ufs_vfs_remove(struct ufsvfs *); 816 817 extern void ufs_sbwrite(struct ufsvfs *); 818 extern void ufs_update(int); 819 extern int ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *); 820 extern int ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *); 821 extern int ufs_syncip(struct inode *, int, int, top_t); 822 extern int ufs_sync_indir(struct inode *); 823 extern int ufs_indirblk_sync(struct inode *, offset_t); 824 extern int ufs_badblock(struct inode *, daddr_t); 825 extern int ufs_indir_badblock(struct inode *, daddr32_t *); 826 extern void ufs_notclean(struct ufsvfs *); 827 extern void ufs_checkclean(struct vfs *); 828 extern int isblock(struct fs *, uchar_t *, daddr_t); 829 extern void setblock(struct fs *, uchar_t *, daddr_t); 830 extern void clrblock(struct fs *, uchar_t *, daddr_t); 831 extern int isclrblock(struct fs *, uchar_t *, daddr_t); 832 extern void fragacct(struct fs *, int, int32_t *, int); 833 extern int skpc(char, uint_t, char *); 834 extern int ufs_fbwrite(struct fbuf *, struct inode *); 835 extern int ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long); 836 extern int ufs_putapage(struct vnode *, struct page *, u_offset_t *, 837 size_t *, int, struct cred *); 838 extern inode_t *ufs_alloc_inode(ufsvfs_t *, ino_t); 839 extern void ufs_free_inode(inode_t *); 840 841 /* 842 * special stuff 843 */ 844 extern void ufs_setreclaim(struct inode *); 845 extern int ufs_scan_inodes(int, int (*)(struct inode *, void *), void *, 846 struct ufsvfs *); 847 extern int ufs_sync_inode(struct inode *, void *); 848 extern int ufs_sticky_remove_access(struct inode *, struct inode *, 849 struct cred *); 850 /* 851 * quota 852 */ 853 extern int chkiq(struct ufsvfs *, int, struct inode *, uid_t, int, 854 struct cred *, char **errp, size_t *lenp); 855 856 /* 857 * ufs thread stuff 858 */ 859 extern void ufs_thread_delete(struct vfs *); 860 extern void ufs_delete_drain(struct vfs *, int, int); 861 extern void ufs_delete(struct ufsvfs *, struct inode *, int); 862 extern void ufs_inode_cache_reclaim(void *); 863 extern void ufs_idle_drain(struct vfs *); 864 extern void ufs_idle_some(int); 865 extern void ufs_thread_idle(void); 866 extern void ufs_thread_reclaim(struct vfs *); 867 extern void ufs_thread_init(struct ufs_q *, int); 868 extern void ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *); 869 extern void ufs_thread_exit(struct ufs_q *); 870 extern void ufs_thread_suspend(struct ufs_q *); 871 extern void ufs_thread_continue(struct ufs_q *); 872 extern void ufs_thread_hlock(void *); 873 extern void ufs_delete_init(struct ufsvfs *, int); 874 extern void ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *); 875 extern void ufs_delete_drain_wait(struct ufsvfs *, int); 876 877 /* 878 * ufs lockfs stuff 879 */ 880 struct seg; 881 extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int); 882 extern int ufs_quiesce(struct ulockfs *); 883 extern int ufs_flush(struct vfs *); 884 extern int ufs_fiolfs(struct vnode *, struct lockfs *, int); 885 extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int); 886 extern int ufs_fiolfss(struct vnode *, struct lockfs *); 887 extern int ufs_fioffs(struct vnode *, char *, struct cred *); 888 extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t); 889 extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t); 890 extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **, 891 struct seg *, int, uint_t *); 892 extern void ufs_lockfs_end(struct ulockfs *); 893 /* 894 * ufs acl stuff 895 */ 896 extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *); 897 extern void si_cache_init(void); 898 extern int ufs_si_load(struct inode *, cred_t *); 899 extern void ufs_si_del(struct inode *); 900 extern int ufs_acl_access(struct inode *, int, cred_t *); 901 extern void ufs_si_cache_flush(dev_t); 902 extern int ufs_si_free(si_t *, struct vfs *, cred_t *); 903 extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *); 904 extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *); 905 extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *); 906 /* 907 * ufs directio stuff 908 */ 909 extern void ufs_directio_init(); 910 extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *, 911 int *); 912 extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *); 913 #define DIRECTIO_FAILURE (0) 914 #define DIRECTIO_SUCCESS (1) 915 916 /* 917 * ufs extensions for PXFS 918 */ 919 920 int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb, 921 int flags, cred_t *cr); 922 int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb, 923 int flags, cred_t *cr); 924 925 /* 926 * prototypes to support the forced unmount 927 */ 928 929 void ufs_freeze(struct ulockfs *, struct lockfs *); 930 int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *); 931 932 /* 933 * extended attributes 934 */ 935 936 int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *); 937 int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *); 938 void ufs_unhook_shadow(inode_t *, inode_t *); 939 940 #endif /* defined(_KERNEL) && !defined(_BOOT) */ 941 942 #ifdef __cplusplus 943 } 944 #endif 945 946 #endif /* _SYS_FS_UFS_INODE_H */ 947