17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 580d34432Sfrankho * Common Development and Distribution License (the "License"). 680d34432Sfrankho * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*134a1f4eSCasper H.S. Dik * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 267c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate /* 297c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 307c478bd9Sstevel@tonic-gate * The Regents of the University of California 317c478bd9Sstevel@tonic-gate * All Rights Reserved 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 347c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 357c478bd9Sstevel@tonic-gate * contributors. 367c478bd9Sstevel@tonic-gate */ 377c478bd9Sstevel@tonic-gate 387c478bd9Sstevel@tonic-gate #include <sys/types.h> 397c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 407c478bd9Sstevel@tonic-gate #include <sys/param.h> 417c478bd9Sstevel@tonic-gate #include <sys/systm.h> 427c478bd9Sstevel@tonic-gate #include <sys/uio.h> 437c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 447c478bd9Sstevel@tonic-gate #include <sys/signal.h> 457c478bd9Sstevel@tonic-gate #include <sys/cred.h> 467c478bd9Sstevel@tonic-gate #include <sys/user.h> 477c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 487c478bd9Sstevel@tonic-gate #include <sys/stat.h> 497c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 507c478bd9Sstevel@tonic-gate #include <sys/buf.h> 517c478bd9Sstevel@tonic-gate #include <sys/proc.h> 527c478bd9Sstevel@tonic-gate #include <sys/disp.h> 537c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 547c478bd9Sstevel@tonic-gate #include <sys/mode.h> 557c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 567c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 577c478bd9Sstevel@tonic-gate #include <sys/acl.h> 587c478bd9Sstevel@tonic-gate #include <sys/var.h> 597c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 627c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_acl.h> 637c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 647c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 657c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 667c478bd9Sstevel@tonic-gate #include <vm/hat.h> 677c478bd9Sstevel@tonic-gate #include <vm/as.h> 687c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 697c478bd9Sstevel@tonic-gate #include <vm/seg.h> 707c478bd9Sstevel@tonic-gate #include <sys/swap.h> 717c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 727c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 737c478bd9Sstevel@tonic-gate #include <sys/errno.h> 747c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 757c478bd9Sstevel@tonic-gate #include <sys/debug.h> 767c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 777c478bd9Sstevel@tonic-gate #include <sys/policy.h> 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate struct kmem_cache *inode_cache; /* cache of free inodes */ 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* UFS Inode Cache Stats -- Not protected */ 827c478bd9Sstevel@tonic-gate struct instats ins = { 837c478bd9Sstevel@tonic-gate { "size", KSTAT_DATA_ULONG }, 847c478bd9Sstevel@tonic-gate { "maxsize", KSTAT_DATA_ULONG }, 857c478bd9Sstevel@tonic-gate { "hits", KSTAT_DATA_ULONG }, 867c478bd9Sstevel@tonic-gate { "misses", KSTAT_DATA_ULONG }, 877c478bd9Sstevel@tonic-gate { "kmem allocs", KSTAT_DATA_ULONG }, 887c478bd9Sstevel@tonic-gate { "kmem frees", KSTAT_DATA_ULONG }, 897c478bd9Sstevel@tonic-gate { "maxsize reached", KSTAT_DATA_ULONG }, 907c478bd9Sstevel@tonic-gate { "puts at frontlist", KSTAT_DATA_ULONG }, 917c478bd9Sstevel@tonic-gate { "puts at backlist", KSTAT_DATA_ULONG }, 927c478bd9Sstevel@tonic-gate { "queues to free", KSTAT_DATA_ULONG }, 937c478bd9Sstevel@tonic-gate { "scans", KSTAT_DATA_ULONG }, 947c478bd9Sstevel@tonic-gate { "thread idles", KSTAT_DATA_ULONG }, 957c478bd9Sstevel@tonic-gate { "lookup idles", KSTAT_DATA_ULONG }, 967c478bd9Sstevel@tonic-gate { "vget idles", KSTAT_DATA_ULONG }, 977c478bd9Sstevel@tonic-gate { "cache allocs", KSTAT_DATA_ULONG }, 987c478bd9Sstevel@tonic-gate { "cache frees", KSTAT_DATA_ULONG }, 997c478bd9Sstevel@tonic-gate { "pushes at close", KSTAT_DATA_ULONG } 1007c478bd9Sstevel@tonic-gate }; 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate /* kstat data */ 1037c478bd9Sstevel@tonic-gate static kstat_t *ufs_inode_kstat = NULL; 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate union ihead *ihead; /* inode LRU cache, Chris Maltby */ 1067c478bd9Sstevel@tonic-gate kmutex_t *ih_lock; /* protect inode cache hash table */ 1077c478bd9Sstevel@tonic-gate static int ino_hashlen = 4; /* desired average hash chain length */ 1087c478bd9Sstevel@tonic-gate int inohsz; /* number of buckets in the hash table */ 1097c478bd9Sstevel@tonic-gate 1107c478bd9Sstevel@tonic-gate kmutex_t ufs_scan_lock; /* stop racing multiple ufs_scan_inodes() */ 1117c478bd9Sstevel@tonic-gate kmutex_t ufs_iuniqtime_lock; /* protect iuniqtime */ 1127c478bd9Sstevel@tonic-gate kmutex_t ufsvfs_mutex; 1137c478bd9Sstevel@tonic-gate struct ufsvfs *oldufsvfslist, *ufsvfslist; 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate /* 1167c478bd9Sstevel@tonic-gate * time to wait after ufsvfsp->vfs_iotstamp before declaring that no 1177c478bd9Sstevel@tonic-gate * I/Os are going on. 1187c478bd9Sstevel@tonic-gate */ 1197c478bd9Sstevel@tonic-gate clock_t ufs_iowait; 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate /* 1227c478bd9Sstevel@tonic-gate * the threads that process idle inodes and free (deleted) inodes 1237c478bd9Sstevel@tonic-gate * have high water marks that are set in ufsinit(). 1247c478bd9Sstevel@tonic-gate * These values but can be no less then the minimum shown below 1257c478bd9Sstevel@tonic-gate */ 1267c478bd9Sstevel@tonic-gate int ufs_idle_max; /* # of allowable idle inodes */ 1277c478bd9Sstevel@tonic-gate ulong_t ufs_inode_max; /* hard limit of allowable idle inodes */ 1287c478bd9Sstevel@tonic-gate #define UFS_IDLE_MAX (16) /* min # of allowable idle inodes */ 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate /* 1317c478bd9Sstevel@tonic-gate * Tunables for ufs write throttling. 1327c478bd9Sstevel@tonic-gate * These are validated in ufs_iinit() since improper settings 1337c478bd9Sstevel@tonic-gate * can lead to filesystem hangs. 1347c478bd9Sstevel@tonic-gate */ 1357c478bd9Sstevel@tonic-gate #define UFS_HW_DEFAULT (16 * 1024 * 1024) 1367c478bd9Sstevel@tonic-gate #define UFS_LW_DEFAULT (8 * 1024 * 1024) 1377c478bd9Sstevel@tonic-gate int ufs_HW = UFS_HW_DEFAULT; 1387c478bd9Sstevel@tonic-gate int ufs_LW = UFS_LW_DEFAULT; 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate static void ihinit(void); 1417c478bd9Sstevel@tonic-gate extern int hash2ints(int, int); 1427c478bd9Sstevel@tonic-gate 1437c478bd9Sstevel@tonic-gate static int ufs_iget_internal(struct vfs *, ino_t, struct inode **, 1447c478bd9Sstevel@tonic-gate struct cred *, int); 1457c478bd9Sstevel@tonic-gate 1467c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1477c478bd9Sstevel@tonic-gate static int 1487c478bd9Sstevel@tonic-gate ufs_inode_kstat_update(kstat_t *ksp, int rw) 1497c478bd9Sstevel@tonic-gate { 1507c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 1517c478bd9Sstevel@tonic-gate return (EACCES); 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate ins.in_malloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1547c478bd9Sstevel@tonic-gate "slab_alloc"); 1557c478bd9Sstevel@tonic-gate ins.in_mfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1567c478bd9Sstevel@tonic-gate "slab_free"); 1577c478bd9Sstevel@tonic-gate ins.in_kcalloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1587c478bd9Sstevel@tonic-gate "alloc"); 1597c478bd9Sstevel@tonic-gate ins.in_kcfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1607c478bd9Sstevel@tonic-gate "free"); 1617c478bd9Sstevel@tonic-gate ins.in_size.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1627c478bd9Sstevel@tonic-gate "buf_inuse"); 1637c478bd9Sstevel@tonic-gate ins.in_maxreached.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1647c478bd9Sstevel@tonic-gate "buf_max"); 1657c478bd9Sstevel@tonic-gate ins.in_misses.value.ul = ins.in_kcalloc.value.ul; 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate return (0); 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate void 1717c478bd9Sstevel@tonic-gate ufs_iinit(void) 1727c478bd9Sstevel@tonic-gate { 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * Validate that ufs_HW > ufs_LW. 1757c478bd9Sstevel@tonic-gate * The default values for these two tunables have been increased. 1767c478bd9Sstevel@tonic-gate * There is now a range of values for ufs_HW that used to be 1777c478bd9Sstevel@tonic-gate * legal on previous Solaris versions but no longer is now. 1787c478bd9Sstevel@tonic-gate * Upgrading a machine which has an /etc/system setting for ufs_HW 1797c478bd9Sstevel@tonic-gate * from that range can lead to filesystem hangs unless the values 1807c478bd9Sstevel@tonic-gate * are checked here. 1817c478bd9Sstevel@tonic-gate */ 1827c478bd9Sstevel@tonic-gate if (ufs_HW <= ufs_LW) { 1837c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 1847c478bd9Sstevel@tonic-gate "ufs_HW (%d) <= ufs_LW (%d). Check /etc/system.", 1857c478bd9Sstevel@tonic-gate ufs_HW, ufs_LW); 1867c478bd9Sstevel@tonic-gate ufs_LW = UFS_LW_DEFAULT; 1877c478bd9Sstevel@tonic-gate ufs_HW = UFS_HW_DEFAULT; 1887c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "using defaults, ufs_HW = %d, ufs_LW = %d\n", 1897c478bd9Sstevel@tonic-gate ufs_HW, ufs_LW); 1907c478bd9Sstevel@tonic-gate } 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate /* 1937c478bd9Sstevel@tonic-gate * Adjust the tunable `ufs_ninode' to a reasonable value 1947c478bd9Sstevel@tonic-gate */ 1957c478bd9Sstevel@tonic-gate if (ufs_ninode <= 0) 1967c478bd9Sstevel@tonic-gate ufs_ninode = ncsize; 1977c478bd9Sstevel@tonic-gate if (ufs_inode_max == 0) 19880d34432Sfrankho ufs_inode_max = 19980d34432Sfrankho (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct inode)); 2007c478bd9Sstevel@tonic-gate if (ufs_ninode > ufs_inode_max || (ufs_ninode == 0 && ncsize == 0)) { 2017c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "setting ufs_ninode to max value of %ld", 2027c478bd9Sstevel@tonic-gate ufs_inode_max); 2037c478bd9Sstevel@tonic-gate ufs_ninode = ufs_inode_max; 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate /* 2067c478bd9Sstevel@tonic-gate * Wait till third call of ufs_update to declare that no I/Os are 2077c478bd9Sstevel@tonic-gate * going on. This allows deferred access times to be flushed to disk. 2087c478bd9Sstevel@tonic-gate */ 2097c478bd9Sstevel@tonic-gate ufs_iowait = v.v_autoup * hz * 2; 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * idle thread runs when 25% of ufs_ninode entries are on the queue 2137c478bd9Sstevel@tonic-gate */ 2147c478bd9Sstevel@tonic-gate if (ufs_idle_max == 0) 2157c478bd9Sstevel@tonic-gate ufs_idle_max = ufs_ninode >> 2; 2167c478bd9Sstevel@tonic-gate if (ufs_idle_max < UFS_IDLE_MAX) 2177c478bd9Sstevel@tonic-gate ufs_idle_max = UFS_IDLE_MAX; 2187c478bd9Sstevel@tonic-gate if (ufs_idle_max > ufs_ninode) 2197c478bd9Sstevel@tonic-gate ufs_idle_max = ufs_ninode; 2207c478bd9Sstevel@tonic-gate /* 2217c478bd9Sstevel@tonic-gate * This is really a misnomer, it is ufs_queue_init 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate ufs_thread_init(&ufs_idle_q, ufs_idle_max); 2247c478bd9Sstevel@tonic-gate ufs_thread_start(&ufs_idle_q, ufs_thread_idle, NULL); 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate /* 2277c478bd9Sstevel@tonic-gate * global hlock thread 2287c478bd9Sstevel@tonic-gate */ 2297c478bd9Sstevel@tonic-gate ufs_thread_init(&ufs_hlock, 1); 2307c478bd9Sstevel@tonic-gate ufs_thread_start(&ufs_hlock, ufs_thread_hlock, NULL); 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate ihinit(); 2337c478bd9Sstevel@tonic-gate qtinit(); 2347c478bd9Sstevel@tonic-gate ins.in_maxsize.value.ul = ufs_ninode; 2357c478bd9Sstevel@tonic-gate if ((ufs_inode_kstat = kstat_create("ufs", 0, "inode_cache", "ufs", 2367c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, sizeof (ins) / sizeof (kstat_named_t), 2377c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL)) != NULL) { 2387c478bd9Sstevel@tonic-gate ufs_inode_kstat->ks_data = (void *)&ins; 2397c478bd9Sstevel@tonic-gate ufs_inode_kstat->ks_update = ufs_inode_kstat_update; 2407c478bd9Sstevel@tonic-gate kstat_install(ufs_inode_kstat); 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate ufsfx_init(); /* fix-on-panic initialization */ 2437c478bd9Sstevel@tonic-gate si_cache_init(); 2447c478bd9Sstevel@tonic-gate ufs_directio_init(); 2457c478bd9Sstevel@tonic-gate lufs_init(); 2467c478bd9Sstevel@tonic-gate mutex_init(&ufs_iuniqtime_lock, NULL, MUTEX_DEFAULT, NULL); 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2507c478bd9Sstevel@tonic-gate static int 2517c478bd9Sstevel@tonic-gate ufs_inode_cache_constructor(void *buf, void *cdrarg, int kmflags) 2527c478bd9Sstevel@tonic-gate { 2537c478bd9Sstevel@tonic-gate struct inode *ip = buf; 2547c478bd9Sstevel@tonic-gate struct vnode *vp; 2557c478bd9Sstevel@tonic-gate 256b5fca8f8Stomee vp = ip->i_vnode = vn_alloc(kmflags); 257b5fca8f8Stomee if (vp == NULL) { 258b5fca8f8Stomee return (-1); 259b5fca8f8Stomee } 260b5fca8f8Stomee vn_setops(vp, ufs_vnodeops); 261b5fca8f8Stomee vp->v_data = ip; 262b5fca8f8Stomee 2637c478bd9Sstevel@tonic-gate rw_init(&ip->i_rwlock, NULL, RW_DEFAULT, NULL); 2647c478bd9Sstevel@tonic-gate rw_init(&ip->i_contents, NULL, RW_DEFAULT, NULL); 2657c478bd9Sstevel@tonic-gate mutex_init(&ip->i_tlock, NULL, MUTEX_DEFAULT, NULL); 2667c478bd9Sstevel@tonic-gate dnlc_dir_init(&ip->i_danchor); 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate cv_init(&ip->i_wrcv, NULL, CV_DRIVER, NULL); 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate return (0); 2717c478bd9Sstevel@tonic-gate } 2727c478bd9Sstevel@tonic-gate 2737c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2747c478bd9Sstevel@tonic-gate static void 2757c478bd9Sstevel@tonic-gate ufs_inode_cache_destructor(void *buf, void *cdrarg) 2767c478bd9Sstevel@tonic-gate { 2777c478bd9Sstevel@tonic-gate struct inode *ip = buf; 2787c478bd9Sstevel@tonic-gate struct vnode *vp; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate vp = ITOV(ip); 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate rw_destroy(&ip->i_rwlock); 2837c478bd9Sstevel@tonic-gate rw_destroy(&ip->i_contents); 2847c478bd9Sstevel@tonic-gate mutex_destroy(&ip->i_tlock); 2857c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 2867c478bd9Sstevel@tonic-gate dnlc_dir_fini(&ip->i_danchor); 2877c478bd9Sstevel@tonic-gate } 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate cv_destroy(&ip->i_wrcv); 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate vn_free(vp); 2927c478bd9Sstevel@tonic-gate } 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 2957c478bd9Sstevel@tonic-gate * Initialize hash links for inodes 2967c478bd9Sstevel@tonic-gate * and build inode free list. 2977c478bd9Sstevel@tonic-gate */ 2987c478bd9Sstevel@tonic-gate void 2997c478bd9Sstevel@tonic-gate ihinit(void) 3007c478bd9Sstevel@tonic-gate { 3017c478bd9Sstevel@tonic-gate int i; 3027c478bd9Sstevel@tonic-gate union ihead *ih = ihead; 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate mutex_init(&ufs_scan_lock, NULL, MUTEX_DEFAULT, NULL); 3057c478bd9Sstevel@tonic-gate 3067c478bd9Sstevel@tonic-gate inohsz = 1 << highbit(ufs_ninode / ino_hashlen); 3077c478bd9Sstevel@tonic-gate ihead = kmem_zalloc(inohsz * sizeof (union ihead), KM_SLEEP); 3087c478bd9Sstevel@tonic-gate ih_lock = kmem_zalloc(inohsz * sizeof (kmutex_t), KM_SLEEP); 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 3117c478bd9Sstevel@tonic-gate ih->ih_head[0] = ih; 3127c478bd9Sstevel@tonic-gate ih->ih_head[1] = ih; 3137c478bd9Sstevel@tonic-gate mutex_init(&ih_lock[i], NULL, MUTEX_DEFAULT, NULL); 3147c478bd9Sstevel@tonic-gate } 3157c478bd9Sstevel@tonic-gate inode_cache = kmem_cache_create("ufs_inode_cache", 3167c478bd9Sstevel@tonic-gate sizeof (struct inode), 0, ufs_inode_cache_constructor, 3177c478bd9Sstevel@tonic-gate ufs_inode_cache_destructor, ufs_inode_cache_reclaim, 3187c478bd9Sstevel@tonic-gate NULL, NULL, 0); 3197c478bd9Sstevel@tonic-gate } 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate /* 3227c478bd9Sstevel@tonic-gate * Free an inode structure 3237c478bd9Sstevel@tonic-gate */ 3247c478bd9Sstevel@tonic-gate void 3257c478bd9Sstevel@tonic-gate ufs_free_inode(struct inode *ip) 3267c478bd9Sstevel@tonic-gate { 3277c478bd9Sstevel@tonic-gate vn_invalid(ITOV(ip)); 3287c478bd9Sstevel@tonic-gate kmem_cache_free(inode_cache, ip); 3297c478bd9Sstevel@tonic-gate } 3307c478bd9Sstevel@tonic-gate 3317c478bd9Sstevel@tonic-gate /* 3327c478bd9Sstevel@tonic-gate * Allocate an inode structure 3337c478bd9Sstevel@tonic-gate */ 3347c478bd9Sstevel@tonic-gate struct inode * 3357c478bd9Sstevel@tonic-gate ufs_alloc_inode(ufsvfs_t *ufsvfsp, ino_t ino) 3367c478bd9Sstevel@tonic-gate { 3377c478bd9Sstevel@tonic-gate struct inode *ip; 3387c478bd9Sstevel@tonic-gate vnode_t *vp; 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate ip = kmem_cache_alloc(inode_cache, KM_SLEEP); 3417c478bd9Sstevel@tonic-gate /* 3427c478bd9Sstevel@tonic-gate * at this point we have a newly allocated inode 3437c478bd9Sstevel@tonic-gate */ 3447c478bd9Sstevel@tonic-gate ip->i_freef = ip; 3457c478bd9Sstevel@tonic-gate ip->i_freeb = ip; 3467c478bd9Sstevel@tonic-gate ip->i_flag = IREF; 3477c478bd9Sstevel@tonic-gate ip->i_seq = 0xFF; /* Unique initial value */ 3487c478bd9Sstevel@tonic-gate ip->i_dev = ufsvfsp->vfs_dev; 3497c478bd9Sstevel@tonic-gate ip->i_ufsvfs = ufsvfsp; 3507c478bd9Sstevel@tonic-gate ip->i_devvp = ufsvfsp->vfs_devvp; 3517c478bd9Sstevel@tonic-gate ip->i_number = ino; 3527c478bd9Sstevel@tonic-gate ip->i_diroff = 0; 3537c478bd9Sstevel@tonic-gate ip->i_nextr = 0; 3547c478bd9Sstevel@tonic-gate ip->i_map = NULL; 3557c478bd9Sstevel@tonic-gate ip->i_rdev = 0; 3567c478bd9Sstevel@tonic-gate ip->i_writes = 0; 3577c478bd9Sstevel@tonic-gate ip->i_mode = 0; 3587c478bd9Sstevel@tonic-gate ip->i_delaylen = 0; 3597c478bd9Sstevel@tonic-gate ip->i_delayoff = 0; 3607c478bd9Sstevel@tonic-gate ip->i_nextrio = 0; 3617c478bd9Sstevel@tonic-gate ip->i_ufs_acl = NULL; 3627c478bd9Sstevel@tonic-gate ip->i_cflags = 0; 3637c478bd9Sstevel@tonic-gate ip->i_mapcnt = 0; 3647c478bd9Sstevel@tonic-gate ip->i_dquot = NULL; 3657f63b8c3Svsakar ip->i_cachedir = CD_ENABLED; 3667c478bd9Sstevel@tonic-gate ip->i_writer = NULL; 3677c478bd9Sstevel@tonic-gate 3687c478bd9Sstevel@tonic-gate /* 3697c478bd9Sstevel@tonic-gate * the vnode for this inode was allocated by the constructor 3707c478bd9Sstevel@tonic-gate */ 3717c478bd9Sstevel@tonic-gate vp = ITOV(ip); 3727c478bd9Sstevel@tonic-gate vn_reinit(vp); 3737c478bd9Sstevel@tonic-gate if (ino == (ino_t)UFSROOTINO) 3747c478bd9Sstevel@tonic-gate vp->v_flag = VROOT; 3757c478bd9Sstevel@tonic-gate vp->v_vfsp = ufsvfsp->vfs_vfs; 3767c478bd9Sstevel@tonic-gate vn_exists(vp); 3777c478bd9Sstevel@tonic-gate return (ip); 3787c478bd9Sstevel@tonic-gate } 3797c478bd9Sstevel@tonic-gate 3807c478bd9Sstevel@tonic-gate /* 3817c478bd9Sstevel@tonic-gate * Look up an inode by device, inumber. If it is in core (in the 3827c478bd9Sstevel@tonic-gate * inode structure), honor the locking protocol. If it is not in 3837c478bd9Sstevel@tonic-gate * core, read it in from the specified device after freeing any pages. 3847c478bd9Sstevel@tonic-gate * In all cases, a pointer to a VN_HELD inode structure is returned. 3857c478bd9Sstevel@tonic-gate */ 3867c478bd9Sstevel@tonic-gate int 3877c478bd9Sstevel@tonic-gate ufs_iget(struct vfs *vfsp, ino_t ino, struct inode **ipp, struct cred *cr) 3887c478bd9Sstevel@tonic-gate { 3897c478bd9Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 0)); 3907c478bd9Sstevel@tonic-gate } 3917c478bd9Sstevel@tonic-gate 3927c478bd9Sstevel@tonic-gate /* 3937c478bd9Sstevel@tonic-gate * A version of ufs_iget which returns only allocated, linked inodes. 3947c478bd9Sstevel@tonic-gate * This is appropriate for any callers who do not expect a free inode. 3957c478bd9Sstevel@tonic-gate */ 3967c478bd9Sstevel@tonic-gate int 3977c478bd9Sstevel@tonic-gate ufs_iget_alloced(struct vfs *vfsp, ino_t ino, struct inode **ipp, 3987c478bd9Sstevel@tonic-gate struct cred *cr) 3997c478bd9Sstevel@tonic-gate { 4007c478bd9Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 1)); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate /* 4047c478bd9Sstevel@tonic-gate * Set vnode attributes based on v_type, this should be called whenever 4057c478bd9Sstevel@tonic-gate * an inode's i_mode is changed. 4067c478bd9Sstevel@tonic-gate */ 4077c478bd9Sstevel@tonic-gate void 4087c478bd9Sstevel@tonic-gate ufs_reset_vnode(vnode_t *vp) 4097c478bd9Sstevel@tonic-gate { 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * an old DBE hack 4127c478bd9Sstevel@tonic-gate */ 4137c478bd9Sstevel@tonic-gate if ((VTOI(vp)->i_mode & (ISVTX | IEXEC | IFDIR)) == ISVTX) 4147c478bd9Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE; 4157c478bd9Sstevel@tonic-gate else 4167c478bd9Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE; 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate /* 4197c478bd9Sstevel@tonic-gate * if not swap like and it's just a regular file, we want 4207c478bd9Sstevel@tonic-gate * to maintain the vnode's pages sorted by clean/modified 4217c478bd9Sstevel@tonic-gate * for faster sync'ing to disk 4227c478bd9Sstevel@tonic-gate */ 4237c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) 4247c478bd9Sstevel@tonic-gate vp->v_flag |= VMODSORT; 4257c478bd9Sstevel@tonic-gate else 4267c478bd9Sstevel@tonic-gate vp->v_flag &= ~VMODSORT; 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate /* 4297c478bd9Sstevel@tonic-gate * Is this an attribute hidden dir? 4307c478bd9Sstevel@tonic-gate */ 4317c478bd9Sstevel@tonic-gate if ((VTOI(vp)->i_mode & IFMT) == IFATTRDIR) 4327c478bd9Sstevel@tonic-gate vp->v_flag |= V_XATTRDIR; 4337c478bd9Sstevel@tonic-gate else 4347c478bd9Sstevel@tonic-gate vp->v_flag &= ~V_XATTRDIR; 4357c478bd9Sstevel@tonic-gate } 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate /* 4387c478bd9Sstevel@tonic-gate * Shared implementation of ufs_iget and ufs_iget_alloced. The 'validate' 4397c478bd9Sstevel@tonic-gate * flag is used to distinguish the two; when true, we validate that the inode 4407c478bd9Sstevel@tonic-gate * being retrieved looks like a linked and allocated inode. 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4437c478bd9Sstevel@tonic-gate static int 4447c478bd9Sstevel@tonic-gate ufs_iget_internal(struct vfs *vfsp, ino_t ino, struct inode **ipp, 4457c478bd9Sstevel@tonic-gate struct cred *cr, int validate) 4467c478bd9Sstevel@tonic-gate { 4477c478bd9Sstevel@tonic-gate struct inode *ip, *sp; 4487c478bd9Sstevel@tonic-gate union ihead *ih; 4497c478bd9Sstevel@tonic-gate kmutex_t *ihm; 4507c478bd9Sstevel@tonic-gate struct buf *bp; 4517c478bd9Sstevel@tonic-gate struct dinode *dp; 4527c478bd9Sstevel@tonic-gate struct vnode *vp; 4537c478bd9Sstevel@tonic-gate extern vfs_t EIO_vfs; 4547c478bd9Sstevel@tonic-gate int error; 4557c478bd9Sstevel@tonic-gate int ftype; /* XXX - Remove later on */ 4567c478bd9Sstevel@tonic-gate dev_t vfs_dev; 4577c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 4587c478bd9Sstevel@tonic-gate struct fs *fs; 4597c478bd9Sstevel@tonic-gate int hno; 4607c478bd9Sstevel@tonic-gate daddr_t bno; 4617c478bd9Sstevel@tonic-gate ulong_t ioff; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, ufsiget, 1); 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate /* 4667c478bd9Sstevel@tonic-gate * Lookup inode in cache. 4677c478bd9Sstevel@tonic-gate */ 4687c478bd9Sstevel@tonic-gate vfs_dev = vfsp->vfs_dev; 4697c478bd9Sstevel@tonic-gate hno = INOHASH(ino); 4707c478bd9Sstevel@tonic-gate ih = &ihead[hno]; 4717c478bd9Sstevel@tonic-gate ihm = &ih_lock[hno]; 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate again: 4747c478bd9Sstevel@tonic-gate mutex_enter(ihm); 4757c478bd9Sstevel@tonic-gate for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 4767c478bd9Sstevel@tonic-gate if (ino != ip->i_number || vfs_dev != ip->i_dev || 4777c478bd9Sstevel@tonic-gate (ip->i_flag & ISTALE)) 4787c478bd9Sstevel@tonic-gate continue; 4797c478bd9Sstevel@tonic-gate 4807c478bd9Sstevel@tonic-gate /* 4817c478bd9Sstevel@tonic-gate * Found the interesting inode; hold it and drop the cache lock 4827c478bd9Sstevel@tonic-gate */ 4837c478bd9Sstevel@tonic-gate vp = ITOV(ip); /* for locknest */ 4847c478bd9Sstevel@tonic-gate VN_HOLD(vp); 4857c478bd9Sstevel@tonic-gate mutex_exit(ihm); 4867c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate /* 4897c478bd9Sstevel@tonic-gate * if necessary, remove from idle list 4907c478bd9Sstevel@tonic-gate */ 4917c478bd9Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) { 4927c478bd9Sstevel@tonic-gate if (ufs_rmidle(ip)) 4937c478bd9Sstevel@tonic-gate VN_RELE(vp); 4947c478bd9Sstevel@tonic-gate } 4957c478bd9Sstevel@tonic-gate 4967c478bd9Sstevel@tonic-gate /* 4977c478bd9Sstevel@tonic-gate * Could the inode be read from disk? 4987c478bd9Sstevel@tonic-gate */ 4997c478bd9Sstevel@tonic-gate if (ip->i_flag & ISTALE) { 5007c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5017c478bd9Sstevel@tonic-gate VN_RELE(vp); 5027c478bd9Sstevel@tonic-gate goto again; 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate ins.in_hits.value.ul++; 5067c478bd9Sstevel@tonic-gate *ipp = ip; 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate /* 5097c478bd9Sstevel@tonic-gate * Reset the vnode's attribute flags 5107c478bd9Sstevel@tonic-gate */ 5117c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 5127c478bd9Sstevel@tonic-gate ufs_reset_vnode(vp); 5137c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5167c478bd9Sstevel@tonic-gate 5177c478bd9Sstevel@tonic-gate return (0); 5187c478bd9Sstevel@tonic-gate } 5197c478bd9Sstevel@tonic-gate mutex_exit(ihm); 5207c478bd9Sstevel@tonic-gate 5217c478bd9Sstevel@tonic-gate /* 5227c478bd9Sstevel@tonic-gate * Inode was not in cache. 5237c478bd9Sstevel@tonic-gate * 5247c478bd9Sstevel@tonic-gate * Allocate a new entry 5257c478bd9Sstevel@tonic-gate */ 5267c478bd9Sstevel@tonic-gate ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 5277c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate ip = ufs_alloc_inode(ufsvfsp, ino); 5307c478bd9Sstevel@tonic-gate vp = ITOV(ip); 5317c478bd9Sstevel@tonic-gate 5327c478bd9Sstevel@tonic-gate bno = fsbtodb(fs, itod(fs, ino)); 5337c478bd9Sstevel@tonic-gate ioff = (sizeof (struct dinode)) * (itoo(fs, ino)); 5347c478bd9Sstevel@tonic-gate ip->i_doff = (offset_t)ioff + ldbtob(bno); 5357c478bd9Sstevel@tonic-gate 5367c478bd9Sstevel@tonic-gate /* 5377c478bd9Sstevel@tonic-gate * put a place holder in the cache (if not already there) 5387c478bd9Sstevel@tonic-gate */ 5397c478bd9Sstevel@tonic-gate mutex_enter(ihm); 5407c478bd9Sstevel@tonic-gate for (sp = ih->ih_chain[0]; sp != (struct inode *)ih; sp = sp->i_forw) 5417c478bd9Sstevel@tonic-gate if (ino == sp->i_number && vfs_dev == sp->i_dev && 5427c478bd9Sstevel@tonic-gate ((sp->i_flag & ISTALE) == 0)) { 5437c478bd9Sstevel@tonic-gate mutex_exit(ihm); 5447c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 5457c478bd9Sstevel@tonic-gate goto again; 5467c478bd9Sstevel@tonic-gate } 5477c478bd9Sstevel@tonic-gate /* 5487c478bd9Sstevel@tonic-gate * It would be nice to ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock)) 5497c478bd9Sstevel@tonic-gate * here, but if we do, then shadow inode allocations panic the 5507c478bd9Sstevel@tonic-gate * system. We don't have to hold vfs_dqrwlock for shadow inodes 5517c478bd9Sstevel@tonic-gate * and the ufs_iget() parameters don't tell us what we are getting 5527c478bd9Sstevel@tonic-gate * so we have no way of knowing this is a ufs_iget() call from 5537c478bd9Sstevel@tonic-gate * a ufs_ialloc() call for a shadow inode. 5547c478bd9Sstevel@tonic-gate */ 5557c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 5567c478bd9Sstevel@tonic-gate insque(ip, ih); 5577c478bd9Sstevel@tonic-gate mutex_exit(ihm); 5587c478bd9Sstevel@tonic-gate /* 5597c478bd9Sstevel@tonic-gate * read the dinode 5607c478bd9Sstevel@tonic-gate */ 5617c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, bno, (int)fs->fs_bsize); 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate /* 5647c478bd9Sstevel@tonic-gate * Check I/O errors 5657c478bd9Sstevel@tonic-gate */ 5667c478bd9Sstevel@tonic-gate error = ((bp->b_flags & B_ERROR) ? geterror(bp) : 0); 5677c478bd9Sstevel@tonic-gate if (error) { 5687c478bd9Sstevel@tonic-gate brelse(bp); 5697c478bd9Sstevel@tonic-gate ip->i_flag |= ISTALE; /* in case someone is looking it up */ 5707c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5717c478bd9Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 5727c478bd9Sstevel@tonic-gate VN_RELE(vp); 5737c478bd9Sstevel@tonic-gate return (error); 5747c478bd9Sstevel@tonic-gate } 5757c478bd9Sstevel@tonic-gate /* 5767c478bd9Sstevel@tonic-gate * initialize the inode's dinode 5777c478bd9Sstevel@tonic-gate */ 5787c478bd9Sstevel@tonic-gate dp = (struct dinode *)(ioff + bp->b_un.b_addr); 5797c478bd9Sstevel@tonic-gate ip->i_ic = dp->di_ic; /* structure assignment */ 5807c478bd9Sstevel@tonic-gate brelse(bp); 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate /* 5837c478bd9Sstevel@tonic-gate * Maintain compatibility with Solaris 1.x UFS 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate if (ip->i_suid != UID_LONG) 5867c478bd9Sstevel@tonic-gate ip->i_uid = ip->i_suid; 5877c478bd9Sstevel@tonic-gate if (ip->i_sgid != GID_LONG) 5887c478bd9Sstevel@tonic-gate ip->i_gid = ip->i_sgid; 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate ftype = ip->i_mode & IFMT; 5917c478bd9Sstevel@tonic-gate if (ftype == IFBLK || ftype == IFCHR) { 5927c478bd9Sstevel@tonic-gate dev_t dv; 5937c478bd9Sstevel@tonic-gate uint_t top16 = ip->i_ordev & 0xffff0000u; 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate if (top16 == 0 || top16 == 0xffff0000u) 5967c478bd9Sstevel@tonic-gate dv = expdev(ip->i_ordev); 5977c478bd9Sstevel@tonic-gate else 5987c478bd9Sstevel@tonic-gate dv = expldev(ip->i_ordev); 5997c478bd9Sstevel@tonic-gate vp->v_rdev = ip->i_rdev = dv; 6007c478bd9Sstevel@tonic-gate } 6017c478bd9Sstevel@tonic-gate 6027c478bd9Sstevel@tonic-gate /* 6037c478bd9Sstevel@tonic-gate * if our caller only expects allocated inodes, verify that 6047c478bd9Sstevel@tonic-gate * this inode looks good; throw it out if it's bad. 6057c478bd9Sstevel@tonic-gate */ 6067c478bd9Sstevel@tonic-gate if (validate) { 6077c478bd9Sstevel@tonic-gate if ((ftype == 0) || (ip->i_nlink <= 0)) { 6087c478bd9Sstevel@tonic-gate ip->i_flag |= ISTALE; 6097c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 6107c478bd9Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 6117c478bd9Sstevel@tonic-gate VN_RELE(vp); 6127c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 6137c478bd9Sstevel@tonic-gate "%s: unexpected free inode %d, run fsck(1M)%s", 6147c478bd9Sstevel@tonic-gate fs->fs_fsmnt, (int)ino, 6157c478bd9Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 6167c478bd9Sstevel@tonic-gate return (EIO); 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate } 6197c478bd9Sstevel@tonic-gate 6207c478bd9Sstevel@tonic-gate /* 62152d54943Sbatschul * Finish initializing the vnode, special handling for shadow inodes 62252d54943Sbatschul * because IFTOVT() will produce a v_type of VNON which is not what we 62352d54943Sbatschul * want, set v_type to VREG explicitly in that case. 6247c478bd9Sstevel@tonic-gate */ 62552d54943Sbatschul if (ftype == IFSHAD) { 62652d54943Sbatschul vp->v_type = VREG; 62752d54943Sbatschul } else { 6287c478bd9Sstevel@tonic-gate vp->v_type = IFTOVT((mode_t)ip->i_mode); 62952d54943Sbatschul } 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate ufs_reset_vnode(vp); 6327c478bd9Sstevel@tonic-gate 6337c478bd9Sstevel@tonic-gate /* 6347c478bd9Sstevel@tonic-gate * read the shadow 6357c478bd9Sstevel@tonic-gate */ 6367c478bd9Sstevel@tonic-gate if (ftype != 0 && ip->i_shadow != 0) { 6377c478bd9Sstevel@tonic-gate if ((error = ufs_si_load(ip, cr)) != 0) { 6387c478bd9Sstevel@tonic-gate ip->i_flag |= ISTALE; 6397c478bd9Sstevel@tonic-gate ip->i_ufs_acl = NULL; 6407c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 6417c478bd9Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 6427c478bd9Sstevel@tonic-gate VN_RELE(vp); 6437c478bd9Sstevel@tonic-gate return (error); 6447c478bd9Sstevel@tonic-gate } 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate /* 6487c478bd9Sstevel@tonic-gate * Only attach quota information if the inode has a type and if 6497c478bd9Sstevel@tonic-gate * that type is not a shadow inode. 6507c478bd9Sstevel@tonic-gate */ 6517c478bd9Sstevel@tonic-gate if (ip->i_mode && ((ip->i_mode & IFMT) != IFSHAD) && 6527c478bd9Sstevel@tonic-gate ((ip->i_mode & IFMT) != IFATTRDIR)) { 6537c478bd9Sstevel@tonic-gate ip->i_dquot = getinoquota(ip); 6547c478bd9Sstevel@tonic-gate } 6557c478bd9Sstevel@tonic-gate TRANS_MATA_IGET(ufsvfsp, ip); 6567c478bd9Sstevel@tonic-gate *ipp = ip; 6577c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 6587c478bd9Sstevel@tonic-gate 6597c478bd9Sstevel@tonic-gate return (0); 6607c478bd9Sstevel@tonic-gate } 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate /* 6637c478bd9Sstevel@tonic-gate * Vnode is no longer referenced, write the inode out 6647c478bd9Sstevel@tonic-gate * and if necessary, truncate and deallocate the file. 6657c478bd9Sstevel@tonic-gate */ 6667c478bd9Sstevel@tonic-gate void 6677c478bd9Sstevel@tonic-gate ufs_iinactive(struct inode *ip) 6687c478bd9Sstevel@tonic-gate { 6697c478bd9Sstevel@tonic-gate int front; 6707c478bd9Sstevel@tonic-gate struct inode *iq; 6717c478bd9Sstevel@tonic-gate struct inode *hip; 6727c478bd9Sstevel@tonic-gate struct ufs_q *uq; 6737c478bd9Sstevel@tonic-gate struct vnode *vp = ITOV(ip); 674121be23bSjkennedy struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 675121be23bSjkennedy struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info; 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate /* 6787c478bd9Sstevel@tonic-gate * Because the vnode type might have been changed, 6797c478bd9Sstevel@tonic-gate * the dnlc_dir_purge must be called unconditionally. 6807c478bd9Sstevel@tonic-gate */ 6817c478bd9Sstevel@tonic-gate dnlc_dir_purge(&ip->i_danchor); 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate /* 6847c478bd9Sstevel@tonic-gate * Get exclusive access to inode data. 6857c478bd9Sstevel@tonic-gate */ 6867c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 6877c478bd9Sstevel@tonic-gate ASSERT(ip->i_flag & IREF); 6887c478bd9Sstevel@tonic-gate 6897c478bd9Sstevel@tonic-gate /* 6907c478bd9Sstevel@tonic-gate * Make sure no one reclaimed the inode before we put it on 6917c478bd9Sstevel@tonic-gate * the freelist or destroy it. We keep our 'hold' on the vnode 6927c478bd9Sstevel@tonic-gate * from vn_rele until we are ready to do something with the inode. 6937c478bd9Sstevel@tonic-gate * 6947c478bd9Sstevel@tonic-gate * Pageout may put a VN_HOLD/VN_RELE at anytime during this 6957c478bd9Sstevel@tonic-gate * operation via an async putpage, so we must make sure 6967c478bd9Sstevel@tonic-gate * we don't free/destroy the inode more than once. ufs_iget 6977c478bd9Sstevel@tonic-gate * may also put a VN_HOLD on the inode before it grabs 6987c478bd9Sstevel@tonic-gate * the i_contents lock. This is done so we don't free 6997c478bd9Sstevel@tonic-gate * an inode that a thread is waiting on. 7007c478bd9Sstevel@tonic-gate */ 7017c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 7047c478bd9Sstevel@tonic-gate vp->v_count--; /* release our hold from vn_rele */ 7057c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7067c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 7077c478bd9Sstevel@tonic-gate return; 7087c478bd9Sstevel@tonic-gate } 7097c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7107c478bd9Sstevel@tonic-gate 7117c478bd9Sstevel@tonic-gate /* 7127c478bd9Sstevel@tonic-gate * For umount case: if ufsvfs ptr is NULL, the inode is unhashed 7137c478bd9Sstevel@tonic-gate * and clean. It can be safely destroyed (cyf). 7147c478bd9Sstevel@tonic-gate */ 7157c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) { 7167c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 7177c478bd9Sstevel@tonic-gate ufs_si_del(ip); 7187c478bd9Sstevel@tonic-gate ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp)); 7197c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 7207c478bd9Sstevel@tonic-gate return; 7217c478bd9Sstevel@tonic-gate } 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate /* 7247c478bd9Sstevel@tonic-gate * queue idle inode to appropriate thread. Will check v_count == 1 7257c478bd9Sstevel@tonic-gate * prior to putting this on the appropriate queue. 7267c478bd9Sstevel@tonic-gate * Stale inodes will be unhashed and freed by the ufs idle thread 7277c478bd9Sstevel@tonic-gate * in ufs_idle_free() 7287c478bd9Sstevel@tonic-gate */ 7297c478bd9Sstevel@tonic-gate front = 1; 7307c478bd9Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0 && ip->i_fs->fs_ronly == 0 && 7317c478bd9Sstevel@tonic-gate ip->i_mode && ip->i_nlink <= 0) { 7327c478bd9Sstevel@tonic-gate /* 7337c478bd9Sstevel@tonic-gate * Mark the i_flag to indicate that inode is being deleted. 7347c478bd9Sstevel@tonic-gate * This flag will be cleared when the deletion is complete. 7357c478bd9Sstevel@tonic-gate * This prevents nfs from sneaking in via ufs_vget() while 7367c478bd9Sstevel@tonic-gate * the delete is in progress (bugid 1242481). 7377c478bd9Sstevel@tonic-gate */ 7387c478bd9Sstevel@tonic-gate ip->i_flag |= IDEL; 7397c478bd9Sstevel@tonic-gate 7407c478bd9Sstevel@tonic-gate /* 7417c478bd9Sstevel@tonic-gate * NOIDEL means that deletes are not allowed at this time; 7427c478bd9Sstevel@tonic-gate * whoever resets NOIDEL will also send this inode back 7437c478bd9Sstevel@tonic-gate * through ufs_iinactive. IREF remains set. 7447c478bd9Sstevel@tonic-gate */ 7457c478bd9Sstevel@tonic-gate if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) { 7467c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7477c478bd9Sstevel@tonic-gate vp->v_count--; 7487c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7497c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 7507c478bd9Sstevel@tonic-gate return; 7517c478bd9Sstevel@tonic-gate } 7527c478bd9Sstevel@tonic-gate if (!TRANS_ISTRANS(ip->i_ufsvfs)) { 7537c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 7547c478bd9Sstevel@tonic-gate ufs_delete(ip->i_ufsvfs, ip, 0); 7557c478bd9Sstevel@tonic-gate return; 7567c478bd9Sstevel@tonic-gate } 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate /* queue to delete thread; IREF remains set */ 7597c478bd9Sstevel@tonic-gate ins.in_qfree.value.ul++; 7607c478bd9Sstevel@tonic-gate uq = &ip->i_ufsvfs->vfs_delete; 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate mutex_enter(&uq->uq_mutex); 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate /* add to q */ 7657c478bd9Sstevel@tonic-gate if ((iq = uq->uq_ihead) != 0) { 7667c478bd9Sstevel@tonic-gate ip->i_freef = iq; 7677c478bd9Sstevel@tonic-gate ip->i_freeb = iq->i_freeb; 7687c478bd9Sstevel@tonic-gate iq->i_freeb->i_freef = ip; 7697c478bd9Sstevel@tonic-gate iq->i_freeb = ip; 7707c478bd9Sstevel@tonic-gate if (front) 7717c478bd9Sstevel@tonic-gate uq->uq_ihead = ip; 7727c478bd9Sstevel@tonic-gate } else { 7737c478bd9Sstevel@tonic-gate uq->uq_ihead = ip; 7747c478bd9Sstevel@tonic-gate ip->i_freef = ip; 7757c478bd9Sstevel@tonic-gate ip->i_freeb = ip; 7767c478bd9Sstevel@tonic-gate } 777121be23bSjkennedy 778121be23bSjkennedy delq_info->delq_unreclaimed_files += 1; 779121be23bSjkennedy delq_info->delq_unreclaimed_blocks += ip->i_blocks; 7807c478bd9Sstevel@tonic-gate } else { 7817c478bd9Sstevel@tonic-gate /* 7827c478bd9Sstevel@tonic-gate * queue to idle thread 7837c478bd9Sstevel@tonic-gate * Check the v_count == 1 again. 7847c478bd9Sstevel@tonic-gate * 7857c478bd9Sstevel@tonic-gate */ 7867c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7877c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 7887c478bd9Sstevel@tonic-gate vp->v_count--; /* release our hold from vn_rele */ 7897c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7907c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 7917c478bd9Sstevel@tonic-gate return; 7927c478bd9Sstevel@tonic-gate } 7937c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7947c478bd9Sstevel@tonic-gate uq = &ufs_idle_q; 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate /* 7977c478bd9Sstevel@tonic-gate * useful iff it has pages or is a fastsymlink; otherwise junk 7987c478bd9Sstevel@tonic-gate */ 7997c478bd9Sstevel@tonic-gate mutex_enter(&uq->uq_mutex); 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate /* clear IREF means `on idle list' */ 8027c478bd9Sstevel@tonic-gate ip->i_flag &= ~(IREF | IDIRECTIO); 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) || ip->i_flag & IFASTSYMLNK) { 8057c478bd9Sstevel@tonic-gate ins.in_frback.value.ul++; 8067c478bd9Sstevel@tonic-gate hip = (inode_t *)&ufs_useful_iq[IQHASH(ip)]; 8077c478bd9Sstevel@tonic-gate ufs_nuseful_iq++; 8087c478bd9Sstevel@tonic-gate } else { 8097c478bd9Sstevel@tonic-gate ins.in_frfront.value.ul++; 8107c478bd9Sstevel@tonic-gate hip = (inode_t *)&ufs_junk_iq[IQHASH(ip)]; 8117c478bd9Sstevel@tonic-gate ip->i_flag |= IJUNKIQ; 8127c478bd9Sstevel@tonic-gate ufs_njunk_iq++; 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate ip->i_freef = hip; 8157c478bd9Sstevel@tonic-gate ip->i_freeb = hip->i_freeb; 8167c478bd9Sstevel@tonic-gate hip->i_freeb->i_freef = ip; 8177c478bd9Sstevel@tonic-gate hip->i_freeb = ip; 8187c478bd9Sstevel@tonic-gate } 8197c478bd9Sstevel@tonic-gate 8207c478bd9Sstevel@tonic-gate /* wakeup thread(s) if q is overfull */ 8217c478bd9Sstevel@tonic-gate if (++uq->uq_ne == uq->uq_lowat) 8227c478bd9Sstevel@tonic-gate cv_broadcast(&uq->uq_cv); 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate /* all done, release the q and inode */ 8257c478bd9Sstevel@tonic-gate mutex_exit(&uq->uq_mutex); 8267c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 8277c478bd9Sstevel@tonic-gate } 8287c478bd9Sstevel@tonic-gate 8297c478bd9Sstevel@tonic-gate /* 8307c478bd9Sstevel@tonic-gate * Check accessed and update flags on an inode structure. 8317c478bd9Sstevel@tonic-gate * If any are on, update the inode with the (unique) current time. 8327c478bd9Sstevel@tonic-gate * If waitfor is given, insure I/O order so wait for write to complete. 8337c478bd9Sstevel@tonic-gate */ 8347c478bd9Sstevel@tonic-gate void 8357c478bd9Sstevel@tonic-gate ufs_iupdat(struct inode *ip, int waitfor) 8367c478bd9Sstevel@tonic-gate { 8377c478bd9Sstevel@tonic-gate struct buf *bp; 8387c478bd9Sstevel@tonic-gate struct fs *fp; 8397c478bd9Sstevel@tonic-gate struct dinode *dp; 8407c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 8417c478bd9Sstevel@tonic-gate int i; 8427c478bd9Sstevel@tonic-gate int do_trans_times; 8437c478bd9Sstevel@tonic-gate ushort_t flag; 8447c478bd9Sstevel@tonic-gate o_uid_t suid; 8457c478bd9Sstevel@tonic-gate o_gid_t sgid; 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate /* 8487c478bd9Sstevel@tonic-gate * This function is now safe to be called with either the reader 8497c478bd9Sstevel@tonic-gate * or writer i_contents lock. 8507c478bd9Sstevel@tonic-gate */ 8517c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents)); 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate /* 8547c478bd9Sstevel@tonic-gate * Return if file system has been forcibly umounted. 8557c478bd9Sstevel@tonic-gate */ 8567c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) 8577c478bd9Sstevel@tonic-gate return; 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate flag = ip->i_flag; /* Atomic read */ 8607c478bd9Sstevel@tonic-gate /* 8617c478bd9Sstevel@tonic-gate * We better not update the disk inode from a stale inode. 8627c478bd9Sstevel@tonic-gate */ 8637c478bd9Sstevel@tonic-gate if (flag & ISTALE) 8647c478bd9Sstevel@tonic-gate return; 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate fp = ip->i_fs; 8677c478bd9Sstevel@tonic-gate 8687c478bd9Sstevel@tonic-gate if ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) != 0) { 8697c478bd9Sstevel@tonic-gate if (fp->fs_ronly) { 8707c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8717c478bd9Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 8727c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 8737c478bd9Sstevel@tonic-gate return; 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate /* 8767c478bd9Sstevel@tonic-gate * fs is active while metadata is being written 8777c478bd9Sstevel@tonic-gate */ 8787c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 8797c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 8807c478bd9Sstevel@tonic-gate /* 8817c478bd9Sstevel@tonic-gate * get the dinode 8827c478bd9Sstevel@tonic-gate */ 8837c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, 8847c478bd9Sstevel@tonic-gate (daddr_t)fsbtodb(fp, itod(fp, ip->i_number)), 8857c478bd9Sstevel@tonic-gate (int)fp->fs_bsize); 8867c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 8877c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8887c478bd9Sstevel@tonic-gate ip->i_flag &= 8897c478bd9Sstevel@tonic-gate ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 8907c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 8917c478bd9Sstevel@tonic-gate brelse(bp); 8927c478bd9Sstevel@tonic-gate return; 8937c478bd9Sstevel@tonic-gate } 8947c478bd9Sstevel@tonic-gate /* 8957c478bd9Sstevel@tonic-gate * munge inode fields 8967c478bd9Sstevel@tonic-gate */ 8977c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8987c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 8997c478bd9Sstevel@tonic-gate do_trans_times = ((ip->i_flag & (IMOD|IMODACC)) == IMODACC); 9007c478bd9Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 9017c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 9027c478bd9Sstevel@tonic-gate 9037c478bd9Sstevel@tonic-gate /* 9047c478bd9Sstevel@tonic-gate * For reads and concurrent re-writes, no deltas were 9057c478bd9Sstevel@tonic-gate * entered for the access time changes - do it now. 9067c478bd9Sstevel@tonic-gate */ 9077c478bd9Sstevel@tonic-gate if (do_trans_times) { 9087c478bd9Sstevel@tonic-gate TRANS_INODE_TIMES(ufsvfsp, ip); 9097c478bd9Sstevel@tonic-gate } 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate /* 9127c478bd9Sstevel@tonic-gate * For SunOS 5.0->5.4, these lines below read: 9137c478bd9Sstevel@tonic-gate * 9147c478bd9Sstevel@tonic-gate * suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid; 9157c478bd9Sstevel@tonic-gate * sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid; 9167c478bd9Sstevel@tonic-gate * 9177c478bd9Sstevel@tonic-gate * where MAXUID was set to 60002. This was incorrect - 9187c478bd9Sstevel@tonic-gate * the uids should have been constrained to what fitted into 9197c478bd9Sstevel@tonic-gate * a 16-bit word. 9207c478bd9Sstevel@tonic-gate * 9217c478bd9Sstevel@tonic-gate * This means that files from 4.x filesystems that have an 9227c478bd9Sstevel@tonic-gate * i_suid field larger than 60002 will have that field 9237c478bd9Sstevel@tonic-gate * changed to 65535. 9247c478bd9Sstevel@tonic-gate * 9257c478bd9Sstevel@tonic-gate * Security note: 4.x UFS could never create a i_suid of 9267c478bd9Sstevel@tonic-gate * UID_LONG since that would've corresponded to -1. 9277c478bd9Sstevel@tonic-gate */ 9287c478bd9Sstevel@tonic-gate suid = (ulong_t)ip->i_uid > (ulong_t)USHRT_MAX ? 9297c478bd9Sstevel@tonic-gate UID_LONG : ip->i_uid; 9307c478bd9Sstevel@tonic-gate sgid = (ulong_t)ip->i_gid > (ulong_t)USHRT_MAX ? 9317c478bd9Sstevel@tonic-gate GID_LONG : ip->i_gid; 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate if ((ip->i_suid != suid) || (ip->i_sgid != sgid)) { 9347c478bd9Sstevel@tonic-gate ip->i_suid = suid; 9357c478bd9Sstevel@tonic-gate ip->i_sgid = sgid; 9367c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 9377c478bd9Sstevel@tonic-gate } 9387c478bd9Sstevel@tonic-gate 9397c478bd9Sstevel@tonic-gate if ((ip->i_mode & IFMT) == IFBLK || 9407c478bd9Sstevel@tonic-gate (ip->i_mode & IFMT) == IFCHR) { 9417c478bd9Sstevel@tonic-gate dev_t d = ip->i_rdev; 9427c478bd9Sstevel@tonic-gate dev32_t dev32; 9437c478bd9Sstevel@tonic-gate 9447c478bd9Sstevel@tonic-gate /* 9457c478bd9Sstevel@tonic-gate * load first direct block only if special device 9467c478bd9Sstevel@tonic-gate */ 9477c478bd9Sstevel@tonic-gate if (!cmpldev(&dev32, d)) { 9487c478bd9Sstevel@tonic-gate /* 9497c478bd9Sstevel@tonic-gate * We panic here because there's "no way" 9507c478bd9Sstevel@tonic-gate * we should have been able to create a large 9517c478bd9Sstevel@tonic-gate * inode with a large dev_t. Earlier layers 9527c478bd9Sstevel@tonic-gate * should've caught this. 9537c478bd9Sstevel@tonic-gate */ 9547c478bd9Sstevel@tonic-gate panic("ip %p: i_rdev too big", (void *)ip); 9557c478bd9Sstevel@tonic-gate } 9567c478bd9Sstevel@tonic-gate 9577c478bd9Sstevel@tonic-gate if (dev32 & ~((O_MAXMAJ << L_BITSMINOR32) | O_MAXMIN)) { 9587c478bd9Sstevel@tonic-gate ip->i_ordev = dev32; /* can't use old fmt. */ 9597c478bd9Sstevel@tonic-gate } else { 9607c478bd9Sstevel@tonic-gate ip->i_ordev = cmpdev(d); 9617c478bd9Sstevel@tonic-gate } 9627c478bd9Sstevel@tonic-gate } 9637c478bd9Sstevel@tonic-gate 9647c478bd9Sstevel@tonic-gate /* 9657c478bd9Sstevel@tonic-gate * copy inode to dinode (zero fastsymlnk in dinode) 9667c478bd9Sstevel@tonic-gate */ 9677c478bd9Sstevel@tonic-gate dp = (struct dinode *)bp->b_un.b_addr + itoo(fp, ip->i_number); 9687c478bd9Sstevel@tonic-gate dp->di_ic = ip->i_ic; /* structure assignment */ 9697c478bd9Sstevel@tonic-gate if (flag & IFASTSYMLNK) { 9707c478bd9Sstevel@tonic-gate for (i = 1; i < NDADDR; i++) 9717c478bd9Sstevel@tonic-gate dp->di_db[i] = 0; 9727c478bd9Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 9737c478bd9Sstevel@tonic-gate dp->di_ib[i] = 0; 9747c478bd9Sstevel@tonic-gate } 9757c478bd9Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) { 9767c478bd9Sstevel@tonic-gate /* 9777c478bd9Sstevel@tonic-gate * Pass only a sector size buffer containing 9787c478bd9Sstevel@tonic-gate * the inode, otherwise when the buffer is copied 9797c478bd9Sstevel@tonic-gate * into a cached roll buffer then too much memory 9807c478bd9Sstevel@tonic-gate * gets consumed if 8KB inode buffers are passed. 9817c478bd9Sstevel@tonic-gate */ 9827c478bd9Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)dp, ip->i_doff, 9837c478bd9Sstevel@tonic-gate sizeof (struct dinode), 9847c478bd9Sstevel@tonic-gate (caddr_t)P2ALIGN((uintptr_t)dp, DEV_BSIZE), 9857c478bd9Sstevel@tonic-gate DEV_BSIZE); 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate brelse(bp); 9887c478bd9Sstevel@tonic-gate } else if (waitfor && (ip->i_ufsvfs->vfs_dio == 0)) { 9897c478bd9Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp); 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate /* 9927c478bd9Sstevel@tonic-gate * Synchronous write has guaranteed that inode 9937c478bd9Sstevel@tonic-gate * has been written on disk so clear the flag 9947c478bd9Sstevel@tonic-gate */ 9957c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 9967c478bd9Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE; 9977c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 9987c478bd9Sstevel@tonic-gate } else { 9997c478bd9Sstevel@tonic-gate bdrwrite(bp); 10007c478bd9Sstevel@tonic-gate 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * This write hasn't guaranteed that inode has been 10037c478bd9Sstevel@tonic-gate * written on the disk. 10047c478bd9Sstevel@tonic-gate * Since, all updat flags on inode are cleared, we must 10057c478bd9Sstevel@tonic-gate * remember the condition in case inode is to be updated 10067c478bd9Sstevel@tonic-gate * synchronously later (e.g.- fsync()/fdatasync()) 10077c478bd9Sstevel@tonic-gate * and inode has not been modified yet. 10087c478bd9Sstevel@tonic-gate */ 10097c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 10107c478bd9Sstevel@tonic-gate ip->i_flag |= IBDWRITE; 10117c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 10127c478bd9Sstevel@tonic-gate } 10137c478bd9Sstevel@tonic-gate } else { 10147c478bd9Sstevel@tonic-gate /* 10157c478bd9Sstevel@tonic-gate * In case previous inode update was done asynchronously 10167c478bd9Sstevel@tonic-gate * (IBDWRITE) and this inode update request wants guaranteed 10177c478bd9Sstevel@tonic-gate * (synchronous) disk update, flush the inode. 10187c478bd9Sstevel@tonic-gate */ 10197c478bd9Sstevel@tonic-gate if (waitfor && (flag & IBDWRITE)) { 10207c478bd9Sstevel@tonic-gate blkflush(ip->i_dev, 10217c478bd9Sstevel@tonic-gate (daddr_t)fsbtodb(fp, itod(fp, ip->i_number))); 10227c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 10237c478bd9Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE; 10247c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 10257c478bd9Sstevel@tonic-gate } 10267c478bd9Sstevel@tonic-gate } 10277c478bd9Sstevel@tonic-gate } 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate #define SINGLE 0 /* index of single indirect block */ 10307c478bd9Sstevel@tonic-gate #define DOUBLE 1 /* index of double indirect block */ 10317c478bd9Sstevel@tonic-gate #define TRIPLE 2 /* index of triple indirect block */ 10327c478bd9Sstevel@tonic-gate 10337c478bd9Sstevel@tonic-gate /* 10347c478bd9Sstevel@tonic-gate * Release blocks associated with the inode ip and 10357c478bd9Sstevel@tonic-gate * stored in the indirect block bn. Blocks are free'd 10367c478bd9Sstevel@tonic-gate * in LIFO order up to (but not including) lastbn. If 10377c478bd9Sstevel@tonic-gate * level is greater than SINGLE, the block is an indirect 10387c478bd9Sstevel@tonic-gate * block and recursive calls to indirtrunc must be used to 10397c478bd9Sstevel@tonic-gate * cleanse other indirect blocks. 10407c478bd9Sstevel@tonic-gate * 10417c478bd9Sstevel@tonic-gate * N.B.: triple indirect blocks are untested. 10427c478bd9Sstevel@tonic-gate */ 10437c478bd9Sstevel@tonic-gate static long 10447c478bd9Sstevel@tonic-gate indirtrunc(struct inode *ip, daddr_t bn, daddr_t lastbn, int level, int flags) 10457c478bd9Sstevel@tonic-gate { 10467c478bd9Sstevel@tonic-gate int i; 10477c478bd9Sstevel@tonic-gate struct buf *bp, *copy; 10487c478bd9Sstevel@tonic-gate daddr32_t *bap; 10497c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 10507c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 10517c478bd9Sstevel@tonic-gate daddr_t nb, last; 10527c478bd9Sstevel@tonic-gate long factor; 10537c478bd9Sstevel@tonic-gate int blocksreleased = 0, nblocks; 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 10567c478bd9Sstevel@tonic-gate /* 10577c478bd9Sstevel@tonic-gate * Calculate index in current block of last 10587c478bd9Sstevel@tonic-gate * block to be kept. -1 indicates the entire 10597c478bd9Sstevel@tonic-gate * block so we need not calculate the index. 10607c478bd9Sstevel@tonic-gate */ 10617c478bd9Sstevel@tonic-gate factor = 1; 10627c478bd9Sstevel@tonic-gate for (i = SINGLE; i < level; i++) 10637c478bd9Sstevel@tonic-gate factor *= NINDIR(fs); 10647c478bd9Sstevel@tonic-gate last = lastbn; 10657c478bd9Sstevel@tonic-gate if (lastbn > 0) 10667c478bd9Sstevel@tonic-gate last /= factor; 10677c478bd9Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize); 10687c478bd9Sstevel@tonic-gate /* 10697c478bd9Sstevel@tonic-gate * Get buffer of block pointers, zero those 10707c478bd9Sstevel@tonic-gate * entries corresponding to blocks to be free'd, 10717c478bd9Sstevel@tonic-gate * and update on disk copy first. 10727c478bd9Sstevel@tonic-gate * *Unless* the root pointer has been synchronously 10737c478bd9Sstevel@tonic-gate * written to disk. If nothing points to this 10747c478bd9Sstevel@tonic-gate * indirect block then don't bother zero'ing and 10757c478bd9Sstevel@tonic-gate * writing it. 10767c478bd9Sstevel@tonic-gate */ 10777c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, 10787c478bd9Sstevel@tonic-gate ip->i_dev, (daddr_t)fsbtodb(fs, bn), (int)fs->fs_bsize); 10797c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 10807c478bd9Sstevel@tonic-gate brelse(bp); 10817c478bd9Sstevel@tonic-gate return (0); 10827c478bd9Sstevel@tonic-gate } 10837c478bd9Sstevel@tonic-gate bap = bp->b_un.b_daddr; 10847c478bd9Sstevel@tonic-gate if ((flags & I_CHEAP) == 0) { 10857c478bd9Sstevel@tonic-gate uint_t zb; 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate zb = (uint_t)((NINDIR(fs) - (last + 1)) * sizeof (daddr32_t)); 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate if (zb) { 10907c478bd9Sstevel@tonic-gate /* 10917c478bd9Sstevel@tonic-gate * push any data into the log before we zero it 10927c478bd9Sstevel@tonic-gate */ 10937c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) 10947c478bd9Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)bap, 10957c478bd9Sstevel@tonic-gate ldbtob(bp->b_blkno), bp->b_bcount, 10967c478bd9Sstevel@tonic-gate bp->b_un.b_addr, bp->b_bcount); 10977c478bd9Sstevel@tonic-gate copy = ngeteblk(fs->fs_bsize); 10987c478bd9Sstevel@tonic-gate bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, 10997c478bd9Sstevel@tonic-gate (uint_t)fs->fs_bsize); 11007c478bd9Sstevel@tonic-gate bzero((caddr_t)&bap[last + 1], zb); 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 11037c478bd9Sstevel@tonic-gate (caddr_t)&bap[last + 1] - (caddr_t)bap, 11047c478bd9Sstevel@tonic-gate zb, bp, DT_ABZERO); 11057c478bd9Sstevel@tonic-gate 11067c478bd9Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp); 11077c478bd9Sstevel@tonic-gate bp = copy, bap = bp->b_un.b_daddr; 11087c478bd9Sstevel@tonic-gate } 11097c478bd9Sstevel@tonic-gate } else { 11107c478bd9Sstevel@tonic-gate /* make sure write retries are also cleared */ 11117c478bd9Sstevel@tonic-gate bp->b_flags &= ~(B_DELWRI | B_RETRYWRI); 11127c478bd9Sstevel@tonic-gate bp->b_flags |= B_STALE | B_AGE; 11137c478bd9Sstevel@tonic-gate } 11147c478bd9Sstevel@tonic-gate 11157c478bd9Sstevel@tonic-gate /* 11167c478bd9Sstevel@tonic-gate * Recursively free totally unused blocks. 11177c478bd9Sstevel@tonic-gate */ 11187c478bd9Sstevel@tonic-gate flags |= I_CHEAP; 11197c478bd9Sstevel@tonic-gate for (i = NINDIR(fs) - 1; i > last; i--) { 11207c478bd9Sstevel@tonic-gate nb = bap[i]; 11217c478bd9Sstevel@tonic-gate if (nb == 0) 11227c478bd9Sstevel@tonic-gate continue; 11237c478bd9Sstevel@tonic-gate if (level > SINGLE) { 11247c478bd9Sstevel@tonic-gate blocksreleased += 11257c478bd9Sstevel@tonic-gate indirtrunc(ip, nb, (daddr_t)-1, level - 1, flags); 11267c478bd9Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags | I_IBLK); 11277c478bd9Sstevel@tonic-gate } else 11287c478bd9Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags); 11297c478bd9Sstevel@tonic-gate blocksreleased += nblocks; 11307c478bd9Sstevel@tonic-gate } 11317c478bd9Sstevel@tonic-gate flags &= ~I_CHEAP; 11327c478bd9Sstevel@tonic-gate 11337c478bd9Sstevel@tonic-gate /* 11347c478bd9Sstevel@tonic-gate * Recursively free last partial block. 11357c478bd9Sstevel@tonic-gate */ 11367c478bd9Sstevel@tonic-gate if (level > SINGLE && lastbn >= 0) { 11377c478bd9Sstevel@tonic-gate last = lastbn % factor; 11387c478bd9Sstevel@tonic-gate nb = bap[i]; 11397c478bd9Sstevel@tonic-gate if (nb != 0) 114080d34432Sfrankho blocksreleased += 114180d34432Sfrankho indirtrunc(ip, nb, last, level - 1, flags); 11427c478bd9Sstevel@tonic-gate } 11437c478bd9Sstevel@tonic-gate brelse(bp); 11447c478bd9Sstevel@tonic-gate return (blocksreleased); 11457c478bd9Sstevel@tonic-gate } 11467c478bd9Sstevel@tonic-gate 11477c478bd9Sstevel@tonic-gate /* 11487c478bd9Sstevel@tonic-gate * Truncate the inode ip to at most length size. 11497c478bd9Sstevel@tonic-gate * Free affected disk blocks -- the blocks of the 11507c478bd9Sstevel@tonic-gate * file are removed in reverse order. 11517c478bd9Sstevel@tonic-gate * 11527c478bd9Sstevel@tonic-gate * N.B.: triple indirect blocks are untested. 11537c478bd9Sstevel@tonic-gate */ 11547c478bd9Sstevel@tonic-gate static int i_genrand = 1234; 11557c478bd9Sstevel@tonic-gate int 11567c478bd9Sstevel@tonic-gate ufs_itrunc(struct inode *oip, u_offset_t length, int flags, cred_t *cr) 11577c478bd9Sstevel@tonic-gate { 11587c478bd9Sstevel@tonic-gate struct fs *fs = oip->i_fs; 11597c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = oip->i_ufsvfs; 11607c478bd9Sstevel@tonic-gate struct inode *ip; 11617c478bd9Sstevel@tonic-gate daddr_t lastblock; 11627c478bd9Sstevel@tonic-gate off_t bsize; 11637c478bd9Sstevel@tonic-gate int boff; 11647c478bd9Sstevel@tonic-gate daddr_t bn, lastiblock[NIADDR]; 11657c478bd9Sstevel@tonic-gate int level; 11667c478bd9Sstevel@tonic-gate long nblocks, blocksreleased = 0; 11677c478bd9Sstevel@tonic-gate int i; 11687c478bd9Sstevel@tonic-gate ushort_t mode; 11697c478bd9Sstevel@tonic-gate struct inode tip; 11707c478bd9Sstevel@tonic-gate int err; 11717c478bd9Sstevel@tonic-gate u_offset_t maxoffset = (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) ? 11727c478bd9Sstevel@tonic-gate (UFS_MAXOFFSET_T) : (MAXOFF32_T); 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate /* 11757c478bd9Sstevel@tonic-gate * Shadow inodes do not need to hold the vfs_dqrwlock lock. Most 11767c478bd9Sstevel@tonic-gate * other uses need the reader lock. opendq() holds the writer lock. 11777c478bd9Sstevel@tonic-gate */ 11787c478bd9Sstevel@tonic-gate ASSERT((oip->i_mode & IFMT) == IFSHAD || 11797c478bd9Sstevel@tonic-gate RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 11807c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&oip->i_contents)); 11817c478bd9Sstevel@tonic-gate /* 11827c478bd9Sstevel@tonic-gate * We only allow truncation of regular files and directories 11837c478bd9Sstevel@tonic-gate * to arbitrary lengths here. In addition, we allow symbolic 11847c478bd9Sstevel@tonic-gate * links to be truncated only to zero length. Other inode 11857c478bd9Sstevel@tonic-gate * types cannot have their length set here. Disk blocks are 11867c478bd9Sstevel@tonic-gate * being dealt with - especially device inodes where 11877c478bd9Sstevel@tonic-gate * ip->i_ordev is actually being stored in ip->i_db[0]! 11887c478bd9Sstevel@tonic-gate */ 11897c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, oip); 11907c478bd9Sstevel@tonic-gate mode = oip->i_mode & IFMT; 11917c478bd9Sstevel@tonic-gate if (flags & I_FREE) { 11927c478bd9Sstevel@tonic-gate i_genrand *= 16843009; /* turns into shift and adds */ 11937c478bd9Sstevel@tonic-gate i_genrand++; 1194d3d50737SRafael Vanoni oip->i_gen += ((i_genrand + ddi_get_lbolt()) & 0xffff) + 1; 11957c478bd9Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD; 11967c478bd9Sstevel@tonic-gate oip->i_seq++; 11977c478bd9Sstevel@tonic-gate if (length == oip->i_size) 11987c478bd9Sstevel@tonic-gate return (0); 11997c478bd9Sstevel@tonic-gate flags |= I_CHEAP; 12007c478bd9Sstevel@tonic-gate } 12017c478bd9Sstevel@tonic-gate if (mode == IFIFO) 12027c478bd9Sstevel@tonic-gate return (0); 12037c478bd9Sstevel@tonic-gate if (mode != IFREG && mode != IFDIR && mode != IFATTRDIR && 12047c478bd9Sstevel@tonic-gate !(mode == IFLNK && length == (offset_t)0) && mode != IFSHAD) 12057c478bd9Sstevel@tonic-gate return (EINVAL); 12067c478bd9Sstevel@tonic-gate if (length > maxoffset) 12077c478bd9Sstevel@tonic-gate return (EFBIG); 12087c478bd9Sstevel@tonic-gate if ((mode == IFDIR) || (mode == IFATTRDIR)) 12097c478bd9Sstevel@tonic-gate flags |= I_DIR; 12107c478bd9Sstevel@tonic-gate if (mode == IFSHAD) 12117c478bd9Sstevel@tonic-gate flags |= I_SHAD; 12127c478bd9Sstevel@tonic-gate if (oip == ufsvfsp->vfs_qinod) 12137c478bd9Sstevel@tonic-gate flags |= I_QUOTA; 12147c478bd9Sstevel@tonic-gate if (length == oip->i_size) { 12157c478bd9Sstevel@tonic-gate /* update ctime and mtime to please POSIX tests */ 12167c478bd9Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD; 12177c478bd9Sstevel@tonic-gate oip->i_seq++; 12187c478bd9Sstevel@tonic-gate if (length == 0) { 12197c478bd9Sstevel@tonic-gate /* nothing to cache so clear the flag */ 12207c478bd9Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK; 12217c478bd9Sstevel@tonic-gate } 12227c478bd9Sstevel@tonic-gate return (0); 12237c478bd9Sstevel@tonic-gate } 12247c478bd9Sstevel@tonic-gate /* wipe out fast symlink till next access */ 12257c478bd9Sstevel@tonic-gate if (oip->i_flag & IFASTSYMLNK) { 12267c478bd9Sstevel@tonic-gate int j; 12277c478bd9Sstevel@tonic-gate 12287c478bd9Sstevel@tonic-gate ASSERT(ITOV(oip)->v_type == VLNK); 12297c478bd9Sstevel@tonic-gate 12307c478bd9Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK; 12317c478bd9Sstevel@tonic-gate 12327c478bd9Sstevel@tonic-gate for (j = 1; j < NDADDR; j++) 12337c478bd9Sstevel@tonic-gate oip->i_db[j] = 0; 12347c478bd9Sstevel@tonic-gate for (j = 0; j < NIADDR; j++) 12357c478bd9Sstevel@tonic-gate oip->i_ib[j] = 0; 12367c478bd9Sstevel@tonic-gate } 12377c478bd9Sstevel@tonic-gate 12387c478bd9Sstevel@tonic-gate boff = (int)blkoff(fs, length); 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate if (length > oip->i_size) { 12417c478bd9Sstevel@tonic-gate /* 12427c478bd9Sstevel@tonic-gate * Trunc up case. BMAPALLOC will insure that the right blocks 12437c478bd9Sstevel@tonic-gate * are allocated. This includes extending the old frag to a 12447c478bd9Sstevel@tonic-gate * full block (if needed) in addition to doing any work 12457c478bd9Sstevel@tonic-gate * needed for allocating the last block. 12467c478bd9Sstevel@tonic-gate */ 12477c478bd9Sstevel@tonic-gate if (boff == 0) 12487c478bd9Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, (int)fs->fs_bsize, cr); 12497c478bd9Sstevel@tonic-gate else 12507c478bd9Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr); 12517c478bd9Sstevel@tonic-gate 12527c478bd9Sstevel@tonic-gate if (err == 0) { 12537c478bd9Sstevel@tonic-gate /* 12547c478bd9Sstevel@tonic-gate * Save old size and set inode's size now 12557c478bd9Sstevel@tonic-gate * so that we don't cause too much of the 12567c478bd9Sstevel@tonic-gate * file to be zero'd and pushed. 12577c478bd9Sstevel@tonic-gate */ 12587c478bd9Sstevel@tonic-gate u_offset_t osize = oip->i_size; 12597c478bd9Sstevel@tonic-gate oip->i_size = length; 12607c478bd9Sstevel@tonic-gate /* 12617c478bd9Sstevel@tonic-gate * Make sure we zero out the remaining bytes of 12627c478bd9Sstevel@tonic-gate * the page in case a mmap scribbled on it. We 12637c478bd9Sstevel@tonic-gate * can't prevent a mmap from writing beyond EOF 12647c478bd9Sstevel@tonic-gate * on the last page of a file. 12657c478bd9Sstevel@tonic-gate * 12667c478bd9Sstevel@tonic-gate */ 12677c478bd9Sstevel@tonic-gate if ((boff = (int)blkoff(fs, osize)) != 0) { 12687c478bd9Sstevel@tonic-gate bsize = (int)lblkno(fs, osize - 1) >= NDADDR ? 12697c478bd9Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff); 12707c478bd9Sstevel@tonic-gate pvn_vpzero(ITOV(oip), osize, 12717c478bd9Sstevel@tonic-gate (size_t)(bsize - boff)); 12727c478bd9Sstevel@tonic-gate } 12737c478bd9Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG; 12747c478bd9Sstevel@tonic-gate oip->i_seq++; 12757c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(oip); 12767c478bd9Sstevel@tonic-gate /* 12777c478bd9Sstevel@tonic-gate * MAXOFF32_T is old 2GB size limit. If 12787c478bd9Sstevel@tonic-gate * this operation caused a large file to be 12797c478bd9Sstevel@tonic-gate * created, turn on the superblock flag 12807c478bd9Sstevel@tonic-gate * and update the superblock, if the flag 12817c478bd9Sstevel@tonic-gate * is not already on. 12827c478bd9Sstevel@tonic-gate */ 12837c478bd9Sstevel@tonic-gate if ((length > (u_offset_t)MAXOFF32_T) && 12847c478bd9Sstevel@tonic-gate !(fs->fs_flags & FSLARGEFILES)) { 12857c478bd9Sstevel@tonic-gate ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 12867c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 12877c478bd9Sstevel@tonic-gate fs->fs_flags |= FSLARGEFILES; 12887c478bd9Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 12897c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate return (err); 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 12977c478bd9Sstevel@tonic-gate * Update the pages of the file. If the file is not being 12987c478bd9Sstevel@tonic-gate * truncated to a block boundary, the contents of the 12997c478bd9Sstevel@tonic-gate * pages following the end of the file must be zero'ed 13007c478bd9Sstevel@tonic-gate * in case it ever become accessible again because 13017c478bd9Sstevel@tonic-gate * of subsequent file growth. 13027c478bd9Sstevel@tonic-gate */ 13037c478bd9Sstevel@tonic-gate if (boff == 0) { 13047c478bd9Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length, ufs_putapage, 13057c478bd9Sstevel@tonic-gate B_INVAL | B_TRUNC, CRED()); 13067c478bd9Sstevel@tonic-gate } else { 13077c478bd9Sstevel@tonic-gate /* 13087c478bd9Sstevel@tonic-gate * Make sure that the last block is properly allocated. 13097c478bd9Sstevel@tonic-gate * We only really have to do this if the last block is 13107c478bd9Sstevel@tonic-gate * actually allocated since ufs_bmap will now handle the case 13117c478bd9Sstevel@tonic-gate * of an fragment which has no block allocated. Just to 13127c478bd9Sstevel@tonic-gate * be sure, we do it now independent of current allocation. 13137c478bd9Sstevel@tonic-gate */ 13147c478bd9Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr); 13157c478bd9Sstevel@tonic-gate if (err) 13167c478bd9Sstevel@tonic-gate return (err); 13177c478bd9Sstevel@tonic-gate 13187c478bd9Sstevel@tonic-gate /* 13197c478bd9Sstevel@tonic-gate * BMAPALLOC will call bmap_write which defers i_seq 13207c478bd9Sstevel@tonic-gate * processing. If the timestamps were changed, update 13217c478bd9Sstevel@tonic-gate * i_seq before rdip drops i_contents or syncs the inode. 13227c478bd9Sstevel@tonic-gate */ 13237c478bd9Sstevel@tonic-gate if (oip->i_flag & (ICHG|IUPD)) 13247c478bd9Sstevel@tonic-gate oip->i_seq++; 13257c478bd9Sstevel@tonic-gate 13267c478bd9Sstevel@tonic-gate /* 13277c478bd9Sstevel@tonic-gate * BugId 4069932 13287c478bd9Sstevel@tonic-gate * Make sure that the relevant partial page appears in 13297c478bd9Sstevel@tonic-gate * the v_pages list, so that pvn_vpzero() will do its 13307c478bd9Sstevel@tonic-gate * job. Since doing this correctly requires everything 13317c478bd9Sstevel@tonic-gate * in rdip() except for the uiomove(), it's easier and 13327c478bd9Sstevel@tonic-gate * safer to do the uiomove() rather than duplicate the 13337c478bd9Sstevel@tonic-gate * rest of rdip() here. 13347c478bd9Sstevel@tonic-gate * 13357c478bd9Sstevel@tonic-gate * To get here, we know that length indicates a byte 13367c478bd9Sstevel@tonic-gate * that is not the first byte of a block. (length - 1) 13377c478bd9Sstevel@tonic-gate * is the last actual byte known to exist. Deduction 13387c478bd9Sstevel@tonic-gate * shows it is in the same block as byte (length). 13397c478bd9Sstevel@tonic-gate * Thus, this rdip() invocation should always succeed 13407c478bd9Sstevel@tonic-gate * except in the face of i/o errors, and give us the 13417c478bd9Sstevel@tonic-gate * block we care about. 13427c478bd9Sstevel@tonic-gate * 13437c478bd9Sstevel@tonic-gate * rdip() makes the same locking assertions and 13447c478bd9Sstevel@tonic-gate * assumptions as we do. We do not acquire any locks 13457c478bd9Sstevel@tonic-gate * before calling it, so we have not changed the locking 13467c478bd9Sstevel@tonic-gate * situation. Finally, there do not appear to be any 13477c478bd9Sstevel@tonic-gate * paths whereby rdip() ends up invoking us again. 13487c478bd9Sstevel@tonic-gate * Thus, infinite recursion is avoided. 13497c478bd9Sstevel@tonic-gate */ 13507c478bd9Sstevel@tonic-gate { 13517c478bd9Sstevel@tonic-gate uio_t uio; 13527c478bd9Sstevel@tonic-gate iovec_t iov[1]; 13537c478bd9Sstevel@tonic-gate char buffer; 13547c478bd9Sstevel@tonic-gate 13557c478bd9Sstevel@tonic-gate uio.uio_iov = iov; 13567c478bd9Sstevel@tonic-gate uio.uio_iovcnt = 1; 13577c478bd9Sstevel@tonic-gate uio.uio_loffset = length - 1; 13587c478bd9Sstevel@tonic-gate uio.uio_resid = 1; 13597c478bd9Sstevel@tonic-gate uio.uio_segflg = UIO_SYSSPACE; 13607c478bd9Sstevel@tonic-gate uio.uio_extflg = UIO_COPY_CACHED; 13617c478bd9Sstevel@tonic-gate 13627c478bd9Sstevel@tonic-gate iov[0].iov_base = &buffer; 13637c478bd9Sstevel@tonic-gate iov[0].iov_len = 1; 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate err = rdip(oip, &uio, UIO_READ, NULL); 13667c478bd9Sstevel@tonic-gate if (err) 13677c478bd9Sstevel@tonic-gate return (err); 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate bsize = (int)lblkno(fs, length - 1) >= NDADDR ? 13717c478bd9Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff); 13727c478bd9Sstevel@tonic-gate pvn_vpzero(ITOV(oip), length, (size_t)(bsize - boff)); 13737c478bd9Sstevel@tonic-gate /* 13747c478bd9Sstevel@tonic-gate * Ensure full fs block is marked as dirty. 13757c478bd9Sstevel@tonic-gate */ 13767c478bd9Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length + (bsize - boff), 13777c478bd9Sstevel@tonic-gate ufs_putapage, B_INVAL | B_TRUNC, CRED()); 13787c478bd9Sstevel@tonic-gate } 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate /* 13817c478bd9Sstevel@tonic-gate * Calculate index into inode's block list of 13827c478bd9Sstevel@tonic-gate * last direct and indirect blocks (if any) 13837c478bd9Sstevel@tonic-gate * which we want to keep. Lastblock is -1 when 13847c478bd9Sstevel@tonic-gate * the file is truncated to 0. 13857c478bd9Sstevel@tonic-gate */ 13867c478bd9Sstevel@tonic-gate lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 13877c478bd9Sstevel@tonic-gate lastiblock[SINGLE] = lastblock - NDADDR; 13887c478bd9Sstevel@tonic-gate lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 13897c478bd9Sstevel@tonic-gate lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 13907c478bd9Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize); 13917c478bd9Sstevel@tonic-gate 13927c478bd9Sstevel@tonic-gate /* 13937c478bd9Sstevel@tonic-gate * Update file and block pointers 13947c478bd9Sstevel@tonic-gate * on disk before we start freeing blocks. 13957c478bd9Sstevel@tonic-gate * If we crash before free'ing blocks below, 13967c478bd9Sstevel@tonic-gate * the blocks will be returned to the free list. 13977c478bd9Sstevel@tonic-gate * lastiblock values are also normalized to -1 13987c478bd9Sstevel@tonic-gate * for calls to indirtrunc below. 13997c478bd9Sstevel@tonic-gate */ 14007c478bd9Sstevel@tonic-gate tip = *oip; /* structure copy */ 14017c478bd9Sstevel@tonic-gate ip = &tip; 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--) 14047c478bd9Sstevel@tonic-gate if (lastiblock[level] < 0) { 14057c478bd9Sstevel@tonic-gate oip->i_ib[level] = 0; 14067c478bd9Sstevel@tonic-gate lastiblock[level] = -1; 14077c478bd9Sstevel@tonic-gate } 14087c478bd9Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) { 14097c478bd9Sstevel@tonic-gate oip->i_db[i] = 0; 14107c478bd9Sstevel@tonic-gate flags |= I_CHEAP; 14117c478bd9Sstevel@tonic-gate } 14127c478bd9Sstevel@tonic-gate oip->i_size = length; 14137c478bd9Sstevel@tonic-gate oip->i_flag |= ICHG|IUPD|IATTCHG; 14147c478bd9Sstevel@tonic-gate oip->i_seq++; 14157c478bd9Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp)) 14167c478bd9Sstevel@tonic-gate ufs_iupdat(oip, I_SYNC); /* do sync inode update */ 14177c478bd9Sstevel@tonic-gate 14187c478bd9Sstevel@tonic-gate /* 14197c478bd9Sstevel@tonic-gate * Indirect blocks first. 14207c478bd9Sstevel@tonic-gate */ 14217c478bd9Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--) { 14227c478bd9Sstevel@tonic-gate bn = ip->i_ib[level]; 14237c478bd9Sstevel@tonic-gate if (bn != 0) { 14247c478bd9Sstevel@tonic-gate blocksreleased += 14257c478bd9Sstevel@tonic-gate indirtrunc(ip, bn, lastiblock[level], level, flags); 14267c478bd9Sstevel@tonic-gate if (lastiblock[level] < 0) { 14277c478bd9Sstevel@tonic-gate ip->i_ib[level] = 0; 14287c478bd9Sstevel@tonic-gate free(ip, bn, (off_t)fs->fs_bsize, 14297c478bd9Sstevel@tonic-gate flags | I_IBLK); 14307c478bd9Sstevel@tonic-gate blocksreleased += nblocks; 14317c478bd9Sstevel@tonic-gate } 14327c478bd9Sstevel@tonic-gate } 14337c478bd9Sstevel@tonic-gate if (lastiblock[level] >= 0) 14347c478bd9Sstevel@tonic-gate goto done; 14357c478bd9Sstevel@tonic-gate } 14367c478bd9Sstevel@tonic-gate 14377c478bd9Sstevel@tonic-gate /* 14387c478bd9Sstevel@tonic-gate * All whole direct blocks or frags. 14397c478bd9Sstevel@tonic-gate */ 14407c478bd9Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) { 14417c478bd9Sstevel@tonic-gate bn = ip->i_db[i]; 14427c478bd9Sstevel@tonic-gate if (bn == 0) 14437c478bd9Sstevel@tonic-gate continue; 14447c478bd9Sstevel@tonic-gate ip->i_db[i] = 0; 14457c478bd9Sstevel@tonic-gate bsize = (off_t)blksize(fs, ip, i); 14467c478bd9Sstevel@tonic-gate free(ip, bn, bsize, flags); 14477c478bd9Sstevel@tonic-gate blocksreleased += btodb(bsize); 14487c478bd9Sstevel@tonic-gate } 14497c478bd9Sstevel@tonic-gate if (lastblock < 0) 14507c478bd9Sstevel@tonic-gate goto done; 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate /* 14537c478bd9Sstevel@tonic-gate * Finally, look for a change in size of the 14547c478bd9Sstevel@tonic-gate * last direct block; release any frags. 14557c478bd9Sstevel@tonic-gate */ 14567c478bd9Sstevel@tonic-gate bn = ip->i_db[lastblock]; 14577c478bd9Sstevel@tonic-gate if (bn != 0) { 14587c478bd9Sstevel@tonic-gate off_t oldspace, newspace; 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate /* 14617c478bd9Sstevel@tonic-gate * Calculate amount of space we're giving 14627c478bd9Sstevel@tonic-gate * back as old block size minus new block size. 14637c478bd9Sstevel@tonic-gate */ 14647c478bd9Sstevel@tonic-gate oldspace = blksize(fs, ip, lastblock); 14657c478bd9Sstevel@tonic-gate UFS_SET_ISIZE(length, ip); 14667c478bd9Sstevel@tonic-gate newspace = blksize(fs, ip, lastblock); 14677c478bd9Sstevel@tonic-gate if (newspace == 0) { 14687c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: newspace == 0"); 14697c478bd9Sstevel@tonic-gate return (err); 14707c478bd9Sstevel@tonic-gate } 14717c478bd9Sstevel@tonic-gate if (oldspace - newspace > 0) { 14727c478bd9Sstevel@tonic-gate /* 14737c478bd9Sstevel@tonic-gate * Block number of space to be free'd is 14747c478bd9Sstevel@tonic-gate * the old block # plus the number of frags 14757c478bd9Sstevel@tonic-gate * required for the storage we're keeping. 14767c478bd9Sstevel@tonic-gate */ 14777c478bd9Sstevel@tonic-gate bn += numfrags(fs, newspace); 14787c478bd9Sstevel@tonic-gate free(ip, bn, oldspace - newspace, flags); 14797c478bd9Sstevel@tonic-gate blocksreleased += btodb(oldspace - newspace); 14807c478bd9Sstevel@tonic-gate } 14817c478bd9Sstevel@tonic-gate } 14827c478bd9Sstevel@tonic-gate done: 14837c478bd9Sstevel@tonic-gate /* BEGIN PARANOIA */ 14847c478bd9Sstevel@tonic-gate for (level = SINGLE; level <= TRIPLE; level++) 14857c478bd9Sstevel@tonic-gate if (ip->i_ib[level] != oip->i_ib[level]) { 14867c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: indirect block"); 14877c478bd9Sstevel@tonic-gate return (err); 14887c478bd9Sstevel@tonic-gate } 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate for (i = 0; i < NDADDR; i++) 14917c478bd9Sstevel@tonic-gate if (ip->i_db[i] != oip->i_db[i]) { 14927c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: direct block"); 14937c478bd9Sstevel@tonic-gate return (err); 14947c478bd9Sstevel@tonic-gate } 14957c478bd9Sstevel@tonic-gate /* END PARANOIA */ 14967c478bd9Sstevel@tonic-gate oip->i_blocks -= blocksreleased; 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate if (oip->i_blocks < 0) { /* sanity */ 14997c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 15007c478bd9Sstevel@tonic-gate "ufs_itrunc: %s/%d new size = %lld, blocks = %d\n", 15017c478bd9Sstevel@tonic-gate fs->fs_fsmnt, (int)oip->i_number, oip->i_size, 15027c478bd9Sstevel@tonic-gate (int)oip->i_blocks); 15037c478bd9Sstevel@tonic-gate oip->i_blocks = 0; 15047c478bd9Sstevel@tonic-gate } 15057c478bd9Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG; 15067c478bd9Sstevel@tonic-gate oip->i_seq++; 15077c478bd9Sstevel@tonic-gate /* blocksreleased is >= zero, so this can not fail */ 15087c478bd9Sstevel@tonic-gate (void) chkdq(oip, -blocksreleased, 0, cr, (char **)NULL, 15097c478bd9Sstevel@tonic-gate (size_t *)NULL); 15107c478bd9Sstevel@tonic-gate return (0); 15117c478bd9Sstevel@tonic-gate } 15127c478bd9Sstevel@tonic-gate 15137c478bd9Sstevel@tonic-gate /* 15147c478bd9Sstevel@tonic-gate * Check mode permission on inode. Mode is READ, WRITE or EXEC. 15157c478bd9Sstevel@tonic-gate * In the case of WRITE, the read-only status of the file system 15167c478bd9Sstevel@tonic-gate * is checked. Depending on the calling user, the appropriate 15177c478bd9Sstevel@tonic-gate * mode bits are selected; privileges to override missing permission 15187c478bd9Sstevel@tonic-gate * bits are checked through secpolicy_vnode_access(). 151960c8e821SFrank Batschulat * The i_contens lock must be held as reader here to prevent racing with 152060c8e821SFrank Batschulat * the acl subsystem removing/setting/changing acls on this inode. 152160c8e821SFrank Batschulat * The caller is responsible for indicating whether or not the i_contents 152260c8e821SFrank Batschulat * lock needs to be acquired here or if already held. 15237c478bd9Sstevel@tonic-gate */ 15247c478bd9Sstevel@tonic-gate int 152560c8e821SFrank Batschulat ufs_iaccess(struct inode *ip, int mode, struct cred *cr, int dolock) 15267c478bd9Sstevel@tonic-gate { 15277c478bd9Sstevel@tonic-gate int shift = 0; 152860c8e821SFrank Batschulat int ret = 0; 152960c8e821SFrank Batschulat 153060c8e821SFrank Batschulat if (dolock) 153160c8e821SFrank Batschulat rw_enter(&ip->i_contents, RW_READER); 153260c8e821SFrank Batschulat ASSERT(RW_LOCK_HELD(&ip->i_contents)); 15337c478bd9Sstevel@tonic-gate 15347c478bd9Sstevel@tonic-gate if (mode & IWRITE) { 15357c478bd9Sstevel@tonic-gate /* 15367c478bd9Sstevel@tonic-gate * Disallow write attempts on read-only 15377c478bd9Sstevel@tonic-gate * file systems, unless the file is a block 15387c478bd9Sstevel@tonic-gate * or character device or a FIFO. 15397c478bd9Sstevel@tonic-gate */ 15407c478bd9Sstevel@tonic-gate if (ip->i_fs->fs_ronly != 0) { 15417c478bd9Sstevel@tonic-gate if ((ip->i_mode & IFMT) != IFCHR && 15427c478bd9Sstevel@tonic-gate (ip->i_mode & IFMT) != IFBLK && 15437c478bd9Sstevel@tonic-gate (ip->i_mode & IFMT) != IFIFO) { 154460c8e821SFrank Batschulat ret = EROFS; 154560c8e821SFrank Batschulat goto out; 15467c478bd9Sstevel@tonic-gate } 15477c478bd9Sstevel@tonic-gate } 15487c478bd9Sstevel@tonic-gate } 15497c478bd9Sstevel@tonic-gate /* 155060c8e821SFrank Batschulat * If there is an acl, check the acl and return. 15517c478bd9Sstevel@tonic-gate */ 155260c8e821SFrank Batschulat if (ip->i_ufs_acl && ip->i_ufs_acl->aowner) { 155360c8e821SFrank Batschulat ret = ufs_acl_access(ip, mode, cr); 155460c8e821SFrank Batschulat goto out; 155560c8e821SFrank Batschulat } 15567c478bd9Sstevel@tonic-gate 15577c478bd9Sstevel@tonic-gate /* 155860c8e821SFrank Batschulat * Access check is based on only one of owner, group, public. 15597c478bd9Sstevel@tonic-gate * If not owner, then check group. 156060c8e821SFrank Batschulat * If not a member of the group, then check public access. 15617c478bd9Sstevel@tonic-gate */ 15627c478bd9Sstevel@tonic-gate if (crgetuid(cr) != ip->i_uid) { 15637c478bd9Sstevel@tonic-gate shift += 3; 15647c478bd9Sstevel@tonic-gate if (!groupmember((uid_t)ip->i_gid, cr)) 15657c478bd9Sstevel@tonic-gate shift += 3; 15667c478bd9Sstevel@tonic-gate } 15677c478bd9Sstevel@tonic-gate 15687c478bd9Sstevel@tonic-gate /* test missing privilege bits */ 1569*134a1f4eSCasper H.S. Dik ret = secpolicy_vnode_access2(cr, ITOV(ip), ip->i_uid, 1570*134a1f4eSCasper H.S. Dik ip->i_mode << shift, mode); 157160c8e821SFrank Batschulat out: 157260c8e821SFrank Batschulat if (dolock) 157360c8e821SFrank Batschulat rw_exit(&ip->i_contents); 157460c8e821SFrank Batschulat return (ret); 15757c478bd9Sstevel@tonic-gate } 15767c478bd9Sstevel@tonic-gate 15777c478bd9Sstevel@tonic-gate /* 15787c478bd9Sstevel@tonic-gate * if necessary, remove an inode from the free list 15797c478bd9Sstevel@tonic-gate * i_contents is held except at unmount 15807c478bd9Sstevel@tonic-gate * 15817c478bd9Sstevel@tonic-gate * Return 1 if the inode is taken off of the ufs_idle_q, 15827c478bd9Sstevel@tonic-gate * and the caller is expected to call VN_RELE. 15837c478bd9Sstevel@tonic-gate * 15847c478bd9Sstevel@tonic-gate * Return 0 otherwise. 15857c478bd9Sstevel@tonic-gate */ 15867c478bd9Sstevel@tonic-gate int 15877c478bd9Sstevel@tonic-gate ufs_rmidle(struct inode *ip) 15887c478bd9Sstevel@tonic-gate { 15897c478bd9Sstevel@tonic-gate int rval = 0; 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 15927c478bd9Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) { 15937c478bd9Sstevel@tonic-gate mutex_enter(&ufs_idle_q.uq_mutex); 15947c478bd9Sstevel@tonic-gate ip->i_freef->i_freeb = ip->i_freeb; 15957c478bd9Sstevel@tonic-gate ip->i_freeb->i_freef = ip->i_freef; 15967c478bd9Sstevel@tonic-gate ip->i_freef = ip; 15977c478bd9Sstevel@tonic-gate ip->i_freeb = ip; 15987c478bd9Sstevel@tonic-gate ip->i_flag |= IREF; 15997c478bd9Sstevel@tonic-gate ufs_idle_q.uq_ne--; 16007c478bd9Sstevel@tonic-gate if (ip->i_flag & IJUNKIQ) { 16017c478bd9Sstevel@tonic-gate ufs_njunk_iq--; 16027c478bd9Sstevel@tonic-gate ip->i_flag &= ~IJUNKIQ; 16037c478bd9Sstevel@tonic-gate } else { 16047c478bd9Sstevel@tonic-gate ufs_nuseful_iq--; 16057c478bd9Sstevel@tonic-gate } 16067c478bd9Sstevel@tonic-gate mutex_exit(&ufs_idle_q.uq_mutex); 16077c478bd9Sstevel@tonic-gate rval = 1; 16087c478bd9Sstevel@tonic-gate } 16097c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 16107c478bd9Sstevel@tonic-gate return (rval); 16117c478bd9Sstevel@tonic-gate } 16127c478bd9Sstevel@tonic-gate 16137c478bd9Sstevel@tonic-gate /* 16147c478bd9Sstevel@tonic-gate * scan the hash of inodes and call func with the inode locked 16157c478bd9Sstevel@tonic-gate */ 16167c478bd9Sstevel@tonic-gate int 16177c478bd9Sstevel@tonic-gate ufs_scan_inodes(int rwtry, int (*func)(struct inode *, void *), void *arg, 16187c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp) 16197c478bd9Sstevel@tonic-gate { 16207c478bd9Sstevel@tonic-gate struct inode *ip; /* current inode */ 16217c478bd9Sstevel@tonic-gate struct inode *lip = NULL; /* last/previous inode */ 16227c478bd9Sstevel@tonic-gate union ihead *ih; /* current hash chain */ 16237c478bd9Sstevel@tonic-gate int error, i; 16247c478bd9Sstevel@tonic-gate int saverror = 0; 16257c478bd9Sstevel@tonic-gate int lip_held; /* lip needs a VN_RELE() */ 16267c478bd9Sstevel@tonic-gate 16277c478bd9Sstevel@tonic-gate /* 16287c478bd9Sstevel@tonic-gate * If ufsvfsp is NULL, then our caller should be holding 16297c478bd9Sstevel@tonic-gate * ufs_scan_lock to avoid conflicts between ufs_unmount() and 16307c478bd9Sstevel@tonic-gate * ufs_update(). Otherwise, to avoid false-positives in 16317c478bd9Sstevel@tonic-gate * ufs_unmount()'s v_count-based EBUSY check, we only hold 16327c478bd9Sstevel@tonic-gate * those inodes that are in the file system our caller cares 16337c478bd9Sstevel@tonic-gate * about. 16347c478bd9Sstevel@tonic-gate * 16357c478bd9Sstevel@tonic-gate * We know that ip is a valid inode in the hash chain (and thus 16367c478bd9Sstevel@tonic-gate * we can trust i_ufsvfs) because the inode we chained from 16377c478bd9Sstevel@tonic-gate * (lip) is still in the hash chain. This is true because either: 16387c478bd9Sstevel@tonic-gate * 16397c478bd9Sstevel@tonic-gate * 1. We did not drop the hash chain lock since the last 16407c478bd9Sstevel@tonic-gate * iteration (because we were not interested in the last inode), 16417c478bd9Sstevel@tonic-gate * or 16427c478bd9Sstevel@tonic-gate * 2. We maintained a hold on the last inode while we 16437c478bd9Sstevel@tonic-gate * we were processing it, so it could not be removed 16447c478bd9Sstevel@tonic-gate * from the hash chain. 16457c478bd9Sstevel@tonic-gate * 16467c478bd9Sstevel@tonic-gate * The whole reason we're dropping and re-grabbing the chain 16477c478bd9Sstevel@tonic-gate * lock on every inode is so that we don't present a major 16487c478bd9Sstevel@tonic-gate * choke point on throughput, particularly when we've been 16497c478bd9Sstevel@tonic-gate * called on behalf of fsflush. 16507c478bd9Sstevel@tonic-gate */ 16517c478bd9Sstevel@tonic-gate 16527c478bd9Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 16537c478bd9Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 16547c478bd9Sstevel@tonic-gate for (ip = ih->ih_chain[0], lip_held = 0; 16557c478bd9Sstevel@tonic-gate ip != (struct inode *)ih; 16567c478bd9Sstevel@tonic-gate ip = lip->i_forw) { 16577c478bd9Sstevel@tonic-gate 16587c478bd9Sstevel@tonic-gate ins.in_scan.value.ul++; 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate /* 16617c478bd9Sstevel@tonic-gate * Undo the previous iteration's VN_HOLD(), but 16627c478bd9Sstevel@tonic-gate * only if one was done. 16637c478bd9Sstevel@tonic-gate */ 16647c478bd9Sstevel@tonic-gate if (lip_held) 16657c478bd9Sstevel@tonic-gate VN_RELE(ITOV(lip)); 16667c478bd9Sstevel@tonic-gate 16677c478bd9Sstevel@tonic-gate lip = ip; 16687c478bd9Sstevel@tonic-gate if (ufsvfsp != NULL && ip->i_ufsvfs != ufsvfsp) { 16697c478bd9Sstevel@tonic-gate /* 16707c478bd9Sstevel@tonic-gate * We're not processing all inodes, and 16717c478bd9Sstevel@tonic-gate * this inode is not in the filesystem of 16727c478bd9Sstevel@tonic-gate * interest, so skip it. No need to do a 16737c478bd9Sstevel@tonic-gate * VN_HOLD() since we're not dropping the 16747c478bd9Sstevel@tonic-gate * hash chain lock until after we've 16757c478bd9Sstevel@tonic-gate * done the i_forw traversal above. 16767c478bd9Sstevel@tonic-gate */ 16777c478bd9Sstevel@tonic-gate lip_held = 0; 16787c478bd9Sstevel@tonic-gate continue; 16797c478bd9Sstevel@tonic-gate } 16807c478bd9Sstevel@tonic-gate VN_HOLD(ITOV(ip)); 16817c478bd9Sstevel@tonic-gate lip_held = 1; 16827c478bd9Sstevel@tonic-gate mutex_exit(&ih_lock[i]); 16837c478bd9Sstevel@tonic-gate 16847c478bd9Sstevel@tonic-gate /* 16857c478bd9Sstevel@tonic-gate * Acquire the contents lock as writer to make 16867c478bd9Sstevel@tonic-gate * sure that the inode has been initialized in 16877c478bd9Sstevel@tonic-gate * the cache or removed from the idle list by 16887c478bd9Sstevel@tonic-gate * ufs_iget(). This works because ufs_iget() 16897c478bd9Sstevel@tonic-gate * acquires the contents lock before putting 16907c478bd9Sstevel@tonic-gate * the inode into the cache. If we can lock 16917c478bd9Sstevel@tonic-gate * it, then he's done with it. 16927c478bd9Sstevel@tonic-gate */ 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate if (rwtry) { 16957c478bd9Sstevel@tonic-gate if (!rw_tryenter(&ip->i_contents, RW_WRITER)) { 16967c478bd9Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 16977c478bd9Sstevel@tonic-gate continue; 16987c478bd9Sstevel@tonic-gate } 16997c478bd9Sstevel@tonic-gate } else { 17007c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 17017c478bd9Sstevel@tonic-gate } 17027c478bd9Sstevel@tonic-gate 17037c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate /* 17067c478bd9Sstevel@tonic-gate * ISTALE means the inode couldn't be read 17077c478bd9Sstevel@tonic-gate * 17087c478bd9Sstevel@tonic-gate * We don't have to hold the i_contents lock 17097c478bd9Sstevel@tonic-gate * for this check for a couple of 17107c478bd9Sstevel@tonic-gate * reasons. First, if ISTALE is set then the 17117c478bd9Sstevel@tonic-gate * flag cannot be cleared until the inode is 17127c478bd9Sstevel@tonic-gate * removed from the cache and that cannot 17137c478bd9Sstevel@tonic-gate * happen until after we VN_RELE() it. 17147c478bd9Sstevel@tonic-gate * Second, if ISTALE is not set, then the 17157c478bd9Sstevel@tonic-gate * inode is in the cache and does not need to 17167c478bd9Sstevel@tonic-gate * be read from disk so ISTALE cannot be set 17177c478bd9Sstevel@tonic-gate * while we are not looking. 17187c478bd9Sstevel@tonic-gate */ 17197c478bd9Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0) { 17207c478bd9Sstevel@tonic-gate if ((error = (*func)(ip, arg)) != 0) 17217c478bd9Sstevel@tonic-gate saverror = error; 17227c478bd9Sstevel@tonic-gate } 17237c478bd9Sstevel@tonic-gate 17247c478bd9Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 17257c478bd9Sstevel@tonic-gate } 17267c478bd9Sstevel@tonic-gate if (lip_held) 17277c478bd9Sstevel@tonic-gate VN_RELE(ITOV(lip)); 17287c478bd9Sstevel@tonic-gate mutex_exit(&ih_lock[i]); 17297c478bd9Sstevel@tonic-gate } 17307c478bd9Sstevel@tonic-gate return (saverror); 17317c478bd9Sstevel@tonic-gate } 17327c478bd9Sstevel@tonic-gate 17337c478bd9Sstevel@tonic-gate /* 17347c478bd9Sstevel@tonic-gate * Mark inode with the current time, plus a unique increment. 17357c478bd9Sstevel@tonic-gate * 17367c478bd9Sstevel@tonic-gate * Since we only keep 32-bit time on disk, if UFS is still alive 17377c478bd9Sstevel@tonic-gate * beyond 2038, filesystem times will simply stick at the last 17387c478bd9Sstevel@tonic-gate * possible second of 32-bit time. Not ideal, but probably better 17397c478bd9Sstevel@tonic-gate * than going into the remote past, or confusing applications with 17407c478bd9Sstevel@tonic-gate * negative time. 17417c478bd9Sstevel@tonic-gate */ 17427c478bd9Sstevel@tonic-gate void 17437c478bd9Sstevel@tonic-gate ufs_imark(struct inode *ip) 17447c478bd9Sstevel@tonic-gate { 17457c478bd9Sstevel@tonic-gate timestruc_t now; 17467c478bd9Sstevel@tonic-gate int32_t usec, nsec; 17477c478bd9Sstevel@tonic-gate 17487c478bd9Sstevel@tonic-gate /* 17497c478bd9Sstevel@tonic-gate * The update of i_seq may have been deferred, increase i_seq here 17507c478bd9Sstevel@tonic-gate * to make sure it is in sync with the timestamps. 17517c478bd9Sstevel@tonic-gate */ 17527c478bd9Sstevel@tonic-gate if (ip->i_flag & ISEQ) { 17537c478bd9Sstevel@tonic-gate ASSERT(ip->i_flag & (IUPD|ICHG)); 17547c478bd9Sstevel@tonic-gate ip->i_seq++; 17557c478bd9Sstevel@tonic-gate ip->i_flag &= ~ISEQ; 17567c478bd9Sstevel@tonic-gate } 17577c478bd9Sstevel@tonic-gate 17587c478bd9Sstevel@tonic-gate gethrestime(&now); 17597c478bd9Sstevel@tonic-gate 17607c478bd9Sstevel@tonic-gate /* 17617c478bd9Sstevel@tonic-gate * Fast algorithm to convert nsec to usec -- see hrt2ts() 17627c478bd9Sstevel@tonic-gate * in common/os/timers.c for a full description. 17637c478bd9Sstevel@tonic-gate */ 17647c478bd9Sstevel@tonic-gate nsec = now.tv_nsec; 17657c478bd9Sstevel@tonic-gate usec = nsec + (nsec >> 2); 17667c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 1); 17677c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 2); 17687c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 4); 17697c478bd9Sstevel@tonic-gate usec = nsec - (usec >> 3); 17707c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 2); 17717c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 3); 17727c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 4); 17737c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 1); 17747c478bd9Sstevel@tonic-gate usec = nsec + (usec >> 6); 17757c478bd9Sstevel@tonic-gate usec = usec >> 10; 17767c478bd9Sstevel@tonic-gate 17777c478bd9Sstevel@tonic-gate mutex_enter(&ufs_iuniqtime_lock); 17787c478bd9Sstevel@tonic-gate if (now.tv_sec > (time_t)iuniqtime.tv_sec || 17797c478bd9Sstevel@tonic-gate usec > iuniqtime.tv_usec) { 17807c478bd9Sstevel@tonic-gate if (now.tv_sec < TIME32_MAX) { 17817c478bd9Sstevel@tonic-gate iuniqtime.tv_sec = (time32_t)now.tv_sec; 17827c478bd9Sstevel@tonic-gate iuniqtime.tv_usec = usec; 17837c478bd9Sstevel@tonic-gate } 17847c478bd9Sstevel@tonic-gate } else { 17857c478bd9Sstevel@tonic-gate if (iuniqtime.tv_sec < TIME32_MAX) { 17867c478bd9Sstevel@tonic-gate iuniqtime.tv_usec++; 17877c478bd9Sstevel@tonic-gate /* Check for usec overflow */ 17887c478bd9Sstevel@tonic-gate if (iuniqtime.tv_usec >= MICROSEC) { 17897c478bd9Sstevel@tonic-gate iuniqtime.tv_sec++; 17907c478bd9Sstevel@tonic-gate iuniqtime.tv_usec = 0; 17917c478bd9Sstevel@tonic-gate } 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate } 17947c478bd9Sstevel@tonic-gate 17957c478bd9Sstevel@tonic-gate if ((ip->i_flag & IACC) && !(ip->i_ufsvfs->vfs_noatime)) { 17967c478bd9Sstevel@tonic-gate ip->i_atime = iuniqtime; 17977c478bd9Sstevel@tonic-gate } 17987c478bd9Sstevel@tonic-gate if (ip->i_flag & IUPD) { 17997c478bd9Sstevel@tonic-gate ip->i_mtime = iuniqtime; 18007c478bd9Sstevel@tonic-gate ip->i_flag |= IMODTIME; 18017c478bd9Sstevel@tonic-gate } 18027c478bd9Sstevel@tonic-gate if (ip->i_flag & ICHG) { 18037c478bd9Sstevel@tonic-gate ip->i_diroff = 0; 18047c478bd9Sstevel@tonic-gate ip->i_ctime = iuniqtime; 18057c478bd9Sstevel@tonic-gate } 18067c478bd9Sstevel@tonic-gate mutex_exit(&ufs_iuniqtime_lock); 18077c478bd9Sstevel@tonic-gate } 18087c478bd9Sstevel@tonic-gate 18097c478bd9Sstevel@tonic-gate /* 18107c478bd9Sstevel@tonic-gate * Update timestamps in inode. 18117c478bd9Sstevel@tonic-gate */ 18127c478bd9Sstevel@tonic-gate void 18137c478bd9Sstevel@tonic-gate ufs_itimes_nolock(struct inode *ip) 18147c478bd9Sstevel@tonic-gate { 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate /* 18177c478bd9Sstevel@tonic-gate * if noatime is set and the inode access time is the only field that 18187c478bd9Sstevel@tonic-gate * must be changed, exit immediately. 18197c478bd9Sstevel@tonic-gate */ 18207c478bd9Sstevel@tonic-gate if (((ip->i_flag & (IUPD|IACC|ICHG)) == IACC) && 18217c478bd9Sstevel@tonic-gate (ip->i_ufsvfs->vfs_noatime)) { 18227c478bd9Sstevel@tonic-gate return; 18237c478bd9Sstevel@tonic-gate } 18247c478bd9Sstevel@tonic-gate 18257c478bd9Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG)) { 18267c478bd9Sstevel@tonic-gate if (ip->i_flag & ICHG) 18277c478bd9Sstevel@tonic-gate ip->i_flag |= IMOD; 18287c478bd9Sstevel@tonic-gate else 18297c478bd9Sstevel@tonic-gate ip->i_flag |= IMODACC; 18307c478bd9Sstevel@tonic-gate ufs_imark(ip); 18317c478bd9Sstevel@tonic-gate ip->i_flag &= ~(IACC|IUPD|ICHG); 18327c478bd9Sstevel@tonic-gate } 18337c478bd9Sstevel@tonic-gate } 1834