17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 533c22cb3Smishra * Common Development and Distribution License (the "License"). 633c22cb3Smishra * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e7da395aSOwen Roberts * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 317c478bd9Sstevel@tonic-gate * The Regents of the University of California 327c478bd9Sstevel@tonic-gate * All Rights Reserved 337c478bd9Sstevel@tonic-gate * 347c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 357c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 367c478bd9Sstevel@tonic-gate * contributors. 377c478bd9Sstevel@tonic-gate */ 387c478bd9Sstevel@tonic-gate 39303bf60bSsdebnath #include <sys/condvar_impl.h> 407c478bd9Sstevel@tonic-gate #include <sys/types.h> 417c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 427c478bd9Sstevel@tonic-gate #include <sys/debug.h> 437c478bd9Sstevel@tonic-gate #include <sys/param.h> 447c478bd9Sstevel@tonic-gate #include <sys/systm.h> 457c478bd9Sstevel@tonic-gate #include <sys/signal.h> 467c478bd9Sstevel@tonic-gate #include <sys/cred.h> 477c478bd9Sstevel@tonic-gate #include <sys/proc.h> 487c478bd9Sstevel@tonic-gate #include <sys/disp.h> 497c478bd9Sstevel@tonic-gate #include <sys/user.h> 507c478bd9Sstevel@tonic-gate #include <sys/buf.h> 517c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 527c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 537c478bd9Sstevel@tonic-gate #include <sys/acl.h> 547c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 557c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 567c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_acl.h> 577c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 587c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 597c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 627c478bd9Sstevel@tonic-gate #include <sys/errno.h> 637c478bd9Sstevel@tonic-gate #include <sys/time.h> 647c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 657c478bd9Sstevel@tonic-gate #include <sys/file.h> 667c478bd9Sstevel@tonic-gate #include <sys/fcntl.h> 677c478bd9Sstevel@tonic-gate #include <sys/flock.h> 687c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 697c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 707c478bd9Sstevel@tonic-gate #include <sys/policy.h> 71e7da395aSOwen Roberts #include <sys/fs/ufs_log.h> 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate static ino_t hashalloc(); 747c478bd9Sstevel@tonic-gate static daddr_t fragextend(); 757c478bd9Sstevel@tonic-gate static daddr_t alloccg(); 767c478bd9Sstevel@tonic-gate static daddr_t alloccgblk(); 777c478bd9Sstevel@tonic-gate static ino_t ialloccg(); 787c478bd9Sstevel@tonic-gate static daddr_t mapsearch(); 79e7da395aSOwen Roberts static int findlogstartcg(); 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate extern int inside[], around[]; 827c478bd9Sstevel@tonic-gate extern uchar_t *fragtbl[]; 837c478bd9Sstevel@tonic-gate void delay(); 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate /* 867c478bd9Sstevel@tonic-gate * Allocate a block in the file system. 877c478bd9Sstevel@tonic-gate * 887c478bd9Sstevel@tonic-gate * The size of the requested block is given, which must be some 897c478bd9Sstevel@tonic-gate * multiple of fs_fsize and <= fs_bsize. 907c478bd9Sstevel@tonic-gate * A preference may be optionally specified. If a preference is given 917c478bd9Sstevel@tonic-gate * the following hierarchy is used to allocate a block: 927c478bd9Sstevel@tonic-gate * 1) allocate the requested block. 937c478bd9Sstevel@tonic-gate * 2) allocate a rotationally optimal block in the same cylinder. 947c478bd9Sstevel@tonic-gate * 3) allocate a block in the same cylinder group. 957c478bd9Sstevel@tonic-gate * 4) quadratically rehash into other cylinder groups, until an 967c478bd9Sstevel@tonic-gate * available block is located. 977c478bd9Sstevel@tonic-gate * If no block preference is given the following hierarchy is used 987c478bd9Sstevel@tonic-gate * to allocate a block: 997c478bd9Sstevel@tonic-gate * 1) allocate a block in the cylinder group that contains the 1007c478bd9Sstevel@tonic-gate * inode for the file. 1017c478bd9Sstevel@tonic-gate * 2) quadratically rehash into other cylinder groups, until an 1027c478bd9Sstevel@tonic-gate * available block is located. 1037c478bd9Sstevel@tonic-gate */ 1047c478bd9Sstevel@tonic-gate int 1057c478bd9Sstevel@tonic-gate alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr) 1067c478bd9Sstevel@tonic-gate { 1077c478bd9Sstevel@tonic-gate struct fs *fs; 1087c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1097c478bd9Sstevel@tonic-gate daddr_t bno; 1107c478bd9Sstevel@tonic-gate int cg; 1117c478bd9Sstevel@tonic-gate int err; 1127c478bd9Sstevel@tonic-gate char *errmsg = NULL; 1137c478bd9Sstevel@tonic-gate size_t len; 114*d3d50737SRafael Vanoni clock_t now; 1157c478bd9Sstevel@tonic-gate 1167c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1177c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1187c478bd9Sstevel@tonic-gate if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { 119303bf60bSsdebnath err = ufs_fault(ITOV(ip), "alloc: bad size, dev = 0x%lx," 120303bf60bSsdebnath " bsize = %d, size = %d, fs = %s\n", 1217c478bd9Sstevel@tonic-gate ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1227c478bd9Sstevel@tonic-gate return (err); 1237c478bd9Sstevel@tonic-gate } 1247c478bd9Sstevel@tonic-gate if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 1257c478bd9Sstevel@tonic-gate goto nospace; 1267c478bd9Sstevel@tonic-gate if (freespace(fs, ufsvfsp) <= 0 && 1277c478bd9Sstevel@tonic-gate secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0) 1287c478bd9Sstevel@tonic-gate goto nospace; 1297c478bd9Sstevel@tonic-gate err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len); 1307c478bd9Sstevel@tonic-gate /* Note that may not have err, but may have errmsg */ 1317c478bd9Sstevel@tonic-gate if (errmsg != NULL) { 1327c478bd9Sstevel@tonic-gate uprintf(errmsg); 1337c478bd9Sstevel@tonic-gate kmem_free(errmsg, len); 1347c478bd9Sstevel@tonic-gate errmsg = NULL; 1357c478bd9Sstevel@tonic-gate } 1367c478bd9Sstevel@tonic-gate if (err) 1377c478bd9Sstevel@tonic-gate return (err); 1387c478bd9Sstevel@tonic-gate if (bpref >= fs->fs_size) 1397c478bd9Sstevel@tonic-gate bpref = 0; 1407c478bd9Sstevel@tonic-gate if (bpref == 0) 1417c478bd9Sstevel@tonic-gate cg = (int)itog(fs, ip->i_number); 1427c478bd9Sstevel@tonic-gate else 1437c478bd9Sstevel@tonic-gate cg = dtog(fs, bpref); 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, 1467c478bd9Sstevel@tonic-gate (ulong_t (*)())alloccg); 1477c478bd9Sstevel@tonic-gate if (bno > 0) { 1487c478bd9Sstevel@tonic-gate *bnp = bno; 1497c478bd9Sstevel@tonic-gate return (0); 1507c478bd9Sstevel@tonic-gate } 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* 1537c478bd9Sstevel@tonic-gate * hashalloc() failed because some other thread grabbed 1547c478bd9Sstevel@tonic-gate * the last block so unwind the quota operation. We can 1557c478bd9Sstevel@tonic-gate * ignore the return because subtractions don't fail and 1567c478bd9Sstevel@tonic-gate * size is guaranteed to be >= zero by our caller. 1577c478bd9Sstevel@tonic-gate */ 1587c478bd9Sstevel@tonic-gate (void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL, 1597c478bd9Sstevel@tonic-gate (size_t *)NULL); 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate nospace: 162*d3d50737SRafael Vanoni now = ddi_get_lbolt(); 1637c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 164*d3d50737SRafael Vanoni if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) && 1657c478bd9Sstevel@tonic-gate (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) { 166*d3d50737SRafael Vanoni ufsvfsp->vfs_lastwhinetime = now; 1677c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt); 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 1707c478bd9Sstevel@tonic-gate return (ENOSPC); 1717c478bd9Sstevel@tonic-gate } 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * Reallocate a fragment to a bigger size 1757c478bd9Sstevel@tonic-gate * 1767c478bd9Sstevel@tonic-gate * The number and size of the old block is given, and a preference 1777c478bd9Sstevel@tonic-gate * and new size is also specified. The allocator attempts to extend 1787c478bd9Sstevel@tonic-gate * the original block. Failing that, the regular block allocator is 1797c478bd9Sstevel@tonic-gate * invoked to get an appropriate block. 1807c478bd9Sstevel@tonic-gate */ 1817c478bd9Sstevel@tonic-gate int 1827c478bd9Sstevel@tonic-gate realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize, 1837c478bd9Sstevel@tonic-gate int nsize, daddr_t *bnp, cred_t *cr) 1847c478bd9Sstevel@tonic-gate { 1857c478bd9Sstevel@tonic-gate daddr_t bno; 1867c478bd9Sstevel@tonic-gate struct fs *fs; 1877c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1887c478bd9Sstevel@tonic-gate int cg, request; 1897c478bd9Sstevel@tonic-gate int err; 1907c478bd9Sstevel@tonic-gate char *errmsg = NULL; 1917c478bd9Sstevel@tonic-gate size_t len; 192*d3d50737SRafael Vanoni clock_t now; 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1957c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1967c478bd9Sstevel@tonic-gate if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 1977c478bd9Sstevel@tonic-gate (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 1987c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), 199303bf60bSsdebnath "realloccg: bad size, dev=0x%lx, bsize=%d, " 200303bf60bSsdebnath "osize=%d, nsize=%d, fs=%s\n", 201303bf60bSsdebnath ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); 2027c478bd9Sstevel@tonic-gate return (err); 2037c478bd9Sstevel@tonic-gate } 2047c478bd9Sstevel@tonic-gate if (freespace(fs, ufsvfsp) <= 0 && 2057c478bd9Sstevel@tonic-gate secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0) 2067c478bd9Sstevel@tonic-gate goto nospace; 2077c478bd9Sstevel@tonic-gate if (bprev == 0) { 2087c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), 209303bf60bSsdebnath "realloccg: bad bprev, dev = 0x%lx, bsize = %d," 210303bf60bSsdebnath " bprev = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev, 2117c478bd9Sstevel@tonic-gate fs->fs_fsmnt); 2127c478bd9Sstevel@tonic-gate return (err); 2137c478bd9Sstevel@tonic-gate } 2147c478bd9Sstevel@tonic-gate err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len); 2157c478bd9Sstevel@tonic-gate /* Note that may not have err, but may have errmsg */ 2167c478bd9Sstevel@tonic-gate if (errmsg != NULL) { 2177c478bd9Sstevel@tonic-gate uprintf(errmsg); 2187c478bd9Sstevel@tonic-gate kmem_free(errmsg, len); 2197c478bd9Sstevel@tonic-gate errmsg = NULL; 2207c478bd9Sstevel@tonic-gate } 2217c478bd9Sstevel@tonic-gate if (err) 2227c478bd9Sstevel@tonic-gate return (err); 2237c478bd9Sstevel@tonic-gate cg = dtog(fs, bprev); 2247c478bd9Sstevel@tonic-gate bno = fragextend(ip, cg, (long)bprev, osize, nsize); 2257c478bd9Sstevel@tonic-gate if (bno != 0) { 2267c478bd9Sstevel@tonic-gate *bnp = bno; 2277c478bd9Sstevel@tonic-gate return (0); 2287c478bd9Sstevel@tonic-gate } 2297c478bd9Sstevel@tonic-gate if (bpref >= fs->fs_size) 2307c478bd9Sstevel@tonic-gate bpref = 0; 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate /* 2337c478bd9Sstevel@tonic-gate * When optimizing for time we allocate a full block and 2347c478bd9Sstevel@tonic-gate * then only use the upper portion for this request. When 2357c478bd9Sstevel@tonic-gate * this file grows again it will grow into the unused portion 2367c478bd9Sstevel@tonic-gate * of the block (See fragextend() above). This saves time 2377c478bd9Sstevel@tonic-gate * because an extra disk write would be needed if the frags 2387c478bd9Sstevel@tonic-gate * following the current allocation were not free. The extra 2397c478bd9Sstevel@tonic-gate * disk write is needed to move the data from its current 2407c478bd9Sstevel@tonic-gate * location into the newly allocated position. 2417c478bd9Sstevel@tonic-gate * 2427c478bd9Sstevel@tonic-gate * When optimizing for space we allocate a run of frags 2437c478bd9Sstevel@tonic-gate * that is just the right size for this request. 2447c478bd9Sstevel@tonic-gate */ 2457c478bd9Sstevel@tonic-gate request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize; 2467c478bd9Sstevel@tonic-gate bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request, 2477c478bd9Sstevel@tonic-gate (ulong_t (*)())alloccg); 2487c478bd9Sstevel@tonic-gate if (bno > 0) { 2497c478bd9Sstevel@tonic-gate *bnp = bno; 2507c478bd9Sstevel@tonic-gate if (nsize < request) 2517c478bd9Sstevel@tonic-gate (void) free(ip, bno + numfrags(fs, nsize), 2527c478bd9Sstevel@tonic-gate (off_t)(request - nsize), I_NOCANCEL); 2537c478bd9Sstevel@tonic-gate return (0); 2547c478bd9Sstevel@tonic-gate } 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate /* 2577c478bd9Sstevel@tonic-gate * hashalloc() failed because some other thread grabbed 2587c478bd9Sstevel@tonic-gate * the last block so unwind the quota operation. We can 2597c478bd9Sstevel@tonic-gate * ignore the return because subtractions don't fail, and 2607c478bd9Sstevel@tonic-gate * our caller guarantees nsize >= osize. 2617c478bd9Sstevel@tonic-gate */ 2627c478bd9Sstevel@tonic-gate (void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL, 2637c478bd9Sstevel@tonic-gate (size_t *)NULL); 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate nospace: 266*d3d50737SRafael Vanoni now = ddi_get_lbolt(); 2677c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 268*d3d50737SRafael Vanoni if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) && 2697c478bd9Sstevel@tonic-gate (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) { 270*d3d50737SRafael Vanoni ufsvfsp->vfs_lastwhinetime = now; 2717c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 2727c478bd9Sstevel@tonic-gate "realloccg %s: file system full", fs->fs_fsmnt); 2737c478bd9Sstevel@tonic-gate } 2747c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2757c478bd9Sstevel@tonic-gate return (ENOSPC); 2767c478bd9Sstevel@tonic-gate } 2777c478bd9Sstevel@tonic-gate 2787c478bd9Sstevel@tonic-gate /* 2797c478bd9Sstevel@tonic-gate * Allocate an inode in the file system. 2807c478bd9Sstevel@tonic-gate * 2817c478bd9Sstevel@tonic-gate * A preference may be optionally specified. If a preference is given 2827c478bd9Sstevel@tonic-gate * the following hierarchy is used to allocate an inode: 2837c478bd9Sstevel@tonic-gate * 1) allocate the requested inode. 2847c478bd9Sstevel@tonic-gate * 2) allocate an inode in the same cylinder group. 2857c478bd9Sstevel@tonic-gate * 3) quadratically rehash into other cylinder groups, until an 2867c478bd9Sstevel@tonic-gate * available inode is located. 2877c478bd9Sstevel@tonic-gate * If no inode preference is given the following hierarchy is used 2887c478bd9Sstevel@tonic-gate * to allocate an inode: 2897c478bd9Sstevel@tonic-gate * 1) allocate an inode in cylinder group 0. 2907c478bd9Sstevel@tonic-gate * 2) quadratically rehash into other cylinder groups, until an 2917c478bd9Sstevel@tonic-gate * available inode is located. 2927c478bd9Sstevel@tonic-gate */ 2937c478bd9Sstevel@tonic-gate int 2947c478bd9Sstevel@tonic-gate ufs_ialloc(struct inode *pip, 2957c478bd9Sstevel@tonic-gate ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr) 2967c478bd9Sstevel@tonic-gate { 2977c478bd9Sstevel@tonic-gate struct inode *ip; 2987c478bd9Sstevel@tonic-gate struct fs *fs; 2997c478bd9Sstevel@tonic-gate int cg; 3007c478bd9Sstevel@tonic-gate ino_t ino; 3017c478bd9Sstevel@tonic-gate int err; 3027c478bd9Sstevel@tonic-gate int nifree; 3037c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = pip->i_ufsvfs; 3047c478bd9Sstevel@tonic-gate char *errmsg = NULL; 3057c478bd9Sstevel@tonic-gate size_t len; 3067c478bd9Sstevel@tonic-gate 3077c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&pip->i_rwlock)); 3087c478bd9Sstevel@tonic-gate fs = pip->i_fs; 3097c478bd9Sstevel@tonic-gate loop: 3107c478bd9Sstevel@tonic-gate nifree = fs->fs_cstotal.cs_nifree; 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate if (nifree == 0) 3137c478bd9Sstevel@tonic-gate goto noinodes; 3147c478bd9Sstevel@tonic-gate /* 3157c478bd9Sstevel@tonic-gate * Shadow inodes don't count against a user's inode allocation. 3167c478bd9Sstevel@tonic-gate * They are an implementation method and not a resource. 3177c478bd9Sstevel@tonic-gate */ 3187c478bd9Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3197c478bd9Sstevel@tonic-gate err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data, 3207c478bd9Sstevel@tonic-gate /* change */ 1, (struct inode *)NULL, crgetuid(cr), 0, 3217c478bd9Sstevel@tonic-gate cr, &errmsg, &len); 3227c478bd9Sstevel@tonic-gate /* 3237c478bd9Sstevel@tonic-gate * As we haven't acquired any locks yet, dump the message 3247c478bd9Sstevel@tonic-gate * now. 3257c478bd9Sstevel@tonic-gate */ 3267c478bd9Sstevel@tonic-gate if (errmsg != NULL) { 3277c478bd9Sstevel@tonic-gate uprintf(errmsg); 3287c478bd9Sstevel@tonic-gate kmem_free(errmsg, len); 3297c478bd9Sstevel@tonic-gate errmsg = NULL; 3307c478bd9Sstevel@tonic-gate } 3317c478bd9Sstevel@tonic-gate if (err) 3327c478bd9Sstevel@tonic-gate return (err); 3337c478bd9Sstevel@tonic-gate } 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg)) 3367c478bd9Sstevel@tonic-gate ipref = 0; 3377c478bd9Sstevel@tonic-gate cg = (int)itog(fs, ipref); 3387c478bd9Sstevel@tonic-gate ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode, 3397c478bd9Sstevel@tonic-gate (ulong_t (*)())ialloccg); 3407c478bd9Sstevel@tonic-gate if (ino == 0) { 3417c478bd9Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3427c478bd9Sstevel@tonic-gate /* 3437c478bd9Sstevel@tonic-gate * We can safely ignore the return from chkiq() 3447c478bd9Sstevel@tonic-gate * because deallocations can only fail if we 3457c478bd9Sstevel@tonic-gate * can't get the user's quota info record off 3467c478bd9Sstevel@tonic-gate * the disk due to an I/O error. In that case, 3477c478bd9Sstevel@tonic-gate * the quota subsystem is already messed up. 3487c478bd9Sstevel@tonic-gate */ 3497c478bd9Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ -1, 3507c478bd9Sstevel@tonic-gate (struct inode *)NULL, crgetuid(cr), 0, cr, 3517c478bd9Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 3527c478bd9Sstevel@tonic-gate } 3537c478bd9Sstevel@tonic-gate goto noinodes; 3547c478bd9Sstevel@tonic-gate } 3557c478bd9Sstevel@tonic-gate err = ufs_iget(pip->i_vfs, ino, ipp, cr); 3567c478bd9Sstevel@tonic-gate if (err) { 3577c478bd9Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3587c478bd9Sstevel@tonic-gate /* 3597c478bd9Sstevel@tonic-gate * See above comment about why it is safe to ignore an 3607c478bd9Sstevel@tonic-gate * error return here. 3617c478bd9Sstevel@tonic-gate */ 3627c478bd9Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ -1, 3637c478bd9Sstevel@tonic-gate (struct inode *)NULL, crgetuid(cr), 0, cr, 3647c478bd9Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 3657c478bd9Sstevel@tonic-gate } 3667c478bd9Sstevel@tonic-gate ufs_ifree(pip, ino, 0); 3677c478bd9Sstevel@tonic-gate return (err); 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate ip = *ipp; 3707c478bd9Sstevel@tonic-gate ASSERT(!ip->i_ufs_acl); 3717c478bd9Sstevel@tonic-gate ASSERT(!ip->i_dquot); 3727c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3737c478bd9Sstevel@tonic-gate 3747c478bd9Sstevel@tonic-gate /* 3757c478bd9Sstevel@tonic-gate * Check if we really got a free inode, if not then complain 3767c478bd9Sstevel@tonic-gate * and mark the inode ISTALE so that it will be freed by the 3777c478bd9Sstevel@tonic-gate * ufs idle thread eventually and will not be sent to ufs_delete(). 3787c478bd9Sstevel@tonic-gate */ 3797c478bd9Sstevel@tonic-gate if (ip->i_mode || (ip->i_nlink > 0)) { 3807c478bd9Sstevel@tonic-gate ip->i_flag |= ISTALE; 3817c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 3827c478bd9Sstevel@tonic-gate VN_RELE(ITOV(ip)); 3837c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 3847c478bd9Sstevel@tonic-gate "%s: unexpected allocated inode %d, run fsck(1M)%s", 3857c478bd9Sstevel@tonic-gate fs->fs_fsmnt, (int)ino, 3867c478bd9Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 3877c478bd9Sstevel@tonic-gate goto loop; 3887c478bd9Sstevel@tonic-gate } 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate /* 3917c478bd9Sstevel@tonic-gate * Check the inode has no size or data blocks. 3927c478bd9Sstevel@tonic-gate * This could have happened if the truncation failed when 3937c478bd9Sstevel@tonic-gate * deleting the inode. It used to be possible for this to occur 3947c478bd9Sstevel@tonic-gate * if a block allocation failed when iteratively truncating a 3957c478bd9Sstevel@tonic-gate * large file using logging and with a full file system. 3967c478bd9Sstevel@tonic-gate * This was fixed with bug fix 4348738. However, truncation may 3977c478bd9Sstevel@tonic-gate * still fail on an IO error. So in all cases for safety and 3987c478bd9Sstevel@tonic-gate * security we clear out the size; the blocks allocated; and 3997c478bd9Sstevel@tonic-gate * pointers to the blocks. This will ultimately cause a fsck 4007c478bd9Sstevel@tonic-gate * error of un-accounted for blocks, but its a fairly benign error, 4017c478bd9Sstevel@tonic-gate * and possibly the correct thing to do anyway as accesssing those 4027c478bd9Sstevel@tonic-gate * blocks agains may lead to more IO errors. 4037c478bd9Sstevel@tonic-gate */ 4047c478bd9Sstevel@tonic-gate if (ip->i_size || ip->i_blocks) { 4057c478bd9Sstevel@tonic-gate int i; 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate if (ip->i_size) { 4087c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 4097c478bd9Sstevel@tonic-gate "%s: free inode %d had size 0x%llx, run fsck(1M)%s", 4107c478bd9Sstevel@tonic-gate fs->fs_fsmnt, (int)ino, ip->i_size, 4117c478bd9Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate /* 4147c478bd9Sstevel@tonic-gate * Clear any garbage left behind. 4157c478bd9Sstevel@tonic-gate */ 4167c478bd9Sstevel@tonic-gate ip->i_size = (u_offset_t)0; 4177c478bd9Sstevel@tonic-gate ip->i_blocks = 0; 4187c478bd9Sstevel@tonic-gate for (i = 0; i < NDADDR; i++) 4197c478bd9Sstevel@tonic-gate ip->i_db[i] = 0; 4207c478bd9Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 4217c478bd9Sstevel@tonic-gate ip->i_ib[i] = 0; 4227c478bd9Sstevel@tonic-gate } 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate /* 4257c478bd9Sstevel@tonic-gate * Initialize the link count 4267c478bd9Sstevel@tonic-gate */ 4277c478bd9Sstevel@tonic-gate ip->i_nlink = 0; 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate /* 4307c478bd9Sstevel@tonic-gate * Clear the old flags 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate ip->i_flag &= IREF; 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate /* 4357c478bd9Sstevel@tonic-gate * Access times are not really defined if the fs is mounted 4367c478bd9Sstevel@tonic-gate * with 'noatime'. But it can cause nfs clients to fail 4377c478bd9Sstevel@tonic-gate * open() if the atime is not a legal value. Set a legal value 4387c478bd9Sstevel@tonic-gate * here when the inode is allocated. 4397c478bd9Sstevel@tonic-gate */ 4407c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_noatime) { 4417c478bd9Sstevel@tonic-gate mutex_enter(&ufs_iuniqtime_lock); 4427c478bd9Sstevel@tonic-gate ip->i_atime = iuniqtime; 4437c478bd9Sstevel@tonic-gate mutex_exit(&ufs_iuniqtime_lock); 4447c478bd9Sstevel@tonic-gate } 4457c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4467c478bd9Sstevel@tonic-gate return (0); 4477c478bd9Sstevel@tonic-gate noinodes: 4487c478bd9Sstevel@tonic-gate if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET)) 4497c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt); 4507c478bd9Sstevel@tonic-gate return (ENOSPC); 4517c478bd9Sstevel@tonic-gate } 4527c478bd9Sstevel@tonic-gate 4537c478bd9Sstevel@tonic-gate /* 4547c478bd9Sstevel@tonic-gate * Find a cylinder group to place a directory. 4557c478bd9Sstevel@tonic-gate * Returns an inumber within the selected cylinder group. 4567c478bd9Sstevel@tonic-gate * Note, the vfs_lock is not needed as we don't require exact cg summary info. 4577c478bd9Sstevel@tonic-gate * 4587c478bd9Sstevel@tonic-gate * If the switch ufs_close_dirs is set, then the policy is to use 4597c478bd9Sstevel@tonic-gate * the current cg if it has more than 25% free inodes and more 4607c478bd9Sstevel@tonic-gate * than 25% free blocks. Otherwise the cgs are searched from 4617c478bd9Sstevel@tonic-gate * the beginning and the first cg with the same criteria is 4627c478bd9Sstevel@tonic-gate * used. If that is also null then we revert to the old algorithm. 4637c478bd9Sstevel@tonic-gate * This tends to cluster files at the beginning of the disk 4647c478bd9Sstevel@tonic-gate * until the disk gets full. 4657c478bd9Sstevel@tonic-gate * 4667c478bd9Sstevel@tonic-gate * Otherwise if ufs_close_dirs is not set then the original policy is 4677c478bd9Sstevel@tonic-gate * used which is to select from among those cylinder groups with 4687c478bd9Sstevel@tonic-gate * above the average number of free inodes, the one with the smallest 4697c478bd9Sstevel@tonic-gate * number of directories. 4707c478bd9Sstevel@tonic-gate */ 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate int ufs_close_dirs = 1; /* allocate directories close as possible */ 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate ino_t 4757c478bd9Sstevel@tonic-gate dirpref(inode_t *dp) 4767c478bd9Sstevel@tonic-gate { 4777c478bd9Sstevel@tonic-gate int cg, minndir, mincg, avgifree, mininode, minbpg, ifree; 4787c478bd9Sstevel@tonic-gate struct fs *fs = dp->i_fs; 4797c478bd9Sstevel@tonic-gate 4807c478bd9Sstevel@tonic-gate cg = itog(fs, dp->i_number); 4817c478bd9Sstevel@tonic-gate mininode = fs->fs_ipg >> 2; 4827c478bd9Sstevel@tonic-gate minbpg = fs->fs_maxbpg >> 2; 4837c478bd9Sstevel@tonic-gate if (ufs_close_dirs && 4847c478bd9Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nifree > mininode) && 4857c478bd9Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) { 4867c478bd9Sstevel@tonic-gate return (dp->i_number); 4877c478bd9Sstevel@tonic-gate } 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 4907c478bd9Sstevel@tonic-gate minndir = fs->fs_ipg; 4917c478bd9Sstevel@tonic-gate mincg = 0; 4927c478bd9Sstevel@tonic-gate for (cg = 0; cg < fs->fs_ncg; cg++) { 4937c478bd9Sstevel@tonic-gate ifree = fs->fs_cs(fs, cg).cs_nifree; 4947c478bd9Sstevel@tonic-gate if (ufs_close_dirs && 4957c478bd9Sstevel@tonic-gate (ifree > mininode) && 4967c478bd9Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) { 4977c478bd9Sstevel@tonic-gate return ((ino_t)(fs->fs_ipg * cg)); 4987c478bd9Sstevel@tonic-gate } 4997c478bd9Sstevel@tonic-gate if ((fs->fs_cs(fs, cg).cs_ndir < minndir) && 5007c478bd9Sstevel@tonic-gate (ifree >= avgifree)) { 5017c478bd9Sstevel@tonic-gate mincg = cg; 5027c478bd9Sstevel@tonic-gate minndir = fs->fs_cs(fs, cg).cs_ndir; 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate } 5057c478bd9Sstevel@tonic-gate return ((ino_t)(fs->fs_ipg * mincg)); 5067c478bd9Sstevel@tonic-gate } 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate /* 5097c478bd9Sstevel@tonic-gate * Select the desired position for the next block in a file. The file is 5107c478bd9Sstevel@tonic-gate * logically divided into sections. The first section is composed of the 5117c478bd9Sstevel@tonic-gate * direct blocks. Each additional section contains fs_maxbpg blocks. 5127c478bd9Sstevel@tonic-gate * 5137c478bd9Sstevel@tonic-gate * If no blocks have been allocated in the first section, the policy is to 5147c478bd9Sstevel@tonic-gate * request a block in the same cylinder group as the inode that describes 5157c478bd9Sstevel@tonic-gate * the file. If no blocks have been allocated in any other section, the 5167c478bd9Sstevel@tonic-gate * policy is to place the section in a cylinder group with a greater than 5177c478bd9Sstevel@tonic-gate * average number of free blocks. An appropriate cylinder group is found 5187c478bd9Sstevel@tonic-gate * by using a rotor that sweeps the cylinder groups. When a new group of 5197c478bd9Sstevel@tonic-gate * blocks is needed, the sweep begins in the cylinder group following the 5207c478bd9Sstevel@tonic-gate * cylinder group from which the previous allocation was made. The sweep 5217c478bd9Sstevel@tonic-gate * continues until a cylinder group with greater than the average number 5227c478bd9Sstevel@tonic-gate * of free blocks is found. If the allocation is for the first block in an 5237c478bd9Sstevel@tonic-gate * indirect block, the information on the previous allocation is unavailable; 5247c478bd9Sstevel@tonic-gate * here a best guess is made based upon the logical block number being 5257c478bd9Sstevel@tonic-gate * allocated. 5267c478bd9Sstevel@tonic-gate * 5277c478bd9Sstevel@tonic-gate * If a section is already partially allocated, the policy is to 5287c478bd9Sstevel@tonic-gate * contiguously allocate fs_maxcontig blocks. The end of one of these 5297c478bd9Sstevel@tonic-gate * contiguous blocks and the beginning of the next is physically separated 5307c478bd9Sstevel@tonic-gate * so that the disk head will be in transit between them for at least 5317c478bd9Sstevel@tonic-gate * fs_rotdelay milliseconds. This is to allow time for the processor to 5327c478bd9Sstevel@tonic-gate * schedule another I/O transfer. 5337c478bd9Sstevel@tonic-gate */ 5347c478bd9Sstevel@tonic-gate daddr_t 5357c478bd9Sstevel@tonic-gate blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap) 5367c478bd9Sstevel@tonic-gate { 5377c478bd9Sstevel@tonic-gate struct fs *fs; 5387c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 5397c478bd9Sstevel@tonic-gate int cg; 5407c478bd9Sstevel@tonic-gate int avgbfree, startcg; 5417c478bd9Sstevel@tonic-gate daddr_t nextblk; 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 5447c478bd9Sstevel@tonic-gate fs = ip->i_fs; 5457c478bd9Sstevel@tonic-gate if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 5467c478bd9Sstevel@tonic-gate if (lbn < NDADDR) { 5477c478bd9Sstevel@tonic-gate cg = itog(fs, ip->i_number); 5487c478bd9Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5497c478bd9Sstevel@tonic-gate } 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * Find a cylinder with greater than average 5527c478bd9Sstevel@tonic-gate * number of unused data blocks. 5537c478bd9Sstevel@tonic-gate */ 5547c478bd9Sstevel@tonic-gate if (indx == 0 || bap[indx - 1] == 0) 5557c478bd9Sstevel@tonic-gate startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg; 5567c478bd9Sstevel@tonic-gate else 5577c478bd9Sstevel@tonic-gate startcg = dtog(fs, bap[indx - 1]) + 1; 5587c478bd9Sstevel@tonic-gate startcg %= fs->fs_ncg; 5597c478bd9Sstevel@tonic-gate 5607c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 5617c478bd9Sstevel@tonic-gate avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 5627c478bd9Sstevel@tonic-gate /* 5637c478bd9Sstevel@tonic-gate * used for computing log space for writes/truncs 5647c478bd9Sstevel@tonic-gate */ 5657c478bd9Sstevel@tonic-gate ufsvfsp->vfs_avgbfree = avgbfree; 5667c478bd9Sstevel@tonic-gate for (cg = startcg; cg < fs->fs_ncg; cg++) 5677c478bd9Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 5687c478bd9Sstevel@tonic-gate fs->fs_cgrotor = cg; 5697c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5707c478bd9Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5717c478bd9Sstevel@tonic-gate } 5727c478bd9Sstevel@tonic-gate for (cg = 0; cg <= startcg; cg++) 5737c478bd9Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 5747c478bd9Sstevel@tonic-gate fs->fs_cgrotor = cg; 5757c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5767c478bd9Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5777c478bd9Sstevel@tonic-gate } 5787c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5797c478bd9Sstevel@tonic-gate return (NULL); 5807c478bd9Sstevel@tonic-gate } 5817c478bd9Sstevel@tonic-gate /* 5827c478bd9Sstevel@tonic-gate * One or more previous blocks have been laid out. If less 5837c478bd9Sstevel@tonic-gate * than fs_maxcontig previous blocks are contiguous, the 5847c478bd9Sstevel@tonic-gate * next block is requested contiguously, otherwise it is 5857c478bd9Sstevel@tonic-gate * requested rotationally delayed by fs_rotdelay milliseconds. 5867c478bd9Sstevel@tonic-gate */ 587303bf60bSsdebnath 588303bf60bSsdebnath nextblk = bap[indx - 1]; 589303bf60bSsdebnath /* 590303bf60bSsdebnath * Provision for fallocate to return positive 591303bf60bSsdebnath * blk preference based on last allocation 592303bf60bSsdebnath */ 593303bf60bSsdebnath if (nextblk < 0 && nextblk != UFS_HOLE) { 594303bf60bSsdebnath nextblk = (-bap[indx - 1]) + fs->fs_frag; 595303bf60bSsdebnath } else { 5967c478bd9Sstevel@tonic-gate nextblk = bap[indx - 1] + fs->fs_frag; 597303bf60bSsdebnath } 598303bf60bSsdebnath 599303bf60bSsdebnath if (indx > fs->fs_maxcontig && bap[indx - fs->fs_maxcontig] + 600303bf60bSsdebnath blkstofrags(fs, fs->fs_maxcontig) != nextblk) { 6017c478bd9Sstevel@tonic-gate return (nextblk); 602303bf60bSsdebnath } 6037c478bd9Sstevel@tonic-gate if (fs->fs_rotdelay != 0) 6047c478bd9Sstevel@tonic-gate /* 6057c478bd9Sstevel@tonic-gate * Here we convert ms of delay to frags as: 6067c478bd9Sstevel@tonic-gate * (frags) = (ms) * (rev/sec) * (sect/rev) / 6077c478bd9Sstevel@tonic-gate * ((sect/frag) * (ms/sec)) 6087c478bd9Sstevel@tonic-gate * then round up to the next block. 6097c478bd9Sstevel@tonic-gate */ 6107c478bd9Sstevel@tonic-gate nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 6117c478bd9Sstevel@tonic-gate (NSPF(fs) * 1000), fs->fs_frag); 6127c478bd9Sstevel@tonic-gate return (nextblk); 6137c478bd9Sstevel@tonic-gate } 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate /* 6167c478bd9Sstevel@tonic-gate * Free a block or fragment. 6177c478bd9Sstevel@tonic-gate * 6187c478bd9Sstevel@tonic-gate * The specified block or fragment is placed back in the 6197c478bd9Sstevel@tonic-gate * free map. If a fragment is deallocated, a possible 6207c478bd9Sstevel@tonic-gate * block reassembly is checked. 6217c478bd9Sstevel@tonic-gate */ 6227c478bd9Sstevel@tonic-gate void 6237c478bd9Sstevel@tonic-gate free(struct inode *ip, daddr_t bno, off_t size, int flags) 6247c478bd9Sstevel@tonic-gate { 6257c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 6267c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 627121be23bSjkennedy struct ufs_q *delq = &ufsvfsp->vfs_delete; 628121be23bSjkennedy struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info; 6297c478bd9Sstevel@tonic-gate struct cg *cgp; 6307c478bd9Sstevel@tonic-gate struct buf *bp; 6317c478bd9Sstevel@tonic-gate int cg, bmap, bbase; 6327c478bd9Sstevel@tonic-gate int i; 6337c478bd9Sstevel@tonic-gate uchar_t *blksfree; 6347c478bd9Sstevel@tonic-gate int *blktot; 6357c478bd9Sstevel@tonic-gate short *blks; 6367c478bd9Sstevel@tonic-gate daddr_t blkno, cylno, rpos; 6377c478bd9Sstevel@tonic-gate 638303bf60bSsdebnath /* 639303bf60bSsdebnath * fallocate'd files will have negative block address. 640303bf60bSsdebnath * So negate it again to get original block address. 641303bf60bSsdebnath */ 64233c22cb3Smishra if (bno < 0 && (bno % fs->fs_frag == 0) && bno != UFS_HOLE) { 643303bf60bSsdebnath bno = -bno; 644303bf60bSsdebnath } 645303bf60bSsdebnath 6467c478bd9Sstevel@tonic-gate if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) { 6477c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 648303bf60bSsdebnath "free: bad size, dev = 0x%lx, bsize = %d, size = %d, " 649303bf60bSsdebnath "fs = %s\n", ip->i_dev, fs->fs_bsize, 650303bf60bSsdebnath (int)size, fs->fs_fsmnt); 6517c478bd9Sstevel@tonic-gate return; 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate cg = dtog(fs, bno); 6547c478bd9Sstevel@tonic-gate ASSERT(!ufs_badblock(ip, bno)); 6557c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 6567c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 6597c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 6607c478bd9Sstevel@tonic-gate brelse(bp); 6617c478bd9Sstevel@tonic-gate return; 6627c478bd9Sstevel@tonic-gate } 6637c478bd9Sstevel@tonic-gate 6647c478bd9Sstevel@tonic-gate if (!(flags & I_NOCANCEL)) 6657c478bd9Sstevel@tonic-gate TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags); 6667c478bd9Sstevel@tonic-gate if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) { 6677c478bd9Sstevel@tonic-gate TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size); 6687c478bd9Sstevel@tonic-gate } 6697c478bd9Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 6707c478bd9Sstevel@tonic-gate blktot = cg_blktot(cgp); 6717c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 6727c478bd9Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 6737c478bd9Sstevel@tonic-gate bno = dtogd(fs, bno); 6747c478bd9Sstevel@tonic-gate if (size == fs->fs_bsize) { 6757c478bd9Sstevel@tonic-gate blkno = fragstoblks(fs, bno); 6767c478bd9Sstevel@tonic-gate cylno = cbtocylno(fs, bno); 6777c478bd9Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bno); 6787c478bd9Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 6797c478bd9Sstevel@tonic-gate if (!isclrblock(fs, blksfree, blkno)) { 6807c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 6817c478bd9Sstevel@tonic-gate brelse(bp); 6827c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "free: freeing free block, " 6837c478bd9Sstevel@tonic-gate "dev:0x%lx, block:%ld, ino:%lu, fs:%s", 6847c478bd9Sstevel@tonic-gate ip->i_dev, bno, ip->i_number, fs->fs_fsmnt); 6857c478bd9Sstevel@tonic-gate return; 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate setblock(fs, blksfree, blkno); 6887c478bd9Sstevel@tonic-gate blks[rpos]++; 6897c478bd9Sstevel@tonic-gate blktot[cylno]++; 6907c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nbfree++; /* Log below */ 6917c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree++; 6927c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nbfree++; 693121be23bSjkennedy if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) { 694121be23bSjkennedy mutex_enter(&delq->uq_mutex); 695121be23bSjkennedy delq_info->delq_unreclaimed_blocks -= 696121be23bSjkennedy btodb(fs->fs_bsize); 697121be23bSjkennedy mutex_exit(&delq->uq_mutex); 698121be23bSjkennedy } 6997c478bd9Sstevel@tonic-gate } else { 7007c478bd9Sstevel@tonic-gate bbase = bno - fragnum(fs, bno); 7017c478bd9Sstevel@tonic-gate /* 7027c478bd9Sstevel@tonic-gate * Decrement the counts associated with the old frags 7037c478bd9Sstevel@tonic-gate */ 7047c478bd9Sstevel@tonic-gate bmap = blkmap(fs, blksfree, bbase); 7057c478bd9Sstevel@tonic-gate fragacct(fs, bmap, cgp->cg_frsum, -1); 7067c478bd9Sstevel@tonic-gate /* 7077c478bd9Sstevel@tonic-gate * Deallocate the fragment 7087c478bd9Sstevel@tonic-gate */ 7097c478bd9Sstevel@tonic-gate for (i = 0; i < numfrags(fs, size); i++) { 7107c478bd9Sstevel@tonic-gate if (isset(blksfree, bno + i)) { 7117c478bd9Sstevel@tonic-gate brelse(bp); 7127c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 7137c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7147c478bd9Sstevel@tonic-gate "free: freeing free frag, " 7157c478bd9Sstevel@tonic-gate "dev:0x%lx, blk:%ld, cg:%d, " 7167c478bd9Sstevel@tonic-gate "ino:%lu, fs:%s", 7177c478bd9Sstevel@tonic-gate ip->i_dev, 7187c478bd9Sstevel@tonic-gate bno + i, 7197c478bd9Sstevel@tonic-gate cgp->cg_cgx, 7207c478bd9Sstevel@tonic-gate ip->i_number, 7217c478bd9Sstevel@tonic-gate fs->fs_fsmnt); 7227c478bd9Sstevel@tonic-gate return; 7237c478bd9Sstevel@tonic-gate } 7247c478bd9Sstevel@tonic-gate setbit(blksfree, bno + i); 7257c478bd9Sstevel@tonic-gate } 7267c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nffree += i; 7277c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nffree += i; 7287c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree += i; 729121be23bSjkennedy if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) { 730121be23bSjkennedy mutex_enter(&delq->uq_mutex); 731121be23bSjkennedy delq_info->delq_unreclaimed_blocks -= 732121be23bSjkennedy btodb(i * fs->fs_fsize); 733121be23bSjkennedy mutex_exit(&delq->uq_mutex); 734121be23bSjkennedy } 7357c478bd9Sstevel@tonic-gate /* 7367c478bd9Sstevel@tonic-gate * Add back in counts associated with the new frags 7377c478bd9Sstevel@tonic-gate */ 7387c478bd9Sstevel@tonic-gate bmap = blkmap(fs, blksfree, bbase); 7397c478bd9Sstevel@tonic-gate fragacct(fs, bmap, cgp->cg_frsum, 1); 7407c478bd9Sstevel@tonic-gate /* 7417c478bd9Sstevel@tonic-gate * If a complete block has been reassembled, account for it 7427c478bd9Sstevel@tonic-gate */ 7437c478bd9Sstevel@tonic-gate blkno = fragstoblks(fs, bbase); 7447c478bd9Sstevel@tonic-gate if (isblock(fs, blksfree, blkno)) { 7457c478bd9Sstevel@tonic-gate cylno = cbtocylno(fs, bbase); 7467c478bd9Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bbase); 7477c478bd9Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 7487c478bd9Sstevel@tonic-gate blks[rpos]++; 7497c478bd9Sstevel@tonic-gate blktot[cylno]++; 7507c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nffree -= fs->fs_frag; 7517c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nffree -= fs->fs_frag; 7527c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 7537c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nbfree++; 7547c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree++; 7557c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nbfree++; 7567c478bd9Sstevel@tonic-gate } 7577c478bd9Sstevel@tonic-gate } 7587c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 7597c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 7607c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 7617c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 7627c478bd9Sstevel@tonic-gate bdrwrite(bp); 7637c478bd9Sstevel@tonic-gate } 7647c478bd9Sstevel@tonic-gate 7657c478bd9Sstevel@tonic-gate /* 7667c478bd9Sstevel@tonic-gate * Free an inode. 7677c478bd9Sstevel@tonic-gate * 7687c478bd9Sstevel@tonic-gate * The specified inode is placed back in the free map. 7697c478bd9Sstevel@tonic-gate */ 7707c478bd9Sstevel@tonic-gate void 7717c478bd9Sstevel@tonic-gate ufs_ifree(struct inode *ip, ino_t ino, mode_t mode) 7727c478bd9Sstevel@tonic-gate { 7737c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 7747c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 7757c478bd9Sstevel@tonic-gate struct cg *cgp; 7767c478bd9Sstevel@tonic-gate struct buf *bp; 7777c478bd9Sstevel@tonic-gate unsigned int inot; 7787c478bd9Sstevel@tonic-gate int cg; 7797c478bd9Sstevel@tonic-gate char *iused; 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate if (ip->i_number == ino && ip->i_mode != 0) { 7827c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7837c478bd9Sstevel@tonic-gate "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, " 7847c478bd9Sstevel@tonic-gate "fs = %s\n", 7857c478bd9Sstevel@tonic-gate ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt); 7867c478bd9Sstevel@tonic-gate return; 7877c478bd9Sstevel@tonic-gate } 7887c478bd9Sstevel@tonic-gate if (ino >= fs->fs_ipg * fs->fs_ncg) { 7897c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7907c478bd9Sstevel@tonic-gate "ifree: range, dev = 0x%x, ino = %d, fs = %s\n", 7917c478bd9Sstevel@tonic-gate (int)ip->i_dev, (int)ino, fs->fs_fsmnt); 7927c478bd9Sstevel@tonic-gate return; 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate cg = (int)itog(fs, ino); 7957c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 7967c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 7997c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 8007c478bd9Sstevel@tonic-gate brelse(bp); 8017c478bd9Sstevel@tonic-gate return; 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 8047c478bd9Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 8057c478bd9Sstevel@tonic-gate iused = cg_inosused(cgp); 8067c478bd9Sstevel@tonic-gate inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg); 8077c478bd9Sstevel@tonic-gate if (isclr(iused, inot)) { 8087c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 8097c478bd9Sstevel@tonic-gate brelse(bp); 8107c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, " 8117c478bd9Sstevel@tonic-gate "mode: (imode) %o, (omode) %o, ino:%d, " 8127c478bd9Sstevel@tonic-gate "fs:%s", 8137c478bd9Sstevel@tonic-gate ip->i_mode, mode, (int)ino, fs->fs_fsmnt); 8147c478bd9Sstevel@tonic-gate return; 8157c478bd9Sstevel@tonic-gate } 8167c478bd9Sstevel@tonic-gate clrbit(iused, inot); 8177c478bd9Sstevel@tonic-gate 8187c478bd9Sstevel@tonic-gate if (inot < (ulong_t)cgp->cg_irotor) 8197c478bd9Sstevel@tonic-gate cgp->cg_irotor = inot; 8207c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nifree++; 8217c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nifree++; 8227c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nifree++; 8237c478bd9Sstevel@tonic-gate if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) { 8247c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_ndir--; 8257c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_ndir--; 8267c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_ndir--; 8277c478bd9Sstevel@tonic-gate } 8287c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 8297c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 8307c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 8317c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 8327c478bd9Sstevel@tonic-gate bdrwrite(bp); 8337c478bd9Sstevel@tonic-gate } 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate /* 8367c478bd9Sstevel@tonic-gate * Implement the cylinder overflow algorithm. 8377c478bd9Sstevel@tonic-gate * 8387c478bd9Sstevel@tonic-gate * The policy implemented by this algorithm is: 8397c478bd9Sstevel@tonic-gate * 1) allocate the block in its requested cylinder group. 8407c478bd9Sstevel@tonic-gate * 2) quadratically rehash on the cylinder group number. 8417c478bd9Sstevel@tonic-gate * 3) brute force search for a free block. 8427c478bd9Sstevel@tonic-gate * The size parameter means size for data blocks, mode for inodes. 8437c478bd9Sstevel@tonic-gate */ 8447c478bd9Sstevel@tonic-gate static ino_t 8457c478bd9Sstevel@tonic-gate hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)()) 8467c478bd9Sstevel@tonic-gate { 8477c478bd9Sstevel@tonic-gate struct fs *fs; 8487c478bd9Sstevel@tonic-gate int i; 8497c478bd9Sstevel@tonic-gate long result; 8507c478bd9Sstevel@tonic-gate int icg = cg; 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate fs = ip->i_fs; 8537c478bd9Sstevel@tonic-gate /* 8547c478bd9Sstevel@tonic-gate * 1: preferred cylinder group 8557c478bd9Sstevel@tonic-gate */ 8567c478bd9Sstevel@tonic-gate result = (*allocator)(ip, cg, pref, size); 8577c478bd9Sstevel@tonic-gate if (result) 8587c478bd9Sstevel@tonic-gate return (result); 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * 2: quadratic rehash 8617c478bd9Sstevel@tonic-gate */ 8627c478bd9Sstevel@tonic-gate for (i = 1; i < fs->fs_ncg; i *= 2) { 8637c478bd9Sstevel@tonic-gate cg += i; 8647c478bd9Sstevel@tonic-gate if (cg >= fs->fs_ncg) 8657c478bd9Sstevel@tonic-gate cg -= fs->fs_ncg; 8667c478bd9Sstevel@tonic-gate result = (*allocator)(ip, cg, 0, size); 8677c478bd9Sstevel@tonic-gate if (result) 8687c478bd9Sstevel@tonic-gate return (result); 8697c478bd9Sstevel@tonic-gate } 8707c478bd9Sstevel@tonic-gate /* 8717c478bd9Sstevel@tonic-gate * 3: brute force search 8727c478bd9Sstevel@tonic-gate * Note that we start at i == 2, since 0 was checked initially, 8737c478bd9Sstevel@tonic-gate * and 1 is always checked in the quadratic rehash. 8747c478bd9Sstevel@tonic-gate */ 8757c478bd9Sstevel@tonic-gate cg = (icg + 2) % fs->fs_ncg; 8767c478bd9Sstevel@tonic-gate for (i = 2; i < fs->fs_ncg; i++) { 8777c478bd9Sstevel@tonic-gate result = (*allocator)(ip, cg, 0, size); 8787c478bd9Sstevel@tonic-gate if (result) 8797c478bd9Sstevel@tonic-gate return (result); 8807c478bd9Sstevel@tonic-gate cg++; 8817c478bd9Sstevel@tonic-gate if (cg == fs->fs_ncg) 8827c478bd9Sstevel@tonic-gate cg = 0; 8837c478bd9Sstevel@tonic-gate } 8847c478bd9Sstevel@tonic-gate return (NULL); 8857c478bd9Sstevel@tonic-gate } 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate /* 8887c478bd9Sstevel@tonic-gate * Determine whether a fragment can be extended. 8897c478bd9Sstevel@tonic-gate * 8907c478bd9Sstevel@tonic-gate * Check to see if the necessary fragments are available, and 8917c478bd9Sstevel@tonic-gate * if they are, allocate them. 8927c478bd9Sstevel@tonic-gate */ 8937c478bd9Sstevel@tonic-gate static daddr_t 8947c478bd9Sstevel@tonic-gate fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize) 8957c478bd9Sstevel@tonic-gate { 8967c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 8977c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 8987c478bd9Sstevel@tonic-gate struct buf *bp; 8997c478bd9Sstevel@tonic-gate struct cg *cgp; 9007c478bd9Sstevel@tonic-gate uchar_t *blksfree; 9017c478bd9Sstevel@tonic-gate long bno; 9027c478bd9Sstevel@tonic-gate int frags, bbase; 9037c478bd9Sstevel@tonic-gate int i, j; 9047c478bd9Sstevel@tonic-gate 9057c478bd9Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 9067c478bd9Sstevel@tonic-gate return (NULL); 9077c478bd9Sstevel@tonic-gate frags = numfrags(fs, nsize); 9087c478bd9Sstevel@tonic-gate bbase = (int)fragnum(fs, bprev); 9097c478bd9Sstevel@tonic-gate if (bbase > fragnum(fs, (bprev + frags - 1))) { 9107c478bd9Sstevel@tonic-gate /* cannot extend across a block boundary */ 9117c478bd9Sstevel@tonic-gate return (NULL); 9127c478bd9Sstevel@tonic-gate } 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 9157c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 9167c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 9177c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 9187c478bd9Sstevel@tonic-gate brelse(bp); 9197c478bd9Sstevel@tonic-gate return (NULL); 9207c478bd9Sstevel@tonic-gate } 9217c478bd9Sstevel@tonic-gate 9227c478bd9Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 9237c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 9247c478bd9Sstevel@tonic-gate bno = dtogd(fs, bprev); 9257c478bd9Sstevel@tonic-gate for (i = numfrags(fs, osize); i < frags; i++) { 9267c478bd9Sstevel@tonic-gate if (isclr(blksfree, bno + i)) { 9277c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 9287c478bd9Sstevel@tonic-gate brelse(bp); 9297c478bd9Sstevel@tonic-gate return (NULL); 9307c478bd9Sstevel@tonic-gate } 9317c478bd9Sstevel@tonic-gate if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)), 9327c478bd9Sstevel@tonic-gate fs->fs_fsize))) { 9337c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 9347c478bd9Sstevel@tonic-gate brelse(bp); 9357c478bd9Sstevel@tonic-gate return (NULL); 9367c478bd9Sstevel@tonic-gate } 9377c478bd9Sstevel@tonic-gate } 9387c478bd9Sstevel@tonic-gate 9397c478bd9Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 9407c478bd9Sstevel@tonic-gate /* 9417c478bd9Sstevel@tonic-gate * The current fragment can be extended, 9427c478bd9Sstevel@tonic-gate * deduct the count on fragment being extended into 9437c478bd9Sstevel@tonic-gate * increase the count on the remaining fragment (if any) 9447c478bd9Sstevel@tonic-gate * allocate the extended piece. 9457c478bd9Sstevel@tonic-gate */ 9467c478bd9Sstevel@tonic-gate for (i = frags; i < fs->fs_frag - bbase; i++) 9477c478bd9Sstevel@tonic-gate if (isclr(blksfree, bno + i)) 9487c478bd9Sstevel@tonic-gate break; 9497c478bd9Sstevel@tonic-gate j = i - numfrags(fs, osize); 9507c478bd9Sstevel@tonic-gate cgp->cg_frsum[j]--; 9517c478bd9Sstevel@tonic-gate ASSERT(cgp->cg_frsum[j] >= 0); 9527c478bd9Sstevel@tonic-gate if (i != frags) 9537c478bd9Sstevel@tonic-gate cgp->cg_frsum[i - frags]++; 9547c478bd9Sstevel@tonic-gate for (i = numfrags(fs, osize); i < frags; i++) { 9557c478bd9Sstevel@tonic-gate clrbit(blksfree, bno + i); 9567c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nffree--; 9577c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree--; 9587c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nffree--; 9597c478bd9Sstevel@tonic-gate } 9607c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 9617c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 9627c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 9637c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 9647c478bd9Sstevel@tonic-gate bdrwrite(bp); 9657c478bd9Sstevel@tonic-gate return ((daddr_t)bprev); 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate /* 9697c478bd9Sstevel@tonic-gate * Determine whether a block can be allocated. 9707c478bd9Sstevel@tonic-gate * 9717c478bd9Sstevel@tonic-gate * Check to see if a block of the apprpriate size 9727c478bd9Sstevel@tonic-gate * is available, and if it is, allocate it. 9737c478bd9Sstevel@tonic-gate */ 9747c478bd9Sstevel@tonic-gate static daddr_t 9757c478bd9Sstevel@tonic-gate alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 9767c478bd9Sstevel@tonic-gate { 9777c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 9787c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 9797c478bd9Sstevel@tonic-gate struct buf *bp; 9807c478bd9Sstevel@tonic-gate struct cg *cgp; 9817c478bd9Sstevel@tonic-gate uchar_t *blksfree; 9827c478bd9Sstevel@tonic-gate int bno, frags; 9837c478bd9Sstevel@tonic-gate int allocsiz; 9847c478bd9Sstevel@tonic-gate int i; 9857c478bd9Sstevel@tonic-gate 986b3143109Smishra /* 987b3143109Smishra * Searching for space could be time expensive so do some 988b3143109Smishra * up front checking to verify that there is actually space 989b3143109Smishra * available (free blocks or free frags). 990b3143109Smishra */ 991b3143109Smishra if (fs->fs_cs(fs, cg).cs_nbfree == 0) { 992b3143109Smishra if (size == fs->fs_bsize) 9937c478bd9Sstevel@tonic-gate return (0); 994b3143109Smishra 995b3143109Smishra /* 996b3143109Smishra * If there are not enough free frags then return. 997b3143109Smishra */ 998b3143109Smishra if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, size)) 999b3143109Smishra return (0); 1000b3143109Smishra } 1001b3143109Smishra 10027c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 10037c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 10067c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) || 10077c478bd9Sstevel@tonic-gate (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 10087c478bd9Sstevel@tonic-gate brelse(bp); 10097c478bd9Sstevel@tonic-gate return (0); 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 10127c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 10137c478bd9Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 10147c478bd9Sstevel@tonic-gate if (size == fs->fs_bsize) { 10157c478bd9Sstevel@tonic-gate if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0) 10167c478bd9Sstevel@tonic-gate goto errout; 10177c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 10187c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10197c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10207c478bd9Sstevel@tonic-gate bdrwrite(bp); 10217c478bd9Sstevel@tonic-gate return (bno); 10227c478bd9Sstevel@tonic-gate } 10237c478bd9Sstevel@tonic-gate /* 10240d5abb8cSViswanathan Kannappan * Check fragment bitmap to see if any fragments are already available. 10250d5abb8cSViswanathan Kannappan * mapsearch() may fail because the fragment that fits this request 10260d5abb8cSViswanathan Kannappan * might still be on the cancel list and not available for re-use yet. 10270d5abb8cSViswanathan Kannappan * Look for a bigger sized fragment to allocate first before we have 10280d5abb8cSViswanathan Kannappan * to give up and fragment a whole new block eventually. 10297c478bd9Sstevel@tonic-gate */ 10307c478bd9Sstevel@tonic-gate frags = numfrags(fs, size); 10310d5abb8cSViswanathan Kannappan allocsiz = frags; 10320d5abb8cSViswanathan Kannappan next_size: 10330d5abb8cSViswanathan Kannappan for (; allocsiz < fs->fs_frag; allocsiz++) 10347c478bd9Sstevel@tonic-gate if (cgp->cg_frsum[allocsiz] != 0) 10357c478bd9Sstevel@tonic-gate break; 10367c478bd9Sstevel@tonic-gate 10370d5abb8cSViswanathan Kannappan if (allocsiz != fs->fs_frag) { 10387c478bd9Sstevel@tonic-gate bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz); 10390d5abb8cSViswanathan Kannappan if (bno < 0 && allocsiz < (fs->fs_frag - 1)) { 10400d5abb8cSViswanathan Kannappan allocsiz++; 10410d5abb8cSViswanathan Kannappan goto next_size; 10420d5abb8cSViswanathan Kannappan } 10430d5abb8cSViswanathan Kannappan } 10447c478bd9Sstevel@tonic-gate 10457c478bd9Sstevel@tonic-gate if (allocsiz == fs->fs_frag || bno < 0) { 10467c478bd9Sstevel@tonic-gate /* 10477c478bd9Sstevel@tonic-gate * No fragments were available, so a block 10487c478bd9Sstevel@tonic-gate * will be allocated and hacked up. 10497c478bd9Sstevel@tonic-gate */ 10507c478bd9Sstevel@tonic-gate if (cgp->cg_cs.cs_nbfree == 0) 10517c478bd9Sstevel@tonic-gate goto errout; 10527c478bd9Sstevel@tonic-gate if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0) 10537c478bd9Sstevel@tonic-gate goto errout; 10547c478bd9Sstevel@tonic-gate bpref = dtogd(fs, bno); 10557c478bd9Sstevel@tonic-gate for (i = frags; i < fs->fs_frag; i++) 10567c478bd9Sstevel@tonic-gate setbit(blksfree, bpref + i); 10577c478bd9Sstevel@tonic-gate i = fs->fs_frag - frags; 10587c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nffree += i; 10597c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nffree += i; 10607c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree += i; 10617c478bd9Sstevel@tonic-gate cgp->cg_frsum[i]++; 10627c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 10637c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10647c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10657c478bd9Sstevel@tonic-gate bdrwrite(bp); 10667c478bd9Sstevel@tonic-gate return (bno); 10677c478bd9Sstevel@tonic-gate } 10687c478bd9Sstevel@tonic-gate 10697c478bd9Sstevel@tonic-gate for (i = 0; i < frags; i++) 10707c478bd9Sstevel@tonic-gate clrbit(blksfree, bno + i); 10717c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nffree -= frags; 10727c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nffree -= frags; 10737c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree -= frags; 10747c478bd9Sstevel@tonic-gate cgp->cg_frsum[allocsiz]--; 10757c478bd9Sstevel@tonic-gate ASSERT(cgp->cg_frsum[allocsiz] >= 0); 10767c478bd9Sstevel@tonic-gate if (frags != allocsiz) { 10777c478bd9Sstevel@tonic-gate cgp->cg_frsum[allocsiz - frags]++; 10787c478bd9Sstevel@tonic-gate } 10797c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 10807c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10817c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 10827c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10837c478bd9Sstevel@tonic-gate bdrwrite(bp); 10847c478bd9Sstevel@tonic-gate return (cg * fs->fs_fpg + bno); 10857c478bd9Sstevel@tonic-gate errout: 10867c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 10877c478bd9Sstevel@tonic-gate brelse(bp); 10887c478bd9Sstevel@tonic-gate return (0); 10897c478bd9Sstevel@tonic-gate } 10907c478bd9Sstevel@tonic-gate 10917c478bd9Sstevel@tonic-gate /* 10927c478bd9Sstevel@tonic-gate * Allocate a block in a cylinder group. 10937c478bd9Sstevel@tonic-gate * 10947c478bd9Sstevel@tonic-gate * This algorithm implements the following policy: 10957c478bd9Sstevel@tonic-gate * 1) allocate the requested block. 10967c478bd9Sstevel@tonic-gate * 2) allocate a rotationally optimal block in the same cylinder. 10977c478bd9Sstevel@tonic-gate * 3) allocate the next available block on the block rotor for the 10987c478bd9Sstevel@tonic-gate * specified cylinder group. 10997c478bd9Sstevel@tonic-gate * Note that this routine only allocates fs_bsize blocks; these 11007c478bd9Sstevel@tonic-gate * blocks may be fragmented by the routine that allocates them. 11017c478bd9Sstevel@tonic-gate */ 11027c478bd9Sstevel@tonic-gate static daddr_t 11037c478bd9Sstevel@tonic-gate alloccgblk( 11047c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp, 11057c478bd9Sstevel@tonic-gate struct cg *cgp, 11067c478bd9Sstevel@tonic-gate daddr_t bpref, 11077c478bd9Sstevel@tonic-gate struct buf *bp) 11087c478bd9Sstevel@tonic-gate { 11097c478bd9Sstevel@tonic-gate daddr_t bno; 11107c478bd9Sstevel@tonic-gate int cylno, pos, delta, rotbl_size; 11117c478bd9Sstevel@tonic-gate short *cylbp; 11127c478bd9Sstevel@tonic-gate int i; 11137c478bd9Sstevel@tonic-gate struct fs *fs; 11147c478bd9Sstevel@tonic-gate uchar_t *blksfree; 11157c478bd9Sstevel@tonic-gate daddr_t blkno, rpos, frag; 11167c478bd9Sstevel@tonic-gate short *blks; 11177c478bd9Sstevel@tonic-gate int32_t *blktot; 11187c478bd9Sstevel@tonic-gate 11197c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock)); 11207c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 11217c478bd9Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 11227c478bd9Sstevel@tonic-gate if (bpref == 0) { 11237c478bd9Sstevel@tonic-gate bpref = cgp->cg_rotor; 11247c478bd9Sstevel@tonic-gate goto norot; 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate bpref = blknum(fs, bpref); 11277c478bd9Sstevel@tonic-gate bpref = dtogd(fs, bpref); 11287c478bd9Sstevel@tonic-gate /* 11297c478bd9Sstevel@tonic-gate * If the requested block is available, use it. 11307c478bd9Sstevel@tonic-gate */ 11317c478bd9Sstevel@tonic-gate if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) { 11327c478bd9Sstevel@tonic-gate bno = bpref; 11337c478bd9Sstevel@tonic-gate goto gotit; 11347c478bd9Sstevel@tonic-gate } 11357c478bd9Sstevel@tonic-gate /* 11367c478bd9Sstevel@tonic-gate * Check for a block available on the same cylinder. 11377c478bd9Sstevel@tonic-gate */ 11387c478bd9Sstevel@tonic-gate cylno = cbtocylno(fs, bpref); 11397c478bd9Sstevel@tonic-gate if (cg_blktot(cgp)[cylno] == 0) 11407c478bd9Sstevel@tonic-gate goto norot; 11417c478bd9Sstevel@tonic-gate if (fs->fs_cpc == 0) { 11427c478bd9Sstevel@tonic-gate /* 11437c478bd9Sstevel@tonic-gate * Block layout info is not available, so just 11447c478bd9Sstevel@tonic-gate * have to take any block in this cylinder. 11457c478bd9Sstevel@tonic-gate */ 11467c478bd9Sstevel@tonic-gate bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); 11477c478bd9Sstevel@tonic-gate goto norot; 11487c478bd9Sstevel@tonic-gate } 11497c478bd9Sstevel@tonic-gate /* 11507c478bd9Sstevel@tonic-gate * Check the summary information to see if a block is 11517c478bd9Sstevel@tonic-gate * available in the requested cylinder starting at the 11527c478bd9Sstevel@tonic-gate * requested rotational position and proceeding around. 11537c478bd9Sstevel@tonic-gate */ 11547c478bd9Sstevel@tonic-gate cylbp = cg_blks(ufsvfsp, cgp, cylno); 11557c478bd9Sstevel@tonic-gate pos = cbtorpos(ufsvfsp, bpref); 11567c478bd9Sstevel@tonic-gate for (i = pos; i < ufsvfsp->vfs_nrpos; i++) 11577c478bd9Sstevel@tonic-gate if (cylbp[i] > 0) 11587c478bd9Sstevel@tonic-gate break; 11597c478bd9Sstevel@tonic-gate if (i == ufsvfsp->vfs_nrpos) 11607c478bd9Sstevel@tonic-gate for (i = 0; i < pos; i++) 11617c478bd9Sstevel@tonic-gate if (cylbp[i] > 0) 11627c478bd9Sstevel@tonic-gate break; 11637c478bd9Sstevel@tonic-gate if (cylbp[i] > 0) { 11647c478bd9Sstevel@tonic-gate /* 11657c478bd9Sstevel@tonic-gate * Found a rotational position, now find the actual 11667c478bd9Sstevel@tonic-gate * block. A "panic" if none is actually there. 11677c478bd9Sstevel@tonic-gate */ 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * Up to this point, "pos" has referred to the rotational 11717c478bd9Sstevel@tonic-gate * position of the desired block. From now on, it holds 11727c478bd9Sstevel@tonic-gate * the offset of the current cylinder within a cylinder 11737c478bd9Sstevel@tonic-gate * cycle. (A cylinder cycle refers to a set of cylinders 11747c478bd9Sstevel@tonic-gate * which are described by a single rotational table; the 11757c478bd9Sstevel@tonic-gate * size of the cycle is fs_cpc.) 11767c478bd9Sstevel@tonic-gate * 11777c478bd9Sstevel@tonic-gate * bno is set to the block number of the first block within 11787c478bd9Sstevel@tonic-gate * the current cylinder cycle. 11797c478bd9Sstevel@tonic-gate */ 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate pos = cylno % fs->fs_cpc; 11827c478bd9Sstevel@tonic-gate bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 11837c478bd9Sstevel@tonic-gate 11847c478bd9Sstevel@tonic-gate /* 11857c478bd9Sstevel@tonic-gate * The blocks within a cylinder are grouped into equivalence 11867c478bd9Sstevel@tonic-gate * classes according to their "rotational position." There 11877c478bd9Sstevel@tonic-gate * are two tables used to determine these classes. 11887c478bd9Sstevel@tonic-gate * 11897c478bd9Sstevel@tonic-gate * The positional offset table (fs_postbl) has an entry for 11907c478bd9Sstevel@tonic-gate * each rotational position of each cylinder in a cylinder 11917c478bd9Sstevel@tonic-gate * cycle. This entry contains the relative block number 11927c478bd9Sstevel@tonic-gate * (counting from the start of the cylinder cycle) of the 11937c478bd9Sstevel@tonic-gate * first block in the equivalence class for that position 11947c478bd9Sstevel@tonic-gate * and that cylinder. Positions for which no blocks exist 11957c478bd9Sstevel@tonic-gate * are indicated by a -1. 11967c478bd9Sstevel@tonic-gate * 11977c478bd9Sstevel@tonic-gate * The rotational delta table (fs_rotbl) has an entry for 11987c478bd9Sstevel@tonic-gate * each block in a cylinder cycle. This entry contains 11997c478bd9Sstevel@tonic-gate * the offset from that block to the next block in the 12007c478bd9Sstevel@tonic-gate * same equivalence class. The last block in the class 12017c478bd9Sstevel@tonic-gate * is indicated by a zero in the table. 12027c478bd9Sstevel@tonic-gate * 12037c478bd9Sstevel@tonic-gate * The following code, then, walks through all of the blocks 12047c478bd9Sstevel@tonic-gate * in the cylinder (cylno) which we're allocating within 12057c478bd9Sstevel@tonic-gate * which are in the equivalence class for the rotational 12067c478bd9Sstevel@tonic-gate * position (i) which we're allocating within. 12077c478bd9Sstevel@tonic-gate */ 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate if (fs_postbl(ufsvfsp, pos)[i] == -1) { 12107c478bd9Sstevel@tonic-gate (void) ufs_fault(ufsvfsp->vfs_root, 1211303bf60bSsdebnath "alloccgblk: cyl groups corrupted, pos = %d, " 1212303bf60bSsdebnath "i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 12137c478bd9Sstevel@tonic-gate return (0); 12147c478bd9Sstevel@tonic-gate } 12157c478bd9Sstevel@tonic-gate 12167c478bd9Sstevel@tonic-gate /* 12177c478bd9Sstevel@tonic-gate * There is one entry in the rotational table for each block 12187c478bd9Sstevel@tonic-gate * in the cylinder cycle. These are whole blocks, not frags. 12197c478bd9Sstevel@tonic-gate */ 12207c478bd9Sstevel@tonic-gate 12217c478bd9Sstevel@tonic-gate rotbl_size = (fs->fs_cpc * fs->fs_spc) >> 12227c478bd9Sstevel@tonic-gate (fs->fs_fragshift + fs->fs_fsbtodb); 12237c478bd9Sstevel@tonic-gate 12247c478bd9Sstevel@tonic-gate /* 12257c478bd9Sstevel@tonic-gate * As we start, "i" is the rotational position within which 12267c478bd9Sstevel@tonic-gate * we're searching. After the next line, it will be a block 12277c478bd9Sstevel@tonic-gate * number (relative to the start of the cylinder cycle) 12287c478bd9Sstevel@tonic-gate * within the equivalence class of that rotational position. 12297c478bd9Sstevel@tonic-gate */ 12307c478bd9Sstevel@tonic-gate 12317c478bd9Sstevel@tonic-gate i = fs_postbl(ufsvfsp, pos)[i]; 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate for (;;) { 12347c478bd9Sstevel@tonic-gate if (isblock(fs, blksfree, (daddr_t)(bno + i))) { 12357c478bd9Sstevel@tonic-gate bno = blkstofrags(fs, (bno + i)); 12367c478bd9Sstevel@tonic-gate goto gotit; 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate delta = fs_rotbl(fs)[i]; 12397c478bd9Sstevel@tonic-gate if (delta <= 0 || /* End of chain, or */ 12407c478bd9Sstevel@tonic-gate delta + i > rotbl_size) /* end of table? */ 12417c478bd9Sstevel@tonic-gate break; /* If so, panic. */ 12427c478bd9Sstevel@tonic-gate i += delta; 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate (void) ufs_fault(ufsvfsp->vfs_root, 1245303bf60bSsdebnath "alloccgblk: can't find blk in cyl, pos:%d, i:%d, " 1246303bf60bSsdebnath "fs:%s bno: %x\n", pos, i, fs->fs_fsmnt, (int)bno); 12477c478bd9Sstevel@tonic-gate return (0); 12487c478bd9Sstevel@tonic-gate } 12497c478bd9Sstevel@tonic-gate norot: 12507c478bd9Sstevel@tonic-gate /* 12517c478bd9Sstevel@tonic-gate * No blocks in the requested cylinder, so take 12527c478bd9Sstevel@tonic-gate * next available one in this cylinder group. 12537c478bd9Sstevel@tonic-gate */ 12547c478bd9Sstevel@tonic-gate bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag); 12557c478bd9Sstevel@tonic-gate if (bno < 0) 12567c478bd9Sstevel@tonic-gate return (0); 12577c478bd9Sstevel@tonic-gate cgp->cg_rotor = bno; 12587c478bd9Sstevel@tonic-gate gotit: 12597c478bd9Sstevel@tonic-gate blkno = fragstoblks(fs, bno); 12607c478bd9Sstevel@tonic-gate frag = (cgp->cg_cgx * fs->fs_fpg) + bno; 12617c478bd9Sstevel@tonic-gate if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize)) 12627c478bd9Sstevel@tonic-gate goto norot; 12637c478bd9Sstevel@tonic-gate clrblock(fs, blksfree, (long)blkno); 12647c478bd9Sstevel@tonic-gate /* 12657c478bd9Sstevel@tonic-gate * the other cg/sb/si fields are TRANS'ed by the caller 12667c478bd9Sstevel@tonic-gate */ 12677c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nbfree--; 12687c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree--; 12697c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 12707c478bd9Sstevel@tonic-gate cylno = cbtocylno(fs, bno); 12717c478bd9Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 12727c478bd9Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bno); 12737c478bd9Sstevel@tonic-gate blktot = cg_blktot(cgp); 12747c478bd9Sstevel@tonic-gate blks[rpos]--; 12757c478bd9Sstevel@tonic-gate blktot[cylno]--; 12767c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 12777c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 12787c478bd9Sstevel@tonic-gate return (frag); 12797c478bd9Sstevel@tonic-gate } 12807c478bd9Sstevel@tonic-gate 12817c478bd9Sstevel@tonic-gate /* 12827c478bd9Sstevel@tonic-gate * Determine whether an inode can be allocated. 12837c478bd9Sstevel@tonic-gate * 12847c478bd9Sstevel@tonic-gate * Check to see if an inode is available, and if it is, 12857c478bd9Sstevel@tonic-gate * allocate it using the following policy: 12867c478bd9Sstevel@tonic-gate * 1) allocate the requested inode. 12877c478bd9Sstevel@tonic-gate * 2) allocate the next available inode after the requested 12887c478bd9Sstevel@tonic-gate * inode in the specified cylinder group. 12897c478bd9Sstevel@tonic-gate */ 12907c478bd9Sstevel@tonic-gate static ino_t 12917c478bd9Sstevel@tonic-gate ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 12927c478bd9Sstevel@tonic-gate { 12937c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 12947c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 12957c478bd9Sstevel@tonic-gate struct cg *cgp; 12967c478bd9Sstevel@tonic-gate struct buf *bp; 12977c478bd9Sstevel@tonic-gate int start, len, loc, map, i; 12987c478bd9Sstevel@tonic-gate char *iused; 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nifree == 0) 13017c478bd9Sstevel@tonic-gate return (0); 13027c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 13037c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 13067c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) || 13077c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nifree == 0) { 13087c478bd9Sstevel@tonic-gate brelse(bp); 13097c478bd9Sstevel@tonic-gate return (0); 13107c478bd9Sstevel@tonic-gate } 13117c478bd9Sstevel@tonic-gate iused = cg_inosused(cgp); 13127c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 13137c478bd9Sstevel@tonic-gate /* 13147c478bd9Sstevel@tonic-gate * While we are waiting for the mutex, someone may have taken 13157c478bd9Sstevel@tonic-gate * the last available inode. Need to recheck. 13167c478bd9Sstevel@tonic-gate */ 13177c478bd9Sstevel@tonic-gate if (cgp->cg_cs.cs_nifree == 0) { 13187c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 13197c478bd9Sstevel@tonic-gate brelse(bp); 13207c478bd9Sstevel@tonic-gate return (0); 13217c478bd9Sstevel@tonic-gate } 13227c478bd9Sstevel@tonic-gate 13237c478bd9Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 13247c478bd9Sstevel@tonic-gate if (ipref) { 13257c478bd9Sstevel@tonic-gate ipref %= fs->fs_ipg; 13267c478bd9Sstevel@tonic-gate if (isclr(iused, ipref)) 13277c478bd9Sstevel@tonic-gate goto gotit; 13287c478bd9Sstevel@tonic-gate } 13297c478bd9Sstevel@tonic-gate start = cgp->cg_irotor / NBBY; 13307c478bd9Sstevel@tonic-gate len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 13317c478bd9Sstevel@tonic-gate loc = skpc(0xff, (uint_t)len, &iused[start]); 13327c478bd9Sstevel@tonic-gate if (loc == 0) { 13337c478bd9Sstevel@tonic-gate len = start + 1; 13347c478bd9Sstevel@tonic-gate start = 0; 13357c478bd9Sstevel@tonic-gate loc = skpc(0xff, (uint_t)len, &iused[0]); 13367c478bd9Sstevel@tonic-gate if (loc == 0) { 13377c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 13387c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 1339303bf60bSsdebnath "ialloccg: map corrupted, cg = %d, irotor = %d, " 1340303bf60bSsdebnath "fs = %s\n", cg, (int)cgp->cg_irotor, fs->fs_fsmnt); 13417c478bd9Sstevel@tonic-gate return (0); 13427c478bd9Sstevel@tonic-gate } 13437c478bd9Sstevel@tonic-gate } 13447c478bd9Sstevel@tonic-gate i = start + len - loc; 13457c478bd9Sstevel@tonic-gate map = iused[i]; 13467c478bd9Sstevel@tonic-gate ipref = i * NBBY; 13477c478bd9Sstevel@tonic-gate for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 13487c478bd9Sstevel@tonic-gate if ((map & i) == 0) { 13497c478bd9Sstevel@tonic-gate cgp->cg_irotor = ipref; 13507c478bd9Sstevel@tonic-gate goto gotit; 13517c478bd9Sstevel@tonic-gate } 13527c478bd9Sstevel@tonic-gate } 13537c478bd9Sstevel@tonic-gate 13547c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 13557c478bd9Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s", 13567c478bd9Sstevel@tonic-gate fs->fs_fsmnt); 13577c478bd9Sstevel@tonic-gate return (0); 13587c478bd9Sstevel@tonic-gate gotit: 13597c478bd9Sstevel@tonic-gate setbit(iused, ipref); 13607c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_nifree--; 13617c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_nifree--; 13627c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nifree--; 13637c478bd9Sstevel@tonic-gate if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) { 13647c478bd9Sstevel@tonic-gate cgp->cg_cs.cs_ndir++; 13657c478bd9Sstevel@tonic-gate fs->fs_cstotal.cs_ndir++; 13667c478bd9Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_ndir++; 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate fs->fs_fmod = 1; 13697c478bd9Sstevel@tonic-gate ufs_notclean(ufsvfsp); 13707c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 13717c478bd9Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 13727c478bd9Sstevel@tonic-gate bdrwrite(bp); 13737c478bd9Sstevel@tonic-gate return (cg * fs->fs_ipg + ipref); 13747c478bd9Sstevel@tonic-gate } 13757c478bd9Sstevel@tonic-gate 13767c478bd9Sstevel@tonic-gate /* 13777c478bd9Sstevel@tonic-gate * Find a block of the specified size in the specified cylinder group. 13787c478bd9Sstevel@tonic-gate * 13797c478bd9Sstevel@tonic-gate * It is a panic if a request is made to find a block if none are 13807c478bd9Sstevel@tonic-gate * available. 13817c478bd9Sstevel@tonic-gate */ 13827c478bd9Sstevel@tonic-gate static daddr_t 13837c478bd9Sstevel@tonic-gate mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref, 13847c478bd9Sstevel@tonic-gate int allocsiz) 13857c478bd9Sstevel@tonic-gate { 13867c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 13877c478bd9Sstevel@tonic-gate daddr_t bno, cfrag; 13887c478bd9Sstevel@tonic-gate int start, len, loc, i, last, first, secondtime; 13897c478bd9Sstevel@tonic-gate int blk, field, subfield, pos; 13907c478bd9Sstevel@tonic-gate int gotit; 13917c478bd9Sstevel@tonic-gate 13927c478bd9Sstevel@tonic-gate /* 13937c478bd9Sstevel@tonic-gate * ufsvfs->vfs_lock is held when calling this. 13947c478bd9Sstevel@tonic-gate */ 13957c478bd9Sstevel@tonic-gate /* 13967c478bd9Sstevel@tonic-gate * Find the fragment by searching through the 13977c478bd9Sstevel@tonic-gate * free block map for an appropriate bit pattern. 13987c478bd9Sstevel@tonic-gate */ 13997c478bd9Sstevel@tonic-gate if (bpref) 14007c478bd9Sstevel@tonic-gate start = dtogd(fs, bpref) / NBBY; 14017c478bd9Sstevel@tonic-gate else 14027c478bd9Sstevel@tonic-gate start = cgp->cg_frotor / NBBY; 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * the following loop performs two scans -- the first scan 14057c478bd9Sstevel@tonic-gate * searches the bottom half of the array for a match and the 14067c478bd9Sstevel@tonic-gate * second scan searches the top half of the array. The loops 14077c478bd9Sstevel@tonic-gate * have been merged just to make things difficult. 14087c478bd9Sstevel@tonic-gate */ 14097c478bd9Sstevel@tonic-gate first = start; 14107c478bd9Sstevel@tonic-gate last = howmany(fs->fs_fpg, NBBY); 14117c478bd9Sstevel@tonic-gate secondtime = 0; 14127c478bd9Sstevel@tonic-gate cfrag = cgp->cg_cgx * fs->fs_fpg; 14137c478bd9Sstevel@tonic-gate while (first < last) { 14147c478bd9Sstevel@tonic-gate len = last - first; 14157c478bd9Sstevel@tonic-gate /* 14167c478bd9Sstevel@tonic-gate * search the array for a match 14177c478bd9Sstevel@tonic-gate */ 14187c478bd9Sstevel@tonic-gate loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first], 14197c478bd9Sstevel@tonic-gate (uchar_t *)fragtbl[fs->fs_frag], 14207c478bd9Sstevel@tonic-gate (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 14217c478bd9Sstevel@tonic-gate /* 14227c478bd9Sstevel@tonic-gate * match found 14237c478bd9Sstevel@tonic-gate */ 14247c478bd9Sstevel@tonic-gate if (loc) { 14257c478bd9Sstevel@tonic-gate bno = (last - loc) * NBBY; 14267c478bd9Sstevel@tonic-gate 14277c478bd9Sstevel@tonic-gate /* 14287c478bd9Sstevel@tonic-gate * Found the byte in the map, sift 14297c478bd9Sstevel@tonic-gate * through the bits to find the selected frag 14307c478bd9Sstevel@tonic-gate */ 14317c478bd9Sstevel@tonic-gate cgp->cg_frotor = bno; 14327c478bd9Sstevel@tonic-gate gotit = 0; 14337c478bd9Sstevel@tonic-gate for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 14347c478bd9Sstevel@tonic-gate blk = blkmap(fs, cg_blksfree(cgp), bno); 14357c478bd9Sstevel@tonic-gate blk <<= 1; 14367c478bd9Sstevel@tonic-gate field = around[allocsiz]; 14377c478bd9Sstevel@tonic-gate subfield = inside[allocsiz]; 14387c478bd9Sstevel@tonic-gate for (pos = 0; 14397c478bd9Sstevel@tonic-gate pos <= fs->fs_frag - allocsiz; 14407c478bd9Sstevel@tonic-gate pos++) { 14417c478bd9Sstevel@tonic-gate if ((blk & field) == subfield) { 14427c478bd9Sstevel@tonic-gate gotit++; 14437c478bd9Sstevel@tonic-gate break; 14447c478bd9Sstevel@tonic-gate } 14457c478bd9Sstevel@tonic-gate field <<= 1; 14467c478bd9Sstevel@tonic-gate subfield <<= 1; 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate if (gotit) 14497c478bd9Sstevel@tonic-gate break; 14507c478bd9Sstevel@tonic-gate } 14517c478bd9Sstevel@tonic-gate bno += pos; 14527c478bd9Sstevel@tonic-gate 14537c478bd9Sstevel@tonic-gate /* 14547c478bd9Sstevel@tonic-gate * success if block is *not* being converted from 14557c478bd9Sstevel@tonic-gate * metadata into userdata (harpy). If so, ignore. 14567c478bd9Sstevel@tonic-gate */ 14577c478bd9Sstevel@tonic-gate if (!TRANS_ISCANCEL(ufsvfsp, 14587c478bd9Sstevel@tonic-gate ldbtob(fsbtodb(fs, (cfrag+bno))), 14597c478bd9Sstevel@tonic-gate allocsiz * fs->fs_fsize)) 14607c478bd9Sstevel@tonic-gate return (bno); 1461303bf60bSsdebnath 14627c478bd9Sstevel@tonic-gate /* 14637c478bd9Sstevel@tonic-gate * keep looking -- this block is being converted 14647c478bd9Sstevel@tonic-gate */ 14657c478bd9Sstevel@tonic-gate first = (last - loc) + 1; 14667c478bd9Sstevel@tonic-gate loc = 0; 14677c478bd9Sstevel@tonic-gate if (first < last) 14687c478bd9Sstevel@tonic-gate continue; 14697c478bd9Sstevel@tonic-gate } 14707c478bd9Sstevel@tonic-gate /* 14717c478bd9Sstevel@tonic-gate * no usable matches in bottom half -- now search the top half 14727c478bd9Sstevel@tonic-gate */ 14737c478bd9Sstevel@tonic-gate if (secondtime) 14747c478bd9Sstevel@tonic-gate /* 14757c478bd9Sstevel@tonic-gate * no usable matches in top half -- all done 14767c478bd9Sstevel@tonic-gate */ 14777c478bd9Sstevel@tonic-gate break; 14787c478bd9Sstevel@tonic-gate secondtime = 1; 14797c478bd9Sstevel@tonic-gate last = start + 1; 14807c478bd9Sstevel@tonic-gate first = 0; 14817c478bd9Sstevel@tonic-gate } 14827c478bd9Sstevel@tonic-gate /* 14837c478bd9Sstevel@tonic-gate * no usable matches 14847c478bd9Sstevel@tonic-gate */ 14857c478bd9Sstevel@tonic-gate return ((daddr_t)-1); 14867c478bd9Sstevel@tonic-gate } 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate #define UFSNADDR (NDADDR + NIADDR) /* NADDR applies to (obsolete) S5FS */ 14897c478bd9Sstevel@tonic-gate #define IB(i) (NDADDR + (i)) /* index of i'th indirect block ptr */ 14907c478bd9Sstevel@tonic-gate #define SINGLE 0 /* single indirect block ptr */ 14917c478bd9Sstevel@tonic-gate #define DOUBLE 1 /* double indirect block ptr */ 14927c478bd9Sstevel@tonic-gate #define TRIPLE 2 /* triple indirect block ptr */ 14937c478bd9Sstevel@tonic-gate 14947c478bd9Sstevel@tonic-gate /* 1495303bf60bSsdebnath * Acquire a write lock, and keep trying till we get it 1496303bf60bSsdebnath */ 1497303bf60bSsdebnath static int 1498303bf60bSsdebnath allocsp_wlockfs(struct vnode *vp, struct lockfs *lf) 1499303bf60bSsdebnath { 1500303bf60bSsdebnath int err = 0; 1501303bf60bSsdebnath 1502303bf60bSsdebnath lockagain: 1503303bf60bSsdebnath do { 1504303bf60bSsdebnath err = ufs_fiolfss(vp, lf); 1505303bf60bSsdebnath if (err) 1506303bf60bSsdebnath return (err); 1507303bf60bSsdebnath } while (!LOCKFS_IS_ULOCK(lf)); 1508303bf60bSsdebnath 1509303bf60bSsdebnath lf->lf_lock = LOCKFS_WLOCK; 1510303bf60bSsdebnath lf->lf_flags = 0; 1511303bf60bSsdebnath lf->lf_comment = NULL; 1512303bf60bSsdebnath err = ufs__fiolfs(vp, lf, 1, 0); 1513303bf60bSsdebnath 1514303bf60bSsdebnath if (err == EBUSY || err == EINVAL) 1515303bf60bSsdebnath goto lockagain; 1516303bf60bSsdebnath 1517303bf60bSsdebnath return (err); 1518303bf60bSsdebnath } 1519303bf60bSsdebnath 1520303bf60bSsdebnath /* 1521303bf60bSsdebnath * Release the write lock 1522303bf60bSsdebnath */ 1523303bf60bSsdebnath static int 1524303bf60bSsdebnath allocsp_unlockfs(struct vnode *vp, struct lockfs *lf) 1525303bf60bSsdebnath { 1526303bf60bSsdebnath int err = 0; 1527303bf60bSsdebnath 1528303bf60bSsdebnath lf->lf_lock = LOCKFS_ULOCK; 1529303bf60bSsdebnath lf->lf_flags = 0; 1530303bf60bSsdebnath err = ufs__fiolfs(vp, lf, 1, 0); 1531303bf60bSsdebnath return (err); 1532303bf60bSsdebnath } 1533303bf60bSsdebnath 1534303bf60bSsdebnath struct allocsp_undo { 1535303bf60bSsdebnath daddr_t offset; 1536303bf60bSsdebnath daddr_t blk; 1537303bf60bSsdebnath struct allocsp_undo *next; 1538303bf60bSsdebnath }; 1539303bf60bSsdebnath 1540303bf60bSsdebnath /* 1541303bf60bSsdebnath * ufs_allocsp() can be used to pre-allocate blocks for a file on a given 15424f21de4dSjr26306 * file system. For direct blocks, the blocks are allocated from the offset 15434f21de4dSjr26306 * requested to the block boundary, then any full blocks are allocated, 15444f21de4dSjr26306 * and finally any remainder. 15454f21de4dSjr26306 * For indirect blocks the blocks are not initialized and are 15464f21de4dSjr26306 * only marked as allocated. These addresses are then stored as negative 15474f21de4dSjr26306 * block numbers in the inode to imply special handling. UFS has been modified 15484f21de4dSjr26306 * where necessary to understand this new notion. 15494f21de4dSjr26306 * Successfully fallocated files will have IFALLOCATE cflag set in the inode. 1550303bf60bSsdebnath */ 1551303bf60bSsdebnath int 1552303bf60bSsdebnath ufs_allocsp(struct vnode *vp, struct flock64 *lp, cred_t *cr) 1553303bf60bSsdebnath { 1554303bf60bSsdebnath struct lockfs lf; 1555303bf60bSsdebnath int berr, err, resv, issync; 15564f21de4dSjr26306 off_t istart, len; /* istart, special for idb */ 1557303bf60bSsdebnath struct inode *ip; 1558303bf60bSsdebnath struct fs *fs; 1559303bf60bSsdebnath struct ufsvfs *ufsvfsp; 15604f21de4dSjr26306 u_offset_t resid, i, uoff; 1561303bf60bSsdebnath daddr32_t db_undo[NDADDR]; /* old direct blocks */ 1562303bf60bSsdebnath struct allocsp_undo *ib_undo = NULL; /* ib undo */ 1563303bf60bSsdebnath struct allocsp_undo *undo = NULL; 1564303bf60bSsdebnath u_offset_t osz; /* old file size */ 1565303bf60bSsdebnath int chunkblks = 0; /* # of blocks in 1 allocation */ 1566303bf60bSsdebnath int cnt = 0; 1567303bf60bSsdebnath daddr_t allocblk; 1568303bf60bSsdebnath daddr_t totblks = 0; 1569303bf60bSsdebnath struct ulockfs *ulp; 15704f21de4dSjr26306 size_t done_len; 15714f21de4dSjr26306 int nbytes, offsetn; 15724f21de4dSjr26306 1573303bf60bSsdebnath 1574303bf60bSsdebnath ASSERT(vp->v_type == VREG); 1575303bf60bSsdebnath 1576303bf60bSsdebnath ip = VTOI(vp); 1577303bf60bSsdebnath fs = ip->i_fs; 1578303bf60bSsdebnath if ((ufsvfsp = ip->i_ufsvfs) == NULL) { 1579303bf60bSsdebnath err = EIO; 1580303bf60bSsdebnath goto out_allocsp; 1581303bf60bSsdebnath } 1582303bf60bSsdebnath 15834f21de4dSjr26306 istart = blkroundup(fs, (lp->l_start)); 1584303bf60bSsdebnath len = blkroundup(fs, (lp->l_len)); 1585303bf60bSsdebnath chunkblks = blkroundup(fs, ufsvfsp->vfs_iotransz) / fs->fs_bsize; 1586303bf60bSsdebnath ulp = &ufsvfsp->vfs_ulockfs; 1587303bf60bSsdebnath 1588303bf60bSsdebnath if (lp->l_start < 0 || lp->l_len <= 0) 1589303bf60bSsdebnath return (EINVAL); 1590303bf60bSsdebnath 1591303bf60bSsdebnath /* Quickly check to make sure we have space before we proceed */ 1592303bf60bSsdebnath if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) { 1593303bf60bSsdebnath if (TRANS_ISTRANS(ufsvfsp)) { 1594303bf60bSsdebnath ufs_delete_drain_wait(ufsvfsp, 1); 1595303bf60bSsdebnath if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) 1596303bf60bSsdebnath return (ENOSPC); 1597303bf60bSsdebnath } else 1598303bf60bSsdebnath return (ENOSPC); 1599303bf60bSsdebnath } 1600303bf60bSsdebnath 1601303bf60bSsdebnath /* 1602303bf60bSsdebnath * We will keep i_rwlock locked as WRITER through out the function 1603303bf60bSsdebnath * since we don't want anyone else reading or writing to the inode 1604303bf60bSsdebnath * while we are in the middle of fallocating the file. 1605303bf60bSsdebnath */ 1606303bf60bSsdebnath rw_enter(&ip->i_rwlock, RW_WRITER); 1607303bf60bSsdebnath 1608303bf60bSsdebnath /* Back up the direct block list, used for undo later if necessary */ 1609303bf60bSsdebnath rw_enter(&ip->i_contents, RW_READER); 1610303bf60bSsdebnath for (i = 0; i < NDADDR; i++) 1611303bf60bSsdebnath db_undo[i] = ip->i_db[i]; 1612303bf60bSsdebnath osz = ip->i_size; 1613303bf60bSsdebnath rw_exit(&ip->i_contents); 1614303bf60bSsdebnath 16154f21de4dSjr26306 /* Write lock the file system */ 16164f21de4dSjr26306 if (err = allocsp_wlockfs(vp, &lf)) 16174f21de4dSjr26306 goto exit; 16184f21de4dSjr26306 16194f21de4dSjr26306 /* 16204f21de4dSjr26306 * Allocate any direct blocks now. 16214f21de4dSjr26306 * Blocks are allocated from the offset requested to the block 16224f21de4dSjr26306 * boundary, then any full blocks are allocated, and finally any 16234f21de4dSjr26306 * remainder. 16244f21de4dSjr26306 */ 16254f21de4dSjr26306 if (lblkno(fs, lp->l_start) < NDADDR) { 1626303bf60bSsdebnath ufs_trans_trunc_resv(ip, ip->i_size + (NDADDR * fs->fs_bsize), 1627303bf60bSsdebnath &resv, &resid); 1628303bf60bSsdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1629303bf60bSsdebnath 1630303bf60bSsdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1631303bf60bSsdebnath rw_enter(&ip->i_contents, RW_WRITER); 1632303bf60bSsdebnath 16334f21de4dSjr26306 done_len = 0; 16344f21de4dSjr26306 while ((done_len < lp->l_len) && 16354f21de4dSjr26306 (lblkno(fs, lp->l_start + done_len) < NDADDR)) { 16364f21de4dSjr26306 uoff = (offset_t)(lp->l_start + done_len); 16374f21de4dSjr26306 offsetn = (int)blkoff(fs, uoff); 16384f21de4dSjr26306 nbytes = (int)MIN(fs->fs_bsize - offsetn, 16394f21de4dSjr26306 lp->l_len - done_len); 16404f21de4dSjr26306 16414f21de4dSjr26306 berr = bmap_write(ip, uoff, offsetn + nbytes, 16424f21de4dSjr26306 BI_FALLOCATE, &allocblk, cr); 1643303bf60bSsdebnath /* Yikes error, quit */ 1644303bf60bSsdebnath if (berr) { 1645303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1646303bf60bSsdebnath rw_exit(&ip->i_contents); 1647303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1648303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1649303bf60bSsdebnath TOP_ALLOCSP, resv); 16504f21de4dSjr26306 err = allocsp_unlockfs(vp, &lf); 1651303bf60bSsdebnath goto exit; 1652303bf60bSsdebnath } 1653303bf60bSsdebnath 1654303bf60bSsdebnath if (allocblk) { 1655303bf60bSsdebnath totblks++; 16564f21de4dSjr26306 if ((uoff + nbytes) > ip->i_size) 16574f21de4dSjr26306 ip->i_size = (uoff + nbytes); 1658303bf60bSsdebnath } 16594f21de4dSjr26306 done_len += nbytes; 1660303bf60bSsdebnath } 1661303bf60bSsdebnath 1662303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1663303bf60bSsdebnath rw_exit(&ip->i_contents); 1664303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1665303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1666303bf60bSsdebnath 16674f21de4dSjr26306 /* start offset for indirect allocation */ 16684f21de4dSjr26306 istart = (uoff + nbytes); 1669303bf60bSsdebnath } 1670303bf60bSsdebnath 1671303bf60bSsdebnath /* Break the transactions into vfs_iotransz units */ 1672303bf60bSsdebnath ufs_trans_trunc_resv(ip, ip->i_size + 1673303bf60bSsdebnath blkroundup(fs, ufsvfsp->vfs_iotransz), &resv, &resid); 1674303bf60bSsdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1675303bf60bSsdebnath 1676303bf60bSsdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1677303bf60bSsdebnath rw_enter(&ip->i_contents, RW_WRITER); 1678303bf60bSsdebnath 1679303bf60bSsdebnath /* Now go about fallocating necessary indirect blocks */ 16804f21de4dSjr26306 for (i = istart; i < (lp->l_start + lp->l_len); i += fs->fs_bsize) { 1681303bf60bSsdebnath berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE, 1682303bf60bSsdebnath &allocblk, cr); 1683303bf60bSsdebnath if (berr) { 1684303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1685303bf60bSsdebnath rw_exit(&ip->i_contents); 1686303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1687303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1688303bf60bSsdebnath TOP_ALLOCSP, resv); 1689303bf60bSsdebnath err = allocsp_unlockfs(vp, &lf); 1690303bf60bSsdebnath goto exit; 1691303bf60bSsdebnath } 1692303bf60bSsdebnath 1693303bf60bSsdebnath /* Update the blk counter only if new block was added */ 1694303bf60bSsdebnath if (allocblk) { 1695303bf60bSsdebnath /* Save undo information */ 1696303bf60bSsdebnath undo = kmem_alloc(sizeof (struct allocsp_undo), 1697303bf60bSsdebnath KM_SLEEP); 1698303bf60bSsdebnath undo->offset = i; 1699303bf60bSsdebnath undo->blk = allocblk; 1700303bf60bSsdebnath undo->next = ib_undo; 1701303bf60bSsdebnath ib_undo = undo; 1702303bf60bSsdebnath totblks++; 170333c22cb3Smishra 170433c22cb3Smishra if (i >= ip->i_size) 1705303bf60bSsdebnath ip->i_size += fs->fs_bsize; 1706303bf60bSsdebnath } 1707303bf60bSsdebnath cnt++; 1708303bf60bSsdebnath 1709303bf60bSsdebnath /* Being a good UFS citizen, let others get a share */ 1710303bf60bSsdebnath if (cnt == chunkblks) { 1711303bf60bSsdebnath /* 1712303bf60bSsdebnath * If there are waiters or the fs is hard locked, 1713303bf60bSsdebnath * error locked, or read-only error locked, 1714303bf60bSsdebnath * quit with EIO 1715303bf60bSsdebnath */ 1716303bf60bSsdebnath if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) || 1717303bf60bSsdebnath ULOCKFS_IS_ROELOCK(ulp)) { 1718303bf60bSsdebnath ip->i_cflags |= IFALLOCATE; 1719303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1720303bf60bSsdebnath rw_exit(&ip->i_contents); 1721303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1722303bf60bSsdebnath 1723303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1724303bf60bSsdebnath TOP_ALLOCSP, resv); 1725303bf60bSsdebnath rw_exit(&ip->i_rwlock); 17264f21de4dSjr26306 (void) allocsp_unlockfs(vp, &lf); 1727303bf60bSsdebnath return (EIO); 1728303bf60bSsdebnath } 1729303bf60bSsdebnath 1730303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1731303bf60bSsdebnath rw_exit(&ip->i_contents); 1732303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1733303bf60bSsdebnath 1734303bf60bSsdebnath /* End the current transaction */ 1735303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1736303bf60bSsdebnath TOP_ALLOCSP, resv); 1737303bf60bSsdebnath 1738303bf60bSsdebnath if (CV_HAS_WAITERS(&ulp->ul_cv)) { 1739303bf60bSsdebnath /* Release the write lock */ 1740303bf60bSsdebnath if (err = allocsp_unlockfs(vp, &lf)) 1741303bf60bSsdebnath goto exit; 1742303bf60bSsdebnath 1743303bf60bSsdebnath /* Wake up others waiting to do operations */ 1744303bf60bSsdebnath mutex_enter(&ulp->ul_lock); 1745303bf60bSsdebnath cv_broadcast(&ulp->ul_cv); 1746303bf60bSsdebnath mutex_exit(&ulp->ul_lock); 1747303bf60bSsdebnath 1748303bf60bSsdebnath /* Grab the write lock again */ 1749303bf60bSsdebnath if (err = allocsp_wlockfs(vp, &lf)) 1750303bf60bSsdebnath goto exit; 1751303bf60bSsdebnath } /* end of CV_HAS_WAITERS(&ulp->ul_cv) */ 1752303bf60bSsdebnath 1753303bf60bSsdebnath /* Reserve more space in log for this file */ 1754303bf60bSsdebnath ufs_trans_trunc_resv(ip, 1755303bf60bSsdebnath ip->i_size + blkroundup(fs, ufsvfsp->vfs_iotransz), 1756303bf60bSsdebnath &resv, &resid); 1757303bf60bSsdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1758303bf60bSsdebnath 1759303bf60bSsdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1760303bf60bSsdebnath rw_enter(&ip->i_contents, RW_WRITER); 1761303bf60bSsdebnath 1762303bf60bSsdebnath cnt = 0; /* reset cnt b/c of new transaction */ 1763303bf60bSsdebnath } 1764303bf60bSsdebnath } 1765303bf60bSsdebnath 1766303bf60bSsdebnath if (!err && !berr) 1767303bf60bSsdebnath ip->i_cflags |= IFALLOCATE; 1768303bf60bSsdebnath 17694f21de4dSjr26306 /* If the file has grown then correct the file size */ 17704f21de4dSjr26306 if (osz < (lp->l_start + lp->l_len)) 17714f21de4dSjr26306 ip->i_size = (lp->l_start + lp->l_len); 17724f21de4dSjr26306 1773303bf60bSsdebnath /* Release locks, end log transaction and unlock fs */ 1774303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1775303bf60bSsdebnath rw_exit(&ip->i_contents); 1776303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1777303bf60bSsdebnath 1778303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1779303bf60bSsdebnath err = allocsp_unlockfs(vp, &lf); 1780303bf60bSsdebnath 1781303bf60bSsdebnath /* 1782303bf60bSsdebnath * @ exit label, we should no longer be holding the fs write lock, and 1783303bf60bSsdebnath * all logging transactions should have been ended. We still hold 1784303bf60bSsdebnath * ip->i_rwlock. 1785303bf60bSsdebnath */ 1786303bf60bSsdebnath exit: 1787303bf60bSsdebnath /* 1788303bf60bSsdebnath * File has grown larger than 2GB. Set flag 1789303bf60bSsdebnath * in superblock to indicate this, if it 1790303bf60bSsdebnath * is not already set. 1791303bf60bSsdebnath */ 1792303bf60bSsdebnath if ((ip->i_size > MAXOFF32_T) && 1793303bf60bSsdebnath !(fs->fs_flags & FSLARGEFILES)) { 1794303bf60bSsdebnath ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 1795303bf60bSsdebnath mutex_enter(&ufsvfsp->vfs_lock); 1796303bf60bSsdebnath fs->fs_flags |= FSLARGEFILES; 1797303bf60bSsdebnath ufs_sbwrite(ufsvfsp); 1798303bf60bSsdebnath mutex_exit(&ufsvfsp->vfs_lock); 1799303bf60bSsdebnath } 1800303bf60bSsdebnath 1801303bf60bSsdebnath /* 1802303bf60bSsdebnath * Since we couldn't allocate completely, we will undo the allocations. 1803303bf60bSsdebnath */ 1804303bf60bSsdebnath if (berr) { 1805303bf60bSsdebnath ufs_trans_trunc_resv(ip, totblks * fs->fs_bsize, &resv, &resid); 1806303bf60bSsdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1807303bf60bSsdebnath 1808303bf60bSsdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1809303bf60bSsdebnath rw_enter(&ip->i_contents, RW_WRITER); 1810303bf60bSsdebnath 1811303bf60bSsdebnath /* Direct blocks */ 1812303bf60bSsdebnath for (i = 0; i < NDADDR; i++) { 1813303bf60bSsdebnath /* 1814303bf60bSsdebnath * Only free the block if they are not same, and 1815303bf60bSsdebnath * the old one isn't zero (the fragment was 1816303bf60bSsdebnath * re-allocated). 1817303bf60bSsdebnath */ 1818303bf60bSsdebnath if (db_undo[i] != ip->i_db[i] && db_undo[i] == 0) { 1819303bf60bSsdebnath free(ip, ip->i_db[i], fs->fs_bsize, 0); 1820303bf60bSsdebnath ip->i_db[i] = 0; 1821303bf60bSsdebnath } 1822303bf60bSsdebnath } 1823303bf60bSsdebnath 1824303bf60bSsdebnath /* Undo the indirect blocks */ 1825303bf60bSsdebnath while (ib_undo != NULL) { 1826303bf60bSsdebnath undo = ib_undo; 1827303bf60bSsdebnath err = bmap_set_bn(vp, undo->offset, 0); 1828303bf60bSsdebnath if (err) 1829303bf60bSsdebnath cmn_err(CE_PANIC, "ufs_allocsp(): failed to " 1830303bf60bSsdebnath "undo allocation of block %ld", 1831303bf60bSsdebnath undo->offset); 1832303bf60bSsdebnath free(ip, undo->blk, fs->fs_bsize, I_IBLK); 1833303bf60bSsdebnath ib_undo = undo->next; 1834303bf60bSsdebnath kmem_free(undo, sizeof (struct allocsp_undo)); 1835303bf60bSsdebnath } 1836303bf60bSsdebnath 1837303bf60bSsdebnath ip->i_size = osz; 1838303bf60bSsdebnath TRANS_INODE(ufsvfsp, ip); 1839303bf60bSsdebnath 1840303bf60bSsdebnath rw_exit(&ip->i_contents); 1841303bf60bSsdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1842303bf60bSsdebnath 1843303bf60bSsdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1844303bf60bSsdebnath 1845303bf60bSsdebnath rw_exit(&ip->i_rwlock); 1846303bf60bSsdebnath return (berr); 1847303bf60bSsdebnath } 1848303bf60bSsdebnath 1849303bf60bSsdebnath /* 1850303bf60bSsdebnath * Don't forget to free the undo chain :) 1851303bf60bSsdebnath */ 1852303bf60bSsdebnath while (ib_undo != NULL) { 1853303bf60bSsdebnath undo = ib_undo; 1854303bf60bSsdebnath ib_undo = undo->next; 1855303bf60bSsdebnath kmem_free(undo, sizeof (struct allocsp_undo)); 1856303bf60bSsdebnath } 1857303bf60bSsdebnath 1858303bf60bSsdebnath rw_exit(&ip->i_rwlock); 1859303bf60bSsdebnath 1860303bf60bSsdebnath out_allocsp: 1861303bf60bSsdebnath return (err); 1862303bf60bSsdebnath } 1863303bf60bSsdebnath 1864303bf60bSsdebnath /* 18657c478bd9Sstevel@tonic-gate * Free storage space associated with the specified inode. The portion 18667c478bd9Sstevel@tonic-gate * to be freed is specified by lp->l_start and lp->l_len (already 18677c478bd9Sstevel@tonic-gate * normalized to a "whence" of 0). 18687c478bd9Sstevel@tonic-gate * 18697c478bd9Sstevel@tonic-gate * This is an experimental facility whose continued existence is not 18707c478bd9Sstevel@tonic-gate * guaranteed. Currently, we only support the special case 18717c478bd9Sstevel@tonic-gate * of l_len == 0, meaning free to end of file. 18727c478bd9Sstevel@tonic-gate * 18737c478bd9Sstevel@tonic-gate * Blocks are freed in reverse order. This FILO algorithm will tend to 18747c478bd9Sstevel@tonic-gate * maintain a contiguous free list much longer than FIFO. 18757c478bd9Sstevel@tonic-gate * See also ufs_itrunc() in ufs_inode.c. 18767c478bd9Sstevel@tonic-gate * 18777c478bd9Sstevel@tonic-gate * Bug: unused bytes in the last retained block are not cleared. 18787c478bd9Sstevel@tonic-gate * This may result in a "hole" in the file that does not read as zeroes. 18797c478bd9Sstevel@tonic-gate */ 18807c478bd9Sstevel@tonic-gate /* ARGSUSED */ 18817c478bd9Sstevel@tonic-gate int 18827c478bd9Sstevel@tonic-gate ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr) 18837c478bd9Sstevel@tonic-gate { 18847c478bd9Sstevel@tonic-gate int i; 18857c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 18867c478bd9Sstevel@tonic-gate int error; 18877c478bd9Sstevel@tonic-gate 18887c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VREG); 18897c478bd9Sstevel@tonic-gate ASSERT(lp->l_start >= 0); /* checked by convoff */ 18907c478bd9Sstevel@tonic-gate 18917c478bd9Sstevel@tonic-gate if (lp->l_len != 0) 18927c478bd9Sstevel@tonic-gate return (EINVAL); 18937c478bd9Sstevel@tonic-gate 18947c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 18957c478bd9Sstevel@tonic-gate if (ip->i_size == (u_offset_t)lp->l_start) { 18967c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 18977c478bd9Sstevel@tonic-gate return (0); 18987c478bd9Sstevel@tonic-gate } 18997c478bd9Sstevel@tonic-gate 19007c478bd9Sstevel@tonic-gate /* 19017c478bd9Sstevel@tonic-gate * Check if there is any active mandatory lock on the 19027c478bd9Sstevel@tonic-gate * range that will be truncated/expanded. 19037c478bd9Sstevel@tonic-gate */ 19047c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 19057c478bd9Sstevel@tonic-gate offset_t save_start; 19067c478bd9Sstevel@tonic-gate 19077c478bd9Sstevel@tonic-gate save_start = lp->l_start; 19087c478bd9Sstevel@tonic-gate 19097c478bd9Sstevel@tonic-gate if (ip->i_size < lp->l_start) { 19107c478bd9Sstevel@tonic-gate /* 19117c478bd9Sstevel@tonic-gate * "Truncate up" case: need to make sure there 19127c478bd9Sstevel@tonic-gate * is no lock beyond current end-of-file. To 19137c478bd9Sstevel@tonic-gate * do so, we need to set l_start to the size 19147c478bd9Sstevel@tonic-gate * of the file temporarily. 19157c478bd9Sstevel@tonic-gate */ 19167c478bd9Sstevel@tonic-gate lp->l_start = ip->i_size; 19177c478bd9Sstevel@tonic-gate } 19187c478bd9Sstevel@tonic-gate lp->l_type = F_WRLCK; 19197c478bd9Sstevel@tonic-gate lp->l_sysid = 0; 19207c478bd9Sstevel@tonic-gate lp->l_pid = ttoproc(curthread)->p_pid; 19217c478bd9Sstevel@tonic-gate i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 19227c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 19237c478bd9Sstevel@tonic-gate if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 19247c478bd9Sstevel@tonic-gate lp->l_type != F_UNLCK) { 19257c478bd9Sstevel@tonic-gate return (i ? i : EAGAIN); 19267c478bd9Sstevel@tonic-gate } 19277c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 19287c478bd9Sstevel@tonic-gate 19297c478bd9Sstevel@tonic-gate lp->l_start = save_start; 19307c478bd9Sstevel@tonic-gate } 19317c478bd9Sstevel@tonic-gate 19327c478bd9Sstevel@tonic-gate /* 19337c478bd9Sstevel@tonic-gate * Make sure a write isn't in progress (allocating blocks) 19347c478bd9Sstevel@tonic-gate * by acquiring i_rwlock (we promised ufs_bmap we wouldn't 19357c478bd9Sstevel@tonic-gate * truncate while it was allocating blocks). 19367c478bd9Sstevel@tonic-gate * Grab the locks in the right order. 19377c478bd9Sstevel@tonic-gate */ 19387c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 19397c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 19407c478bd9Sstevel@tonic-gate error = TRANS_ITRUNC(ip, (u_offset_t)lp->l_start, 0, cr); 19417c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 19427c478bd9Sstevel@tonic-gate return (error); 19437c478bd9Sstevel@tonic-gate } 19447c478bd9Sstevel@tonic-gate 19457c478bd9Sstevel@tonic-gate /* 19467c478bd9Sstevel@tonic-gate * Find a cg with as close to nb contiguous bytes as possible 19477c478bd9Sstevel@tonic-gate * THIS MAY TAKE MANY DISK READS! 19487c478bd9Sstevel@tonic-gate * 19497c478bd9Sstevel@tonic-gate * Implemented in an attempt to allocate contiguous blocks for 19507c478bd9Sstevel@tonic-gate * writing the ufs log file to, minimizing future disk head seeking 19517c478bd9Sstevel@tonic-gate */ 19527c478bd9Sstevel@tonic-gate daddr_t 1953e7da395aSOwen Roberts contigpref(ufsvfs_t *ufsvfsp, size_t nb, size_t minb) 19547c478bd9Sstevel@tonic-gate { 19557c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 19567c478bd9Sstevel@tonic-gate daddr_t nblk = lblkno(fs, blkroundup(fs, nb)); 1957e7da395aSOwen Roberts daddr_t minblk = lblkno(fs, blkroundup(fs, minb)); 19587c478bd9Sstevel@tonic-gate daddr_t savebno, curbno, cgbno; 1959e7da395aSOwen Roberts int cg, cgblks, savecg, savenblk, curnblk, startcg; 19607c478bd9Sstevel@tonic-gate uchar_t *blksfree; 19617c478bd9Sstevel@tonic-gate buf_t *bp; 19627c478bd9Sstevel@tonic-gate struct cg *cgp; 19637c478bd9Sstevel@tonic-gate 19647c478bd9Sstevel@tonic-gate savenblk = 0; 19657c478bd9Sstevel@tonic-gate savecg = 0; 19667c478bd9Sstevel@tonic-gate savebno = 0; 19677c478bd9Sstevel@tonic-gate 1968e7da395aSOwen Roberts if ((startcg = findlogstartcg(fs, nblk, minblk)) == -1) 1969e7da395aSOwen Roberts cg = 0; /* Nothing suitable found */ 1970e7da395aSOwen Roberts else 1971e7da395aSOwen Roberts cg = startcg; 19727c478bd9Sstevel@tonic-gate 1973e7da395aSOwen Roberts for (; cg < fs->fs_ncg; ++cg) { 19747c478bd9Sstevel@tonic-gate /* 19757c478bd9Sstevel@tonic-gate * find the largest contiguous range in this cg 19767c478bd9Sstevel@tonic-gate */ 19777c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, 19787c478bd9Sstevel@tonic-gate (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 19797c478bd9Sstevel@tonic-gate (int)fs->fs_cgsize); 19807c478bd9Sstevel@tonic-gate cgp = bp->b_un.b_cg; 19817c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 19827c478bd9Sstevel@tonic-gate brelse(bp); 19837c478bd9Sstevel@tonic-gate continue; 19847c478bd9Sstevel@tonic-gate } 19857c478bd9Sstevel@tonic-gate blksfree = cg_blksfree(cgp); /* free array */ 19867c478bd9Sstevel@tonic-gate cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */ 19877c478bd9Sstevel@tonic-gate cgbno = 0; 19887c478bd9Sstevel@tonic-gate while (cgbno < cgblks && savenblk < nblk) { 19897c478bd9Sstevel@tonic-gate /* find a free block */ 1990e7da395aSOwen Roberts for (; cgbno < cgblks; ++cgbno) { 1991e7da395aSOwen Roberts if (isblock(fs, blksfree, cgbno)) { 1992b323a615SJim Rice if (startcg != -1) { 1993b323a615SJim Rice brelse(bp); 1994b323a615SJim Rice savecg = startcg; 1995b323a615SJim Rice savebno = cgbno; 1996e7da395aSOwen Roberts goto done; 1997b323a615SJim Rice } else 19987c478bd9Sstevel@tonic-gate break; 1999e7da395aSOwen Roberts } 2000e7da395aSOwen Roberts } 20017c478bd9Sstevel@tonic-gate curbno = cgbno; 20027c478bd9Sstevel@tonic-gate /* count the number of free blocks */ 20037c478bd9Sstevel@tonic-gate for (curnblk = 0; cgbno < cgblks; ++cgbno) { 20047c478bd9Sstevel@tonic-gate if (!isblock(fs, blksfree, cgbno)) 20057c478bd9Sstevel@tonic-gate break; 20067c478bd9Sstevel@tonic-gate if (++curnblk >= nblk) 20077c478bd9Sstevel@tonic-gate break; 20087c478bd9Sstevel@tonic-gate } 20097c478bd9Sstevel@tonic-gate if (curnblk > savenblk) { 20107c478bd9Sstevel@tonic-gate savecg = cg; 20117c478bd9Sstevel@tonic-gate savenblk = curnblk; 20127c478bd9Sstevel@tonic-gate savebno = curbno; 20137c478bd9Sstevel@tonic-gate } 20147c478bd9Sstevel@tonic-gate } 20157c478bd9Sstevel@tonic-gate brelse(bp); 20167c478bd9Sstevel@tonic-gate if (savenblk >= nblk) 20177c478bd9Sstevel@tonic-gate break; 20187c478bd9Sstevel@tonic-gate } 20197c478bd9Sstevel@tonic-gate 2020e7da395aSOwen Roberts done: 2021e7da395aSOwen Roberts 20227c478bd9Sstevel@tonic-gate /* convert block offset in cg to frag offset in cg */ 20237c478bd9Sstevel@tonic-gate savebno = blkstofrags(fs, savebno); 20247c478bd9Sstevel@tonic-gate 20257c478bd9Sstevel@tonic-gate /* convert frag offset in cg to frag offset in fs */ 20267c478bd9Sstevel@tonic-gate savebno += (savecg * fs->fs_fpg); 20277c478bd9Sstevel@tonic-gate 20287c478bd9Sstevel@tonic-gate return (savebno); 20297c478bd9Sstevel@tonic-gate } 2030e7da395aSOwen Roberts 2031e7da395aSOwen Roberts /* 2032e7da395aSOwen Roberts * The object of this routine is to find a start point for the UFS log. 2033e7da395aSOwen Roberts * Ideally the space should be allocated from the smallest possible number 2034e7da395aSOwen Roberts * of contiguous cylinder groups. This is found by using a sliding window 2035e7da395aSOwen Roberts * technique. The smallest window of contiguous cylinder groups, which is 2036e7da395aSOwen Roberts * still able to accommodate the target, is found by moving the window 2037e7da395aSOwen Roberts * through the cylinder groups in a single pass. The end of the window is 2038e7da395aSOwen Roberts * advanced until the space is accommodated, then the start is advanced until 2039e7da395aSOwen Roberts * it no longer fits, the end is then advanced again and so on until the 2040e7da395aSOwen Roberts * final cylinder group is reached. The first suitable instance is recorded 2041e7da395aSOwen Roberts * and its starting cg number is returned. 2042e7da395aSOwen Roberts * 2043e7da395aSOwen Roberts * If we are not able to find a minimum amount of space, represented by 2044e7da395aSOwen Roberts * minblk, or to do so uses more than the available extents, then return -1. 2045e7da395aSOwen Roberts */ 2046e7da395aSOwen Roberts 2047e7da395aSOwen Roberts int 2048e7da395aSOwen Roberts findlogstartcg(struct fs *fs, daddr_t requested, daddr_t minblk) 2049e7da395aSOwen Roberts { 2050e7da395aSOwen Roberts int ncgs; /* number of cylinder groups */ 2051e7da395aSOwen Roberts daddr_t target; /* amount of space sought */ 2052e7da395aSOwen Roberts int cwidth, ctotal; /* current window width and total */ 2053e7da395aSOwen Roberts int bwidth, btotal; /* best window width and total so far */ 2054e7da395aSOwen Roberts int s; /* index of the first element in the current window */ 2055e7da395aSOwen Roberts int e; /* index of the first element + the width */ 2056e7da395aSOwen Roberts /* (i.e. 1 + index of last element) */ 2057e7da395aSOwen Roberts int bs; /* index of the first element in the best window so far */ 2058e7da395aSOwen Roberts int header, max_extents; 2059e7da395aSOwen Roberts 2060e7da395aSOwen Roberts target = requested; 2061e7da395aSOwen Roberts ncgs = fs->fs_ncg; 2062e7da395aSOwen Roberts 2063e7da395aSOwen Roberts header = sizeof (extent_block_t) - sizeof (extent_t); 2064e7da395aSOwen Roberts max_extents = ((fs->fs_bsize)-header) / sizeof (extent_t); 2065e7da395aSOwen Roberts cwidth = ctotal = 0; 2066e7da395aSOwen Roberts btotal = -1; 2067e7da395aSOwen Roberts bwidth = ncgs; 2068e7da395aSOwen Roberts s = e = 0; 2069e7da395aSOwen Roberts while (e < ncgs) { 2070e7da395aSOwen Roberts /* Advance the end of the window until it accommodates the target. */ 2071e7da395aSOwen Roberts while (ctotal < target && e < ncgs) { 2072e7da395aSOwen Roberts ctotal += fs->fs_cs(fs, e).cs_nbfree; 2073e7da395aSOwen Roberts e++; 2074e7da395aSOwen Roberts } 2075e7da395aSOwen Roberts 2076e7da395aSOwen Roberts /* 2077e7da395aSOwen Roberts * Advance the start of the window until it no longer 2078e7da395aSOwen Roberts * accommodates the target. 2079e7da395aSOwen Roberts */ 2080e7da395aSOwen Roberts while (ctotal >= target && s < e) { 2081e7da395aSOwen Roberts /* See if this is the smallest window so far. */ 2082e7da395aSOwen Roberts cwidth = e - s; 2083e7da395aSOwen Roberts if (cwidth <= bwidth) { 2084e7da395aSOwen Roberts if (cwidth == bwidth && ctotal <= btotal) 2085e7da395aSOwen Roberts goto more; 2086e7da395aSOwen Roberts bwidth = cwidth; 2087e7da395aSOwen Roberts btotal = ctotal; 2088e7da395aSOwen Roberts bs = s; 2089e7da395aSOwen Roberts } 2090e7da395aSOwen Roberts more: 2091e7da395aSOwen Roberts ctotal -= fs->fs_cs(fs, s).cs_nbfree; 2092e7da395aSOwen Roberts s++; 2093e7da395aSOwen Roberts } 2094e7da395aSOwen Roberts } 2095e7da395aSOwen Roberts 2096e7da395aSOwen Roberts /* 2097e7da395aSOwen Roberts * If we cannot allocate the minimum required or we use too many 2098e7da395aSOwen Roberts * extents to do so, return -1. 2099e7da395aSOwen Roberts */ 2100e7da395aSOwen Roberts if (btotal < minblk || bwidth > max_extents) 2101e7da395aSOwen Roberts bs = -1; 2102e7da395aSOwen Roberts 2103e7da395aSOwen Roberts return (bs); 2104e7da395aSOwen Roberts } 2105