1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 34 * $Id: ffs_alloc.c,v 1.23 1996/01/05 18:31:45 wollman Exp $ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/buf.h> 42 #include <sys/proc.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #include <sys/kernel.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 49 #include <vm/vm.h> 50 51 #include <ufs/ufs/quota.h> 52 #include <ufs/ufs/inode.h> 53 #include <ufs/ufs/ufs_extern.h> 54 55 #include <ufs/ffs/fs.h> 56 #include <ufs/ffs/ffs_extern.h> 57 58 extern u_long nextgennumber; 59 60 typedef daddr_t allocfcn_t __P((struct inode *ip, int cg, daddr_t bpref, 61 int size)); 62 63 static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int)); 64 static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t)); 65 #ifdef notyet 66 static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int)); 67 #endif 68 static ino_t ffs_dirpref __P((struct fs *)); 69 static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); 70 static void ffs_fserr __P((struct fs *, u_int, char *)); 71 static u_long ffs_hashalloc 72 __P((struct inode *, int, long, int, allocfcn_t *)); 73 static ino_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int)); 74 static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int)); 75 76 static void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); 77 78 /* 79 * Allocate a block in the file system. 80 * 81 * The size of the requested block is given, which must be some 82 * multiple of fs_fsize and <= fs_bsize. 83 * A preference may be optionally specified. If a preference is given 84 * the following hierarchy is used to allocate a block: 85 * 1) allocate the requested block. 86 * 2) allocate a rotationally optimal block in the same cylinder. 87 * 3) allocate a block in the same cylinder group. 88 * 4) quadradically rehash into other cylinder groups, until an 89 * available block is located. 90 * If no block preference is given the following heirarchy is used 91 * to allocate a block: 92 * 1) allocate a block in the cylinder group that contains the 93 * inode for the file. 94 * 2) quadradically rehash into other cylinder groups, until an 95 * available block is located. 96 */ 97 int 98 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 99 register struct inode *ip; 100 daddr_t lbn, bpref; 101 int size; 102 struct ucred *cred; 103 daddr_t *bnp; 104 { 105 register struct fs *fs; 106 daddr_t bno; 107 int cg; 108 #ifdef QUOTA 109 int error; 110 #endif 111 112 113 *bnp = 0; 114 fs = ip->i_fs; 115 #ifdef DIAGNOSTIC 116 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 117 printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n", 118 (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 119 panic("ffs_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ffs_alloc: missing credential"); 123 #endif /* DIAGNOSTIC */ 124 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 127 goto nospace; 128 #ifdef QUOTA 129 error = chkdq(ip, (long)btodb(size), cred, 0); 130 if (error) 131 return (error); 132 #endif 133 if (bpref >= fs->fs_size) 134 bpref = 0; 135 if (bpref == 0) 136 cg = ino_to_cg(fs, ip->i_number); 137 else 138 cg = dtog(fs, bpref); 139 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, ffs_alloccg); 140 if (bno > 0) { 141 ip->i_blocks += btodb(size); 142 ip->i_flag |= IN_CHANGE | IN_UPDATE; 143 *bnp = bno; 144 return (0); 145 } 146 #ifdef QUOTA 147 /* 148 * Restore user's disk quota because allocation failed. 149 */ 150 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 151 #endif 152 nospace: 153 ffs_fserr(fs, cred->cr_uid, "file system full"); 154 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 155 return (ENOSPC); 156 } 157 158 /* 159 * Reallocate a fragment to a bigger size 160 * 161 * The number and size of the old block is given, and a preference 162 * and new size is also specified. The allocator attempts to extend 163 * the original block. Failing that, the regular block allocator is 164 * invoked to get an appropriate block. 165 */ 166 int 167 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 168 register struct inode *ip; 169 daddr_t lbprev; 170 daddr_t bpref; 171 int osize, nsize; 172 struct ucred *cred; 173 struct buf **bpp; 174 { 175 register struct fs *fs; 176 struct buf *bp; 177 int cg, request, error; 178 daddr_t bprev, bno; 179 180 *bpp = 0; 181 fs = ip->i_fs; 182 #ifdef DIAGNOSTIC 183 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 184 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 185 printf( 186 "dev = 0x%lx, bsize = %ld, osize = %d, " 187 "nsize = %d, fs = %s\n", 188 (u_long)ip->i_dev, fs->fs_bsize, osize, 189 nsize, fs->fs_fsmnt); 190 panic("ffs_realloccg: bad size"); 191 } 192 if (cred == NOCRED) 193 panic("ffs_realloccg: missing credential"); 194 #endif /* DIAGNOSTIC */ 195 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 196 goto nospace; 197 if ((bprev = ip->i_db[lbprev]) == 0) { 198 printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n", 199 (u_long) ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); 200 panic("ffs_realloccg: bad bprev"); 201 } 202 /* 203 * Allocate the extra space in the buffer. 204 */ 205 error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp); 206 if (error) { 207 brelse(bp); 208 return (error); 209 } 210 211 if( bp->b_blkno == bp->b_lblkno) { 212 if( lbprev >= NDADDR) 213 panic("ffs_realloccg: lbprev out of range"); 214 bp->b_blkno = fsbtodb(fs, bprev); 215 } 216 217 #ifdef QUOTA 218 error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 219 if (error) { 220 brelse(bp); 221 return (error); 222 } 223 #endif 224 /* 225 * Check for extension in the existing location. 226 */ 227 cg = dtog(fs, bprev); 228 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 229 if (bno) { 230 if (bp->b_blkno != fsbtodb(fs, bno)) 231 panic("bad blockno"); 232 ip->i_blocks += btodb(nsize - osize); 233 ip->i_flag |= IN_CHANGE | IN_UPDATE; 234 allocbuf(bp, nsize); 235 bp->b_flags |= B_DONE; 236 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 237 *bpp = bp; 238 return (0); 239 } 240 /* 241 * Allocate a new disk location. 242 */ 243 if (bpref >= fs->fs_size) 244 bpref = 0; 245 switch ((int)fs->fs_optim) { 246 case FS_OPTSPACE: 247 /* 248 * Allocate an exact sized fragment. Although this makes 249 * best use of space, we will waste time relocating it if 250 * the file continues to grow. If the fragmentation is 251 * less than half of the minimum free reserve, we choose 252 * to begin optimizing for time. 253 */ 254 request = nsize; 255 if (fs->fs_minfree <= 5 || 256 fs->fs_cstotal.cs_nffree > 257 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 258 break; 259 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 260 fs->fs_fsmnt); 261 fs->fs_optim = FS_OPTTIME; 262 break; 263 case FS_OPTTIME: 264 /* 265 * At this point we have discovered a file that is trying to 266 * grow a small fragment to a larger fragment. To save time, 267 * we allocate a full sized block, then free the unused portion. 268 * If the file continues to grow, the `ffs_fragextend' call 269 * above will be able to grow it in place without further 270 * copying. If aberrant programs cause disk fragmentation to 271 * grow within 2% of the free reserve, we choose to begin 272 * optimizing for space. 273 */ 274 request = fs->fs_bsize; 275 if (fs->fs_cstotal.cs_nffree < 276 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 277 break; 278 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 279 fs->fs_fsmnt); 280 fs->fs_optim = FS_OPTSPACE; 281 break; 282 default: 283 printf("dev = 0x%lx, optim = %ld, fs = %s\n", 284 (u_long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 285 panic("ffs_realloccg: bad optim"); 286 /* NOTREACHED */ 287 } 288 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, ffs_alloccg); 289 if (bno > 0) { 290 bp->b_blkno = fsbtodb(fs, bno); 291 ffs_blkfree(ip, bprev, (long)osize); 292 if (nsize < request) 293 ffs_blkfree(ip, bno + numfrags(fs, nsize), 294 (long)(request - nsize)); 295 ip->i_blocks += btodb(nsize - osize); 296 ip->i_flag |= IN_CHANGE | IN_UPDATE; 297 allocbuf(bp, nsize); 298 bp->b_flags |= B_DONE; 299 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 300 *bpp = bp; 301 return (0); 302 } 303 #ifdef QUOTA 304 /* 305 * Restore user's disk quota because allocation failed. 306 */ 307 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 308 #endif 309 brelse(bp); 310 nospace: 311 /* 312 * no space available 313 */ 314 ffs_fserr(fs, cred->cr_uid, "file system full"); 315 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 316 return (ENOSPC); 317 } 318 319 /* 320 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 321 * 322 * The vnode and an array of buffer pointers for a range of sequential 323 * logical blocks to be made contiguous is given. The allocator attempts 324 * to find a range of sequential blocks starting as close as possible to 325 * an fs_rotdelay offset from the end of the allocation for the logical 326 * block immediately preceeding the current range. If successful, the 327 * physical block numbers in the buffer pointers and in the inode are 328 * changed to reflect the new allocation. If unsuccessful, the allocation 329 * is left unchanged. The success in doing the reallocation is returned. 330 * Note that the error return is not reflected back to the user. Rather 331 * the previous block allocation will be used. 332 */ 333 static int doasyncfree = 1; 334 SYSCTL_INT(_debug, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 335 int 336 ffs_reallocblks(ap) 337 struct vop_reallocblks_args /* { 338 struct vnode *a_vp; 339 struct cluster_save *a_buflist; 340 } */ *ap; 341 { 342 #if !defined (not_yes) 343 return (ENOSPC); 344 #else 345 struct fs *fs; 346 struct inode *ip; 347 struct vnode *vp; 348 struct buf *sbp, *ebp; 349 daddr_t *bap, *sbap, *ebap = 0; 350 struct cluster_save *buflist; 351 daddr_t start_lbn, end_lbn, soff, newblk, blkno; 352 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 353 int i, len, start_lvl, end_lvl, pref, ssize; 354 struct timeval tv; 355 356 vp = ap->a_vp; 357 ip = VTOI(vp); 358 fs = ip->i_fs; 359 if (fs->fs_contigsumsize <= 0) 360 return (ENOSPC); 361 buflist = ap->a_buflist; 362 len = buflist->bs_nchildren; 363 start_lbn = buflist->bs_children[0]->b_lblkno; 364 end_lbn = start_lbn + len - 1; 365 #ifdef DIAGNOSTIC 366 for (i = 1; i < len; i++) 367 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 368 panic("ffs_reallocblks: non-cluster"); 369 #endif 370 /* 371 * If the latest allocation is in a new cylinder group, assume that 372 * the filesystem has decided to move and do not force it back to 373 * the previous cylinder group. 374 */ 375 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 376 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 377 return (ENOSPC); 378 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 379 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 380 return (ENOSPC); 381 /* 382 * Get the starting offset and block map for the first block. 383 */ 384 if (start_lvl == 0) { 385 sbap = &ip->i_db[0]; 386 soff = start_lbn; 387 } else { 388 idp = &start_ap[start_lvl - 1]; 389 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 390 brelse(sbp); 391 return (ENOSPC); 392 } 393 sbap = (daddr_t *)sbp->b_data; 394 soff = idp->in_off; 395 } 396 /* 397 * Find the preferred location for the cluster. 398 */ 399 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 400 /* 401 * If the block range spans two block maps, get the second map. 402 */ 403 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 404 ssize = len; 405 } else { 406 #ifdef DIAGNOSTIC 407 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 408 panic("ffs_reallocblk: start == end"); 409 #endif 410 ssize = len - (idp->in_off + 1); 411 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 412 goto fail; 413 ebap = (daddr_t *)ebp->b_data; 414 } 415 /* 416 * Search the block map looking for an allocation of the desired size. 417 */ 418 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 419 len, ffs_clusteralloc)) == 0) 420 goto fail; 421 /* 422 * We have found a new contiguous block. 423 * 424 * First we have to replace the old block pointers with the new 425 * block pointers in the inode and indirect blocks associated 426 * with the file. 427 */ 428 blkno = newblk; 429 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 430 if (i == ssize) 431 bap = ebap; 432 #ifdef DIAGNOSTIC 433 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 434 panic("ffs_reallocblks: alloc mismatch"); 435 #endif 436 *bap++ = blkno; 437 } 438 /* 439 * Next we must write out the modified inode and indirect blocks. 440 * For strict correctness, the writes should be synchronous since 441 * the old block values may have been written to disk. In practise 442 * they are almost never written, but if we are concerned about 443 * strict correctness, the `doasyncfree' flag should be set to zero. 444 * 445 * The test on `doasyncfree' should be changed to test a flag 446 * that shows whether the associated buffers and inodes have 447 * been written. The flag should be set when the cluster is 448 * started and cleared whenever the buffer or inode is flushed. 449 * We can then check below to see if it is set, and do the 450 * synchronous write only when it has been cleared. 451 */ 452 if (sbap != &ip->i_db[0]) { 453 if (doasyncfree) 454 bdwrite(sbp); 455 else 456 bwrite(sbp); 457 } else { 458 ip->i_flag |= IN_CHANGE | IN_UPDATE; 459 if (!doasyncfree) { 460 tv = time; 461 VOP_UPDATE(vp, &tv, &tv, 1); 462 } 463 } 464 if (ssize < len) 465 if (doasyncfree) 466 bdwrite(ebp); 467 else 468 bwrite(ebp); 469 /* 470 * Last, free the old blocks and assign the new blocks to the buffers. 471 */ 472 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 473 ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 474 fs->fs_bsize); 475 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 476 } 477 return (0); 478 479 fail: 480 if (ssize < len) 481 brelse(ebp); 482 if (sbap != &ip->i_db[0]) 483 brelse(sbp); 484 return (ENOSPC); 485 #endif 486 } 487 488 /* 489 * Allocate an inode in the file system. 490 * 491 * If allocating a directory, use ffs_dirpref to select the inode. 492 * If allocating in a directory, the following hierarchy is followed: 493 * 1) allocate the preferred inode. 494 * 2) allocate an inode in the same cylinder group. 495 * 3) quadradically rehash into other cylinder groups, until an 496 * available inode is located. 497 * If no inode preference is given the following heirarchy is used 498 * to allocate an inode: 499 * 1) allocate an inode in cylinder group 0. 500 * 2) quadradically rehash into other cylinder groups, until an 501 * available inode is located. 502 */ 503 int 504 ffs_valloc(ap) 505 struct vop_valloc_args /* { 506 struct vnode *a_pvp; 507 int a_mode; 508 struct ucred *a_cred; 509 struct vnode **a_vpp; 510 } */ *ap; 511 { 512 register struct vnode *pvp = ap->a_pvp; 513 register struct inode *pip; 514 register struct fs *fs; 515 register struct inode *ip; 516 mode_t mode = ap->a_mode; 517 ino_t ino, ipref; 518 int cg, error; 519 520 *ap->a_vpp = NULL; 521 pip = VTOI(pvp); 522 fs = pip->i_fs; 523 if (fs->fs_cstotal.cs_nifree == 0) 524 goto noinodes; 525 526 if ((mode & IFMT) == IFDIR) 527 ipref = ffs_dirpref(fs); 528 else 529 ipref = pip->i_number; 530 if (ipref >= fs->fs_ncg * fs->fs_ipg) 531 ipref = 0; 532 cg = ino_to_cg(fs, ipref); 533 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 534 (allocfcn_t *)ffs_nodealloccg); 535 if (ino == 0) 536 goto noinodes; 537 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); 538 if (error) { 539 VOP_VFREE(pvp, ino, mode); 540 return (error); 541 } 542 ip = VTOI(*ap->a_vpp); 543 if (ip->i_mode) { 544 printf("mode = 0%o, inum = %ld, fs = %s\n", 545 ip->i_mode, ip->i_number, fs->fs_fsmnt); 546 panic("ffs_valloc: dup alloc"); 547 } 548 if (ip->i_blocks) { /* XXX */ 549 printf("free inode %s/%ld had %ld blocks\n", 550 fs->fs_fsmnt, ino, ip->i_blocks); 551 ip->i_blocks = 0; 552 } 553 ip->i_flags = 0; 554 /* 555 * Set up a new generation number for this inode. 556 */ 557 if (++nextgennumber < (u_long)time.tv_sec) 558 nextgennumber = time.tv_sec; 559 ip->i_gen = nextgennumber; 560 return (0); 561 noinodes: 562 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); 563 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 564 return (ENOSPC); 565 } 566 567 /* 568 * Find a cylinder to place a directory. 569 * 570 * The policy implemented by this algorithm is to select from 571 * among those cylinder groups with above the average number of 572 * free inodes, the one with the smallest number of directories. 573 */ 574 static ino_t 575 ffs_dirpref(fs) 576 register struct fs *fs; 577 { 578 int cg, minndir, mincg, avgifree; 579 580 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 581 minndir = fs->fs_ipg; 582 mincg = 0; 583 for (cg = 0; cg < fs->fs_ncg; cg++) 584 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 585 fs->fs_cs(fs, cg).cs_nifree >= avgifree) { 586 mincg = cg; 587 minndir = fs->fs_cs(fs, cg).cs_ndir; 588 } 589 return ((ino_t)(fs->fs_ipg * mincg)); 590 } 591 592 /* 593 * Select the desired position for the next block in a file. The file is 594 * logically divided into sections. The first section is composed of the 595 * direct blocks. Each additional section contains fs_maxbpg blocks. 596 * 597 * If no blocks have been allocated in the first section, the policy is to 598 * request a block in the same cylinder group as the inode that describes 599 * the file. If no blocks have been allocated in any other section, the 600 * policy is to place the section in a cylinder group with a greater than 601 * average number of free blocks. An appropriate cylinder group is found 602 * by using a rotor that sweeps the cylinder groups. When a new group of 603 * blocks is needed, the sweep begins in the cylinder group following the 604 * cylinder group from which the previous allocation was made. The sweep 605 * continues until a cylinder group with greater than the average number 606 * of free blocks is found. If the allocation is for the first block in an 607 * indirect block, the information on the previous allocation is unavailable; 608 * here a best guess is made based upon the logical block number being 609 * allocated. 610 * 611 * If a section is already partially allocated, the policy is to 612 * contiguously allocate fs_maxcontig blocks. The end of one of these 613 * contiguous blocks and the beginning of the next is physically separated 614 * so that the disk head will be in transit between them for at least 615 * fs_rotdelay milliseconds. This is to allow time for the processor to 616 * schedule another I/O transfer. 617 */ 618 daddr_t 619 ffs_blkpref(ip, lbn, indx, bap) 620 struct inode *ip; 621 daddr_t lbn; 622 int indx; 623 daddr_t *bap; 624 { 625 register struct fs *fs; 626 register int cg; 627 int avgbfree, startcg; 628 daddr_t nextblk; 629 630 fs = ip->i_fs; 631 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 632 if (lbn < NDADDR) { 633 cg = ino_to_cg(fs, ip->i_number); 634 return (fs->fs_fpg * cg + fs->fs_frag); 635 } 636 /* 637 * Find a cylinder with greater than average number of 638 * unused data blocks. 639 */ 640 if (indx == 0 || bap[indx - 1] == 0) 641 startcg = 642 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 643 else 644 startcg = dtog(fs, bap[indx - 1]) + 1; 645 startcg %= fs->fs_ncg; 646 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 647 for (cg = startcg; cg < fs->fs_ncg; cg++) 648 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 649 fs->fs_cgrotor = cg; 650 return (fs->fs_fpg * cg + fs->fs_frag); 651 } 652 for (cg = 0; cg <= startcg; cg++) 653 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 654 fs->fs_cgrotor = cg; 655 return (fs->fs_fpg * cg + fs->fs_frag); 656 } 657 return (NULL); 658 } 659 /* 660 * One or more previous blocks have been laid out. If less 661 * than fs_maxcontig previous blocks are contiguous, the 662 * next block is requested contiguously, otherwise it is 663 * requested rotationally delayed by fs_rotdelay milliseconds. 664 */ 665 nextblk = bap[indx - 1] + fs->fs_frag; 666 if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 667 bap[indx - fs->fs_maxcontig] + 668 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 669 return (nextblk); 670 /* 671 * Here we convert ms of delay to frags as: 672 * (frags) = (ms) * (rev/sec) * (sect/rev) / 673 * ((sect/frag) * (ms/sec)) 674 * then round up to the next block. 675 */ 676 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 677 (NSPF(fs) * 1000), fs->fs_frag); 678 return (nextblk); 679 } 680 681 /* 682 * Implement the cylinder overflow algorithm. 683 * 684 * The policy implemented by this algorithm is: 685 * 1) allocate the block in its requested cylinder group. 686 * 2) quadradically rehash on the cylinder group number. 687 * 3) brute force search for a free block. 688 */ 689 /*VARARGS5*/ 690 static u_long 691 ffs_hashalloc(ip, cg, pref, size, allocator) 692 struct inode *ip; 693 int cg; 694 long pref; 695 int size; /* size for data blocks, mode for inodes */ 696 allocfcn_t *allocator; 697 { 698 register struct fs *fs; 699 long result; /* XXX why not same type as we return? */ 700 int i, icg = cg; 701 702 fs = ip->i_fs; 703 /* 704 * 1: preferred cylinder group 705 */ 706 result = (*allocator)(ip, cg, pref, size); 707 if (result) 708 return (result); 709 /* 710 * 2: quadratic rehash 711 */ 712 for (i = 1; i < fs->fs_ncg; i *= 2) { 713 cg += i; 714 if (cg >= fs->fs_ncg) 715 cg -= fs->fs_ncg; 716 result = (*allocator)(ip, cg, 0, size); 717 if (result) 718 return (result); 719 } 720 /* 721 * 3: brute force search 722 * Note that we start at i == 2, since 0 was checked initially, 723 * and 1 is always checked in the quadratic rehash. 724 */ 725 cg = (icg + 2) % fs->fs_ncg; 726 for (i = 2; i < fs->fs_ncg; i++) { 727 result = (*allocator)(ip, cg, 0, size); 728 if (result) 729 return (result); 730 cg++; 731 if (cg == fs->fs_ncg) 732 cg = 0; 733 } 734 return (0); 735 } 736 737 /* 738 * Determine whether a fragment can be extended. 739 * 740 * Check to see if the necessary fragments are available, and 741 * if they are, allocate them. 742 */ 743 static daddr_t 744 ffs_fragextend(ip, cg, bprev, osize, nsize) 745 struct inode *ip; 746 int cg; 747 long bprev; 748 int osize, nsize; 749 { 750 register struct fs *fs; 751 register struct cg *cgp; 752 struct buf *bp; 753 long bno; 754 int frags, bbase; 755 int i, error; 756 757 fs = ip->i_fs; 758 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 759 return (NULL); 760 frags = numfrags(fs, nsize); 761 bbase = fragnum(fs, bprev); 762 if (bbase > fragnum(fs, (bprev + frags - 1))) { 763 /* cannot extend across a block boundary */ 764 return (NULL); 765 } 766 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 767 (int)fs->fs_cgsize, NOCRED, &bp); 768 if (error) { 769 brelse(bp); 770 return (NULL); 771 } 772 cgp = (struct cg *)bp->b_data; 773 if (!cg_chkmagic(cgp)) { 774 brelse(bp); 775 return (NULL); 776 } 777 cgp->cg_time = time.tv_sec; 778 bno = dtogd(fs, bprev); 779 for (i = numfrags(fs, osize); i < frags; i++) 780 if (isclr(cg_blksfree(cgp), bno + i)) { 781 brelse(bp); 782 return (NULL); 783 } 784 /* 785 * the current fragment can be extended 786 * deduct the count on fragment being extended into 787 * increase the count on the remaining fragment (if any) 788 * allocate the extended piece 789 */ 790 for (i = frags; i < fs->fs_frag - bbase; i++) 791 if (isclr(cg_blksfree(cgp), bno + i)) 792 break; 793 cgp->cg_frsum[i - numfrags(fs, osize)]--; 794 if (i != frags) 795 cgp->cg_frsum[i - frags]++; 796 for (i = numfrags(fs, osize); i < frags; i++) { 797 clrbit(cg_blksfree(cgp), bno + i); 798 cgp->cg_cs.cs_nffree--; 799 fs->fs_cstotal.cs_nffree--; 800 fs->fs_cs(fs, cg).cs_nffree--; 801 } 802 fs->fs_fmod = 1; 803 bdwrite(bp); 804 return (bprev); 805 } 806 807 /* 808 * Determine whether a block can be allocated. 809 * 810 * Check to see if a block of the appropriate size is available, 811 * and if it is, allocate it. 812 */ 813 static daddr_t 814 ffs_alloccg(ip, cg, bpref, size) 815 struct inode *ip; 816 int cg; 817 daddr_t bpref; 818 int size; 819 { 820 register struct fs *fs; 821 register struct cg *cgp; 822 struct buf *bp; 823 register int i; 824 int error, bno, frags, allocsiz; 825 826 fs = ip->i_fs; 827 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 828 return (NULL); 829 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 830 (int)fs->fs_cgsize, NOCRED, &bp); 831 if (error) { 832 brelse(bp); 833 return (NULL); 834 } 835 cgp = (struct cg *)bp->b_data; 836 if (!cg_chkmagic(cgp) || 837 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 838 brelse(bp); 839 return (NULL); 840 } 841 cgp->cg_time = time.tv_sec; 842 if (size == fs->fs_bsize) { 843 bno = ffs_alloccgblk(fs, cgp, bpref); 844 bdwrite(bp); 845 return (bno); 846 } 847 /* 848 * check to see if any fragments are already available 849 * allocsiz is the size which will be allocated, hacking 850 * it down to a smaller size if necessary 851 */ 852 frags = numfrags(fs, size); 853 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 854 if (cgp->cg_frsum[allocsiz] != 0) 855 break; 856 if (allocsiz == fs->fs_frag) { 857 /* 858 * no fragments were available, so a block will be 859 * allocated, and hacked up 860 */ 861 if (cgp->cg_cs.cs_nbfree == 0) { 862 brelse(bp); 863 return (NULL); 864 } 865 bno = ffs_alloccgblk(fs, cgp, bpref); 866 bpref = dtogd(fs, bno); 867 for (i = frags; i < fs->fs_frag; i++) 868 setbit(cg_blksfree(cgp), bpref + i); 869 i = fs->fs_frag - frags; 870 cgp->cg_cs.cs_nffree += i; 871 fs->fs_cstotal.cs_nffree += i; 872 fs->fs_cs(fs, cg).cs_nffree += i; 873 fs->fs_fmod = 1; 874 cgp->cg_frsum[i]++; 875 bdwrite(bp); 876 return (bno); 877 } 878 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 879 if (bno < 0) { 880 brelse(bp); 881 return (NULL); 882 } 883 for (i = 0; i < frags; i++) 884 clrbit(cg_blksfree(cgp), bno + i); 885 cgp->cg_cs.cs_nffree -= frags; 886 fs->fs_cstotal.cs_nffree -= frags; 887 fs->fs_cs(fs, cg).cs_nffree -= frags; 888 fs->fs_fmod = 1; 889 cgp->cg_frsum[allocsiz]--; 890 if (frags != allocsiz) 891 cgp->cg_frsum[allocsiz - frags]++; 892 bdwrite(bp); 893 return (cg * fs->fs_fpg + bno); 894 } 895 896 /* 897 * Allocate a block in a cylinder group. 898 * 899 * This algorithm implements the following policy: 900 * 1) allocate the requested block. 901 * 2) allocate a rotationally optimal block in the same cylinder. 902 * 3) allocate the next available block on the block rotor for the 903 * specified cylinder group. 904 * Note that this routine only allocates fs_bsize blocks; these 905 * blocks may be fragmented by the routine that allocates them. 906 */ 907 static daddr_t 908 ffs_alloccgblk(fs, cgp, bpref) 909 register struct fs *fs; 910 register struct cg *cgp; 911 daddr_t bpref; 912 { 913 daddr_t bno, blkno; 914 int cylno, pos, delta; 915 short *cylbp; 916 register int i; 917 918 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 919 bpref = cgp->cg_rotor; 920 goto norot; 921 } 922 bpref = blknum(fs, bpref); 923 bpref = dtogd(fs, bpref); 924 /* 925 * if the requested block is available, use it 926 */ 927 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { 928 bno = bpref; 929 goto gotit; 930 } 931 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 932 /* 933 * Block layout information is not available. 934 * Leaving bpref unchanged means we take the 935 * next available free block following the one 936 * we just allocated. Hopefully this will at 937 * least hit a track cache on drives of unknown 938 * geometry (e.g. SCSI). 939 */ 940 goto norot; 941 } 942 /* 943 * check for a block available on the same cylinder 944 */ 945 cylno = cbtocylno(fs, bpref); 946 if (cg_blktot(cgp)[cylno] == 0) 947 goto norot; 948 /* 949 * check the summary information to see if a block is 950 * available in the requested cylinder starting at the 951 * requested rotational position and proceeding around. 952 */ 953 cylbp = cg_blks(fs, cgp, cylno); 954 pos = cbtorpos(fs, bpref); 955 for (i = pos; i < fs->fs_nrpos; i++) 956 if (cylbp[i] > 0) 957 break; 958 if (i == fs->fs_nrpos) 959 for (i = 0; i < pos; i++) 960 if (cylbp[i] > 0) 961 break; 962 if (cylbp[i] > 0) { 963 /* 964 * found a rotational position, now find the actual 965 * block. A panic if none is actually there. 966 */ 967 pos = cylno % fs->fs_cpc; 968 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 969 if (fs_postbl(fs, pos)[i] == -1) { 970 printf("pos = %d, i = %d, fs = %s\n", 971 pos, i, fs->fs_fsmnt); 972 panic("ffs_alloccgblk: cyl groups corrupted"); 973 } 974 for (i = fs_postbl(fs, pos)[i];; ) { 975 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { 976 bno = blkstofrags(fs, (bno + i)); 977 goto gotit; 978 } 979 delta = fs_rotbl(fs)[i]; 980 if (delta <= 0 || 981 delta + i > fragstoblks(fs, fs->fs_fpg)) 982 break; 983 i += delta; 984 } 985 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 986 panic("ffs_alloccgblk: can't find blk in cyl"); 987 } 988 norot: 989 /* 990 * no blocks in the requested cylinder, so take next 991 * available one in this cylinder group. 992 */ 993 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 994 if (bno < 0) 995 return (NULL); 996 cgp->cg_rotor = bno; 997 gotit: 998 blkno = fragstoblks(fs, bno); 999 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); 1000 ffs_clusteracct(fs, cgp, blkno, -1); 1001 cgp->cg_cs.cs_nbfree--; 1002 fs->fs_cstotal.cs_nbfree--; 1003 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1004 cylno = cbtocylno(fs, bno); 1005 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1006 cg_blktot(cgp)[cylno]--; 1007 fs->fs_fmod = 1; 1008 return (cgp->cg_cgx * fs->fs_fpg + bno); 1009 } 1010 1011 #ifdef notyet 1012 /* 1013 * Determine whether a cluster can be allocated. 1014 * 1015 * We do not currently check for optimal rotational layout if there 1016 * are multiple choices in the same cylinder group. Instead we just 1017 * take the first one that we find following bpref. 1018 */ 1019 static daddr_t 1020 ffs_clusteralloc(ip, cg, bpref, len) 1021 struct inode *ip; 1022 int cg; 1023 daddr_t bpref; 1024 int len; 1025 { 1026 register struct fs *fs; 1027 register struct cg *cgp; 1028 struct buf *bp; 1029 int i, run, bno, bit, map; 1030 u_char *mapp; 1031 1032 fs = ip->i_fs; 1033 if (fs->fs_cs(fs, cg).cs_nbfree < len) 1034 return (NULL); 1035 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1036 NOCRED, &bp)) 1037 goto fail; 1038 cgp = (struct cg *)bp->b_data; 1039 if (!cg_chkmagic(cgp)) 1040 goto fail; 1041 /* 1042 * Check to see if a cluster of the needed size (or bigger) is 1043 * available in this cylinder group. 1044 */ 1045 for (i = len; i <= fs->fs_contigsumsize; i++) 1046 if (cg_clustersum(cgp)[i] > 0) 1047 break; 1048 if (i > fs->fs_contigsumsize) 1049 goto fail; 1050 /* 1051 * Search the cluster map to find a big enough cluster. 1052 * We take the first one that we find, even if it is larger 1053 * than we need as we prefer to get one close to the previous 1054 * block allocation. We do not search before the current 1055 * preference point as we do not want to allocate a block 1056 * that is allocated before the previous one (as we will 1057 * then have to wait for another pass of the elevator 1058 * algorithm before it will be read). We prefer to fail and 1059 * be recalled to try an allocation in the next cylinder group. 1060 */ 1061 if (dtog(fs, bpref) != cg) 1062 bpref = 0; 1063 else 1064 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1065 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1066 map = *mapp++; 1067 bit = 1 << (bpref % NBBY); 1068 for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { 1069 if ((map & bit) == 0) { 1070 run = 0; 1071 } else { 1072 run++; 1073 if (run == len) 1074 break; 1075 } 1076 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1077 bit <<= 1; 1078 } else { 1079 map = *mapp++; 1080 bit = 1; 1081 } 1082 } 1083 if (i == cgp->cg_nclusterblks) 1084 goto fail; 1085 /* 1086 * Allocate the cluster that we have found. 1087 */ 1088 bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); 1089 len = blkstofrags(fs, len); 1090 for (i = 0; i < len; i += fs->fs_frag) 1091 if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) 1092 panic("ffs_clusteralloc: lost block"); 1093 bdwrite(bp); 1094 return (bno); 1095 1096 fail: 1097 brelse(bp); 1098 return (0); 1099 } 1100 #endif 1101 1102 /* 1103 * Determine whether an inode can be allocated. 1104 * 1105 * Check to see if an inode is available, and if it is, 1106 * allocate it using the following policy: 1107 * 1) allocate the requested inode. 1108 * 2) allocate the next available inode after the requested 1109 * inode in the specified cylinder group. 1110 */ 1111 static ino_t 1112 ffs_nodealloccg(ip, cg, ipref, mode) 1113 struct inode *ip; 1114 int cg; 1115 daddr_t ipref; 1116 int mode; 1117 { 1118 register struct fs *fs; 1119 register struct cg *cgp; 1120 struct buf *bp; 1121 int error, start, len, loc, map, i; 1122 1123 fs = ip->i_fs; 1124 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1125 return (NULL); 1126 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1127 (int)fs->fs_cgsize, NOCRED, &bp); 1128 if (error) { 1129 brelse(bp); 1130 return (NULL); 1131 } 1132 cgp = (struct cg *)bp->b_data; 1133 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1134 brelse(bp); 1135 return (NULL); 1136 } 1137 cgp->cg_time = time.tv_sec; 1138 if (ipref) { 1139 ipref %= fs->fs_ipg; 1140 if (isclr(cg_inosused(cgp), ipref)) 1141 goto gotit; 1142 } 1143 start = cgp->cg_irotor / NBBY; 1144 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1145 loc = skpc(0xff, len, &cg_inosused(cgp)[start]); 1146 if (loc == 0) { 1147 len = start + 1; 1148 start = 0; 1149 loc = skpc(0xff, len, &cg_inosused(cgp)[0]); 1150 if (loc == 0) { 1151 printf("cg = %d, irotor = %ld, fs = %s\n", 1152 cg, cgp->cg_irotor, fs->fs_fsmnt); 1153 panic("ffs_nodealloccg: map corrupted"); 1154 /* NOTREACHED */ 1155 } 1156 } 1157 i = start + len - loc; 1158 map = cg_inosused(cgp)[i]; 1159 ipref = i * NBBY; 1160 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1161 if ((map & i) == 0) { 1162 cgp->cg_irotor = ipref; 1163 goto gotit; 1164 } 1165 } 1166 printf("fs = %s\n", fs->fs_fsmnt); 1167 panic("ffs_nodealloccg: block not in map"); 1168 /* NOTREACHED */ 1169 gotit: 1170 setbit(cg_inosused(cgp), ipref); 1171 cgp->cg_cs.cs_nifree--; 1172 fs->fs_cstotal.cs_nifree--; 1173 fs->fs_cs(fs, cg).cs_nifree--; 1174 fs->fs_fmod = 1; 1175 if ((mode & IFMT) == IFDIR) { 1176 cgp->cg_cs.cs_ndir++; 1177 fs->fs_cstotal.cs_ndir++; 1178 fs->fs_cs(fs, cg).cs_ndir++; 1179 } 1180 bdwrite(bp); 1181 return (cg * fs->fs_ipg + ipref); 1182 } 1183 1184 /* 1185 * Free a block or fragment. 1186 * 1187 * The specified block or fragment is placed back in the 1188 * free map. If a fragment is deallocated, a possible 1189 * block reassembly is checked. 1190 */ 1191 void 1192 ffs_blkfree(ip, bno, size) 1193 register struct inode *ip; 1194 daddr_t bno; 1195 long size; 1196 { 1197 register struct fs *fs; 1198 register struct cg *cgp; 1199 struct buf *bp; 1200 daddr_t blkno; 1201 int i, error, cg, blk, frags, bbase; 1202 1203 fs = ip->i_fs; 1204 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1205 printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n", 1206 (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1207 panic("blkfree: bad size"); 1208 } 1209 cg = dtog(fs, bno); 1210 if ((u_int)bno >= fs->fs_size) { 1211 printf("bad block %ld, ino %ld\n", bno, ip->i_number); 1212 ffs_fserr(fs, ip->i_uid, "bad block"); 1213 return; 1214 } 1215 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1216 (int)fs->fs_cgsize, NOCRED, &bp); 1217 if (error) { 1218 brelse(bp); 1219 return; 1220 } 1221 cgp = (struct cg *)bp->b_data; 1222 if (!cg_chkmagic(cgp)) { 1223 brelse(bp); 1224 return; 1225 } 1226 cgp->cg_time = time.tv_sec; 1227 bno = dtogd(fs, bno); 1228 if (size == fs->fs_bsize) { 1229 blkno = fragstoblks(fs, bno); 1230 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1231 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1232 (u_long) ip->i_dev, bno, fs->fs_fsmnt); 1233 panic("blkfree: freeing free block"); 1234 } 1235 ffs_setblock(fs, cg_blksfree(cgp), blkno); 1236 ffs_clusteracct(fs, cgp, blkno, 1); 1237 cgp->cg_cs.cs_nbfree++; 1238 fs->fs_cstotal.cs_nbfree++; 1239 fs->fs_cs(fs, cg).cs_nbfree++; 1240 i = cbtocylno(fs, bno); 1241 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1242 cg_blktot(cgp)[i]++; 1243 } else { 1244 bbase = bno - fragnum(fs, bno); 1245 /* 1246 * decrement the counts associated with the old frags 1247 */ 1248 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1249 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1250 /* 1251 * deallocate the fragment 1252 */ 1253 frags = numfrags(fs, size); 1254 for (i = 0; i < frags; i++) { 1255 if (isset(cg_blksfree(cgp), bno + i)) { 1256 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1257 (u_long) ip->i_dev, bno + i, fs->fs_fsmnt); 1258 panic("blkfree: freeing free frag"); 1259 } 1260 setbit(cg_blksfree(cgp), bno + i); 1261 } 1262 cgp->cg_cs.cs_nffree += i; 1263 fs->fs_cstotal.cs_nffree += i; 1264 fs->fs_cs(fs, cg).cs_nffree += i; 1265 /* 1266 * add back in counts associated with the new frags 1267 */ 1268 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1269 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1270 /* 1271 * if a complete block has been reassembled, account for it 1272 */ 1273 blkno = fragstoblks(fs, bbase); 1274 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1275 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1276 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1277 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1278 ffs_clusteracct(fs, cgp, blkno, 1); 1279 cgp->cg_cs.cs_nbfree++; 1280 fs->fs_cstotal.cs_nbfree++; 1281 fs->fs_cs(fs, cg).cs_nbfree++; 1282 i = cbtocylno(fs, bbase); 1283 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1284 cg_blktot(cgp)[i]++; 1285 } 1286 } 1287 fs->fs_fmod = 1; 1288 bdwrite(bp); 1289 } 1290 1291 /* 1292 * Free an inode. 1293 * 1294 * The specified inode is placed back in the free map. 1295 */ 1296 int 1297 ffs_vfree(ap) 1298 struct vop_vfree_args /* { 1299 struct vnode *a_pvp; 1300 ino_t a_ino; 1301 int a_mode; 1302 } */ *ap; 1303 { 1304 register struct fs *fs; 1305 register struct cg *cgp; 1306 register struct inode *pip; 1307 ino_t ino = ap->a_ino; 1308 struct buf *bp; 1309 int error, cg; 1310 1311 pip = VTOI(ap->a_pvp); 1312 fs = pip->i_fs; 1313 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1314 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 1315 pip->i_dev, ino, fs->fs_fsmnt); 1316 cg = ino_to_cg(fs, ino); 1317 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1318 (int)fs->fs_cgsize, NOCRED, &bp); 1319 if (error) { 1320 brelse(bp); 1321 return (0); 1322 } 1323 cgp = (struct cg *)bp->b_data; 1324 if (!cg_chkmagic(cgp)) { 1325 brelse(bp); 1326 return (0); 1327 } 1328 cgp->cg_time = time.tv_sec; 1329 ino %= fs->fs_ipg; 1330 if (isclr(cg_inosused(cgp), ino)) { 1331 printf("dev = 0x%lx, ino = %ld, fs = %s\n", 1332 (u_long)pip->i_dev, ino, fs->fs_fsmnt); 1333 if (fs->fs_ronly == 0) 1334 panic("ifree: freeing free inode"); 1335 } 1336 clrbit(cg_inosused(cgp), ino); 1337 if (ino < cgp->cg_irotor) 1338 cgp->cg_irotor = ino; 1339 cgp->cg_cs.cs_nifree++; 1340 fs->fs_cstotal.cs_nifree++; 1341 fs->fs_cs(fs, cg).cs_nifree++; 1342 if ((ap->a_mode & IFMT) == IFDIR) { 1343 cgp->cg_cs.cs_ndir--; 1344 fs->fs_cstotal.cs_ndir--; 1345 fs->fs_cs(fs, cg).cs_ndir--; 1346 } 1347 fs->fs_fmod = 1; 1348 bdwrite(bp); 1349 return (0); 1350 } 1351 1352 /* 1353 * Find a block of the specified size in the specified cylinder group. 1354 * 1355 * It is a panic if a request is made to find a block if none are 1356 * available. 1357 */ 1358 static daddr_t 1359 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1360 register struct fs *fs; 1361 register struct cg *cgp; 1362 daddr_t bpref; 1363 int allocsiz; 1364 { 1365 daddr_t bno; 1366 int start, len, loc, i; 1367 int blk, field, subfield, pos; 1368 1369 /* 1370 * find the fragment by searching through the free block 1371 * map for an appropriate bit pattern 1372 */ 1373 if (bpref) 1374 start = dtogd(fs, bpref) / NBBY; 1375 else 1376 start = cgp->cg_frotor / NBBY; 1377 len = howmany(fs->fs_fpg, NBBY) - start; 1378 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], 1379 (u_char *)fragtbl[fs->fs_frag], 1380 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1381 if (loc == 0) { 1382 len = start + 1; 1383 start = 0; 1384 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], 1385 (u_char *)fragtbl[fs->fs_frag], 1386 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1387 if (loc == 0) { 1388 printf("start = %d, len = %d, fs = %s\n", 1389 start, len, fs->fs_fsmnt); 1390 panic("ffs_alloccg: map corrupted"); 1391 /* NOTREACHED */ 1392 } 1393 } 1394 bno = (start + len - loc) * NBBY; 1395 cgp->cg_frotor = bno; 1396 /* 1397 * found the byte in the map 1398 * sift through the bits to find the selected frag 1399 */ 1400 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1401 blk = blkmap(fs, cg_blksfree(cgp), bno); 1402 blk <<= 1; 1403 field = around[allocsiz]; 1404 subfield = inside[allocsiz]; 1405 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1406 if ((blk & field) == subfield) 1407 return (bno + pos); 1408 field <<= 1; 1409 subfield <<= 1; 1410 } 1411 } 1412 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1413 panic("ffs_alloccg: block not in map"); 1414 return (-1); 1415 } 1416 1417 /* 1418 * Update the cluster map because of an allocation or free. 1419 * 1420 * Cnt == 1 means free; cnt == -1 means allocating. 1421 */ 1422 static void 1423 ffs_clusteracct(fs, cgp, blkno, cnt) 1424 struct fs *fs; 1425 struct cg *cgp; 1426 daddr_t blkno; 1427 int cnt; 1428 { 1429 long *sump; 1430 u_char *freemapp, *mapp; 1431 int i, start, end, forw, back, map, bit; 1432 1433 if (fs->fs_contigsumsize <= 0) 1434 return; 1435 freemapp = cg_clustersfree(cgp); 1436 sump = cg_clustersum(cgp); 1437 /* 1438 * Allocate or clear the actual block. 1439 */ 1440 if (cnt > 0) 1441 setbit(freemapp, blkno); 1442 else 1443 clrbit(freemapp, blkno); 1444 /* 1445 * Find the size of the cluster going forward. 1446 */ 1447 start = blkno + 1; 1448 end = start + fs->fs_contigsumsize; 1449 if (end >= cgp->cg_nclusterblks) 1450 end = cgp->cg_nclusterblks; 1451 mapp = &freemapp[start / NBBY]; 1452 map = *mapp++; 1453 bit = 1 << (start % NBBY); 1454 for (i = start; i < end; i++) { 1455 if ((map & bit) == 0) 1456 break; 1457 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1458 bit <<= 1; 1459 } else { 1460 map = *mapp++; 1461 bit = 1; 1462 } 1463 } 1464 forw = i - start; 1465 /* 1466 * Find the size of the cluster going backward. 1467 */ 1468 start = blkno - 1; 1469 end = start - fs->fs_contigsumsize; 1470 if (end < 0) 1471 end = -1; 1472 mapp = &freemapp[start / NBBY]; 1473 map = *mapp--; 1474 bit = 1 << (start % NBBY); 1475 for (i = start; i > end; i--) { 1476 if ((map & bit) == 0) 1477 break; 1478 if ((i & (NBBY - 1)) != 0) { 1479 bit >>= 1; 1480 } else { 1481 map = *mapp--; 1482 bit = 1 << (NBBY - 1); 1483 } 1484 } 1485 back = start - i; 1486 /* 1487 * Account for old cluster and the possibly new forward and 1488 * back clusters. 1489 */ 1490 i = back + forw + 1; 1491 if (i > fs->fs_contigsumsize) 1492 i = fs->fs_contigsumsize; 1493 sump[i] += cnt; 1494 if (back > 0) 1495 sump[back] -= cnt; 1496 if (forw > 0) 1497 sump[forw] -= cnt; 1498 } 1499 1500 /* 1501 * Fserr prints the name of a file system with an error diagnostic. 1502 * 1503 * The form of the error message is: 1504 * fs: error message 1505 */ 1506 static void 1507 ffs_fserr(fs, uid, cp) 1508 struct fs *fs; 1509 u_int uid; 1510 char *cp; 1511 { 1512 1513 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 1514 } 1515