1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 34 * $Id: ffs_alloc.c,v 1.12 1995/03/26 23:29:09 davidg Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/vnode.h> 42 #include <sys/mount.h> 43 #include <sys/kernel.h> 44 #include <sys/syslog.h> 45 46 #include <vm/vm.h> 47 48 #include <ufs/ufs/quota.h> 49 #include <ufs/ufs/inode.h> 50 #include <ufs/ufs/ufs_extern.h> /* YF - needed for ufs_getlbns() */ 51 52 #include <ufs/ffs/fs.h> 53 #include <ufs/ffs/ffs_extern.h> 54 55 extern u_long nextgennumber; 56 57 static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int)); 58 static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t)); 59 static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int)); 60 static ino_t ffs_dirpref __P((struct fs *)); 61 static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); 62 static void ffs_fserr __P((struct fs *, u_int, char *)); 63 static u_long ffs_hashalloc 64 __P((struct inode *, int, long, int, u_long (*)())); 65 static ino_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int)); 66 static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int)); 67 68 void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); 69 70 /* 71 * Allocate a block in the file system. 72 * 73 * The size of the requested block is given, which must be some 74 * multiple of fs_fsize and <= fs_bsize. 75 * A preference may be optionally specified. If a preference is given 76 * the following hierarchy is used to allocate a block: 77 * 1) allocate the requested block. 78 * 2) allocate a rotationally optimal block in the same cylinder. 79 * 3) allocate a block in the same cylinder group. 80 * 4) quadradically rehash into other cylinder groups, until an 81 * available block is located. 82 * If no block preference is given the following heirarchy is used 83 * to allocate a block: 84 * 1) allocate a block in the cylinder group that contains the 85 * inode for the file. 86 * 2) quadradically rehash into other cylinder groups, until an 87 * available block is located. 88 */ 89 int 90 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 91 register struct inode *ip; 92 daddr_t lbn, bpref; 93 int size; 94 struct ucred *cred; 95 daddr_t *bnp; 96 { 97 register struct fs *fs; 98 daddr_t bno; 99 int cg; 100 #ifdef QUOTA 101 int error; 102 #endif 103 104 105 *bnp = 0; 106 fs = ip->i_fs; 107 #ifdef DIAGNOSTIC 108 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 109 printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n", 110 (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 111 panic("ffs_alloc: bad size"); 112 } 113 if (cred == NOCRED) 114 panic("ffs_alloc: missing credential"); 115 #endif /* DIAGNOSTIC */ 116 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 117 goto nospace; 118 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 119 goto nospace; 120 #ifdef QUOTA 121 error = chkdq(ip, (long)btodb(size), cred, 0); 122 if (error) 123 return (error); 124 #endif 125 if (bpref >= fs->fs_size) 126 bpref = 0; 127 if (bpref == 0) 128 cg = ino_to_cg(fs, ip->i_number); 129 else 130 cg = dtog(fs, bpref); 131 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 132 (u_long (*)())ffs_alloccg); 133 if (bno > 0) { 134 ip->i_blocks += btodb(size); 135 ip->i_flag |= IN_CHANGE | IN_UPDATE; 136 *bnp = bno; 137 return (0); 138 } 139 #ifdef QUOTA 140 /* 141 * Restore user's disk quota because allocation failed. 142 */ 143 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 144 #endif 145 nospace: 146 ffs_fserr(fs, cred->cr_uid, "file system full"); 147 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 148 return (ENOSPC); 149 } 150 151 /* 152 * Reallocate a fragment to a bigger size 153 * 154 * The number and size of the old block is given, and a preference 155 * and new size is also specified. The allocator attempts to extend 156 * the original block. Failing that, the regular block allocator is 157 * invoked to get an appropriate block. 158 */ 159 int 160 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 161 register struct inode *ip; 162 daddr_t lbprev; 163 daddr_t bpref; 164 int osize, nsize; 165 struct ucred *cred; 166 struct buf **bpp; 167 { 168 register struct fs *fs; 169 struct buf *bp; 170 int cg, request, error; 171 daddr_t bprev, bno; 172 173 *bpp = 0; 174 fs = ip->i_fs; 175 #ifdef DIAGNOSTIC 176 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 177 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 178 printf( 179 "dev = 0x%lx, bsize = %ld, osize = %d, " 180 "nsize = %d, fs = %s\n", 181 (u_long)ip->i_dev, fs->fs_bsize, osize, 182 nsize, fs->fs_fsmnt); 183 panic("ffs_realloccg: bad size"); 184 } 185 if (cred == NOCRED) 186 panic("ffs_realloccg: missing credential"); 187 #endif /* DIAGNOSTIC */ 188 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 189 goto nospace; 190 if ((bprev = ip->i_db[lbprev]) == 0) { 191 printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n", 192 (u_long) ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); 193 panic("ffs_realloccg: bad bprev"); 194 } 195 /* 196 * Allocate the extra space in the buffer. 197 */ 198 error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp); 199 if (error) { 200 brelse(bp); 201 return (error); 202 } 203 204 if( bp->b_blkno == bp->b_lblkno) { 205 if( lbprev >= NDADDR) 206 panic("ffs_realloccg: lbprev out of range"); 207 bp->b_blkno = fsbtodb(fs, bprev); 208 } 209 210 #ifdef QUOTA 211 error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 212 if (error) { 213 brelse(bp); 214 return (error); 215 } 216 #endif 217 /* 218 * Check for extension in the existing location. 219 */ 220 cg = dtog(fs, bprev); 221 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 222 if (bno) { 223 if (bp->b_blkno != fsbtodb(fs, bno)) 224 panic("bad blockno"); 225 ip->i_blocks += btodb(nsize - osize); 226 ip->i_flag |= IN_CHANGE | IN_UPDATE; 227 allocbuf(bp, nsize); 228 bp->b_flags |= B_DONE; 229 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 230 *bpp = bp; 231 return (0); 232 } 233 /* 234 * Allocate a new disk location. 235 */ 236 if (bpref >= fs->fs_size) 237 bpref = 0; 238 switch ((int)fs->fs_optim) { 239 case FS_OPTSPACE: 240 /* 241 * Allocate an exact sized fragment. Although this makes 242 * best use of space, we will waste time relocating it if 243 * the file continues to grow. If the fragmentation is 244 * less than half of the minimum free reserve, we choose 245 * to begin optimizing for time. 246 */ 247 request = nsize; 248 if (fs->fs_minfree <= 5 || 249 fs->fs_cstotal.cs_nffree > 250 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 251 break; 252 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 253 fs->fs_fsmnt); 254 fs->fs_optim = FS_OPTTIME; 255 break; 256 case FS_OPTTIME: 257 /* 258 * At this point we have discovered a file that is trying to 259 * grow a small fragment to a larger fragment. To save time, 260 * we allocate a full sized block, then free the unused portion. 261 * If the file continues to grow, the `ffs_fragextend' call 262 * above will be able to grow it in place without further 263 * copying. If aberrant programs cause disk fragmentation to 264 * grow within 2% of the free reserve, we choose to begin 265 * optimizing for space. 266 */ 267 request = fs->fs_bsize; 268 if (fs->fs_cstotal.cs_nffree < 269 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 270 break; 271 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 272 fs->fs_fsmnt); 273 fs->fs_optim = FS_OPTSPACE; 274 break; 275 default: 276 printf("dev = 0x%lx, optim = %ld, fs = %s\n", 277 (u_long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 278 panic("ffs_realloccg: bad optim"); 279 /* NOTREACHED */ 280 } 281 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 282 (u_long (*)())ffs_alloccg); 283 if (bno > 0) { 284 bp->b_blkno = fsbtodb(fs, bno); 285 ffs_blkfree(ip, bprev, (long)osize); 286 if (nsize < request) 287 ffs_blkfree(ip, bno + numfrags(fs, nsize), 288 (long)(request - nsize)); 289 ip->i_blocks += btodb(nsize - osize); 290 ip->i_flag |= IN_CHANGE | IN_UPDATE; 291 allocbuf(bp, nsize); 292 bp->b_flags |= B_DONE; 293 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 294 *bpp = bp; 295 return (0); 296 } 297 #ifdef QUOTA 298 /* 299 * Restore user's disk quota because allocation failed. 300 */ 301 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 302 #endif 303 brelse(bp); 304 nospace: 305 /* 306 * no space available 307 */ 308 ffs_fserr(fs, cred->cr_uid, "file system full"); 309 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 310 return (ENOSPC); 311 } 312 313 /* 314 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 315 * 316 * The vnode and an array of buffer pointers for a range of sequential 317 * logical blocks to be made contiguous is given. The allocator attempts 318 * to find a range of sequential blocks starting as close as possible to 319 * an fs_rotdelay offset from the end of the allocation for the logical 320 * block immediately preceeding the current range. If successful, the 321 * physical block numbers in the buffer pointers and in the inode are 322 * changed to reflect the new allocation. If unsuccessful, the allocation 323 * is left unchanged. The success in doing the reallocation is returned. 324 * Note that the error return is not reflected back to the user. Rather 325 * the previous block allocation will be used. 326 */ 327 #include <sys/sysctl.h> 328 int doasyncfree = 1; 329 #ifdef DEBUG 330 struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; 331 #endif 332 int 333 ffs_reallocblks(ap) 334 struct vop_reallocblks_args /* { 335 struct vnode *a_vp; 336 struct cluster_save *a_buflist; 337 } */ *ap; 338 { 339 struct fs *fs; 340 struct inode *ip; 341 struct vnode *vp; 342 struct buf *sbp, *ebp; 343 daddr_t *bap, *sbap, *ebap = 0; 344 struct cluster_save *buflist; 345 daddr_t start_lbn, end_lbn, soff, newblk, blkno; 346 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 347 int i, len, start_lvl, end_lvl, pref, ssize; 348 349 vp = ap->a_vp; 350 ip = VTOI(vp); 351 fs = ip->i_fs; 352 if (fs->fs_contigsumsize <= 0) 353 return (ENOSPC); 354 buflist = ap->a_buflist; 355 len = buflist->bs_nchildren; 356 start_lbn = buflist->bs_children[0]->b_lblkno; 357 end_lbn = start_lbn + len - 1; 358 #ifdef DIAGNOSTIC 359 for (i = 1; i < len; i++) 360 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 361 panic("ffs_reallocblks: non-cluster"); 362 #endif 363 /* 364 * If the latest allocation is in a new cylinder group, assume that 365 * the filesystem has decided to move and do not force it back to 366 * the previous cylinder group. 367 */ 368 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 369 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 370 return (ENOSPC); 371 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 372 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 373 return (ENOSPC); 374 /* 375 * Get the starting offset and block map for the first block. 376 */ 377 if (start_lvl == 0) { 378 sbap = &ip->i_db[0]; 379 soff = start_lbn; 380 } else { 381 idp = &start_ap[start_lvl - 1]; 382 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 383 brelse(sbp); 384 return (ENOSPC); 385 } 386 sbap = (daddr_t *)sbp->b_data; 387 soff = idp->in_off; 388 } 389 /* 390 * Find the preferred location for the cluster. 391 */ 392 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 393 /* 394 * If the block range spans two block maps, get the second map. 395 */ 396 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 397 ssize = len; 398 } else { 399 #ifdef DIAGNOSTIC 400 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 401 panic("ffs_reallocblk: start == end"); 402 #endif 403 ssize = len - (idp->in_off + 1); 404 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 405 goto fail; 406 ebap = (daddr_t *)ebp->b_data; 407 } 408 /* 409 * Search the block map looking for an allocation of the desired size. 410 */ 411 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 412 len, (u_long (*)())ffs_clusteralloc)) == 0) 413 goto fail; 414 /* 415 * We have found a new contiguous block. 416 * 417 * First we have to replace the old block pointers with the new 418 * block pointers in the inode and indirect blocks associated 419 * with the file. 420 */ 421 blkno = newblk; 422 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 423 if (i == ssize) 424 bap = ebap; 425 #ifdef DIAGNOSTIC 426 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 427 panic("ffs_reallocblks: alloc mismatch"); 428 #endif 429 *bap++ = blkno; 430 } 431 /* 432 * Next we must write out the modified inode and indirect blocks. 433 * For strict correctness, the writes should be synchronous since 434 * the old block values may have been written to disk. In practise 435 * they are almost never written, but if we are concerned about 436 * strict correctness, the `doasyncfree' flag should be set to zero. 437 * 438 * The test on `doasyncfree' should be changed to test a flag 439 * that shows whether the associated buffers and inodes have 440 * been written. The flag should be set when the cluster is 441 * started and cleared whenever the buffer or inode is flushed. 442 * We can then check below to see if it is set, and do the 443 * synchronous write only when it has been cleared. 444 */ 445 if (sbap != &ip->i_db[0]) { 446 if (doasyncfree) 447 bdwrite(sbp); 448 else 449 bwrite(sbp); 450 } else { 451 ip->i_flag |= IN_CHANGE | IN_UPDATE; 452 if (!doasyncfree) 453 VOP_UPDATE(vp, &time, &time, 1); 454 } 455 if (ssize < len) 456 if (doasyncfree) 457 bdwrite(ebp); 458 else 459 bwrite(ebp); 460 /* 461 * Last, free the old blocks and assign the new blocks to the buffers. 462 */ 463 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 464 ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 465 fs->fs_bsize); 466 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 467 } 468 return (0); 469 470 fail: 471 if (ssize < len) 472 brelse(ebp); 473 if (sbap != &ip->i_db[0]) 474 brelse(sbp); 475 return (ENOSPC); 476 } 477 478 /* 479 * Allocate an inode in the file system. 480 * 481 * If allocating a directory, use ffs_dirpref to select the inode. 482 * If allocating in a directory, the following hierarchy is followed: 483 * 1) allocate the preferred inode. 484 * 2) allocate an inode in the same cylinder group. 485 * 3) quadradically rehash into other cylinder groups, until an 486 * available inode is located. 487 * If no inode preference is given the following heirarchy is used 488 * to allocate an inode: 489 * 1) allocate an inode in cylinder group 0. 490 * 2) quadradically rehash into other cylinder groups, until an 491 * available inode is located. 492 */ 493 int 494 ffs_valloc(ap) 495 struct vop_valloc_args /* { 496 struct vnode *a_pvp; 497 int a_mode; 498 struct ucred *a_cred; 499 struct vnode **a_vpp; 500 } */ *ap; 501 { 502 register struct vnode *pvp = ap->a_pvp; 503 register struct inode *pip; 504 register struct fs *fs; 505 register struct inode *ip; 506 mode_t mode = ap->a_mode; 507 ino_t ino, ipref; 508 int cg, error; 509 510 *ap->a_vpp = NULL; 511 pip = VTOI(pvp); 512 fs = pip->i_fs; 513 if (fs->fs_cstotal.cs_nifree == 0) 514 goto noinodes; 515 516 if ((mode & IFMT) == IFDIR) 517 ipref = ffs_dirpref(fs); 518 else 519 ipref = pip->i_number; 520 if (ipref >= fs->fs_ncg * fs->fs_ipg) 521 ipref = 0; 522 cg = ino_to_cg(fs, ipref); 523 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg); 524 if (ino == 0) 525 goto noinodes; 526 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); 527 if (error) { 528 VOP_VFREE(pvp, ino, mode); 529 return (error); 530 } 531 ip = VTOI(*ap->a_vpp); 532 if (ip->i_mode) { 533 printf("mode = 0%o, inum = %ld, fs = %s\n", 534 ip->i_mode, ip->i_number, fs->fs_fsmnt); 535 panic("ffs_valloc: dup alloc"); 536 } 537 if (ip->i_blocks) { /* XXX */ 538 printf("free inode %s/%ld had %ld blocks\n", 539 fs->fs_fsmnt, ino, ip->i_blocks); 540 ip->i_blocks = 0; 541 } 542 ip->i_flags = 0; 543 /* 544 * Set up a new generation number for this inode. 545 */ 546 if (++nextgennumber < (u_long)time.tv_sec) 547 nextgennumber = time.tv_sec; 548 ip->i_gen = nextgennumber; 549 return (0); 550 noinodes: 551 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); 552 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 553 return (ENOSPC); 554 } 555 556 /* 557 * Find a cylinder to place a directory. 558 * 559 * The policy implemented by this algorithm is to select from 560 * among those cylinder groups with above the average number of 561 * free inodes, the one with the smallest number of directories. 562 */ 563 static ino_t 564 ffs_dirpref(fs) 565 register struct fs *fs; 566 { 567 int cg, minndir, mincg, avgifree; 568 569 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 570 minndir = fs->fs_ipg; 571 mincg = 0; 572 for (cg = 0; cg < fs->fs_ncg; cg++) 573 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 574 fs->fs_cs(fs, cg).cs_nifree >= avgifree) { 575 mincg = cg; 576 minndir = fs->fs_cs(fs, cg).cs_ndir; 577 } 578 return ((ino_t)(fs->fs_ipg * mincg)); 579 } 580 581 /* 582 * Select the desired position for the next block in a file. The file is 583 * logically divided into sections. The first section is composed of the 584 * direct blocks. Each additional section contains fs_maxbpg blocks. 585 * 586 * If no blocks have been allocated in the first section, the policy is to 587 * request a block in the same cylinder group as the inode that describes 588 * the file. If no blocks have been allocated in any other section, the 589 * policy is to place the section in a cylinder group with a greater than 590 * average number of free blocks. An appropriate cylinder group is found 591 * by using a rotor that sweeps the cylinder groups. When a new group of 592 * blocks is needed, the sweep begins in the cylinder group following the 593 * cylinder group from which the previous allocation was made. The sweep 594 * continues until a cylinder group with greater than the average number 595 * of free blocks is found. If the allocation is for the first block in an 596 * indirect block, the information on the previous allocation is unavailable; 597 * here a best guess is made based upon the logical block number being 598 * allocated. 599 * 600 * If a section is already partially allocated, the policy is to 601 * contiguously allocate fs_maxcontig blocks. The end of one of these 602 * contiguous blocks and the beginning of the next is physically separated 603 * so that the disk head will be in transit between them for at least 604 * fs_rotdelay milliseconds. This is to allow time for the processor to 605 * schedule another I/O transfer. 606 */ 607 daddr_t 608 ffs_blkpref(ip, lbn, indx, bap) 609 struct inode *ip; 610 daddr_t lbn; 611 int indx; 612 daddr_t *bap; 613 { 614 register struct fs *fs; 615 register int cg; 616 int avgbfree, startcg; 617 daddr_t nextblk; 618 619 fs = ip->i_fs; 620 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 621 if (lbn < NDADDR) { 622 cg = ino_to_cg(fs, ip->i_number); 623 return (fs->fs_fpg * cg + fs->fs_frag); 624 } 625 /* 626 * Find a cylinder with greater than average number of 627 * unused data blocks. 628 */ 629 if (indx == 0 || bap[indx - 1] == 0) 630 startcg = 631 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 632 else 633 startcg = dtog(fs, bap[indx - 1]) + 1; 634 startcg %= fs->fs_ncg; 635 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 636 for (cg = startcg; cg < fs->fs_ncg; cg++) 637 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 638 fs->fs_cgrotor = cg; 639 return (fs->fs_fpg * cg + fs->fs_frag); 640 } 641 for (cg = 0; cg <= startcg; cg++) 642 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 643 fs->fs_cgrotor = cg; 644 return (fs->fs_fpg * cg + fs->fs_frag); 645 } 646 return (NULL); 647 } 648 /* 649 * One or more previous blocks have been laid out. If less 650 * than fs_maxcontig previous blocks are contiguous, the 651 * next block is requested contiguously, otherwise it is 652 * requested rotationally delayed by fs_rotdelay milliseconds. 653 */ 654 nextblk = bap[indx - 1] + fs->fs_frag; 655 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] + 656 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 657 return (nextblk); 658 if (fs->fs_rotdelay != 0) 659 /* 660 * Here we convert ms of delay to frags as: 661 * (frags) = (ms) * (rev/sec) * (sect/rev) / 662 * ((sect/frag) * (ms/sec)) 663 * then round up to the next block. 664 */ 665 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 666 (NSPF(fs) * 1000), fs->fs_frag); 667 return (nextblk); 668 } 669 670 /* 671 * Implement the cylinder overflow algorithm. 672 * 673 * The policy implemented by this algorithm is: 674 * 1) allocate the block in its requested cylinder group. 675 * 2) quadradically rehash on the cylinder group number. 676 * 3) brute force search for a free block. 677 */ 678 /*VARARGS5*/ 679 static u_long 680 ffs_hashalloc(ip, cg, pref, size, allocator) 681 struct inode *ip; 682 int cg; 683 long pref; 684 int size; /* size for data blocks, mode for inodes */ 685 u_long (*allocator)(); 686 { 687 register struct fs *fs; 688 long result; 689 int i, icg = cg; 690 691 fs = ip->i_fs; 692 /* 693 * 1: preferred cylinder group 694 */ 695 result = (*allocator)(ip, cg, pref, size); 696 if (result) 697 return (result); 698 /* 699 * 2: quadratic rehash 700 */ 701 for (i = 1; i < fs->fs_ncg; i *= 2) { 702 cg += i; 703 if (cg >= fs->fs_ncg) 704 cg -= fs->fs_ncg; 705 result = (*allocator)(ip, cg, 0, size); 706 if (result) 707 return (result); 708 } 709 /* 710 * 3: brute force search 711 * Note that we start at i == 2, since 0 was checked initially, 712 * and 1 is always checked in the quadratic rehash. 713 */ 714 cg = (icg + 2) % fs->fs_ncg; 715 for (i = 2; i < fs->fs_ncg; i++) { 716 result = (*allocator)(ip, cg, 0, size); 717 if (result) 718 return (result); 719 cg++; 720 if (cg == fs->fs_ncg) 721 cg = 0; 722 } 723 return (NULL); 724 } 725 726 /* 727 * Determine whether a fragment can be extended. 728 * 729 * Check to see if the necessary fragments are available, and 730 * if they are, allocate them. 731 */ 732 static daddr_t 733 ffs_fragextend(ip, cg, bprev, osize, nsize) 734 struct inode *ip; 735 int cg; 736 long bprev; 737 int osize, nsize; 738 { 739 register struct fs *fs; 740 register struct cg *cgp; 741 struct buf *bp; 742 long bno; 743 int frags, bbase; 744 int i, error; 745 746 fs = ip->i_fs; 747 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 748 return (NULL); 749 frags = numfrags(fs, nsize); 750 bbase = fragnum(fs, bprev); 751 if (bbase > fragnum(fs, (bprev + frags - 1))) { 752 /* cannot extend across a block boundary */ 753 return (NULL); 754 } 755 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 756 (int)fs->fs_cgsize, NOCRED, &bp); 757 if (error) { 758 brelse(bp); 759 return (NULL); 760 } 761 cgp = (struct cg *)bp->b_data; 762 if (!cg_chkmagic(cgp)) { 763 brelse(bp); 764 return (NULL); 765 } 766 cgp->cg_time = time.tv_sec; 767 bno = dtogd(fs, bprev); 768 for (i = numfrags(fs, osize); i < frags; i++) 769 if (isclr(cg_blksfree(cgp), bno + i)) { 770 brelse(bp); 771 return (NULL); 772 } 773 /* 774 * the current fragment can be extended 775 * deduct the count on fragment being extended into 776 * increase the count on the remaining fragment (if any) 777 * allocate the extended piece 778 */ 779 for (i = frags; i < fs->fs_frag - bbase; i++) 780 if (isclr(cg_blksfree(cgp), bno + i)) 781 break; 782 cgp->cg_frsum[i - numfrags(fs, osize)]--; 783 if (i != frags) 784 cgp->cg_frsum[i - frags]++; 785 for (i = numfrags(fs, osize); i < frags; i++) { 786 clrbit(cg_blksfree(cgp), bno + i); 787 cgp->cg_cs.cs_nffree--; 788 fs->fs_cstotal.cs_nffree--; 789 fs->fs_cs(fs, cg).cs_nffree--; 790 } 791 fs->fs_fmod = 1; 792 bdwrite(bp); 793 return (bprev); 794 } 795 796 /* 797 * Determine whether a block can be allocated. 798 * 799 * Check to see if a block of the appropriate size is available, 800 * and if it is, allocate it. 801 */ 802 static daddr_t 803 ffs_alloccg(ip, cg, bpref, size) 804 struct inode *ip; 805 int cg; 806 daddr_t bpref; 807 int size; 808 { 809 register struct fs *fs; 810 register struct cg *cgp; 811 struct buf *bp; 812 register int i; 813 int error, bno, frags, allocsiz; 814 815 fs = ip->i_fs; 816 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 817 return (NULL); 818 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 819 (int)fs->fs_cgsize, NOCRED, &bp); 820 if (error) { 821 brelse(bp); 822 return (NULL); 823 } 824 cgp = (struct cg *)bp->b_data; 825 if (!cg_chkmagic(cgp) || 826 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 827 brelse(bp); 828 return (NULL); 829 } 830 cgp->cg_time = time.tv_sec; 831 if (size == fs->fs_bsize) { 832 bno = ffs_alloccgblk(fs, cgp, bpref); 833 bdwrite(bp); 834 return (bno); 835 } 836 /* 837 * check to see if any fragments are already available 838 * allocsiz is the size which will be allocated, hacking 839 * it down to a smaller size if necessary 840 */ 841 frags = numfrags(fs, size); 842 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 843 if (cgp->cg_frsum[allocsiz] != 0) 844 break; 845 if (allocsiz == fs->fs_frag) { 846 /* 847 * no fragments were available, so a block will be 848 * allocated, and hacked up 849 */ 850 if (cgp->cg_cs.cs_nbfree == 0) { 851 brelse(bp); 852 return (NULL); 853 } 854 bno = ffs_alloccgblk(fs, cgp, bpref); 855 bpref = dtogd(fs, bno); 856 for (i = frags; i < fs->fs_frag; i++) 857 setbit(cg_blksfree(cgp), bpref + i); 858 i = fs->fs_frag - frags; 859 cgp->cg_cs.cs_nffree += i; 860 fs->fs_cstotal.cs_nffree += i; 861 fs->fs_cs(fs, cg).cs_nffree += i; 862 fs->fs_fmod = 1; 863 cgp->cg_frsum[i]++; 864 bdwrite(bp); 865 return (bno); 866 } 867 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 868 if (bno < 0) { 869 brelse(bp); 870 return (NULL); 871 } 872 for (i = 0; i < frags; i++) 873 clrbit(cg_blksfree(cgp), bno + i); 874 cgp->cg_cs.cs_nffree -= frags; 875 fs->fs_cstotal.cs_nffree -= frags; 876 fs->fs_cs(fs, cg).cs_nffree -= frags; 877 fs->fs_fmod = 1; 878 cgp->cg_frsum[allocsiz]--; 879 if (frags != allocsiz) 880 cgp->cg_frsum[allocsiz - frags]++; 881 bdwrite(bp); 882 return (cg * fs->fs_fpg + bno); 883 } 884 885 /* 886 * Allocate a block in a cylinder group. 887 * 888 * This algorithm implements the following policy: 889 * 1) allocate the requested block. 890 * 2) allocate a rotationally optimal block in the same cylinder. 891 * 3) allocate the next available block on the block rotor for the 892 * specified cylinder group. 893 * Note that this routine only allocates fs_bsize blocks; these 894 * blocks may be fragmented by the routine that allocates them. 895 */ 896 static daddr_t 897 ffs_alloccgblk(fs, cgp, bpref) 898 register struct fs *fs; 899 register struct cg *cgp; 900 daddr_t bpref; 901 { 902 daddr_t bno, blkno; 903 int cylno, pos, delta; 904 short *cylbp; 905 register int i; 906 907 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 908 bpref = cgp->cg_rotor; 909 goto norot; 910 } 911 bpref = blknum(fs, bpref); 912 bpref = dtogd(fs, bpref); 913 /* 914 * if the requested block is available, use it 915 */ 916 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { 917 bno = bpref; 918 goto gotit; 919 } 920 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 921 /* 922 * Block layout information is not available. 923 * Leaving bpref unchanged means we take the 924 * next available free block following the one 925 * we just allocated. Hopefully this will at 926 * least hit a track cache on drives of unknown 927 * geometry (e.g. SCSI). 928 */ 929 goto norot; 930 } 931 /* 932 * check for a block available on the same cylinder 933 */ 934 cylno = cbtocylno(fs, bpref); 935 if (cg_blktot(cgp)[cylno] == 0) 936 goto norot; 937 /* 938 * check the summary information to see if a block is 939 * available in the requested cylinder starting at the 940 * requested rotational position and proceeding around. 941 */ 942 cylbp = cg_blks(fs, cgp, cylno); 943 pos = cbtorpos(fs, bpref); 944 for (i = pos; i < fs->fs_nrpos; i++) 945 if (cylbp[i] > 0) 946 break; 947 if (i == fs->fs_nrpos) 948 for (i = 0; i < pos; i++) 949 if (cylbp[i] > 0) 950 break; 951 if (cylbp[i] > 0) { 952 /* 953 * found a rotational position, now find the actual 954 * block. A panic if none is actually there. 955 */ 956 pos = cylno % fs->fs_cpc; 957 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 958 if (fs_postbl(fs, pos)[i] == -1) { 959 printf("pos = %d, i = %d, fs = %s\n", 960 pos, i, fs->fs_fsmnt); 961 panic("ffs_alloccgblk: cyl groups corrupted"); 962 } 963 for (i = fs_postbl(fs, pos)[i];; ) { 964 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { 965 bno = blkstofrags(fs, (bno + i)); 966 goto gotit; 967 } 968 delta = fs_rotbl(fs)[i]; 969 if (delta <= 0 || 970 delta + i > fragstoblks(fs, fs->fs_fpg)) 971 break; 972 i += delta; 973 } 974 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 975 panic("ffs_alloccgblk: can't find blk in cyl"); 976 } 977 norot: 978 /* 979 * no blocks in the requested cylinder, so take next 980 * available one in this cylinder group. 981 */ 982 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 983 if (bno < 0) 984 return (NULL); 985 cgp->cg_rotor = bno; 986 gotit: 987 blkno = fragstoblks(fs, bno); 988 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); 989 ffs_clusteracct(fs, cgp, blkno, -1); 990 cgp->cg_cs.cs_nbfree--; 991 fs->fs_cstotal.cs_nbfree--; 992 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 993 cylno = cbtocylno(fs, bno); 994 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 995 cg_blktot(cgp)[cylno]--; 996 fs->fs_fmod = 1; 997 return (cgp->cg_cgx * fs->fs_fpg + bno); 998 } 999 1000 /* 1001 * Determine whether a cluster can be allocated. 1002 * 1003 * We do not currently check for optimal rotational layout if there 1004 * are multiple choices in the same cylinder group. Instead we just 1005 * take the first one that we find following bpref. 1006 */ 1007 static daddr_t 1008 ffs_clusteralloc(ip, cg, bpref, len) 1009 struct inode *ip; 1010 int cg; 1011 daddr_t bpref; 1012 int len; 1013 { 1014 register struct fs *fs; 1015 register struct cg *cgp; 1016 struct buf *bp; 1017 int i, run, bno, bit, map; 1018 u_char *mapp; 1019 1020 fs = ip->i_fs; 1021 if (fs->fs_cs(fs, cg).cs_nbfree < len) 1022 return (NULL); 1023 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1024 NOCRED, &bp)) 1025 goto fail; 1026 cgp = (struct cg *)bp->b_data; 1027 if (!cg_chkmagic(cgp)) 1028 goto fail; 1029 /* 1030 * Check to see if a cluster of the needed size (or bigger) is 1031 * available in this cylinder group. 1032 */ 1033 for (i = len; i <= fs->fs_contigsumsize; i++) 1034 if (cg_clustersum(cgp)[i] > 0) 1035 break; 1036 if (i > fs->fs_contigsumsize) 1037 goto fail; 1038 /* 1039 * Search the cluster map to find a big enough cluster. 1040 * We take the first one that we find, even if it is larger 1041 * than we need as we prefer to get one close to the previous 1042 * block allocation. We do not search before the current 1043 * preference point as we do not want to allocate a block 1044 * that is allocated before the previous one (as we will 1045 * then have to wait for another pass of the elevator 1046 * algorithm before it will be read). We prefer to fail and 1047 * be recalled to try an allocation in the next cylinder group. 1048 */ 1049 if (dtog(fs, bpref) != cg) 1050 bpref = 0; 1051 else 1052 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1053 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1054 map = *mapp++; 1055 bit = 1 << (bpref % NBBY); 1056 for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { 1057 if ((map & bit) == 0) { 1058 run = 0; 1059 } else { 1060 run++; 1061 if (run == len) 1062 break; 1063 } 1064 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1065 bit <<= 1; 1066 } else { 1067 map = *mapp++; 1068 bit = 1; 1069 } 1070 } 1071 if (i == cgp->cg_nclusterblks) 1072 goto fail; 1073 /* 1074 * Allocate the cluster that we have found. 1075 */ 1076 bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); 1077 len = blkstofrags(fs, len); 1078 for (i = 0; i < len; i += fs->fs_frag) 1079 if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) 1080 panic("ffs_clusteralloc: lost block"); 1081 brelse(bp); 1082 return (bno); 1083 1084 fail: 1085 brelse(bp); 1086 return (0); 1087 } 1088 1089 /* 1090 * Determine whether an inode can be allocated. 1091 * 1092 * Check to see if an inode is available, and if it is, 1093 * allocate it using the following policy: 1094 * 1) allocate the requested inode. 1095 * 2) allocate the next available inode after the requested 1096 * inode in the specified cylinder group. 1097 */ 1098 static ino_t 1099 ffs_nodealloccg(ip, cg, ipref, mode) 1100 struct inode *ip; 1101 int cg; 1102 daddr_t ipref; 1103 int mode; 1104 { 1105 register struct fs *fs; 1106 register struct cg *cgp; 1107 struct buf *bp; 1108 int error, start, len, loc, map, i; 1109 1110 fs = ip->i_fs; 1111 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1112 return (NULL); 1113 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1114 (int)fs->fs_cgsize, NOCRED, &bp); 1115 if (error) { 1116 brelse(bp); 1117 return (NULL); 1118 } 1119 cgp = (struct cg *)bp->b_data; 1120 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1121 brelse(bp); 1122 return (NULL); 1123 } 1124 cgp->cg_time = time.tv_sec; 1125 if (ipref) { 1126 ipref %= fs->fs_ipg; 1127 if (isclr(cg_inosused(cgp), ipref)) 1128 goto gotit; 1129 } 1130 start = cgp->cg_irotor / NBBY; 1131 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1132 loc = skpc(0xff, len, &cg_inosused(cgp)[start]); 1133 if (loc == 0) { 1134 len = start + 1; 1135 start = 0; 1136 loc = skpc(0xff, len, &cg_inosused(cgp)[0]); 1137 if (loc == 0) { 1138 printf("cg = %d, irotor = %ld, fs = %s\n", 1139 cg, cgp->cg_irotor, fs->fs_fsmnt); 1140 panic("ffs_nodealloccg: map corrupted"); 1141 /* NOTREACHED */ 1142 } 1143 } 1144 i = start + len - loc; 1145 map = cg_inosused(cgp)[i]; 1146 ipref = i * NBBY; 1147 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1148 if ((map & i) == 0) { 1149 cgp->cg_irotor = ipref; 1150 goto gotit; 1151 } 1152 } 1153 printf("fs = %s\n", fs->fs_fsmnt); 1154 panic("ffs_nodealloccg: block not in map"); 1155 /* NOTREACHED */ 1156 gotit: 1157 setbit(cg_inosused(cgp), ipref); 1158 cgp->cg_cs.cs_nifree--; 1159 fs->fs_cstotal.cs_nifree--; 1160 fs->fs_cs(fs, cg).cs_nifree--; 1161 fs->fs_fmod = 1; 1162 if ((mode & IFMT) == IFDIR) { 1163 cgp->cg_cs.cs_ndir++; 1164 fs->fs_cstotal.cs_ndir++; 1165 fs->fs_cs(fs, cg).cs_ndir++; 1166 } 1167 bdwrite(bp); 1168 return (cg * fs->fs_ipg + ipref); 1169 } 1170 1171 /* 1172 * Free a block or fragment. 1173 * 1174 * The specified block or fragment is placed back in the 1175 * free map. If a fragment is deallocated, a possible 1176 * block reassembly is checked. 1177 */ 1178 void 1179 ffs_blkfree(ip, bno, size) 1180 register struct inode *ip; 1181 daddr_t bno; 1182 long size; 1183 { 1184 register struct fs *fs; 1185 register struct cg *cgp; 1186 struct buf *bp; 1187 daddr_t blkno; 1188 int i, error, cg, blk, frags, bbase; 1189 1190 fs = ip->i_fs; 1191 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1192 printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n", 1193 (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1194 panic("blkfree: bad size"); 1195 } 1196 cg = dtog(fs, bno); 1197 if ((u_int)bno >= fs->fs_size) { 1198 printf("bad block %ld, ino %ld\n", bno, ip->i_number); 1199 ffs_fserr(fs, ip->i_uid, "bad block"); 1200 return; 1201 } 1202 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1203 (int)fs->fs_cgsize, NOCRED, &bp); 1204 if (error) { 1205 brelse(bp); 1206 return; 1207 } 1208 cgp = (struct cg *)bp->b_data; 1209 if (!cg_chkmagic(cgp)) { 1210 brelse(bp); 1211 return; 1212 } 1213 cgp->cg_time = time.tv_sec; 1214 bno = dtogd(fs, bno); 1215 if (size == fs->fs_bsize) { 1216 blkno = fragstoblks(fs, bno); 1217 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1218 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1219 (u_long) ip->i_dev, bno, fs->fs_fsmnt); 1220 panic("blkfree: freeing free block"); 1221 } 1222 ffs_setblock(fs, cg_blksfree(cgp), blkno); 1223 ffs_clusteracct(fs, cgp, blkno, 1); 1224 cgp->cg_cs.cs_nbfree++; 1225 fs->fs_cstotal.cs_nbfree++; 1226 fs->fs_cs(fs, cg).cs_nbfree++; 1227 i = cbtocylno(fs, bno); 1228 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1229 cg_blktot(cgp)[i]++; 1230 } else { 1231 bbase = bno - fragnum(fs, bno); 1232 /* 1233 * decrement the counts associated with the old frags 1234 */ 1235 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1236 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1237 /* 1238 * deallocate the fragment 1239 */ 1240 frags = numfrags(fs, size); 1241 for (i = 0; i < frags; i++) { 1242 if (isset(cg_blksfree(cgp), bno + i)) { 1243 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1244 (u_long) ip->i_dev, bno + i, fs->fs_fsmnt); 1245 panic("blkfree: freeing free frag"); 1246 } 1247 setbit(cg_blksfree(cgp), bno + i); 1248 } 1249 cgp->cg_cs.cs_nffree += i; 1250 fs->fs_cstotal.cs_nffree += i; 1251 fs->fs_cs(fs, cg).cs_nffree += i; 1252 /* 1253 * add back in counts associated with the new frags 1254 */ 1255 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1256 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1257 /* 1258 * if a complete block has been reassembled, account for it 1259 */ 1260 blkno = fragstoblks(fs, bbase); 1261 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1262 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1263 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1264 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1265 ffs_clusteracct(fs, cgp, blkno, 1); 1266 cgp->cg_cs.cs_nbfree++; 1267 fs->fs_cstotal.cs_nbfree++; 1268 fs->fs_cs(fs, cg).cs_nbfree++; 1269 i = cbtocylno(fs, bbase); 1270 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1271 cg_blktot(cgp)[i]++; 1272 } 1273 } 1274 fs->fs_fmod = 1; 1275 bdwrite(bp); 1276 } 1277 1278 /* 1279 * Free an inode. 1280 * 1281 * The specified inode is placed back in the free map. 1282 */ 1283 int 1284 ffs_vfree(ap) 1285 struct vop_vfree_args /* { 1286 struct vnode *a_pvp; 1287 ino_t a_ino; 1288 int a_mode; 1289 } */ *ap; 1290 { 1291 register struct fs *fs; 1292 register struct cg *cgp; 1293 register struct inode *pip; 1294 ino_t ino = ap->a_ino; 1295 struct buf *bp; 1296 int error, cg; 1297 1298 pip = VTOI(ap->a_pvp); 1299 fs = pip->i_fs; 1300 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1301 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 1302 pip->i_dev, ino, fs->fs_fsmnt); 1303 cg = ino_to_cg(fs, ino); 1304 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1305 (int)fs->fs_cgsize, NOCRED, &bp); 1306 if (error) { 1307 brelse(bp); 1308 return (0); 1309 } 1310 cgp = (struct cg *)bp->b_data; 1311 if (!cg_chkmagic(cgp)) { 1312 brelse(bp); 1313 return (0); 1314 } 1315 cgp->cg_time = time.tv_sec; 1316 ino %= fs->fs_ipg; 1317 if (isclr(cg_inosused(cgp), ino)) { 1318 printf("dev = 0x%lx, ino = %ld, fs = %s\n", 1319 (u_long)pip->i_dev, ino, fs->fs_fsmnt); 1320 if (fs->fs_ronly == 0) 1321 panic("ifree: freeing free inode"); 1322 } 1323 clrbit(cg_inosused(cgp), ino); 1324 if (ino < cgp->cg_irotor) 1325 cgp->cg_irotor = ino; 1326 cgp->cg_cs.cs_nifree++; 1327 fs->fs_cstotal.cs_nifree++; 1328 fs->fs_cs(fs, cg).cs_nifree++; 1329 if ((ap->a_mode & IFMT) == IFDIR) { 1330 cgp->cg_cs.cs_ndir--; 1331 fs->fs_cstotal.cs_ndir--; 1332 fs->fs_cs(fs, cg).cs_ndir--; 1333 } 1334 fs->fs_fmod = 1; 1335 bdwrite(bp); 1336 return (0); 1337 } 1338 1339 /* 1340 * Find a block of the specified size in the specified cylinder group. 1341 * 1342 * It is a panic if a request is made to find a block if none are 1343 * available. 1344 */ 1345 static daddr_t 1346 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1347 register struct fs *fs; 1348 register struct cg *cgp; 1349 daddr_t bpref; 1350 int allocsiz; 1351 { 1352 daddr_t bno; 1353 int start, len, loc, i; 1354 int blk, field, subfield, pos; 1355 1356 /* 1357 * find the fragment by searching through the free block 1358 * map for an appropriate bit pattern 1359 */ 1360 if (bpref) 1361 start = dtogd(fs, bpref) / NBBY; 1362 else 1363 start = cgp->cg_frotor / NBBY; 1364 len = howmany(fs->fs_fpg, NBBY) - start; 1365 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], 1366 (u_char *)fragtbl[fs->fs_frag], 1367 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1368 if (loc == 0) { 1369 len = start + 1; 1370 start = 0; 1371 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], 1372 (u_char *)fragtbl[fs->fs_frag], 1373 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1374 if (loc == 0) { 1375 printf("start = %d, len = %d, fs = %s\n", 1376 start, len, fs->fs_fsmnt); 1377 panic("ffs_alloccg: map corrupted"); 1378 /* NOTREACHED */ 1379 } 1380 } 1381 bno = (start + len - loc) * NBBY; 1382 cgp->cg_frotor = bno; 1383 /* 1384 * found the byte in the map 1385 * sift through the bits to find the selected frag 1386 */ 1387 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1388 blk = blkmap(fs, cg_blksfree(cgp), bno); 1389 blk <<= 1; 1390 field = around[allocsiz]; 1391 subfield = inside[allocsiz]; 1392 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1393 if ((blk & field) == subfield) 1394 return (bno + pos); 1395 field <<= 1; 1396 subfield <<= 1; 1397 } 1398 } 1399 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1400 panic("ffs_alloccg: block not in map"); 1401 return (-1); 1402 } 1403 1404 /* 1405 * Update the cluster map because of an allocation or free. 1406 * 1407 * Cnt == 1 means free; cnt == -1 means allocating. 1408 */ 1409 void 1410 ffs_clusteracct(fs, cgp, blkno, cnt) 1411 struct fs *fs; 1412 struct cg *cgp; 1413 daddr_t blkno; 1414 int cnt; 1415 { 1416 long *sump; 1417 u_char *freemapp, *mapp; 1418 int i, start, end, forw, back, map, bit; 1419 1420 if (fs->fs_contigsumsize <= 0) 1421 return; 1422 freemapp = cg_clustersfree(cgp); 1423 sump = cg_clustersum(cgp); 1424 /* 1425 * Allocate or clear the actual block. 1426 */ 1427 if (cnt > 0) 1428 setbit(freemapp, blkno); 1429 else 1430 clrbit(freemapp, blkno); 1431 /* 1432 * Find the size of the cluster going forward. 1433 */ 1434 start = blkno + 1; 1435 end = start + fs->fs_contigsumsize; 1436 if (end >= cgp->cg_nclusterblks) 1437 end = cgp->cg_nclusterblks; 1438 mapp = &freemapp[start / NBBY]; 1439 map = *mapp++; 1440 bit = 1 << (start % NBBY); 1441 for (i = start; i < end; i++) { 1442 if ((map & bit) == 0) 1443 break; 1444 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1445 bit <<= 1; 1446 } else { 1447 map = *mapp++; 1448 bit = 1; 1449 } 1450 } 1451 forw = i - start; 1452 /* 1453 * Find the size of the cluster going backward. 1454 */ 1455 start = blkno - 1; 1456 end = start - fs->fs_contigsumsize; 1457 if (end < 0) 1458 end = -1; 1459 mapp = &freemapp[start / NBBY]; 1460 map = *mapp--; 1461 bit = 1 << (start % NBBY); 1462 for (i = start; i > end; i--) { 1463 if ((map & bit) == 0) 1464 break; 1465 if ((i & (NBBY - 1)) != 0) { 1466 bit >>= 1; 1467 } else { 1468 map = *mapp--; 1469 bit = 1 << (NBBY - 1); 1470 } 1471 } 1472 back = start - i; 1473 /* 1474 * Account for old cluster and the possibly new forward and 1475 * back clusters. 1476 */ 1477 i = back + forw + 1; 1478 if (i > fs->fs_contigsumsize) 1479 i = fs->fs_contigsumsize; 1480 sump[i] += cnt; 1481 if (back > 0) 1482 sump[back] -= cnt; 1483 if (forw > 0) 1484 sump[forw] -= cnt; 1485 } 1486 1487 /* 1488 * Fserr prints the name of a file system with an error diagnostic. 1489 * 1490 * The form of the error message is: 1491 * fs: error message 1492 */ 1493 static void 1494 ffs_fserr(fs, uid, cp) 1495 struct fs *fs; 1496 u_int uid; 1497 char *cp; 1498 { 1499 1500 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); 1501 } 1502