1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 34 * $Id: ffs_alloc.c,v 1.52 1998/09/05 14:13:12 phk Exp $ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/buf.h> 42 #include <sys/proc.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #ifdef notyet 46 #include <sys/sysctl.h> 47 #endif 48 #include <sys/syslog.h> 49 50 #include <ufs/ufs/quota.h> 51 #include <ufs/ufs/inode.h> 52 #include <ufs/ufs/ufsmount.h> 53 54 #include <ufs/ffs/fs.h> 55 #include <ufs/ffs/ffs_extern.h> 56 57 typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref, 58 int size)); 59 60 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); 61 static ufs_daddr_t 62 ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t)); 63 #ifdef DIAGNOSTIC 64 static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long)); 65 #endif 66 static void ffs_clusteracct __P((struct fs *, struct cg *, ufs_daddr_t, 67 int)); 68 #ifdef notyet 69 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, 70 int)); 71 #endif 72 static ino_t ffs_dirpref __P((struct fs *)); 73 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); 74 static void ffs_fserr __P((struct fs *, u_int, char *)); 75 static u_long ffs_hashalloc 76 __P((struct inode *, int, long, int, allocfcn_t *)); 77 static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); 78 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t, 79 int)); 80 81 /* 82 * Allocate a block in the file system. 83 * 84 * The size of the requested block is given, which must be some 85 * multiple of fs_fsize and <= fs_bsize. 86 * A preference may be optionally specified. If a preference is given 87 * the following hierarchy is used to allocate a block: 88 * 1) allocate the requested block. 89 * 2) allocate a rotationally optimal block in the same cylinder. 90 * 3) allocate a block in the same cylinder group. 91 * 4) quadradically rehash into other cylinder groups, until an 92 * available block is located. 93 * If no block preference is given the following heirarchy is used 94 * to allocate a block: 95 * 1) allocate a block in the cylinder group that contains the 96 * inode for the file. 97 * 2) quadradically rehash into other cylinder groups, until an 98 * available block is located. 99 */ 100 int 101 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 102 register struct inode *ip; 103 ufs_daddr_t lbn, bpref; 104 int size; 105 struct ucred *cred; 106 ufs_daddr_t *bnp; 107 { 108 register struct fs *fs; 109 ufs_daddr_t bno; 110 int cg; 111 #ifdef QUOTA 112 int error; 113 #endif 114 115 *bnp = 0; 116 fs = ip->i_fs; 117 #ifdef DIAGNOSTIC 118 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 119 printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n", 120 (u_long)ip->i_dev, (long)fs->fs_bsize, size, fs->fs_fsmnt); 121 panic("ffs_alloc: bad size"); 122 } 123 if (cred == NOCRED) 124 panic("ffs_alloc: missing credential"); 125 #endif /* DIAGNOSTIC */ 126 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 127 goto nospace; 128 if (cred->cr_uid != 0 && 129 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 130 goto nospace; 131 #ifdef QUOTA 132 error = chkdq(ip, (long)btodb(size), cred, 0); 133 if (error) 134 return (error); 135 #endif 136 if (bpref >= fs->fs_size) 137 bpref = 0; 138 if (bpref == 0) 139 cg = ino_to_cg(fs, ip->i_number); 140 else 141 cg = dtog(fs, bpref); 142 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 143 ffs_alloccg); 144 if (bno > 0) { 145 ip->i_blocks += btodb(size); 146 ip->i_flag |= IN_CHANGE | IN_UPDATE; 147 *bnp = bno; 148 return (0); 149 } 150 #ifdef QUOTA 151 /* 152 * Restore user's disk quota because allocation failed. 153 */ 154 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 155 #endif 156 nospace: 157 ffs_fserr(fs, cred->cr_uid, "file system full"); 158 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 159 return (ENOSPC); 160 } 161 162 /* 163 * Reallocate a fragment to a bigger size 164 * 165 * The number and size of the old block is given, and a preference 166 * and new size is also specified. The allocator attempts to extend 167 * the original block. Failing that, the regular block allocator is 168 * invoked to get an appropriate block. 169 */ 170 int 171 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 172 register struct inode *ip; 173 ufs_daddr_t lbprev; 174 ufs_daddr_t bpref; 175 int osize, nsize; 176 struct ucred *cred; 177 struct buf **bpp; 178 { 179 register struct fs *fs; 180 struct buf *bp; 181 int cg, request, error; 182 ufs_daddr_t bprev, bno; 183 184 *bpp = 0; 185 fs = ip->i_fs; 186 #ifdef DIAGNOSTIC 187 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 188 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 189 printf( 190 "dev = 0x%lx, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 191 (u_long)ip->i_dev, (long)fs->fs_bsize, osize, 192 nsize, fs->fs_fsmnt); 193 panic("ffs_realloccg: bad size"); 194 } 195 if (cred == NOCRED) 196 panic("ffs_realloccg: missing credential"); 197 #endif /* DIAGNOSTIC */ 198 if (cred->cr_uid != 0 && 199 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 200 goto nospace; 201 if ((bprev = ip->i_db[lbprev]) == 0) { 202 printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n", 203 (u_long)ip->i_dev, (long)fs->fs_bsize, (long)bprev, 204 fs->fs_fsmnt); 205 panic("ffs_realloccg: bad bprev"); 206 } 207 /* 208 * Allocate the extra space in the buffer. 209 */ 210 error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp); 211 if (error) { 212 brelse(bp); 213 return (error); 214 } 215 216 if( bp->b_blkno == bp->b_lblkno) { 217 if( lbprev >= NDADDR) 218 panic("ffs_realloccg: lbprev out of range"); 219 bp->b_blkno = fsbtodb(fs, bprev); 220 } 221 222 #ifdef QUOTA 223 error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 224 if (error) { 225 brelse(bp); 226 return (error); 227 } 228 #endif 229 /* 230 * Check for extension in the existing location. 231 */ 232 cg = dtog(fs, bprev); 233 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 234 if (bno) { 235 if (bp->b_blkno != fsbtodb(fs, bno)) 236 panic("ffs_realloccg: bad blockno"); 237 ip->i_blocks += btodb(nsize - osize); 238 ip->i_flag |= IN_CHANGE | IN_UPDATE; 239 allocbuf(bp, nsize); 240 bp->b_flags |= B_DONE; 241 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 242 *bpp = bp; 243 return (0); 244 } 245 /* 246 * Allocate a new disk location. 247 */ 248 if (bpref >= fs->fs_size) 249 bpref = 0; 250 switch ((int)fs->fs_optim) { 251 case FS_OPTSPACE: 252 /* 253 * Allocate an exact sized fragment. Although this makes 254 * best use of space, we will waste time relocating it if 255 * the file continues to grow. If the fragmentation is 256 * less than half of the minimum free reserve, we choose 257 * to begin optimizing for time. 258 */ 259 request = nsize; 260 if (fs->fs_minfree <= 5 || 261 fs->fs_cstotal.cs_nffree > 262 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 263 break; 264 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 265 fs->fs_fsmnt); 266 fs->fs_optim = FS_OPTTIME; 267 break; 268 case FS_OPTTIME: 269 /* 270 * At this point we have discovered a file that is trying to 271 * grow a small fragment to a larger fragment. To save time, 272 * we allocate a full sized block, then free the unused portion. 273 * If the file continues to grow, the `ffs_fragextend' call 274 * above will be able to grow it in place without further 275 * copying. If aberrant programs cause disk fragmentation to 276 * grow within 2% of the free reserve, we choose to begin 277 * optimizing for space. 278 */ 279 request = fs->fs_bsize; 280 if (fs->fs_cstotal.cs_nffree < 281 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 282 break; 283 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 284 fs->fs_fsmnt); 285 fs->fs_optim = FS_OPTSPACE; 286 break; 287 default: 288 printf("dev = 0x%lx, optim = %ld, fs = %s\n", 289 (u_long)ip->i_dev, (long)fs->fs_optim, fs->fs_fsmnt); 290 panic("ffs_realloccg: bad optim"); 291 /* NOTREACHED */ 292 } 293 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 294 ffs_alloccg); 295 if (bno > 0) { 296 bp->b_blkno = fsbtodb(fs, bno); 297 if (!DOINGSOFTDEP(ITOV(ip))) 298 ffs_blkfree(ip, bprev, (long)osize); 299 if (nsize < request) 300 ffs_blkfree(ip, bno + numfrags(fs, nsize), 301 (long)(request - nsize)); 302 ip->i_blocks += btodb(nsize - osize); 303 ip->i_flag |= IN_CHANGE | IN_UPDATE; 304 allocbuf(bp, nsize); 305 bp->b_flags |= B_DONE; 306 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 307 *bpp = bp; 308 return (0); 309 } 310 #ifdef QUOTA 311 /* 312 * Restore user's disk quota because allocation failed. 313 */ 314 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 315 #endif 316 brelse(bp); 317 nospace: 318 /* 319 * no space available 320 */ 321 ffs_fserr(fs, cred->cr_uid, "file system full"); 322 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 323 return (ENOSPC); 324 } 325 326 #ifdef notyet 327 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 328 329 /* 330 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 331 * 332 * The vnode and an array of buffer pointers for a range of sequential 333 * logical blocks to be made contiguous is given. The allocator attempts 334 * to find a range of sequential blocks starting as close as possible to 335 * an fs_rotdelay offset from the end of the allocation for the logical 336 * block immediately preceeding the current range. If successful, the 337 * physical block numbers in the buffer pointers and in the inode are 338 * changed to reflect the new allocation. If unsuccessful, the allocation 339 * is left unchanged. The success in doing the reallocation is returned. 340 * Note that the error return is not reflected back to the user. Rather 341 * the previous block allocation will be used. 342 */ 343 static int doasyncfree = 1; 344 SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 345 346 static int doreallocblks = 1; 347 SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 348 349 static int prtrealloc = 0; 350 #endif 351 352 int 353 ffs_reallocblks(ap) 354 struct vop_reallocblks_args /* { 355 struct vnode *a_vp; 356 struct cluster_save *a_buflist; 357 } */ *ap; 358 { 359 #if !defined (not_yes) 360 return (ENOSPC); 361 #else 362 struct fs *fs; 363 struct inode *ip; 364 struct vnode *vp; 365 struct buf *sbp, *ebp; 366 ufs_daddr_t *bap, *sbap, *ebap = 0; 367 struct cluster_save *buflist; 368 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; 369 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 370 int i, len, start_lvl, end_lvl, pref, ssize; 371 struct timeval tv; 372 373 if (doreallocblks == 0) 374 return (ENOSPC); 375 vp = ap->a_vp; 376 ip = VTOI(vp); 377 fs = ip->i_fs; 378 if (fs->fs_contigsumsize <= 0) 379 return (ENOSPC); 380 buflist = ap->a_buflist; 381 len = buflist->bs_nchildren; 382 start_lbn = buflist->bs_children[0]->b_lblkno; 383 end_lbn = start_lbn + len - 1; 384 #ifdef DIAGNOSTIC 385 for (i = 0; i < len; i++) 386 if (!ffs_checkblk(ip, 387 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 388 panic("ffs_reallocblks: unallocated block 1"); 389 for (i = 1; i < len; i++) 390 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 391 panic("ffs_reallocblks: non-logical cluster"); 392 blkno = buflist->bs_children[0]->b_blkno; 393 ssize = fsbtodb(fs, fs->fs_frag); 394 for (i = 1; i < len - 1; i++) 395 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 396 panic("ffs_reallocblks: non-physical cluster %d", i); 397 #endif 398 /* 399 * If the latest allocation is in a new cylinder group, assume that 400 * the filesystem has decided to move and do not force it back to 401 * the previous cylinder group. 402 */ 403 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 404 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 405 return (ENOSPC); 406 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 407 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 408 return (ENOSPC); 409 /* 410 * Get the starting offset and block map for the first block. 411 */ 412 if (start_lvl == 0) { 413 sbap = &ip->i_db[0]; 414 soff = start_lbn; 415 } else { 416 idp = &start_ap[start_lvl - 1]; 417 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 418 brelse(sbp); 419 return (ENOSPC); 420 } 421 sbap = (ufs_daddr_t *)sbp->b_data; 422 soff = idp->in_off; 423 } 424 /* 425 * Find the preferred location for the cluster. 426 */ 427 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 428 /* 429 * If the block range spans two block maps, get the second map. 430 */ 431 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 432 ssize = len; 433 } else { 434 #ifdef DIAGNOSTIC 435 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 436 panic("ffs_reallocblk: start == end"); 437 #endif 438 ssize = len - (idp->in_off + 1); 439 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 440 goto fail; 441 ebap = (ufs_daddr_t *)ebp->b_data; 442 } 443 /* 444 * Search the block map looking for an allocation of the desired size. 445 */ 446 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 447 len, ffs_clusteralloc)) == 0) 448 goto fail; 449 /* 450 * We have found a new contiguous block. 451 * 452 * First we have to replace the old block pointers with the new 453 * block pointers in the inode and indirect blocks associated 454 * with the file. 455 */ 456 #ifdef DEBUG 457 if (prtrealloc) 458 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, 459 start_lbn, end_lbn); 460 #endif 461 blkno = newblk; 462 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 463 if (i == ssize) { 464 bap = ebap; 465 soff = -i; 466 } 467 #ifdef DIAGNOSTIC 468 if (!ffs_checkblk(ip, 469 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 470 panic("ffs_reallocblks: unallocated block 2"); 471 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 472 panic("ffs_reallocblks: alloc mismatch"); 473 #endif 474 #ifdef DEBUG 475 if (prtrealloc) 476 printf(" %d,", *bap); 477 #endif 478 if (DOINGSOFTDEP(vp)) { 479 if (sbap == &ip->i_db[0] && i < ssize) 480 softdep_setup_allocdirect(ip, start_lbn + i, 481 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 482 buflist->bs_children[i]); 483 else 484 softdep_setup_allocindir_page(ip, start_lbn + i, 485 i < ssize ? sbp : ebp, soff + i, blkno, 486 *bap, buflist->bs_children[i]); 487 } 488 *bap++ = blkno; 489 } 490 /* 491 * Next we must write out the modified inode and indirect blocks. 492 * For strict correctness, the writes should be synchronous since 493 * the old block values may have been written to disk. In practise 494 * they are almost never written, but if we are concerned about 495 * strict correctness, the `doasyncfree' flag should be set to zero. 496 * 497 * The test on `doasyncfree' should be changed to test a flag 498 * that shows whether the associated buffers and inodes have 499 * been written. The flag should be set when the cluster is 500 * started and cleared whenever the buffer or inode is flushed. 501 * We can then check below to see if it is set, and do the 502 * synchronous write only when it has been cleared. 503 */ 504 if (sbap != &ip->i_db[0]) { 505 if (doasyncfree) 506 bdwrite(sbp); 507 else 508 bwrite(sbp); 509 } else { 510 ip->i_flag |= IN_CHANGE | IN_UPDATE; 511 if (!doasyncfree) { 512 gettime(&tv); 513 UFS_UPDATE(vp, &tv, &tv, 1); 514 } 515 } 516 if (ssize < len) 517 if (doasyncfree) 518 bdwrite(ebp); 519 else 520 bwrite(ebp); 521 /* 522 * Last, free the old blocks and assign the new blocks to the buffers. 523 */ 524 #ifdef DEBUG 525 if (prtrealloc) 526 printf("\n\tnew:"); 527 #endif 528 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 529 if (!DOINGSOFTDEP(vp)) 530 ffs_blkfree(ip, 531 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 532 fs->fs_bsize); 533 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 534 #ifdef DEBUG 535 if (!ffs_checkblk(ip, 536 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 537 panic("ffs_reallocblks: unallocated block 3"); 538 if (prtrealloc) 539 printf(" %d,", blkno); 540 #endif 541 } 542 #ifdef DEBUG 543 if (prtrealloc) { 544 prtrealloc--; 545 printf("\n"); 546 } 547 #endif 548 return (0); 549 550 fail: 551 if (ssize < len) 552 brelse(ebp); 553 if (sbap != &ip->i_db[0]) 554 brelse(sbp); 555 return (ENOSPC); 556 #endif 557 } 558 559 /* 560 * Allocate an inode in the file system. 561 * 562 * If allocating a directory, use ffs_dirpref to select the inode. 563 * If allocating in a directory, the following hierarchy is followed: 564 * 1) allocate the preferred inode. 565 * 2) allocate an inode in the same cylinder group. 566 * 3) quadradically rehash into other cylinder groups, until an 567 * available inode is located. 568 * If no inode preference is given the following heirarchy is used 569 * to allocate an inode: 570 * 1) allocate an inode in cylinder group 0. 571 * 2) quadradically rehash into other cylinder groups, until an 572 * available inode is located. 573 */ 574 int 575 ffs_valloc(pvp, mode, cred, vpp) 576 struct vnode *pvp; 577 int mode; 578 struct ucred *cred; 579 struct vnode **vpp; 580 { 581 register struct inode *pip; 582 register struct fs *fs; 583 register struct inode *ip; 584 ino_t ino, ipref; 585 int cg, error; 586 587 *vpp = NULL; 588 pip = VTOI(pvp); 589 fs = pip->i_fs; 590 if (fs->fs_cstotal.cs_nifree == 0) 591 goto noinodes; 592 593 if ((mode & IFMT) == IFDIR) 594 ipref = ffs_dirpref(fs); 595 else 596 ipref = pip->i_number; 597 if (ipref >= fs->fs_ncg * fs->fs_ipg) 598 ipref = 0; 599 cg = ino_to_cg(fs, ipref); 600 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 601 (allocfcn_t *)ffs_nodealloccg); 602 if (ino == 0) 603 goto noinodes; 604 error = VFS_VGET(pvp->v_mount, ino, vpp); 605 if (error) { 606 UFS_VFREE(pvp, ino, mode); 607 return (error); 608 } 609 ip = VTOI(*vpp); 610 if (ip->i_mode) { 611 printf("mode = 0%o, inum = %lu, fs = %s\n", 612 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 613 panic("ffs_valloc: dup alloc"); 614 } 615 if (ip->i_blocks) { /* XXX */ 616 printf("free inode %s/%lu had %ld blocks\n", 617 fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); 618 ip->i_blocks = 0; 619 } 620 ip->i_flags = 0; 621 /* 622 * Set up a new generation number for this inode. 623 */ 624 if (ip->i_gen == 0 || ++ip->i_gen == 0) 625 ip->i_gen = random() / 2 + 1; 626 return (0); 627 noinodes: 628 ffs_fserr(fs, cred->cr_uid, "out of inodes"); 629 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 630 return (ENOSPC); 631 } 632 633 /* 634 * Find a cylinder to place a directory. 635 * 636 * The policy implemented by this algorithm is to select from 637 * among those cylinder groups with above the average number of 638 * free inodes, the one with the smallest number of directories. 639 */ 640 static ino_t 641 ffs_dirpref(fs) 642 register struct fs *fs; 643 { 644 int cg, minndir, mincg, avgifree; 645 646 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 647 minndir = fs->fs_ipg; 648 mincg = 0; 649 for (cg = 0; cg < fs->fs_ncg; cg++) 650 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 651 fs->fs_cs(fs, cg).cs_nifree >= avgifree) { 652 mincg = cg; 653 minndir = fs->fs_cs(fs, cg).cs_ndir; 654 } 655 return ((ino_t)(fs->fs_ipg * mincg)); 656 } 657 658 /* 659 * Select the desired position for the next block in a file. The file is 660 * logically divided into sections. The first section is composed of the 661 * direct blocks. Each additional section contains fs_maxbpg blocks. 662 * 663 * If no blocks have been allocated in the first section, the policy is to 664 * request a block in the same cylinder group as the inode that describes 665 * the file. If no blocks have been allocated in any other section, the 666 * policy is to place the section in a cylinder group with a greater than 667 * average number of free blocks. An appropriate cylinder group is found 668 * by using a rotor that sweeps the cylinder groups. When a new group of 669 * blocks is needed, the sweep begins in the cylinder group following the 670 * cylinder group from which the previous allocation was made. The sweep 671 * continues until a cylinder group with greater than the average number 672 * of free blocks is found. If the allocation is for the first block in an 673 * indirect block, the information on the previous allocation is unavailable; 674 * here a best guess is made based upon the logical block number being 675 * allocated. 676 * 677 * If a section is already partially allocated, the policy is to 678 * contiguously allocate fs_maxcontig blocks. The end of one of these 679 * contiguous blocks and the beginning of the next is physically separated 680 * so that the disk head will be in transit between them for at least 681 * fs_rotdelay milliseconds. This is to allow time for the processor to 682 * schedule another I/O transfer. 683 */ 684 ufs_daddr_t 685 ffs_blkpref(ip, lbn, indx, bap) 686 struct inode *ip; 687 ufs_daddr_t lbn; 688 int indx; 689 ufs_daddr_t *bap; 690 { 691 register struct fs *fs; 692 register int cg; 693 int avgbfree, startcg; 694 ufs_daddr_t nextblk; 695 696 fs = ip->i_fs; 697 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 698 if (lbn < NDADDR) { 699 cg = ino_to_cg(fs, ip->i_number); 700 return (fs->fs_fpg * cg + fs->fs_frag); 701 } 702 /* 703 * Find a cylinder with greater than average number of 704 * unused data blocks. 705 */ 706 if (indx == 0 || bap[indx - 1] == 0) 707 startcg = 708 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 709 else 710 startcg = dtog(fs, bap[indx - 1]) + 1; 711 startcg %= fs->fs_ncg; 712 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 713 for (cg = startcg; cg < fs->fs_ncg; cg++) 714 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 715 fs->fs_cgrotor = cg; 716 return (fs->fs_fpg * cg + fs->fs_frag); 717 } 718 for (cg = 0; cg <= startcg; cg++) 719 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 720 fs->fs_cgrotor = cg; 721 return (fs->fs_fpg * cg + fs->fs_frag); 722 } 723 return (0); 724 } 725 /* 726 * One or more previous blocks have been laid out. If less 727 * than fs_maxcontig previous blocks are contiguous, the 728 * next block is requested contiguously, otherwise it is 729 * requested rotationally delayed by fs_rotdelay milliseconds. 730 */ 731 nextblk = bap[indx - 1] + fs->fs_frag; 732 if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 733 bap[indx - fs->fs_maxcontig] + 734 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 735 return (nextblk); 736 /* 737 * Here we convert ms of delay to frags as: 738 * (frags) = (ms) * (rev/sec) * (sect/rev) / 739 * ((sect/frag) * (ms/sec)) 740 * then round up to the next block. 741 */ 742 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 743 (NSPF(fs) * 1000), fs->fs_frag); 744 return (nextblk); 745 } 746 747 /* 748 * Implement the cylinder overflow algorithm. 749 * 750 * The policy implemented by this algorithm is: 751 * 1) allocate the block in its requested cylinder group. 752 * 2) quadradically rehash on the cylinder group number. 753 * 3) brute force search for a free block. 754 */ 755 /*VARARGS5*/ 756 static u_long 757 ffs_hashalloc(ip, cg, pref, size, allocator) 758 struct inode *ip; 759 int cg; 760 long pref; 761 int size; /* size for data blocks, mode for inodes */ 762 allocfcn_t *allocator; 763 { 764 register struct fs *fs; 765 long result; /* XXX why not same type as we return? */ 766 int i, icg = cg; 767 768 fs = ip->i_fs; 769 /* 770 * 1: preferred cylinder group 771 */ 772 result = (*allocator)(ip, cg, pref, size); 773 if (result) 774 return (result); 775 /* 776 * 2: quadratic rehash 777 */ 778 for (i = 1; i < fs->fs_ncg; i *= 2) { 779 cg += i; 780 if (cg >= fs->fs_ncg) 781 cg -= fs->fs_ncg; 782 result = (*allocator)(ip, cg, 0, size); 783 if (result) 784 return (result); 785 } 786 /* 787 * 3: brute force search 788 * Note that we start at i == 2, since 0 was checked initially, 789 * and 1 is always checked in the quadratic rehash. 790 */ 791 cg = (icg + 2) % fs->fs_ncg; 792 for (i = 2; i < fs->fs_ncg; i++) { 793 result = (*allocator)(ip, cg, 0, size); 794 if (result) 795 return (result); 796 cg++; 797 if (cg == fs->fs_ncg) 798 cg = 0; 799 } 800 return (0); 801 } 802 803 /* 804 * Determine whether a fragment can be extended. 805 * 806 * Check to see if the necessary fragments are available, and 807 * if they are, allocate them. 808 */ 809 static ufs_daddr_t 810 ffs_fragextend(ip, cg, bprev, osize, nsize) 811 struct inode *ip; 812 int cg; 813 long bprev; 814 int osize, nsize; 815 { 816 register struct fs *fs; 817 register struct cg *cgp; 818 struct buf *bp; 819 long bno; 820 int frags, bbase; 821 int i, error; 822 823 fs = ip->i_fs; 824 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 825 return (0); 826 frags = numfrags(fs, nsize); 827 bbase = fragnum(fs, bprev); 828 if (bbase > fragnum(fs, (bprev + frags - 1))) { 829 /* cannot extend across a block boundary */ 830 return (0); 831 } 832 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 833 (int)fs->fs_cgsize, NOCRED, &bp); 834 if (error) { 835 brelse(bp); 836 return (0); 837 } 838 cgp = (struct cg *)bp->b_data; 839 if (!cg_chkmagic(cgp)) { 840 brelse(bp); 841 return (0); 842 } 843 cgp->cg_time = time_second; 844 bno = dtogd(fs, bprev); 845 for (i = numfrags(fs, osize); i < frags; i++) 846 if (isclr(cg_blksfree(cgp), bno + i)) { 847 brelse(bp); 848 return (0); 849 } 850 /* 851 * the current fragment can be extended 852 * deduct the count on fragment being extended into 853 * increase the count on the remaining fragment (if any) 854 * allocate the extended piece 855 */ 856 for (i = frags; i < fs->fs_frag - bbase; i++) 857 if (isclr(cg_blksfree(cgp), bno + i)) 858 break; 859 cgp->cg_frsum[i - numfrags(fs, osize)]--; 860 if (i != frags) 861 cgp->cg_frsum[i - frags]++; 862 for (i = numfrags(fs, osize); i < frags; i++) { 863 clrbit(cg_blksfree(cgp), bno + i); 864 cgp->cg_cs.cs_nffree--; 865 fs->fs_cstotal.cs_nffree--; 866 fs->fs_cs(fs, cg).cs_nffree--; 867 } 868 fs->fs_fmod = 1; 869 if (DOINGSOFTDEP(ITOV(ip))) 870 softdep_setup_blkmapdep(bp, fs, bprev); 871 bdwrite(bp); 872 return (bprev); 873 } 874 875 /* 876 * Determine whether a block can be allocated. 877 * 878 * Check to see if a block of the appropriate size is available, 879 * and if it is, allocate it. 880 */ 881 static ufs_daddr_t 882 ffs_alloccg(ip, cg, bpref, size) 883 struct inode *ip; 884 int cg; 885 ufs_daddr_t bpref; 886 int size; 887 { 888 register struct fs *fs; 889 register struct cg *cgp; 890 struct buf *bp; 891 register int i; 892 ufs_daddr_t bno, blkno; 893 int allocsiz, error, frags; 894 895 fs = ip->i_fs; 896 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 897 return (0); 898 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 899 (int)fs->fs_cgsize, NOCRED, &bp); 900 if (error) { 901 brelse(bp); 902 return (0); 903 } 904 cgp = (struct cg *)bp->b_data; 905 if (!cg_chkmagic(cgp) || 906 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 907 brelse(bp); 908 return (0); 909 } 910 cgp->cg_time = time_second; 911 if (size == fs->fs_bsize) { 912 bno = ffs_alloccgblk(ip, bp, bpref); 913 bdwrite(bp); 914 return (bno); 915 } 916 /* 917 * check to see if any fragments are already available 918 * allocsiz is the size which will be allocated, hacking 919 * it down to a smaller size if necessary 920 */ 921 frags = numfrags(fs, size); 922 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 923 if (cgp->cg_frsum[allocsiz] != 0) 924 break; 925 if (allocsiz == fs->fs_frag) { 926 /* 927 * no fragments were available, so a block will be 928 * allocated, and hacked up 929 */ 930 if (cgp->cg_cs.cs_nbfree == 0) { 931 brelse(bp); 932 return (0); 933 } 934 bno = ffs_alloccgblk(ip, bp, bpref); 935 bpref = dtogd(fs, bno); 936 for (i = frags; i < fs->fs_frag; i++) 937 setbit(cg_blksfree(cgp), bpref + i); 938 i = fs->fs_frag - frags; 939 cgp->cg_cs.cs_nffree += i; 940 fs->fs_cstotal.cs_nffree += i; 941 fs->fs_cs(fs, cg).cs_nffree += i; 942 fs->fs_fmod = 1; 943 cgp->cg_frsum[i]++; 944 bdwrite(bp); 945 return (bno); 946 } 947 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 948 if (bno < 0) { 949 brelse(bp); 950 return (0); 951 } 952 for (i = 0; i < frags; i++) 953 clrbit(cg_blksfree(cgp), bno + i); 954 cgp->cg_cs.cs_nffree -= frags; 955 fs->fs_cstotal.cs_nffree -= frags; 956 fs->fs_cs(fs, cg).cs_nffree -= frags; 957 fs->fs_fmod = 1; 958 cgp->cg_frsum[allocsiz]--; 959 if (frags != allocsiz) 960 cgp->cg_frsum[allocsiz - frags]++; 961 blkno = cg * fs->fs_fpg + bno; 962 if (DOINGSOFTDEP(ITOV(ip))) 963 softdep_setup_blkmapdep(bp, fs, blkno); 964 bdwrite(bp); 965 return ((u_long)blkno); 966 } 967 968 /* 969 * Allocate a block in a cylinder group. 970 * 971 * This algorithm implements the following policy: 972 * 1) allocate the requested block. 973 * 2) allocate a rotationally optimal block in the same cylinder. 974 * 3) allocate the next available block on the block rotor for the 975 * specified cylinder group. 976 * Note that this routine only allocates fs_bsize blocks; these 977 * blocks may be fragmented by the routine that allocates them. 978 */ 979 static ufs_daddr_t 980 ffs_alloccgblk(ip, bp, bpref) 981 struct inode *ip; 982 struct buf *bp; 983 ufs_daddr_t bpref; 984 { 985 struct fs *fs; 986 struct cg *cgp; 987 ufs_daddr_t bno, blkno; 988 int cylno, pos, delta; 989 short *cylbp; 990 register int i; 991 992 fs = ip->i_fs; 993 cgp = (struct cg *)bp->b_data; 994 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 995 bpref = cgp->cg_rotor; 996 goto norot; 997 } 998 bpref = blknum(fs, bpref); 999 bpref = dtogd(fs, bpref); 1000 /* 1001 * if the requested block is available, use it 1002 */ 1003 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { 1004 bno = bpref; 1005 goto gotit; 1006 } 1007 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 1008 /* 1009 * Block layout information is not available. 1010 * Leaving bpref unchanged means we take the 1011 * next available free block following the one 1012 * we just allocated. Hopefully this will at 1013 * least hit a track cache on drives of unknown 1014 * geometry (e.g. SCSI). 1015 */ 1016 goto norot; 1017 } 1018 /* 1019 * check for a block available on the same cylinder 1020 */ 1021 cylno = cbtocylno(fs, bpref); 1022 if (cg_blktot(cgp)[cylno] == 0) 1023 goto norot; 1024 /* 1025 * check the summary information to see if a block is 1026 * available in the requested cylinder starting at the 1027 * requested rotational position and proceeding around. 1028 */ 1029 cylbp = cg_blks(fs, cgp, cylno); 1030 pos = cbtorpos(fs, bpref); 1031 for (i = pos; i < fs->fs_nrpos; i++) 1032 if (cylbp[i] > 0) 1033 break; 1034 if (i == fs->fs_nrpos) 1035 for (i = 0; i < pos; i++) 1036 if (cylbp[i] > 0) 1037 break; 1038 if (cylbp[i] > 0) { 1039 /* 1040 * found a rotational position, now find the actual 1041 * block. A panic if none is actually there. 1042 */ 1043 pos = cylno % fs->fs_cpc; 1044 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 1045 if (fs_postbl(fs, pos)[i] == -1) { 1046 printf("pos = %d, i = %d, fs = %s\n", 1047 pos, i, fs->fs_fsmnt); 1048 panic("ffs_alloccgblk: cyl groups corrupted"); 1049 } 1050 for (i = fs_postbl(fs, pos)[i];; ) { 1051 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { 1052 bno = blkstofrags(fs, (bno + i)); 1053 goto gotit; 1054 } 1055 delta = fs_rotbl(fs)[i]; 1056 if (delta <= 0 || 1057 delta + i > fragstoblks(fs, fs->fs_fpg)) 1058 break; 1059 i += delta; 1060 } 1061 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 1062 panic("ffs_alloccgblk: can't find blk in cyl"); 1063 } 1064 norot: 1065 /* 1066 * no blocks in the requested cylinder, so take next 1067 * available one in this cylinder group. 1068 */ 1069 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1070 if (bno < 0) 1071 return (0); 1072 cgp->cg_rotor = bno; 1073 gotit: 1074 blkno = fragstoblks(fs, bno); 1075 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); 1076 ffs_clusteracct(fs, cgp, blkno, -1); 1077 cgp->cg_cs.cs_nbfree--; 1078 fs->fs_cstotal.cs_nbfree--; 1079 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1080 cylno = cbtocylno(fs, bno); 1081 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1082 cg_blktot(cgp)[cylno]--; 1083 fs->fs_fmod = 1; 1084 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 1085 if (DOINGSOFTDEP(ITOV(ip))) 1086 softdep_setup_blkmapdep(bp, fs, blkno); 1087 return (blkno); 1088 } 1089 1090 #ifdef notyet 1091 /* 1092 * Determine whether a cluster can be allocated. 1093 * 1094 * We do not currently check for optimal rotational layout if there 1095 * are multiple choices in the same cylinder group. Instead we just 1096 * take the first one that we find following bpref. 1097 */ 1098 static ufs_daddr_t 1099 ffs_clusteralloc(ip, cg, bpref, len) 1100 struct inode *ip; 1101 int cg; 1102 ufs_daddr_t bpref; 1103 int len; 1104 { 1105 register struct fs *fs; 1106 register struct cg *cgp; 1107 struct buf *bp; 1108 int i, got, run, bno, bit, map; 1109 u_char *mapp; 1110 int32_t *lp; 1111 1112 fs = ip->i_fs; 1113 if (fs->fs_maxcluster[cg] < len) 1114 return (NULL); 1115 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1116 NOCRED, &bp)) 1117 goto fail; 1118 cgp = (struct cg *)bp->b_data; 1119 if (!cg_chkmagic(cgp)) 1120 goto fail; 1121 /* 1122 * Check to see if a cluster of the needed size (or bigger) is 1123 * available in this cylinder group. 1124 */ 1125 lp = &cg_clustersum(cgp)[len]; 1126 for (i = len; i <= fs->fs_contigsumsize; i++) 1127 if (*lp++ > 0) 1128 break; 1129 if (i > fs->fs_contigsumsize) { 1130 /* 1131 * This is the first time looking for a cluster in this 1132 * cylinder group. Update the cluster summary information 1133 * to reflect the true maximum sized cluster so that 1134 * future cluster allocation requests can avoid reading 1135 * the cylinder group map only to find no clusters. 1136 */ 1137 lp = &cg_clustersum(cgp)[len - 1]; 1138 for (i = len - 1; i > 0; i--) 1139 if (*lp-- > 0) 1140 break; 1141 fs->fs_maxcluster[cg] = i; 1142 goto fail; 1143 } 1144 /* 1145 * Search the cluster map to find a big enough cluster. 1146 * We take the first one that we find, even if it is larger 1147 * than we need as we prefer to get one close to the previous 1148 * block allocation. We do not search before the current 1149 * preference point as we do not want to allocate a block 1150 * that is allocated before the previous one (as we will 1151 * then have to wait for another pass of the elevator 1152 * algorithm before it will be read). We prefer to fail and 1153 * be recalled to try an allocation in the next cylinder group. 1154 */ 1155 if (dtog(fs, bpref) != cg) 1156 bpref = 0; 1157 else 1158 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1159 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1160 map = *mapp++; 1161 bit = 1 << (bpref % NBBY); 1162 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1163 if ((map & bit) == 0) { 1164 run = 0; 1165 } else { 1166 run++; 1167 if (run == len) 1168 break; 1169 } 1170 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1171 bit <<= 1; 1172 } else { 1173 map = *mapp++; 1174 bit = 1; 1175 } 1176 } 1177 if (got >= cgp->cg_nclusterblks) 1178 goto fail; 1179 /* 1180 * Allocate the cluster that we have found. 1181 */ 1182 for (i = 1; i <= len; i++) 1183 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) 1184 panic("ffs_clusteralloc: map mismatch"); 1185 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1186 if (dtog(fs, bno) != cg) 1187 panic("ffs_clusteralloc: allocated out of group"); 1188 len = blkstofrags(fs, len); 1189 for (i = 0; i < len; i += fs->fs_frag) 1190 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) 1191 panic("ffs_clusteralloc: lost block"); 1192 bdwrite(bp); 1193 return (bno); 1194 1195 fail: 1196 brelse(bp); 1197 return (0); 1198 } 1199 #endif 1200 1201 /* 1202 * Determine whether an inode can be allocated. 1203 * 1204 * Check to see if an inode is available, and if it is, 1205 * allocate it using the following policy: 1206 * 1) allocate the requested inode. 1207 * 2) allocate the next available inode after the requested 1208 * inode in the specified cylinder group. 1209 */ 1210 static ino_t 1211 ffs_nodealloccg(ip, cg, ipref, mode) 1212 struct inode *ip; 1213 int cg; 1214 ufs_daddr_t ipref; 1215 int mode; 1216 { 1217 register struct fs *fs; 1218 register struct cg *cgp; 1219 struct buf *bp; 1220 int error, start, len, loc, map, i; 1221 1222 fs = ip->i_fs; 1223 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1224 return (0); 1225 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1226 (int)fs->fs_cgsize, NOCRED, &bp); 1227 if (error) { 1228 brelse(bp); 1229 return (0); 1230 } 1231 cgp = (struct cg *)bp->b_data; 1232 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1233 brelse(bp); 1234 return (0); 1235 } 1236 cgp->cg_time = time_second; 1237 if (ipref) { 1238 ipref %= fs->fs_ipg; 1239 if (isclr(cg_inosused(cgp), ipref)) 1240 goto gotit; 1241 } 1242 start = cgp->cg_irotor / NBBY; 1243 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1244 loc = skpc(0xff, len, &cg_inosused(cgp)[start]); 1245 if (loc == 0) { 1246 len = start + 1; 1247 start = 0; 1248 loc = skpc(0xff, len, &cg_inosused(cgp)[0]); 1249 if (loc == 0) { 1250 printf("cg = %d, irotor = %ld, fs = %s\n", 1251 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1252 panic("ffs_nodealloccg: map corrupted"); 1253 /* NOTREACHED */ 1254 } 1255 } 1256 i = start + len - loc; 1257 map = cg_inosused(cgp)[i]; 1258 ipref = i * NBBY; 1259 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1260 if ((map & i) == 0) { 1261 cgp->cg_irotor = ipref; 1262 goto gotit; 1263 } 1264 } 1265 printf("fs = %s\n", fs->fs_fsmnt); 1266 panic("ffs_nodealloccg: block not in map"); 1267 /* NOTREACHED */ 1268 gotit: 1269 if (DOINGSOFTDEP(ITOV(ip))) 1270 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1271 setbit(cg_inosused(cgp), ipref); 1272 cgp->cg_cs.cs_nifree--; 1273 fs->fs_cstotal.cs_nifree--; 1274 fs->fs_cs(fs, cg).cs_nifree--; 1275 fs->fs_fmod = 1; 1276 if ((mode & IFMT) == IFDIR) { 1277 cgp->cg_cs.cs_ndir++; 1278 fs->fs_cstotal.cs_ndir++; 1279 fs->fs_cs(fs, cg).cs_ndir++; 1280 } 1281 bdwrite(bp); 1282 return (cg * fs->fs_ipg + ipref); 1283 } 1284 1285 /* 1286 * Free a block or fragment. 1287 * 1288 * The specified block or fragment is placed back in the 1289 * free map. If a fragment is deallocated, a possible 1290 * block reassembly is checked. 1291 */ 1292 void 1293 ffs_blkfree(ip, bno, size) 1294 register struct inode *ip; 1295 ufs_daddr_t bno; 1296 long size; 1297 { 1298 register struct fs *fs; 1299 register struct cg *cgp; 1300 struct buf *bp; 1301 ufs_daddr_t blkno; 1302 int i, error, cg, blk, frags, bbase; 1303 1304 fs = ip->i_fs; 1305 VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size); 1306 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1307 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1308 printf("dev=0x%lx, bno = %d, bsize = %d, size = %ld, fs = %s\n", 1309 (u_long)ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); 1310 panic("ffs_blkfree: bad size"); 1311 } 1312 cg = dtog(fs, bno); 1313 if ((u_int)bno >= fs->fs_size) { 1314 printf("bad block %ld, ino %lu\n", 1315 (long)bno, (u_long)ip->i_number); 1316 ffs_fserr(fs, ip->i_uid, "bad block"); 1317 return; 1318 } 1319 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1320 (int)fs->fs_cgsize, NOCRED, &bp); 1321 if (error) { 1322 brelse(bp); 1323 return; 1324 } 1325 cgp = (struct cg *)bp->b_data; 1326 if (!cg_chkmagic(cgp)) { 1327 brelse(bp); 1328 return; 1329 } 1330 cgp->cg_time = time_second; 1331 bno = dtogd(fs, bno); 1332 if (size == fs->fs_bsize) { 1333 blkno = fragstoblks(fs, bno); 1334 if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { 1335 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1336 (u_long)ip->i_dev, (long)bno, fs->fs_fsmnt); 1337 panic("ffs_blkfree: freeing free block"); 1338 } 1339 ffs_setblock(fs, cg_blksfree(cgp), blkno); 1340 ffs_clusteracct(fs, cgp, blkno, 1); 1341 cgp->cg_cs.cs_nbfree++; 1342 fs->fs_cstotal.cs_nbfree++; 1343 fs->fs_cs(fs, cg).cs_nbfree++; 1344 i = cbtocylno(fs, bno); 1345 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1346 cg_blktot(cgp)[i]++; 1347 } else { 1348 bbase = bno - fragnum(fs, bno); 1349 /* 1350 * decrement the counts associated with the old frags 1351 */ 1352 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1353 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1354 /* 1355 * deallocate the fragment 1356 */ 1357 frags = numfrags(fs, size); 1358 for (i = 0; i < frags; i++) { 1359 if (isset(cg_blksfree(cgp), bno + i)) { 1360 printf("dev = 0x%lx, block = %ld, fs = %s\n", 1361 (u_long)ip->i_dev, (long)(bno + i), 1362 fs->fs_fsmnt); 1363 panic("ffs_blkfree: freeing free frag"); 1364 } 1365 setbit(cg_blksfree(cgp), bno + i); 1366 } 1367 cgp->cg_cs.cs_nffree += i; 1368 fs->fs_cstotal.cs_nffree += i; 1369 fs->fs_cs(fs, cg).cs_nffree += i; 1370 /* 1371 * add back in counts associated with the new frags 1372 */ 1373 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1374 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1375 /* 1376 * if a complete block has been reassembled, account for it 1377 */ 1378 blkno = fragstoblks(fs, bbase); 1379 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1380 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1381 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1382 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1383 ffs_clusteracct(fs, cgp, blkno, 1); 1384 cgp->cg_cs.cs_nbfree++; 1385 fs->fs_cstotal.cs_nbfree++; 1386 fs->fs_cs(fs, cg).cs_nbfree++; 1387 i = cbtocylno(fs, bbase); 1388 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1389 cg_blktot(cgp)[i]++; 1390 } 1391 } 1392 fs->fs_fmod = 1; 1393 bdwrite(bp); 1394 } 1395 1396 #ifdef DIAGNOSTIC 1397 /* 1398 * Verify allocation of a block or fragment. Returns true if block or 1399 * fragment is allocated, false if it is free. 1400 */ 1401 static int 1402 ffs_checkblk(ip, bno, size) 1403 struct inode *ip; 1404 ufs_daddr_t bno; 1405 long size; 1406 { 1407 struct fs *fs; 1408 struct cg *cgp; 1409 struct buf *bp; 1410 int i, error, frags, free; 1411 1412 fs = ip->i_fs; 1413 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1414 printf("bsize = %ld, size = %ld, fs = %s\n", 1415 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1416 panic("ffs_checkblk: bad size"); 1417 } 1418 if ((u_int)bno >= fs->fs_size) 1419 panic("ffs_checkblk: bad block %d", bno); 1420 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1421 (int)fs->fs_cgsize, NOCRED, &bp); 1422 if (error) 1423 panic("ffs_checkblk: cg bread failed"); 1424 cgp = (struct cg *)bp->b_data; 1425 if (!cg_chkmagic(cgp)) 1426 panic("ffs_checkblk: cg magic mismatch"); 1427 bno = dtogd(fs, bno); 1428 if (size == fs->fs_bsize) { 1429 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); 1430 } else { 1431 frags = numfrags(fs, size); 1432 for (free = 0, i = 0; i < frags; i++) 1433 if (isset(cg_blksfree(cgp), bno + i)) 1434 free++; 1435 if (free != 0 && free != frags) 1436 panic("ffs_checkblk: partially free fragment"); 1437 } 1438 brelse(bp); 1439 return (!free); 1440 } 1441 #endif /* DIAGNOSTIC */ 1442 1443 /* 1444 * Free an inode. 1445 */ 1446 int 1447 ffs_vfree( pvp, ino, mode) 1448 struct vnode *pvp; 1449 ino_t ino; 1450 int mode; 1451 { 1452 if (DOINGSOFTDEP(pvp)) { 1453 softdep_freefile(pvp, ino, mode); 1454 return (0); 1455 } 1456 return (ffs_freefile(pvp, ino, mode)); 1457 } 1458 1459 /* 1460 * Do the actual free operation. 1461 * The specified inode is placed back in the free map. 1462 */ 1463 int 1464 ffs_freefile( pvp, ino, mode) 1465 struct vnode *pvp; 1466 ino_t ino; 1467 int mode; 1468 { 1469 register struct fs *fs; 1470 register struct cg *cgp; 1471 register struct inode *pip; 1472 struct buf *bp; 1473 int error, cg; 1474 1475 pip = VTOI(pvp); 1476 fs = pip->i_fs; 1477 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1478 panic("ffs_vfree: range: dev = 0x%x, ino = %d, fs = %s", 1479 pip->i_dev, ino, fs->fs_fsmnt); 1480 cg = ino_to_cg(fs, ino); 1481 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1482 (int)fs->fs_cgsize, NOCRED, &bp); 1483 if (error) { 1484 brelse(bp); 1485 return (error); 1486 } 1487 cgp = (struct cg *)bp->b_data; 1488 if (!cg_chkmagic(cgp)) { 1489 brelse(bp); 1490 return (0); 1491 } 1492 cgp->cg_time = time_second; 1493 ino %= fs->fs_ipg; 1494 if (isclr(cg_inosused(cgp), ino)) { 1495 printf("dev = 0x%lx, ino = %lu, fs = %s\n", 1496 (u_long)pip->i_dev, (u_long)ino, fs->fs_fsmnt); 1497 if (fs->fs_ronly == 0) 1498 panic("ffs_vfree: freeing free inode"); 1499 } 1500 clrbit(cg_inosused(cgp), ino); 1501 if (ino < cgp->cg_irotor) 1502 cgp->cg_irotor = ino; 1503 cgp->cg_cs.cs_nifree++; 1504 fs->fs_cstotal.cs_nifree++; 1505 fs->fs_cs(fs, cg).cs_nifree++; 1506 if ((mode & IFMT) == IFDIR) { 1507 cgp->cg_cs.cs_ndir--; 1508 fs->fs_cstotal.cs_ndir--; 1509 fs->fs_cs(fs, cg).cs_ndir--; 1510 } 1511 fs->fs_fmod = 1; 1512 bdwrite(bp); 1513 return (0); 1514 } 1515 1516 /* 1517 * Find a block of the specified size in the specified cylinder group. 1518 * 1519 * It is a panic if a request is made to find a block if none are 1520 * available. 1521 */ 1522 static ufs_daddr_t 1523 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1524 register struct fs *fs; 1525 register struct cg *cgp; 1526 ufs_daddr_t bpref; 1527 int allocsiz; 1528 { 1529 ufs_daddr_t bno; 1530 int start, len, loc, i; 1531 int blk, field, subfield, pos; 1532 1533 /* 1534 * find the fragment by searching through the free block 1535 * map for an appropriate bit pattern 1536 */ 1537 if (bpref) 1538 start = dtogd(fs, bpref) / NBBY; 1539 else 1540 start = cgp->cg_frotor / NBBY; 1541 len = howmany(fs->fs_fpg, NBBY) - start; 1542 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], 1543 (u_char *)fragtbl[fs->fs_frag], 1544 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1545 if (loc == 0) { 1546 len = start + 1; 1547 start = 0; 1548 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], 1549 (u_char *)fragtbl[fs->fs_frag], 1550 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1551 if (loc == 0) { 1552 printf("start = %d, len = %d, fs = %s\n", 1553 start, len, fs->fs_fsmnt); 1554 panic("ffs_alloccg: map corrupted"); 1555 /* NOTREACHED */ 1556 } 1557 } 1558 bno = (start + len - loc) * NBBY; 1559 cgp->cg_frotor = bno; 1560 /* 1561 * found the byte in the map 1562 * sift through the bits to find the selected frag 1563 */ 1564 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1565 blk = blkmap(fs, cg_blksfree(cgp), bno); 1566 blk <<= 1; 1567 field = around[allocsiz]; 1568 subfield = inside[allocsiz]; 1569 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1570 if ((blk & field) == subfield) 1571 return (bno + pos); 1572 field <<= 1; 1573 subfield <<= 1; 1574 } 1575 } 1576 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1577 panic("ffs_alloccg: block not in map"); 1578 return (-1); 1579 } 1580 1581 /* 1582 * Update the cluster map because of an allocation or free. 1583 * 1584 * Cnt == 1 means free; cnt == -1 means allocating. 1585 */ 1586 static void 1587 ffs_clusteracct(fs, cgp, blkno, cnt) 1588 struct fs *fs; 1589 struct cg *cgp; 1590 ufs_daddr_t blkno; 1591 int cnt; 1592 { 1593 int32_t *sump; 1594 int32_t *lp; 1595 u_char *freemapp, *mapp; 1596 int i, start, end, forw, back, map, bit; 1597 1598 if (fs->fs_contigsumsize <= 0) 1599 return; 1600 freemapp = cg_clustersfree(cgp); 1601 sump = cg_clustersum(cgp); 1602 /* 1603 * Allocate or clear the actual block. 1604 */ 1605 if (cnt > 0) 1606 setbit(freemapp, blkno); 1607 else 1608 clrbit(freemapp, blkno); 1609 /* 1610 * Find the size of the cluster going forward. 1611 */ 1612 start = blkno + 1; 1613 end = start + fs->fs_contigsumsize; 1614 if (end >= cgp->cg_nclusterblks) 1615 end = cgp->cg_nclusterblks; 1616 mapp = &freemapp[start / NBBY]; 1617 map = *mapp++; 1618 bit = 1 << (start % NBBY); 1619 for (i = start; i < end; i++) { 1620 if ((map & bit) == 0) 1621 break; 1622 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1623 bit <<= 1; 1624 } else { 1625 map = *mapp++; 1626 bit = 1; 1627 } 1628 } 1629 forw = i - start; 1630 /* 1631 * Find the size of the cluster going backward. 1632 */ 1633 start = blkno - 1; 1634 end = start - fs->fs_contigsumsize; 1635 if (end < 0) 1636 end = -1; 1637 mapp = &freemapp[start / NBBY]; 1638 map = *mapp--; 1639 bit = 1 << (start % NBBY); 1640 for (i = start; i > end; i--) { 1641 if ((map & bit) == 0) 1642 break; 1643 if ((i & (NBBY - 1)) != 0) { 1644 bit >>= 1; 1645 } else { 1646 map = *mapp--; 1647 bit = 1 << (NBBY - 1); 1648 } 1649 } 1650 back = start - i; 1651 /* 1652 * Account for old cluster and the possibly new forward and 1653 * back clusters. 1654 */ 1655 i = back + forw + 1; 1656 if (i > fs->fs_contigsumsize) 1657 i = fs->fs_contigsumsize; 1658 sump[i] += cnt; 1659 if (back > 0) 1660 sump[back] -= cnt; 1661 if (forw > 0) 1662 sump[forw] -= cnt; 1663 /* 1664 * Update cluster summary information. 1665 */ 1666 lp = &sump[fs->fs_contigsumsize]; 1667 for (i = fs->fs_contigsumsize; i > 0; i--) 1668 if (*lp-- > 0) 1669 break; 1670 fs->fs_maxcluster[cgp->cg_cgx] = i; 1671 } 1672 1673 /* 1674 * Fserr prints the name of a file system with an error diagnostic. 1675 * 1676 * The form of the error message is: 1677 * fs: error message 1678 */ 1679 static void 1680 ffs_fserr(fs, uid, cp) 1681 struct fs *fs; 1682 u_int uid; 1683 char *cp; 1684 { 1685 struct proc *p = curproc; /* XXX */ 1686 1687 log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, 1688 p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); 1689 } 1690