1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_quota.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/bio.h> 42 #include <sys/buf.h> 43 #include <sys/conf.h> 44 #include <sys/file.h> 45 #include <sys/proc.h> 46 #include <sys/vnode.h> 47 #include <sys/mount.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <ufs/ufs/extattr.h> 53 #include <ufs/ufs/quota.h> 54 #include <ufs/ufs/inode.h> 55 #include <ufs/ufs/ufs_extern.h> 56 #include <ufs/ufs/ufsmount.h> 57 58 #include <ufs/ffs/fs.h> 59 #include <ufs/ffs/ffs_extern.h> 60 61 typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref, 62 int size)); 63 64 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); 65 static ufs_daddr_t 66 ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t)); 67 #ifdef DIAGNOSTIC 68 static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long)); 69 #endif 70 static void ffs_clusteracct __P((struct fs *, struct cg *, ufs_daddr_t, 71 int)); 72 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, 73 int)); 74 static ino_t ffs_dirpref __P((struct inode *)); 75 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); 76 static void ffs_fserr __P((struct fs *, u_int, char *)); 77 static u_long ffs_hashalloc 78 __P((struct inode *, int, long, int, allocfcn_t *)); 79 static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); 80 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t, 81 int)); 82 83 /* 84 * Allocate a block in the file system. 85 * 86 * The size of the requested block is given, which must be some 87 * multiple of fs_fsize and <= fs_bsize. 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadradically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following heirarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadradically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102 int 103 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 104 register struct inode *ip; 105 ufs_daddr_t lbn, bpref; 106 int size; 107 struct ucred *cred; 108 ufs_daddr_t *bnp; 109 { 110 register struct fs *fs; 111 ufs_daddr_t bno; 112 int cg; 113 #ifdef QUOTA 114 int error; 115 #endif 116 117 *bnp = 0; 118 fs = ip->i_fs; 119 #ifdef DIAGNOSTIC 120 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 121 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 122 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 123 fs->fs_fsmnt); 124 panic("ffs_alloc: bad size"); 125 } 126 if (cred == NOCRED) 127 panic("ffs_alloc: missing credential"); 128 #endif /* DIAGNOSTIC */ 129 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 130 goto nospace; 131 if (cred->cr_uid != 0 && 132 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 133 goto nospace; 134 #ifdef QUOTA 135 error = chkdq(ip, (long)btodb(size), cred, 0); 136 if (error) 137 return (error); 138 #endif 139 if (bpref >= fs->fs_size) 140 bpref = 0; 141 if (bpref == 0) 142 cg = ino_to_cg(fs, ip->i_number); 143 else 144 cg = dtog(fs, bpref); 145 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 146 ffs_alloccg); 147 if (bno > 0) { 148 ip->i_blocks += btodb(size); 149 ip->i_flag |= IN_CHANGE | IN_UPDATE; 150 *bnp = bno; 151 return (0); 152 } 153 #ifdef QUOTA 154 /* 155 * Restore user's disk quota because allocation failed. 156 */ 157 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 158 #endif 159 nospace: 160 ffs_fserr(fs, cred->cr_uid, "file system full"); 161 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 162 return (ENOSPC); 163 } 164 165 /* 166 * Reallocate a fragment to a bigger size 167 * 168 * The number and size of the old block is given, and a preference 169 * and new size is also specified. The allocator attempts to extend 170 * the original block. Failing that, the regular block allocator is 171 * invoked to get an appropriate block. 172 */ 173 int 174 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 175 register struct inode *ip; 176 ufs_daddr_t lbprev; 177 ufs_daddr_t bpref; 178 int osize, nsize; 179 struct ucred *cred; 180 struct buf **bpp; 181 { 182 register struct fs *fs; 183 struct buf *bp; 184 int cg, request, error; 185 ufs_daddr_t bprev, bno; 186 187 *bpp = 0; 188 fs = ip->i_fs; 189 #ifdef DIAGNOSTIC 190 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 191 panic("ffs_realloccg: allocation on suspended filesystem"); 192 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 193 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 194 printf( 195 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 196 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 197 nsize, fs->fs_fsmnt); 198 panic("ffs_realloccg: bad size"); 199 } 200 if (cred == NOCRED) 201 panic("ffs_realloccg: missing credential"); 202 #endif /* DIAGNOSTIC */ 203 if (cred->cr_uid != 0 && 204 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 205 goto nospace; 206 if ((bprev = ip->i_db[lbprev]) == 0) { 207 printf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", 208 devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, 209 fs->fs_fsmnt); 210 panic("ffs_realloccg: bad bprev"); 211 } 212 /* 213 * Allocate the extra space in the buffer. 214 */ 215 error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp); 216 if (error) { 217 brelse(bp); 218 return (error); 219 } 220 221 if( bp->b_blkno == bp->b_lblkno) { 222 if( lbprev >= NDADDR) 223 panic("ffs_realloccg: lbprev out of range"); 224 bp->b_blkno = fsbtodb(fs, bprev); 225 } 226 227 #ifdef QUOTA 228 error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 229 if (error) { 230 brelse(bp); 231 return (error); 232 } 233 #endif 234 /* 235 * Check for extension in the existing location. 236 */ 237 cg = dtog(fs, bprev); 238 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 239 if (bno) { 240 if (bp->b_blkno != fsbtodb(fs, bno)) 241 panic("ffs_realloccg: bad blockno"); 242 ip->i_blocks += btodb(nsize - osize); 243 ip->i_flag |= IN_CHANGE | IN_UPDATE; 244 allocbuf(bp, nsize); 245 bp->b_flags |= B_DONE; 246 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 247 *bpp = bp; 248 return (0); 249 } 250 /* 251 * Allocate a new disk location. 252 */ 253 if (bpref >= fs->fs_size) 254 bpref = 0; 255 switch ((int)fs->fs_optim) { 256 case FS_OPTSPACE: 257 /* 258 * Allocate an exact sized fragment. Although this makes 259 * best use of space, we will waste time relocating it if 260 * the file continues to grow. If the fragmentation is 261 * less than half of the minimum free reserve, we choose 262 * to begin optimizing for time. 263 */ 264 request = nsize; 265 if (fs->fs_minfree <= 5 || 266 fs->fs_cstotal.cs_nffree > 267 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 268 break; 269 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 270 fs->fs_fsmnt); 271 fs->fs_optim = FS_OPTTIME; 272 break; 273 case FS_OPTTIME: 274 /* 275 * At this point we have discovered a file that is trying to 276 * grow a small fragment to a larger fragment. To save time, 277 * we allocate a full sized block, then free the unused portion. 278 * If the file continues to grow, the `ffs_fragextend' call 279 * above will be able to grow it in place without further 280 * copying. If aberrant programs cause disk fragmentation to 281 * grow within 2% of the free reserve, we choose to begin 282 * optimizing for space. 283 */ 284 request = fs->fs_bsize; 285 if (fs->fs_cstotal.cs_nffree < 286 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 287 break; 288 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 289 fs->fs_fsmnt); 290 fs->fs_optim = FS_OPTSPACE; 291 break; 292 default: 293 printf("dev = %s, optim = %ld, fs = %s\n", 294 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 295 panic("ffs_realloccg: bad optim"); 296 /* NOTREACHED */ 297 } 298 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 299 ffs_alloccg); 300 if (bno > 0) { 301 bp->b_blkno = fsbtodb(fs, bno); 302 if (!DOINGSOFTDEP(ITOV(ip))) 303 ffs_blkfree(ip, bprev, (long)osize); 304 if (nsize < request) 305 ffs_blkfree(ip, bno + numfrags(fs, nsize), 306 (long)(request - nsize)); 307 ip->i_blocks += btodb(nsize - osize); 308 ip->i_flag |= IN_CHANGE | IN_UPDATE; 309 allocbuf(bp, nsize); 310 bp->b_flags |= B_DONE; 311 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 312 *bpp = bp; 313 return (0); 314 } 315 #ifdef QUOTA 316 /* 317 * Restore user's disk quota because allocation failed. 318 */ 319 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 320 #endif 321 brelse(bp); 322 nospace: 323 /* 324 * no space available 325 */ 326 ffs_fserr(fs, cred->cr_uid, "file system full"); 327 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 328 return (ENOSPC); 329 } 330 331 /* 332 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 333 * 334 * The vnode and an array of buffer pointers for a range of sequential 335 * logical blocks to be made contiguous is given. The allocator attempts 336 * to find a range of sequential blocks starting as close as possible to 337 * an fs_rotdelay offset from the end of the allocation for the logical 338 * block immediately preceding the current range. If successful, the 339 * physical block numbers in the buffer pointers and in the inode are 340 * changed to reflect the new allocation. If unsuccessful, the allocation 341 * is left unchanged. The success in doing the reallocation is returned. 342 * Note that the error return is not reflected back to the user. Rather 343 * the previous block allocation will be used. 344 */ 345 346 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 347 348 static int doasyncfree = 1; 349 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 350 351 static int doreallocblks = 1; 352 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 353 354 #ifdef DEBUG 355 static volatile int prtrealloc = 0; 356 #endif 357 358 int 359 ffs_reallocblks(ap) 360 struct vop_reallocblks_args /* { 361 struct vnode *a_vp; 362 struct cluster_save *a_buflist; 363 } */ *ap; 364 { 365 struct fs *fs; 366 struct inode *ip; 367 struct vnode *vp; 368 struct buf *sbp, *ebp; 369 ufs_daddr_t *bap, *sbap, *ebap = 0; 370 struct cluster_save *buflist; 371 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; 372 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 373 int i, len, start_lvl, end_lvl, pref, ssize; 374 375 if (doreallocblks == 0) 376 return (ENOSPC); 377 vp = ap->a_vp; 378 ip = VTOI(vp); 379 fs = ip->i_fs; 380 if (fs->fs_contigsumsize <= 0) 381 return (ENOSPC); 382 buflist = ap->a_buflist; 383 len = buflist->bs_nchildren; 384 start_lbn = buflist->bs_children[0]->b_lblkno; 385 end_lbn = start_lbn + len - 1; 386 #ifdef DIAGNOSTIC 387 for (i = 0; i < len; i++) 388 if (!ffs_checkblk(ip, 389 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 390 panic("ffs_reallocblks: unallocated block 1"); 391 for (i = 1; i < len; i++) 392 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 393 panic("ffs_reallocblks: non-logical cluster"); 394 blkno = buflist->bs_children[0]->b_blkno; 395 ssize = fsbtodb(fs, fs->fs_frag); 396 for (i = 1; i < len - 1; i++) 397 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 398 panic("ffs_reallocblks: non-physical cluster %d", i); 399 #endif 400 /* 401 * If the latest allocation is in a new cylinder group, assume that 402 * the filesystem has decided to move and do not force it back to 403 * the previous cylinder group. 404 */ 405 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 406 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 407 return (ENOSPC); 408 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 409 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 410 return (ENOSPC); 411 /* 412 * Get the starting offset and block map for the first block. 413 */ 414 if (start_lvl == 0) { 415 sbap = &ip->i_db[0]; 416 soff = start_lbn; 417 } else { 418 idp = &start_ap[start_lvl - 1]; 419 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 420 brelse(sbp); 421 return (ENOSPC); 422 } 423 sbap = (ufs_daddr_t *)sbp->b_data; 424 soff = idp->in_off; 425 } 426 /* 427 * Find the preferred location for the cluster. 428 */ 429 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 430 /* 431 * If the block range spans two block maps, get the second map. 432 */ 433 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 434 ssize = len; 435 } else { 436 #ifdef DIAGNOSTIC 437 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 438 panic("ffs_reallocblk: start == end"); 439 #endif 440 ssize = len - (idp->in_off + 1); 441 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 442 goto fail; 443 ebap = (ufs_daddr_t *)ebp->b_data; 444 } 445 /* 446 * Search the block map looking for an allocation of the desired size. 447 */ 448 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 449 len, ffs_clusteralloc)) == 0) 450 goto fail; 451 /* 452 * We have found a new contiguous block. 453 * 454 * First we have to replace the old block pointers with the new 455 * block pointers in the inode and indirect blocks associated 456 * with the file. 457 */ 458 #ifdef DEBUG 459 if (prtrealloc) 460 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, 461 start_lbn, end_lbn); 462 #endif 463 blkno = newblk; 464 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 465 if (i == ssize) { 466 bap = ebap; 467 soff = -i; 468 } 469 #ifdef DIAGNOSTIC 470 if (!ffs_checkblk(ip, 471 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 472 panic("ffs_reallocblks: unallocated block 2"); 473 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 474 panic("ffs_reallocblks: alloc mismatch"); 475 #endif 476 #ifdef DEBUG 477 if (prtrealloc) 478 printf(" %d,", *bap); 479 #endif 480 if (DOINGSOFTDEP(vp)) { 481 if (sbap == &ip->i_db[0] && i < ssize) 482 softdep_setup_allocdirect(ip, start_lbn + i, 483 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 484 buflist->bs_children[i]); 485 else 486 softdep_setup_allocindir_page(ip, start_lbn + i, 487 i < ssize ? sbp : ebp, soff + i, blkno, 488 *bap, buflist->bs_children[i]); 489 } 490 *bap++ = blkno; 491 } 492 /* 493 * Next we must write out the modified inode and indirect blocks. 494 * For strict correctness, the writes should be synchronous since 495 * the old block values may have been written to disk. In practise 496 * they are almost never written, but if we are concerned about 497 * strict correctness, the `doasyncfree' flag should be set to zero. 498 * 499 * The test on `doasyncfree' should be changed to test a flag 500 * that shows whether the associated buffers and inodes have 501 * been written. The flag should be set when the cluster is 502 * started and cleared whenever the buffer or inode is flushed. 503 * We can then check below to see if it is set, and do the 504 * synchronous write only when it has been cleared. 505 */ 506 if (sbap != &ip->i_db[0]) { 507 if (doasyncfree) 508 bdwrite(sbp); 509 else 510 bwrite(sbp); 511 } else { 512 ip->i_flag |= IN_CHANGE | IN_UPDATE; 513 if (!doasyncfree) 514 UFS_UPDATE(vp, 1); 515 } 516 if (ssize < len) { 517 if (doasyncfree) 518 bdwrite(ebp); 519 else 520 bwrite(ebp); 521 } 522 /* 523 * Last, free the old blocks and assign the new blocks to the buffers. 524 */ 525 #ifdef DEBUG 526 if (prtrealloc) 527 printf("\n\tnew:"); 528 #endif 529 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 530 if (!DOINGSOFTDEP(vp)) 531 ffs_blkfree(ip, 532 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 533 fs->fs_bsize); 534 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 535 #ifdef DIAGNOSTIC 536 if (!ffs_checkblk(ip, 537 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 538 panic("ffs_reallocblks: unallocated block 3"); 539 #endif 540 #ifdef DEBUG 541 if (prtrealloc) 542 printf(" %d,", blkno); 543 #endif 544 } 545 #ifdef DEBUG 546 if (prtrealloc) { 547 prtrealloc--; 548 printf("\n"); 549 } 550 #endif 551 return (0); 552 553 fail: 554 if (ssize < len) 555 brelse(ebp); 556 if (sbap != &ip->i_db[0]) 557 brelse(sbp); 558 return (ENOSPC); 559 } 560 561 /* 562 * Allocate an inode in the file system. 563 * 564 * If allocating a directory, use ffs_dirpref to select the inode. 565 * If allocating in a directory, the following hierarchy is followed: 566 * 1) allocate the preferred inode. 567 * 2) allocate an inode in the same cylinder group. 568 * 3) quadradically rehash into other cylinder groups, until an 569 * available inode is located. 570 * If no inode preference is given the following heirarchy is used 571 * to allocate an inode: 572 * 1) allocate an inode in cylinder group 0. 573 * 2) quadradically rehash into other cylinder groups, until an 574 * available inode is located. 575 */ 576 int 577 ffs_valloc(pvp, mode, cred, vpp) 578 struct vnode *pvp; 579 int mode; 580 struct ucred *cred; 581 struct vnode **vpp; 582 { 583 register struct inode *pip; 584 register struct fs *fs; 585 register struct inode *ip; 586 ino_t ino, ipref; 587 int cg, error; 588 589 *vpp = NULL; 590 pip = VTOI(pvp); 591 fs = pip->i_fs; 592 if (fs->fs_cstotal.cs_nifree == 0) 593 goto noinodes; 594 595 if ((mode & IFMT) == IFDIR) 596 ipref = ffs_dirpref(pip); 597 else 598 ipref = pip->i_number; 599 if (ipref >= fs->fs_ncg * fs->fs_ipg) 600 ipref = 0; 601 cg = ino_to_cg(fs, ipref); 602 /* 603 * Track number of dirs created one after another 604 * in a same cg without intervening by files. 605 */ 606 if ((mode & IFMT) == IFDIR) { 607 if (fs->fs_contigdirs[cg] < 255) 608 fs->fs_contigdirs[cg]++; 609 } else { 610 if (fs->fs_contigdirs[cg] > 0) 611 fs->fs_contigdirs[cg]--; 612 } 613 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 614 (allocfcn_t *)ffs_nodealloccg); 615 if (ino == 0) 616 goto noinodes; 617 error = VFS_VGET(pvp->v_mount, ino, vpp); 618 if (error) { 619 UFS_VFREE(pvp, ino, mode); 620 return (error); 621 } 622 ip = VTOI(*vpp); 623 if (ip->i_mode) { 624 printf("mode = 0%o, inum = %lu, fs = %s\n", 625 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 626 panic("ffs_valloc: dup alloc"); 627 } 628 if (ip->i_blocks && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 629 printf("free inode %s/%lu had %ld blocks\n", 630 fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); 631 ip->i_blocks = 0; 632 } 633 ip->i_flags = 0; 634 /* 635 * Set up a new generation number for this inode. 636 */ 637 if (ip->i_gen == 0 || ++ip->i_gen == 0) 638 ip->i_gen = random() / 2 + 1; 639 return (0); 640 noinodes: 641 ffs_fserr(fs, cred->cr_uid, "out of inodes"); 642 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 643 return (ENOSPC); 644 } 645 646 /* 647 * Find a cylinder group to place a directory. 648 * 649 * The policy implemented by this algorithm is to allocate a 650 * directory inode in the same cylinder group as its parent 651 * directory, but also to reserve space for its files inodes 652 * and data. Restrict the number of directories which may be 653 * allocated one after another in the same cylinder group 654 * without intervening allocation of files. 655 * 656 * If we allocate a first level directory then force allocation 657 * in another cylinder group. 658 */ 659 static ino_t 660 ffs_dirpref(pip) 661 struct inode *pip; 662 { 663 register struct fs *fs; 664 int cg, prefcg, dirsize, cgsize; 665 int avgifree, avgbfree, avgndir, curdirsize; 666 int minifree, minbfree, maxndir; 667 int mincg, minndir; 668 int maxcontigdirs; 669 670 fs = pip->i_fs; 671 672 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 673 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 674 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 675 676 /* 677 * Force allocation in another cg if creating a first level dir. 678 */ 679 if (ITOV(pip)->v_flag & VROOT) { 680 prefcg = arc4random() % fs->fs_ncg; 681 mincg = prefcg; 682 minndir = fs->fs_ipg; 683 for (cg = prefcg; cg < fs->fs_ncg; cg++) 684 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 685 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 686 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 687 mincg = cg; 688 minndir = fs->fs_cs(fs, cg).cs_ndir; 689 } 690 for (cg = 0; cg < prefcg; cg++) 691 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 692 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 693 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 694 mincg = cg; 695 minndir = fs->fs_cs(fs, cg).cs_ndir; 696 } 697 return ((ino_t)(fs->fs_ipg * mincg)); 698 } 699 700 /* 701 * Count various limits which used for 702 * optimal allocation of a directory inode. 703 */ 704 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 705 minifree = avgifree - fs->fs_ipg / 4; 706 if (minifree < 0) 707 minifree = 0; 708 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; 709 if (minbfree < 0) 710 minbfree = 0; 711 cgsize = fs->fs_fsize * fs->fs_fpg; 712 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 713 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 714 if (dirsize < curdirsize) 715 dirsize = curdirsize; 716 maxcontigdirs = min(cgsize / dirsize, 255); 717 if (fs->fs_avgfpdir > 0) 718 maxcontigdirs = min(maxcontigdirs, 719 fs->fs_ipg / fs->fs_avgfpdir); 720 if (maxcontigdirs == 0) 721 maxcontigdirs = 1; 722 723 /* 724 * Limit number of dirs in one cg and reserve space for 725 * regular files, but only if we have no deficit in 726 * inodes or space. 727 */ 728 prefcg = ino_to_cg(fs, pip->i_number); 729 for (cg = prefcg; cg < fs->fs_ncg; cg++) 730 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 731 fs->fs_cs(fs, cg).cs_nifree >= minifree && 732 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 733 if (fs->fs_contigdirs[cg] < maxcontigdirs) 734 return ((ino_t)(fs->fs_ipg * cg)); 735 } 736 for (cg = 0; cg < prefcg; cg++) 737 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 738 fs->fs_cs(fs, cg).cs_nifree >= minifree && 739 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 740 if (fs->fs_contigdirs[cg] < maxcontigdirs) 741 return ((ino_t)(fs->fs_ipg * cg)); 742 } 743 /* 744 * This is a backstop when we have deficit in space. 745 */ 746 for (cg = prefcg; cg < fs->fs_ncg; cg++) 747 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 748 return ((ino_t)(fs->fs_ipg * cg)); 749 for (cg = 0; cg < prefcg; cg++) 750 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 751 break; 752 return ((ino_t)(fs->fs_ipg * cg)); 753 } 754 755 /* 756 * Select the desired position for the next block in a file. The file is 757 * logically divided into sections. The first section is composed of the 758 * direct blocks. Each additional section contains fs_maxbpg blocks. 759 * 760 * If no blocks have been allocated in the first section, the policy is to 761 * request a block in the same cylinder group as the inode that describes 762 * the file. If no blocks have been allocated in any other section, the 763 * policy is to place the section in a cylinder group with a greater than 764 * average number of free blocks. An appropriate cylinder group is found 765 * by using a rotor that sweeps the cylinder groups. When a new group of 766 * blocks is needed, the sweep begins in the cylinder group following the 767 * cylinder group from which the previous allocation was made. The sweep 768 * continues until a cylinder group with greater than the average number 769 * of free blocks is found. If the allocation is for the first block in an 770 * indirect block, the information on the previous allocation is unavailable; 771 * here a best guess is made based upon the logical block number being 772 * allocated. 773 * 774 * If a section is already partially allocated, the policy is to 775 * contiguously allocate fs_maxcontig blocks. The end of one of these 776 * contiguous blocks and the beginning of the next is physically separated 777 * so that the disk head will be in transit between them for at least 778 * fs_rotdelay milliseconds. This is to allow time for the processor to 779 * schedule another I/O transfer. 780 */ 781 ufs_daddr_t 782 ffs_blkpref(ip, lbn, indx, bap) 783 struct inode *ip; 784 ufs_daddr_t lbn; 785 int indx; 786 ufs_daddr_t *bap; 787 { 788 register struct fs *fs; 789 register int cg; 790 int avgbfree, startcg; 791 ufs_daddr_t nextblk; 792 793 fs = ip->i_fs; 794 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 795 if (lbn < NDADDR + NINDIR(fs)) { 796 cg = ino_to_cg(fs, ip->i_number); 797 return (fs->fs_fpg * cg + fs->fs_frag); 798 } 799 /* 800 * Find a cylinder with greater than average number of 801 * unused data blocks. 802 */ 803 if (indx == 0 || bap[indx - 1] == 0) 804 startcg = 805 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 806 else 807 startcg = dtog(fs, bap[indx - 1]) + 1; 808 startcg %= fs->fs_ncg; 809 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 810 for (cg = startcg; cg < fs->fs_ncg; cg++) 811 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 812 fs->fs_cgrotor = cg; 813 return (fs->fs_fpg * cg + fs->fs_frag); 814 } 815 for (cg = 0; cg <= startcg; cg++) 816 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 817 fs->fs_cgrotor = cg; 818 return (fs->fs_fpg * cg + fs->fs_frag); 819 } 820 return (0); 821 } 822 /* 823 * One or more previous blocks have been laid out. If less 824 * than fs_maxcontig previous blocks are contiguous, the 825 * next block is requested contiguously, otherwise it is 826 * requested rotationally delayed by fs_rotdelay milliseconds. 827 */ 828 nextblk = bap[indx - 1] + fs->fs_frag; 829 if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 830 bap[indx - fs->fs_maxcontig] + 831 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 832 return (nextblk); 833 /* 834 * Here we convert ms of delay to frags as: 835 * (frags) = (ms) * (rev/sec) * (sect/rev) / 836 * ((sect/frag) * (ms/sec)) 837 * then round up to the next block. 838 */ 839 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 840 (NSPF(fs) * 1000), fs->fs_frag); 841 return (nextblk); 842 } 843 844 /* 845 * Implement the cylinder overflow algorithm. 846 * 847 * The policy implemented by this algorithm is: 848 * 1) allocate the block in its requested cylinder group. 849 * 2) quadradically rehash on the cylinder group number. 850 * 3) brute force search for a free block. 851 */ 852 /*VARARGS5*/ 853 static u_long 854 ffs_hashalloc(ip, cg, pref, size, allocator) 855 struct inode *ip; 856 int cg; 857 long pref; 858 int size; /* size for data blocks, mode for inodes */ 859 allocfcn_t *allocator; 860 { 861 register struct fs *fs; 862 long result; /* XXX why not same type as we return? */ 863 int i, icg = cg; 864 865 #ifdef DIAGNOSTIC 866 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 867 panic("ffs_hashalloc: allocation on suspended filesystem"); 868 #endif 869 fs = ip->i_fs; 870 /* 871 * 1: preferred cylinder group 872 */ 873 result = (*allocator)(ip, cg, pref, size); 874 if (result) 875 return (result); 876 /* 877 * 2: quadratic rehash 878 */ 879 for (i = 1; i < fs->fs_ncg; i *= 2) { 880 cg += i; 881 if (cg >= fs->fs_ncg) 882 cg -= fs->fs_ncg; 883 result = (*allocator)(ip, cg, 0, size); 884 if (result) 885 return (result); 886 } 887 /* 888 * 3: brute force search 889 * Note that we start at i == 2, since 0 was checked initially, 890 * and 1 is always checked in the quadratic rehash. 891 */ 892 cg = (icg + 2) % fs->fs_ncg; 893 for (i = 2; i < fs->fs_ncg; i++) { 894 result = (*allocator)(ip, cg, 0, size); 895 if (result) 896 return (result); 897 cg++; 898 if (cg == fs->fs_ncg) 899 cg = 0; 900 } 901 return (0); 902 } 903 904 /* 905 * Determine whether a fragment can be extended. 906 * 907 * Check to see if the necessary fragments are available, and 908 * if they are, allocate them. 909 */ 910 static ufs_daddr_t 911 ffs_fragextend(ip, cg, bprev, osize, nsize) 912 struct inode *ip; 913 int cg; 914 long bprev; 915 int osize, nsize; 916 { 917 register struct fs *fs; 918 register struct cg *cgp; 919 struct buf *bp; 920 long bno; 921 int frags, bbase; 922 int i, error; 923 u_int8_t *blksfree; 924 925 fs = ip->i_fs; 926 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 927 return (0); 928 frags = numfrags(fs, nsize); 929 bbase = fragnum(fs, bprev); 930 if (bbase > fragnum(fs, (bprev + frags - 1))) { 931 /* cannot extend across a block boundary */ 932 return (0); 933 } 934 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 935 (int)fs->fs_cgsize, NOCRED, &bp); 936 if (error) { 937 brelse(bp); 938 return (0); 939 } 940 cgp = (struct cg *)bp->b_data; 941 if (!cg_chkmagic(cgp)) { 942 brelse(bp); 943 return (0); 944 } 945 bp->b_xflags |= BX_BKGRDWRITE; 946 cgp->cg_time = time_second; 947 bno = dtogd(fs, bprev); 948 blksfree = cg_blksfree(cgp); 949 for (i = numfrags(fs, osize); i < frags; i++) 950 if (isclr(blksfree, bno + i)) { 951 brelse(bp); 952 return (0); 953 } 954 /* 955 * the current fragment can be extended 956 * deduct the count on fragment being extended into 957 * increase the count on the remaining fragment (if any) 958 * allocate the extended piece 959 */ 960 for (i = frags; i < fs->fs_frag - bbase; i++) 961 if (isclr(blksfree, bno + i)) 962 break; 963 cgp->cg_frsum[i - numfrags(fs, osize)]--; 964 if (i != frags) 965 cgp->cg_frsum[i - frags]++; 966 for (i = numfrags(fs, osize); i < frags; i++) { 967 clrbit(blksfree, bno + i); 968 cgp->cg_cs.cs_nffree--; 969 fs->fs_cstotal.cs_nffree--; 970 fs->fs_cs(fs, cg).cs_nffree--; 971 } 972 fs->fs_fmod = 1; 973 if (DOINGSOFTDEP(ITOV(ip))) 974 softdep_setup_blkmapdep(bp, fs, bprev); 975 bdwrite(bp); 976 return (bprev); 977 } 978 979 /* 980 * Determine whether a block can be allocated. 981 * 982 * Check to see if a block of the appropriate size is available, 983 * and if it is, allocate it. 984 */ 985 static ufs_daddr_t 986 ffs_alloccg(ip, cg, bpref, size) 987 struct inode *ip; 988 int cg; 989 ufs_daddr_t bpref; 990 int size; 991 { 992 register struct fs *fs; 993 register struct cg *cgp; 994 struct buf *bp; 995 register int i; 996 ufs_daddr_t bno, blkno; 997 int allocsiz, error, frags; 998 u_int8_t *blksfree; 999 1000 fs = ip->i_fs; 1001 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1002 return (0); 1003 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1004 (int)fs->fs_cgsize, NOCRED, &bp); 1005 if (error) { 1006 brelse(bp); 1007 return (0); 1008 } 1009 cgp = (struct cg *)bp->b_data; 1010 if (!cg_chkmagic(cgp) || 1011 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 1012 brelse(bp); 1013 return (0); 1014 } 1015 bp->b_xflags |= BX_BKGRDWRITE; 1016 cgp->cg_time = time_second; 1017 if (size == fs->fs_bsize) { 1018 bno = ffs_alloccgblk(ip, bp, bpref); 1019 bdwrite(bp); 1020 return (bno); 1021 } 1022 /* 1023 * check to see if any fragments are already available 1024 * allocsiz is the size which will be allocated, hacking 1025 * it down to a smaller size if necessary 1026 */ 1027 blksfree = cg_blksfree(cgp); 1028 frags = numfrags(fs, size); 1029 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1030 if (cgp->cg_frsum[allocsiz] != 0) 1031 break; 1032 if (allocsiz == fs->fs_frag) { 1033 /* 1034 * no fragments were available, so a block will be 1035 * allocated, and hacked up 1036 */ 1037 if (cgp->cg_cs.cs_nbfree == 0) { 1038 brelse(bp); 1039 return (0); 1040 } 1041 bno = ffs_alloccgblk(ip, bp, bpref); 1042 bpref = dtogd(fs, bno); 1043 for (i = frags; i < fs->fs_frag; i++) 1044 setbit(blksfree, bpref + i); 1045 i = fs->fs_frag - frags; 1046 cgp->cg_cs.cs_nffree += i; 1047 fs->fs_cstotal.cs_nffree += i; 1048 fs->fs_cs(fs, cg).cs_nffree += i; 1049 fs->fs_fmod = 1; 1050 cgp->cg_frsum[i]++; 1051 bdwrite(bp); 1052 return (bno); 1053 } 1054 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1055 if (bno < 0) { 1056 brelse(bp); 1057 return (0); 1058 } 1059 for (i = 0; i < frags; i++) 1060 clrbit(blksfree, bno + i); 1061 cgp->cg_cs.cs_nffree -= frags; 1062 fs->fs_cstotal.cs_nffree -= frags; 1063 fs->fs_cs(fs, cg).cs_nffree -= frags; 1064 fs->fs_fmod = 1; 1065 cgp->cg_frsum[allocsiz]--; 1066 if (frags != allocsiz) 1067 cgp->cg_frsum[allocsiz - frags]++; 1068 blkno = cg * fs->fs_fpg + bno; 1069 if (DOINGSOFTDEP(ITOV(ip))) 1070 softdep_setup_blkmapdep(bp, fs, blkno); 1071 bdwrite(bp); 1072 return ((u_long)blkno); 1073 } 1074 1075 /* 1076 * Allocate a block in a cylinder group. 1077 * 1078 * This algorithm implements the following policy: 1079 * 1) allocate the requested block. 1080 * 2) allocate a rotationally optimal block in the same cylinder. 1081 * 3) allocate the next available block on the block rotor for the 1082 * specified cylinder group. 1083 * Note that this routine only allocates fs_bsize blocks; these 1084 * blocks may be fragmented by the routine that allocates them. 1085 */ 1086 static ufs_daddr_t 1087 ffs_alloccgblk(ip, bp, bpref) 1088 struct inode *ip; 1089 struct buf *bp; 1090 ufs_daddr_t bpref; 1091 { 1092 struct fs *fs; 1093 struct cg *cgp; 1094 ufs_daddr_t bno, blkno; 1095 int cylno, pos, delta; 1096 short *cylbp; 1097 register int i; 1098 u_int8_t *blksfree; 1099 1100 fs = ip->i_fs; 1101 cgp = (struct cg *)bp->b_data; 1102 blksfree = cg_blksfree(cgp); 1103 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1104 bpref = cgp->cg_rotor; 1105 goto norot; 1106 } 1107 bpref = blknum(fs, bpref); 1108 bpref = dtogd(fs, bpref); 1109 /* 1110 * if the requested block is available, use it 1111 */ 1112 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { 1113 bno = bpref; 1114 goto gotit; 1115 } 1116 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 1117 /* 1118 * Block layout information is not available. 1119 * Leaving bpref unchanged means we take the 1120 * next available free block following the one 1121 * we just allocated. Hopefully this will at 1122 * least hit a track cache on drives of unknown 1123 * geometry (e.g. SCSI). 1124 */ 1125 goto norot; 1126 } 1127 /* 1128 * check for a block available on the same cylinder 1129 */ 1130 cylno = cbtocylno(fs, bpref); 1131 if (cg_blktot(cgp)[cylno] == 0) 1132 goto norot; 1133 /* 1134 * check the summary information to see if a block is 1135 * available in the requested cylinder starting at the 1136 * requested rotational position and proceeding around. 1137 */ 1138 cylbp = cg_blks(fs, cgp, cylno); 1139 pos = cbtorpos(fs, bpref); 1140 for (i = pos; i < fs->fs_nrpos; i++) 1141 if (cylbp[i] > 0) 1142 break; 1143 if (i == fs->fs_nrpos) 1144 for (i = 0; i < pos; i++) 1145 if (cylbp[i] > 0) 1146 break; 1147 if (cylbp[i] > 0) { 1148 /* 1149 * found a rotational position, now find the actual 1150 * block. A panic if none is actually there. 1151 */ 1152 pos = cylno % fs->fs_cpc; 1153 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 1154 if (fs_postbl(fs, pos)[i] == -1) { 1155 printf("pos = %d, i = %d, fs = %s\n", 1156 pos, i, fs->fs_fsmnt); 1157 panic("ffs_alloccgblk: cyl groups corrupted"); 1158 } 1159 for (i = fs_postbl(fs, pos)[i];; ) { 1160 if (ffs_isblock(fs, blksfree, bno + i)) { 1161 bno = blkstofrags(fs, (bno + i)); 1162 goto gotit; 1163 } 1164 delta = fs_rotbl(fs)[i]; 1165 if (delta <= 0 || 1166 delta + i > fragstoblks(fs, fs->fs_fpg)) 1167 break; 1168 i += delta; 1169 } 1170 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 1171 panic("ffs_alloccgblk: can't find blk in cyl"); 1172 } 1173 norot: 1174 /* 1175 * no blocks in the requested cylinder, so take next 1176 * available one in this cylinder group. 1177 */ 1178 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1179 if (bno < 0) 1180 return (0); 1181 cgp->cg_rotor = bno; 1182 gotit: 1183 blkno = fragstoblks(fs, bno); 1184 ffs_clrblock(fs, blksfree, (long)blkno); 1185 ffs_clusteracct(fs, cgp, blkno, -1); 1186 cgp->cg_cs.cs_nbfree--; 1187 fs->fs_cstotal.cs_nbfree--; 1188 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1189 cylno = cbtocylno(fs, bno); 1190 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1191 cg_blktot(cgp)[cylno]--; 1192 fs->fs_fmod = 1; 1193 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 1194 if (DOINGSOFTDEP(ITOV(ip))) 1195 softdep_setup_blkmapdep(bp, fs, blkno); 1196 return (blkno); 1197 } 1198 1199 /* 1200 * Determine whether a cluster can be allocated. 1201 * 1202 * We do not currently check for optimal rotational layout if there 1203 * are multiple choices in the same cylinder group. Instead we just 1204 * take the first one that we find following bpref. 1205 */ 1206 static ufs_daddr_t 1207 ffs_clusteralloc(ip, cg, bpref, len) 1208 struct inode *ip; 1209 int cg; 1210 ufs_daddr_t bpref; 1211 int len; 1212 { 1213 register struct fs *fs; 1214 register struct cg *cgp; 1215 struct buf *bp; 1216 int i, got, run, bno, bit, map; 1217 u_char *mapp; 1218 int32_t *lp; 1219 u_int8_t *blksfree; 1220 1221 fs = ip->i_fs; 1222 if (fs->fs_maxcluster[cg] < len) 1223 return (0); 1224 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1225 NOCRED, &bp)) 1226 goto fail; 1227 cgp = (struct cg *)bp->b_data; 1228 if (!cg_chkmagic(cgp)) 1229 goto fail; 1230 bp->b_xflags |= BX_BKGRDWRITE; 1231 /* 1232 * Check to see if a cluster of the needed size (or bigger) is 1233 * available in this cylinder group. 1234 */ 1235 lp = &cg_clustersum(cgp)[len]; 1236 for (i = len; i <= fs->fs_contigsumsize; i++) 1237 if (*lp++ > 0) 1238 break; 1239 if (i > fs->fs_contigsumsize) { 1240 /* 1241 * This is the first time looking for a cluster in this 1242 * cylinder group. Update the cluster summary information 1243 * to reflect the true maximum sized cluster so that 1244 * future cluster allocation requests can avoid reading 1245 * the cylinder group map only to find no clusters. 1246 */ 1247 lp = &cg_clustersum(cgp)[len - 1]; 1248 for (i = len - 1; i > 0; i--) 1249 if (*lp-- > 0) 1250 break; 1251 fs->fs_maxcluster[cg] = i; 1252 goto fail; 1253 } 1254 /* 1255 * Search the cluster map to find a big enough cluster. 1256 * We take the first one that we find, even if it is larger 1257 * than we need as we prefer to get one close to the previous 1258 * block allocation. We do not search before the current 1259 * preference point as we do not want to allocate a block 1260 * that is allocated before the previous one (as we will 1261 * then have to wait for another pass of the elevator 1262 * algorithm before it will be read). We prefer to fail and 1263 * be recalled to try an allocation in the next cylinder group. 1264 */ 1265 if (dtog(fs, bpref) != cg) 1266 bpref = 0; 1267 else 1268 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1269 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1270 map = *mapp++; 1271 bit = 1 << (bpref % NBBY); 1272 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1273 if ((map & bit) == 0) { 1274 run = 0; 1275 } else { 1276 run++; 1277 if (run == len) 1278 break; 1279 } 1280 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1281 bit <<= 1; 1282 } else { 1283 map = *mapp++; 1284 bit = 1; 1285 } 1286 } 1287 if (got >= cgp->cg_nclusterblks) 1288 goto fail; 1289 /* 1290 * Allocate the cluster that we have found. 1291 */ 1292 blksfree = cg_blksfree(cgp); 1293 for (i = 1; i <= len; i++) 1294 if (!ffs_isblock(fs, blksfree, got - run + i)) 1295 panic("ffs_clusteralloc: map mismatch"); 1296 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1297 if (dtog(fs, bno) != cg) 1298 panic("ffs_clusteralloc: allocated out of group"); 1299 len = blkstofrags(fs, len); 1300 for (i = 0; i < len; i += fs->fs_frag) 1301 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) 1302 panic("ffs_clusteralloc: lost block"); 1303 bdwrite(bp); 1304 return (bno); 1305 1306 fail: 1307 brelse(bp); 1308 return (0); 1309 } 1310 1311 /* 1312 * Determine whether an inode can be allocated. 1313 * 1314 * Check to see if an inode is available, and if it is, 1315 * allocate it using the following policy: 1316 * 1) allocate the requested inode. 1317 * 2) allocate the next available inode after the requested 1318 * inode in the specified cylinder group. 1319 */ 1320 static ino_t 1321 ffs_nodealloccg(ip, cg, ipref, mode) 1322 struct inode *ip; 1323 int cg; 1324 ufs_daddr_t ipref; 1325 int mode; 1326 { 1327 register struct fs *fs; 1328 register struct cg *cgp; 1329 struct buf *bp; 1330 u_int8_t *inosused; 1331 int error, start, len, loc, map, i; 1332 1333 fs = ip->i_fs; 1334 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1335 return (0); 1336 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1337 (int)fs->fs_cgsize, NOCRED, &bp); 1338 if (error) { 1339 brelse(bp); 1340 return (0); 1341 } 1342 cgp = (struct cg *)bp->b_data; 1343 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1344 brelse(bp); 1345 return (0); 1346 } 1347 bp->b_xflags |= BX_BKGRDWRITE; 1348 cgp->cg_time = time_second; 1349 inosused = cg_inosused(cgp); 1350 if (ipref) { 1351 ipref %= fs->fs_ipg; 1352 if (isclr(inosused, ipref)) 1353 goto gotit; 1354 } 1355 start = cgp->cg_irotor / NBBY; 1356 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1357 loc = skpc(0xff, len, &inosused[start]); 1358 if (loc == 0) { 1359 len = start + 1; 1360 start = 0; 1361 loc = skpc(0xff, len, &inosused[0]); 1362 if (loc == 0) { 1363 printf("cg = %d, irotor = %ld, fs = %s\n", 1364 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1365 panic("ffs_nodealloccg: map corrupted"); 1366 /* NOTREACHED */ 1367 } 1368 } 1369 i = start + len - loc; 1370 map = inosused[i]; 1371 ipref = i * NBBY; 1372 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1373 if ((map & i) == 0) { 1374 cgp->cg_irotor = ipref; 1375 goto gotit; 1376 } 1377 } 1378 printf("fs = %s\n", fs->fs_fsmnt); 1379 panic("ffs_nodealloccg: block not in map"); 1380 /* NOTREACHED */ 1381 gotit: 1382 if (DOINGSOFTDEP(ITOV(ip))) 1383 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1384 setbit(inosused, ipref); 1385 cgp->cg_cs.cs_nifree--; 1386 fs->fs_cstotal.cs_nifree--; 1387 fs->fs_cs(fs, cg).cs_nifree--; 1388 fs->fs_fmod = 1; 1389 if ((mode & IFMT) == IFDIR) { 1390 cgp->cg_cs.cs_ndir++; 1391 fs->fs_cstotal.cs_ndir++; 1392 fs->fs_cs(fs, cg).cs_ndir++; 1393 } 1394 bdwrite(bp); 1395 return (cg * fs->fs_ipg + ipref); 1396 } 1397 1398 /* 1399 * Free a block or fragment. 1400 * 1401 * The specified block or fragment is placed back in the 1402 * free map. If a fragment is deallocated, a possible 1403 * block reassembly is checked. 1404 */ 1405 void 1406 ffs_blkfree(ip, bno, size) 1407 register struct inode *ip; 1408 ufs_daddr_t bno; 1409 long size; 1410 { 1411 register struct fs *fs; 1412 register struct cg *cgp; 1413 struct buf *bp; 1414 ufs_daddr_t fragno, cgbno; 1415 int i, error, cg, blk, frags, bbase; 1416 u_int8_t *blksfree; 1417 #ifdef DIAGNOSTIC 1418 struct vnode *vp; 1419 #endif 1420 1421 fs = ip->i_fs; 1422 #ifdef DIAGNOSTIC 1423 if ((vp = ITOV(ip)) != NULL && vp->v_mount != NULL && 1424 (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)) 1425 panic("ffs_blkfree: deallocation on suspended filesystem"); 1426 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1427 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1428 printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", 1429 devtoname(ip->i_dev), (long)bno, (long)fs->fs_bsize, size, 1430 fs->fs_fsmnt); 1431 panic("ffs_blkfree: bad size"); 1432 } 1433 #endif 1434 if ((ip->i_devvp->v_flag & VCOPYONWRITE) && 1435 ffs_snapblkfree(ip, bno, size)) 1436 return; 1437 VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size); 1438 cg = dtog(fs, bno); 1439 if ((u_int)bno >= fs->fs_size) { 1440 printf("bad block %ld, ino %lu\n", 1441 (long)bno, (u_long)ip->i_number); 1442 ffs_fserr(fs, ip->i_uid, "bad block"); 1443 return; 1444 } 1445 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1446 (int)fs->fs_cgsize, NOCRED, &bp); 1447 if (error) { 1448 brelse(bp); 1449 return; 1450 } 1451 cgp = (struct cg *)bp->b_data; 1452 if (!cg_chkmagic(cgp)) { 1453 brelse(bp); 1454 return; 1455 } 1456 bp->b_xflags |= BX_BKGRDWRITE; 1457 cgp->cg_time = time_second; 1458 cgbno = dtogd(fs, bno); 1459 blksfree = cg_blksfree(cgp); 1460 if (size == fs->fs_bsize) { 1461 fragno = fragstoblks(fs, cgbno); 1462 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1463 printf("dev = %s, block = %ld, fs = %s\n", 1464 devtoname(ip->i_dev), (long)bno, fs->fs_fsmnt); 1465 panic("ffs_blkfree: freeing free block"); 1466 } 1467 ffs_setblock(fs, blksfree, fragno); 1468 ffs_clusteracct(fs, cgp, fragno, 1); 1469 cgp->cg_cs.cs_nbfree++; 1470 fs->fs_cstotal.cs_nbfree++; 1471 fs->fs_cs(fs, cg).cs_nbfree++; 1472 i = cbtocylno(fs, cgbno); 1473 cg_blks(fs, cgp, i)[cbtorpos(fs, cgbno)]++; 1474 cg_blktot(cgp)[i]++; 1475 } else { 1476 bbase = cgbno - fragnum(fs, cgbno); 1477 /* 1478 * decrement the counts associated with the old frags 1479 */ 1480 blk = blkmap(fs, blksfree, bbase); 1481 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1482 /* 1483 * deallocate the fragment 1484 */ 1485 frags = numfrags(fs, size); 1486 for (i = 0; i < frags; i++) { 1487 if (isset(blksfree, cgbno + i)) { 1488 printf("dev = %s, block = %ld, fs = %s\n", 1489 devtoname(ip->i_dev), (long)(bno + i), 1490 fs->fs_fsmnt); 1491 panic("ffs_blkfree: freeing free frag"); 1492 } 1493 setbit(blksfree, cgbno + i); 1494 } 1495 cgp->cg_cs.cs_nffree += i; 1496 fs->fs_cstotal.cs_nffree += i; 1497 fs->fs_cs(fs, cg).cs_nffree += i; 1498 /* 1499 * add back in counts associated with the new frags 1500 */ 1501 blk = blkmap(fs, blksfree, bbase); 1502 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1503 /* 1504 * if a complete block has been reassembled, account for it 1505 */ 1506 fragno = fragstoblks(fs, bbase); 1507 if (ffs_isblock(fs, blksfree, fragno)) { 1508 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1509 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1510 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1511 ffs_clusteracct(fs, cgp, fragno, 1); 1512 cgp->cg_cs.cs_nbfree++; 1513 fs->fs_cstotal.cs_nbfree++; 1514 fs->fs_cs(fs, cg).cs_nbfree++; 1515 i = cbtocylno(fs, bbase); 1516 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1517 cg_blktot(cgp)[i]++; 1518 } 1519 } 1520 fs->fs_fmod = 1; 1521 bdwrite(bp); 1522 } 1523 1524 #ifdef DIAGNOSTIC 1525 /* 1526 * Verify allocation of a block or fragment. Returns true if block or 1527 * fragment is allocated, false if it is free. 1528 */ 1529 static int 1530 ffs_checkblk(ip, bno, size) 1531 struct inode *ip; 1532 ufs_daddr_t bno; 1533 long size; 1534 { 1535 struct fs *fs; 1536 struct cg *cgp; 1537 struct buf *bp; 1538 int i, error, frags, free; 1539 u_int8_t *blksfree; 1540 1541 fs = ip->i_fs; 1542 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1543 printf("bsize = %ld, size = %ld, fs = %s\n", 1544 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1545 panic("ffs_checkblk: bad size"); 1546 } 1547 if ((u_int)bno >= fs->fs_size) 1548 panic("ffs_checkblk: bad block %d", bno); 1549 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1550 (int)fs->fs_cgsize, NOCRED, &bp); 1551 if (error) 1552 panic("ffs_checkblk: cg bread failed"); 1553 cgp = (struct cg *)bp->b_data; 1554 if (!cg_chkmagic(cgp)) 1555 panic("ffs_checkblk: cg magic mismatch"); 1556 bp->b_xflags |= BX_BKGRDWRITE; 1557 blksfree = cg_blksfree(cgp); 1558 bno = dtogd(fs, bno); 1559 if (size == fs->fs_bsize) { 1560 free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); 1561 } else { 1562 frags = numfrags(fs, size); 1563 for (free = 0, i = 0; i < frags; i++) 1564 if (isset(blksfree, bno + i)) 1565 free++; 1566 if (free != 0 && free != frags) 1567 panic("ffs_checkblk: partially free fragment"); 1568 } 1569 brelse(bp); 1570 return (!free); 1571 } 1572 #endif /* DIAGNOSTIC */ 1573 1574 /* 1575 * Free an inode. 1576 */ 1577 int 1578 ffs_vfree(pvp, ino, mode) 1579 struct vnode *pvp; 1580 ino_t ino; 1581 int mode; 1582 { 1583 if (DOINGSOFTDEP(pvp)) { 1584 softdep_freefile(pvp, ino, mode); 1585 return (0); 1586 } 1587 return (ffs_freefile(VTOI(pvp), ino, mode)); 1588 } 1589 1590 /* 1591 * Do the actual free operation. 1592 * The specified inode is placed back in the free map. 1593 */ 1594 int 1595 ffs_freefile(pip, ino, mode) 1596 struct inode *pip; 1597 ino_t ino; 1598 int mode; 1599 { 1600 register struct fs *fs; 1601 register struct cg *cgp; 1602 struct buf *bp; 1603 int error, cg; 1604 u_int8_t *inosused; 1605 1606 fs = pip->i_fs; 1607 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1608 panic("ffs_vfree: range: dev = (%d,%d), ino = %d, fs = %s", 1609 major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt); 1610 cg = ino_to_cg(fs, ino); 1611 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1612 (int)fs->fs_cgsize, NOCRED, &bp); 1613 if (error) { 1614 brelse(bp); 1615 return (error); 1616 } 1617 cgp = (struct cg *)bp->b_data; 1618 if (!cg_chkmagic(cgp)) { 1619 brelse(bp); 1620 return (0); 1621 } 1622 bp->b_xflags |= BX_BKGRDWRITE; 1623 cgp->cg_time = time_second; 1624 inosused = cg_inosused(cgp); 1625 ino %= fs->fs_ipg; 1626 if (isclr(inosused, ino)) { 1627 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(pip->i_dev), 1628 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 1629 if (fs->fs_ronly == 0) 1630 panic("ffs_vfree: freeing free inode"); 1631 } 1632 clrbit(inosused, ino); 1633 if (ino < cgp->cg_irotor) 1634 cgp->cg_irotor = ino; 1635 cgp->cg_cs.cs_nifree++; 1636 fs->fs_cstotal.cs_nifree++; 1637 fs->fs_cs(fs, cg).cs_nifree++; 1638 if ((mode & IFMT) == IFDIR) { 1639 cgp->cg_cs.cs_ndir--; 1640 fs->fs_cstotal.cs_ndir--; 1641 fs->fs_cs(fs, cg).cs_ndir--; 1642 } 1643 fs->fs_fmod = 1; 1644 bdwrite(bp); 1645 return (0); 1646 } 1647 1648 /* 1649 * Find a block of the specified size in the specified cylinder group. 1650 * 1651 * It is a panic if a request is made to find a block if none are 1652 * available. 1653 */ 1654 static ufs_daddr_t 1655 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1656 register struct fs *fs; 1657 register struct cg *cgp; 1658 ufs_daddr_t bpref; 1659 int allocsiz; 1660 { 1661 ufs_daddr_t bno; 1662 int start, len, loc, i; 1663 int blk, field, subfield, pos; 1664 u_int8_t *blksfree; 1665 1666 /* 1667 * find the fragment by searching through the free block 1668 * map for an appropriate bit pattern 1669 */ 1670 if (bpref) 1671 start = dtogd(fs, bpref) / NBBY; 1672 else 1673 start = cgp->cg_frotor / NBBY; 1674 blksfree = cg_blksfree(cgp); 1675 len = howmany(fs->fs_fpg, NBBY) - start; 1676 loc = scanc((u_int)len, (u_char *)&blksfree[start], 1677 (u_char *)fragtbl[fs->fs_frag], 1678 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1679 if (loc == 0) { 1680 len = start + 1; 1681 start = 0; 1682 loc = scanc((u_int)len, (u_char *)&blksfree[0], 1683 (u_char *)fragtbl[fs->fs_frag], 1684 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1685 if (loc == 0) { 1686 printf("start = %d, len = %d, fs = %s\n", 1687 start, len, fs->fs_fsmnt); 1688 panic("ffs_alloccg: map corrupted"); 1689 /* NOTREACHED */ 1690 } 1691 } 1692 bno = (start + len - loc) * NBBY; 1693 cgp->cg_frotor = bno; 1694 /* 1695 * found the byte in the map 1696 * sift through the bits to find the selected frag 1697 */ 1698 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1699 blk = blkmap(fs, blksfree, bno); 1700 blk <<= 1; 1701 field = around[allocsiz]; 1702 subfield = inside[allocsiz]; 1703 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1704 if ((blk & field) == subfield) 1705 return (bno + pos); 1706 field <<= 1; 1707 subfield <<= 1; 1708 } 1709 } 1710 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1711 panic("ffs_alloccg: block not in map"); 1712 return (-1); 1713 } 1714 1715 /* 1716 * Update the cluster map because of an allocation or free. 1717 * 1718 * Cnt == 1 means free; cnt == -1 means allocating. 1719 */ 1720 static void 1721 ffs_clusteracct(fs, cgp, blkno, cnt) 1722 struct fs *fs; 1723 struct cg *cgp; 1724 ufs_daddr_t blkno; 1725 int cnt; 1726 { 1727 int32_t *sump; 1728 int32_t *lp; 1729 u_char *freemapp, *mapp; 1730 int i, start, end, forw, back, map, bit; 1731 1732 if (fs->fs_contigsumsize <= 0) 1733 return; 1734 freemapp = cg_clustersfree(cgp); 1735 sump = cg_clustersum(cgp); 1736 /* 1737 * Allocate or clear the actual block. 1738 */ 1739 if (cnt > 0) 1740 setbit(freemapp, blkno); 1741 else 1742 clrbit(freemapp, blkno); 1743 /* 1744 * Find the size of the cluster going forward. 1745 */ 1746 start = blkno + 1; 1747 end = start + fs->fs_contigsumsize; 1748 if (end >= cgp->cg_nclusterblks) 1749 end = cgp->cg_nclusterblks; 1750 mapp = &freemapp[start / NBBY]; 1751 map = *mapp++; 1752 bit = 1 << (start % NBBY); 1753 for (i = start; i < end; i++) { 1754 if ((map & bit) == 0) 1755 break; 1756 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1757 bit <<= 1; 1758 } else { 1759 map = *mapp++; 1760 bit = 1; 1761 } 1762 } 1763 forw = i - start; 1764 /* 1765 * Find the size of the cluster going backward. 1766 */ 1767 start = blkno - 1; 1768 end = start - fs->fs_contigsumsize; 1769 if (end < 0) 1770 end = -1; 1771 mapp = &freemapp[start / NBBY]; 1772 map = *mapp--; 1773 bit = 1 << (start % NBBY); 1774 for (i = start; i > end; i--) { 1775 if ((map & bit) == 0) 1776 break; 1777 if ((i & (NBBY - 1)) != 0) { 1778 bit >>= 1; 1779 } else { 1780 map = *mapp--; 1781 bit = 1 << (NBBY - 1); 1782 } 1783 } 1784 back = start - i; 1785 /* 1786 * Account for old cluster and the possibly new forward and 1787 * back clusters. 1788 */ 1789 i = back + forw + 1; 1790 if (i > fs->fs_contigsumsize) 1791 i = fs->fs_contigsumsize; 1792 sump[i] += cnt; 1793 if (back > 0) 1794 sump[back] -= cnt; 1795 if (forw > 0) 1796 sump[forw] -= cnt; 1797 /* 1798 * Update cluster summary information. 1799 */ 1800 lp = &sump[fs->fs_contigsumsize]; 1801 for (i = fs->fs_contigsumsize; i > 0; i--) 1802 if (*lp-- > 0) 1803 break; 1804 fs->fs_maxcluster[cgp->cg_cgx] = i; 1805 } 1806 1807 /* 1808 * Fserr prints the name of a file system with an error diagnostic. 1809 * 1810 * The form of the error message is: 1811 * fs: error message 1812 */ 1813 static void 1814 ffs_fserr(fs, uid, cp) 1815 struct fs *fs; 1816 u_int uid; 1817 char *cp; 1818 { 1819 struct proc *p = curproc; /* XXX */ 1820 1821 log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, 1822 p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); 1823 } 1824 1825 /* 1826 * This function provides the capability for the fsck program to 1827 * update an active filesystem. Six operations are provided: 1828 * 1829 * adjrefcnt(inode, amt) - adjusts the reference count on the 1830 * specified inode by the specified amount. Under normal 1831 * operation the count should always go down. Decrementing 1832 * the count to zero will cause the inode to be freed. 1833 * adjblkcnt(inode, amt) - adjust the number of blocks used to 1834 * by the specifed amount. 1835 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 1836 * are marked as free. Inodes should never have to be marked 1837 * as in use. 1838 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 1839 * are marked as free. Inodes should never have to be marked 1840 * as in use. 1841 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 1842 * are marked as free. Blocks should never have to be marked 1843 * as in use. 1844 * setflags(flags, set/clear) - the fs_flags field has the specified 1845 * flags set (second parameter +1) or cleared (second parameter -1). 1846 */ 1847 1848 static int sysctl_ffs_fsck __P((SYSCTL_HANDLER_ARGS)); 1849 1850 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 1851 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 1852 1853 SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 1854 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 1855 1856 SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 1857 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 1858 1859 SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 1860 sysctl_ffs_fsck, "Free Range of File Inodes"); 1861 1862 SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 1863 sysctl_ffs_fsck, "Free Range of Blocks"); 1864 1865 SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 1866 sysctl_ffs_fsck, "Change Filesystem Flags"); 1867 1868 #ifdef DEBUG 1869 static int fsckcmds = 0; 1870 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 1871 #endif /* DEBUG */ 1872 1873 static int 1874 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 1875 { 1876 struct fsck_cmd cmd; 1877 struct inode tip; 1878 struct ufsmount *ump; 1879 struct vnode *vp; 1880 struct inode *ip; 1881 struct mount *mp; 1882 struct fs *fs; 1883 ufs_daddr_t blkno; 1884 long blkcnt, blksize; 1885 struct file *fp; 1886 int filetype, error; 1887 1888 if (req->newlen > sizeof cmd) 1889 return (EBADRPC); 1890 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 1891 return (error); 1892 if (cmd.version != FFS_CMD_VERSION) 1893 return (ERPCMISMATCH); 1894 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 1895 return (error); 1896 vn_start_write((struct vnode *)fp->f_data, &mp, V_WAIT); 1897 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 1898 vn_finished_write(mp); 1899 return (EINVAL); 1900 } 1901 if (mp->mnt_flag & MNT_RDONLY) { 1902 vn_finished_write(mp); 1903 return (EROFS); 1904 } 1905 ump = VFSTOUFS(mp); 1906 fs = ump->um_fs; 1907 filetype = IFREG; 1908 1909 switch (oidp->oid_number) { 1910 1911 case FFS_SET_FLAGS: 1912 #ifdef DEBUG 1913 if (fsckcmds) 1914 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 1915 cmd.size > 0 ? "set" : "clear"); 1916 #endif /* DEBUG */ 1917 if (cmd.size > 0) 1918 fs->fs_flags |= (long)cmd.value; 1919 else 1920 fs->fs_flags &= ~(long)cmd.value; 1921 break; 1922 1923 case FFS_ADJ_REFCNT: 1924 #ifdef DEBUG 1925 if (fsckcmds) { 1926 printf("%s: adjust inode %d count by %ld\n", 1927 mp->mnt_stat.f_mntonname, (ino_t)cmd.value, 1928 cmd.size); 1929 } 1930 #endif /* DEBUG */ 1931 if ((error = VFS_VGET(mp, (ino_t)cmd.value, &vp)) != 0) 1932 break; 1933 ip = VTOI(vp); 1934 ip->i_nlink += cmd.size; 1935 ip->i_effnlink += cmd.size; 1936 ip->i_flag |= IN_CHANGE; 1937 if (DOINGSOFTDEP(vp)) 1938 softdep_change_linkcnt(ip); 1939 vput(vp); 1940 break; 1941 1942 case FFS_ADJ_BLKCNT: 1943 #ifdef DEBUG 1944 if (fsckcmds) { 1945 printf("%s: adjust inode %d block count by %ld\n", 1946 mp->mnt_stat.f_mntonname, (ino_t)cmd.value, 1947 cmd.size); 1948 } 1949 #endif /* DEBUG */ 1950 if ((error = VFS_VGET(mp, (ino_t)cmd.value, &vp)) != 0) 1951 break; 1952 ip = VTOI(vp); 1953 ip->i_blocks += cmd.size; 1954 ip->i_flag |= IN_CHANGE; 1955 vput(vp); 1956 break; 1957 1958 case FFS_DIR_FREE: 1959 filetype = IFDIR; 1960 /* fall through */ 1961 1962 case FFS_FILE_FREE: 1963 #ifdef DEBUG 1964 if (fsckcmds) { 1965 if (cmd.size == 1) 1966 printf("%s: free %s inode %d\n", 1967 mp->mnt_stat.f_mntonname, 1968 filetype == IFDIR ? "directory" : "file", 1969 (ino_t)cmd.value); 1970 else 1971 printf("%s: free %s inodes %d-%d\n", 1972 mp->mnt_stat.f_mntonname, 1973 filetype == IFDIR ? "directory" : "file", 1974 (ino_t)cmd.value, 1975 (ino_t)cmd.value + cmd.size - 1); 1976 } 1977 #endif /* DEBUG */ 1978 tip.i_devvp = ump->um_devvp; 1979 tip.i_dev = ump->um_dev; 1980 tip.i_fs = fs; 1981 while (cmd.size > 0) { 1982 if ((error = ffs_freefile(&tip, cmd.value, filetype))) 1983 break; 1984 cmd.size -= 1; 1985 cmd.value += 1; 1986 } 1987 break; 1988 1989 case FFS_BLK_FREE: 1990 #ifdef DEBUG 1991 if (fsckcmds) { 1992 if (cmd.size == 1) 1993 printf("%s: free block %d\n", 1994 mp->mnt_stat.f_mntonname, 1995 (ufs_daddr_t)cmd.value); 1996 else 1997 printf("%s: free blocks %d-%ld\n", 1998 mp->mnt_stat.f_mntonname, 1999 (ufs_daddr_t)cmd.value, 2000 (ufs_daddr_t)cmd.value + cmd.size - 1); 2001 } 2002 #endif /* DEBUG */ 2003 tip.i_number = ROOTINO; 2004 tip.i_devvp = ump->um_devvp; 2005 tip.i_dev = ump->um_dev; 2006 tip.i_fs = fs; 2007 tip.i_size = cmd.size * fs->fs_fsize; 2008 tip.i_uid = 0; 2009 tip.i_vnode = NULL; 2010 blkno = (ufs_daddr_t)cmd.value; 2011 blkcnt = cmd.size; 2012 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2013 while (blkcnt > 0) { 2014 if (blksize > blkcnt) 2015 blksize = blkcnt; 2016 ffs_blkfree(&tip, blkno, blksize * fs->fs_fsize); 2017 blkno += blksize; 2018 blkcnt -= blksize; 2019 blksize = fs->fs_frag; 2020 } 2021 break; 2022 2023 default: 2024 #ifdef DEBUG 2025 if (fsckcmds) { 2026 printf("Invalid request %d from fsck\n", 2027 oidp->oid_number); 2028 } 2029 #endif /* DEBUG */ 2030 error = EINVAL; 2031 break; 2032 2033 } 2034 vn_finished_write(mp); 2035 return (error); 2036 } 2037