1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/file.h> 73 #include <sys/filedesc.h> 74 #include <sys/priv.h> 75 #include <sys/proc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 82 #include <ufs/ufs/extattr.h> 83 #include <ufs/ufs/quota.h> 84 #include <ufs/ufs/inode.h> 85 #include <ufs/ufs/ufs_extern.h> 86 #include <ufs/ufs/ufsmount.h> 87 88 #include <ufs/ffs/fs.h> 89 #include <ufs/ffs/ffs_extern.h> 90 91 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 92 int size); 93 94 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 95 static ufs2_daddr_t 96 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 97 #ifdef INVARIANTS 98 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 99 #endif 100 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 101 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 102 ufs1_daddr_t, int); 103 static ino_t ffs_dirpref(struct inode *); 104 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 105 static void ffs_fserr(struct fs *, ino_t, char *); 106 static ufs2_daddr_t ffs_hashalloc 107 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 108 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 109 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 110 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 111 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 112 113 /* 114 * Allocate a block in the filesystem. 115 * 116 * The size of the requested block is given, which must be some 117 * multiple of fs_fsize and <= fs_bsize. 118 * A preference may be optionally specified. If a preference is given 119 * the following hierarchy is used to allocate a block: 120 * 1) allocate the requested block. 121 * 2) allocate a rotationally optimal block in the same cylinder. 122 * 3) allocate a block in the same cylinder group. 123 * 4) quadradically rehash into other cylinder groups, until an 124 * available block is located. 125 * If no block preference is given the following hierarchy is used 126 * to allocate a block: 127 * 1) allocate a block in the cylinder group that contains the 128 * inode for the file. 129 * 2) quadradically rehash into other cylinder groups, until an 130 * available block is located. 131 */ 132 int 133 ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp) 134 struct inode *ip; 135 ufs2_daddr_t lbn, bpref; 136 int size, flags; 137 struct ucred *cred; 138 ufs2_daddr_t *bnp; 139 { 140 struct fs *fs; 141 struct ufsmount *ump; 142 ufs2_daddr_t bno; 143 int cg, reclaimed; 144 static struct timeval lastfail; 145 static int curfail; 146 int64_t delta; 147 #ifdef QUOTA 148 int error; 149 #endif 150 151 *bnp = 0; 152 fs = ip->i_fs; 153 ump = ip->i_ump; 154 mtx_assert(UFS_MTX(ump), MA_OWNED); 155 #ifdef INVARIANTS 156 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 157 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 158 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 159 fs->fs_fsmnt); 160 panic("ffs_alloc: bad size"); 161 } 162 if (cred == NOCRED) 163 panic("ffs_alloc: missing credential"); 164 #endif /* INVARIANTS */ 165 reclaimed = 0; 166 retry: 167 #ifdef QUOTA 168 UFS_UNLOCK(ump); 169 error = chkdq(ip, btodb(size), cred, 0); 170 if (error) 171 return (error); 172 UFS_LOCK(ump); 173 #endif 174 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 175 goto nospace; 176 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 177 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 178 goto nospace; 179 if (bpref >= fs->fs_size) 180 bpref = 0; 181 if (bpref == 0) 182 cg = ino_to_cg(fs, ip->i_number); 183 else 184 cg = dtog(fs, bpref); 185 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 186 if (bno > 0) { 187 delta = btodb(size); 188 if (ip->i_flag & IN_SPACECOUNTED) { 189 UFS_LOCK(ump); 190 fs->fs_pendingblocks += delta; 191 UFS_UNLOCK(ump); 192 } 193 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 194 if (flags & IO_EXT) 195 ip->i_flag |= IN_CHANGE; 196 else 197 ip->i_flag |= IN_CHANGE | IN_UPDATE; 198 *bnp = bno; 199 return (0); 200 } 201 nospace: 202 #ifdef QUOTA 203 UFS_UNLOCK(ump); 204 /* 205 * Restore user's disk quota because allocation failed. 206 */ 207 (void) chkdq(ip, -btodb(size), cred, FORCE); 208 UFS_LOCK(ump); 209 #endif 210 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 211 reclaimed = 1; 212 softdep_request_cleanup(fs, ITOV(ip)); 213 goto retry; 214 } 215 UFS_UNLOCK(ump); 216 if (ppsratecheck(&lastfail, &curfail, 1)) { 217 ffs_fserr(fs, ip->i_number, "filesystem full"); 218 uprintf("\n%s: write failed, filesystem is full\n", 219 fs->fs_fsmnt); 220 } 221 return (ENOSPC); 222 } 223 224 /* 225 * Reallocate a fragment to a bigger size 226 * 227 * The number and size of the old block is given, and a preference 228 * and new size is also specified. The allocator attempts to extend 229 * the original block. Failing that, the regular block allocator is 230 * invoked to get an appropriate block. 231 */ 232 int 233 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) 234 struct inode *ip; 235 ufs2_daddr_t lbprev; 236 ufs2_daddr_t bprev; 237 ufs2_daddr_t bpref; 238 int osize, nsize, flags; 239 struct ucred *cred; 240 struct buf **bpp; 241 { 242 struct vnode *vp; 243 struct fs *fs; 244 struct buf *bp; 245 struct ufsmount *ump; 246 int cg, request, error, reclaimed; 247 ufs2_daddr_t bno; 248 static struct timeval lastfail; 249 static int curfail; 250 int64_t delta; 251 252 *bpp = 0; 253 vp = ITOV(ip); 254 fs = ip->i_fs; 255 bp = NULL; 256 ump = ip->i_ump; 257 mtx_assert(UFS_MTX(ump), MA_OWNED); 258 #ifdef INVARIANTS 259 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 260 panic("ffs_realloccg: allocation on suspended filesystem"); 261 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 262 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 263 printf( 264 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 265 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 266 nsize, fs->fs_fsmnt); 267 panic("ffs_realloccg: bad size"); 268 } 269 if (cred == NOCRED) 270 panic("ffs_realloccg: missing credential"); 271 #endif /* INVARIANTS */ 272 reclaimed = 0; 273 retry: 274 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 275 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 276 goto nospace; 277 } 278 if (bprev == 0) { 279 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 280 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 281 fs->fs_fsmnt); 282 panic("ffs_realloccg: bad bprev"); 283 } 284 UFS_UNLOCK(ump); 285 /* 286 * Allocate the extra space in the buffer. 287 */ 288 error = bread(vp, lbprev, osize, NOCRED, &bp); 289 if (error) { 290 brelse(bp); 291 return (error); 292 } 293 294 if (bp->b_blkno == bp->b_lblkno) { 295 if (lbprev >= NDADDR) 296 panic("ffs_realloccg: lbprev out of range"); 297 bp->b_blkno = fsbtodb(fs, bprev); 298 } 299 300 #ifdef QUOTA 301 error = chkdq(ip, btodb(nsize - osize), cred, 0); 302 if (error) { 303 brelse(bp); 304 return (error); 305 } 306 #endif 307 /* 308 * Check for extension in the existing location. 309 */ 310 cg = dtog(fs, bprev); 311 UFS_LOCK(ump); 312 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 313 if (bno) { 314 if (bp->b_blkno != fsbtodb(fs, bno)) 315 panic("ffs_realloccg: bad blockno"); 316 delta = btodb(nsize - osize); 317 if (ip->i_flag & IN_SPACECOUNTED) { 318 UFS_LOCK(ump); 319 fs->fs_pendingblocks += delta; 320 UFS_UNLOCK(ump); 321 } 322 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 323 if (flags & IO_EXT) 324 ip->i_flag |= IN_CHANGE; 325 else 326 ip->i_flag |= IN_CHANGE | IN_UPDATE; 327 allocbuf(bp, nsize); 328 bp->b_flags |= B_DONE; 329 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 330 bzero((char *)bp->b_data + osize, nsize - osize); 331 else 332 vfs_bio_clrbuf(bp); 333 *bpp = bp; 334 return (0); 335 } 336 /* 337 * Allocate a new disk location. 338 */ 339 if (bpref >= fs->fs_size) 340 bpref = 0; 341 switch ((int)fs->fs_optim) { 342 case FS_OPTSPACE: 343 /* 344 * Allocate an exact sized fragment. Although this makes 345 * best use of space, we will waste time relocating it if 346 * the file continues to grow. If the fragmentation is 347 * less than half of the minimum free reserve, we choose 348 * to begin optimizing for time. 349 */ 350 request = nsize; 351 if (fs->fs_minfree <= 5 || 352 fs->fs_cstotal.cs_nffree > 353 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 354 break; 355 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 356 fs->fs_fsmnt); 357 fs->fs_optim = FS_OPTTIME; 358 break; 359 case FS_OPTTIME: 360 /* 361 * At this point we have discovered a file that is trying to 362 * grow a small fragment to a larger fragment. To save time, 363 * we allocate a full sized block, then free the unused portion. 364 * If the file continues to grow, the `ffs_fragextend' call 365 * above will be able to grow it in place without further 366 * copying. If aberrant programs cause disk fragmentation to 367 * grow within 2% of the free reserve, we choose to begin 368 * optimizing for space. 369 */ 370 request = fs->fs_bsize; 371 if (fs->fs_cstotal.cs_nffree < 372 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 373 break; 374 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 375 fs->fs_fsmnt); 376 fs->fs_optim = FS_OPTSPACE; 377 break; 378 default: 379 printf("dev = %s, optim = %ld, fs = %s\n", 380 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 381 panic("ffs_realloccg: bad optim"); 382 /* NOTREACHED */ 383 } 384 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 385 if (bno > 0) { 386 bp->b_blkno = fsbtodb(fs, bno); 387 if (!DOINGSOFTDEP(vp)) 388 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 389 ip->i_number); 390 if (nsize < request) 391 ffs_blkfree(ump, fs, ip->i_devvp, 392 bno + numfrags(fs, nsize), 393 (long)(request - nsize), ip->i_number); 394 delta = btodb(nsize - osize); 395 if (ip->i_flag & IN_SPACECOUNTED) { 396 UFS_LOCK(ump); 397 fs->fs_pendingblocks += delta; 398 UFS_UNLOCK(ump); 399 } 400 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 401 if (flags & IO_EXT) 402 ip->i_flag |= IN_CHANGE; 403 else 404 ip->i_flag |= IN_CHANGE | IN_UPDATE; 405 allocbuf(bp, nsize); 406 bp->b_flags |= B_DONE; 407 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 408 bzero((char *)bp->b_data + osize, nsize - osize); 409 else 410 vfs_bio_clrbuf(bp); 411 *bpp = bp; 412 return (0); 413 } 414 #ifdef QUOTA 415 UFS_UNLOCK(ump); 416 /* 417 * Restore user's disk quota because allocation failed. 418 */ 419 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 420 UFS_LOCK(ump); 421 #endif 422 nospace: 423 /* 424 * no space available 425 */ 426 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 427 reclaimed = 1; 428 softdep_request_cleanup(fs, vp); 429 UFS_UNLOCK(ump); 430 if (bp) 431 brelse(bp); 432 UFS_LOCK(ump); 433 goto retry; 434 } 435 UFS_UNLOCK(ump); 436 if (bp) 437 brelse(bp); 438 if (ppsratecheck(&lastfail, &curfail, 1)) { 439 ffs_fserr(fs, ip->i_number, "filesystem full"); 440 uprintf("\n%s: write failed, filesystem is full\n", 441 fs->fs_fsmnt); 442 } 443 return (ENOSPC); 444 } 445 446 /* 447 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 448 * 449 * The vnode and an array of buffer pointers for a range of sequential 450 * logical blocks to be made contiguous is given. The allocator attempts 451 * to find a range of sequential blocks starting as close as possible 452 * from the end of the allocation for the logical block immediately 453 * preceding the current range. If successful, the physical block numbers 454 * in the buffer pointers and in the inode are changed to reflect the new 455 * allocation. If unsuccessful, the allocation is left unchanged. The 456 * success in doing the reallocation is returned. Note that the error 457 * return is not reflected back to the user. Rather the previous block 458 * allocation will be used. 459 */ 460 461 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 462 463 static int doasyncfree = 1; 464 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 465 466 static int doreallocblks = 1; 467 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 468 469 #ifdef DEBUG 470 static volatile int prtrealloc = 0; 471 #endif 472 473 int 474 ffs_reallocblks(ap) 475 struct vop_reallocblks_args /* { 476 struct vnode *a_vp; 477 struct cluster_save *a_buflist; 478 } */ *ap; 479 { 480 481 if (doreallocblks == 0) 482 return (ENOSPC); 483 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 484 return (ffs_reallocblks_ufs1(ap)); 485 return (ffs_reallocblks_ufs2(ap)); 486 } 487 488 static int 489 ffs_reallocblks_ufs1(ap) 490 struct vop_reallocblks_args /* { 491 struct vnode *a_vp; 492 struct cluster_save *a_buflist; 493 } */ *ap; 494 { 495 struct fs *fs; 496 struct inode *ip; 497 struct vnode *vp; 498 struct buf *sbp, *ebp; 499 ufs1_daddr_t *bap, *sbap, *ebap = 0; 500 struct cluster_save *buflist; 501 struct ufsmount *ump; 502 ufs_lbn_t start_lbn, end_lbn; 503 ufs1_daddr_t soff, newblk, blkno; 504 ufs2_daddr_t pref; 505 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 506 int i, len, start_lvl, end_lvl, ssize; 507 508 vp = ap->a_vp; 509 ip = VTOI(vp); 510 fs = ip->i_fs; 511 ump = ip->i_ump; 512 if (fs->fs_contigsumsize <= 0) 513 return (ENOSPC); 514 buflist = ap->a_buflist; 515 len = buflist->bs_nchildren; 516 start_lbn = buflist->bs_children[0]->b_lblkno; 517 end_lbn = start_lbn + len - 1; 518 #ifdef INVARIANTS 519 for (i = 0; i < len; i++) 520 if (!ffs_checkblk(ip, 521 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 522 panic("ffs_reallocblks: unallocated block 1"); 523 for (i = 1; i < len; i++) 524 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 525 panic("ffs_reallocblks: non-logical cluster"); 526 blkno = buflist->bs_children[0]->b_blkno; 527 ssize = fsbtodb(fs, fs->fs_frag); 528 for (i = 1; i < len - 1; i++) 529 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 530 panic("ffs_reallocblks: non-physical cluster %d", i); 531 #endif 532 /* 533 * If the latest allocation is in a new cylinder group, assume that 534 * the filesystem has decided to move and do not force it back to 535 * the previous cylinder group. 536 */ 537 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 538 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 539 return (ENOSPC); 540 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 541 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 542 return (ENOSPC); 543 /* 544 * Get the starting offset and block map for the first block. 545 */ 546 if (start_lvl == 0) { 547 sbap = &ip->i_din1->di_db[0]; 548 soff = start_lbn; 549 } else { 550 idp = &start_ap[start_lvl - 1]; 551 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 552 brelse(sbp); 553 return (ENOSPC); 554 } 555 sbap = (ufs1_daddr_t *)sbp->b_data; 556 soff = idp->in_off; 557 } 558 /* 559 * If the block range spans two block maps, get the second map. 560 */ 561 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 562 ssize = len; 563 } else { 564 #ifdef INVARIANTS 565 if (start_lvl > 0 && 566 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 567 panic("ffs_reallocblk: start == end"); 568 #endif 569 ssize = len - (idp->in_off + 1); 570 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 571 goto fail; 572 ebap = (ufs1_daddr_t *)ebp->b_data; 573 } 574 /* 575 * Find the preferred location for the cluster. 576 */ 577 UFS_LOCK(ump); 578 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 579 /* 580 * Search the block map looking for an allocation of the desired size. 581 */ 582 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 583 len, ffs_clusteralloc)) == 0) { 584 UFS_UNLOCK(ump); 585 goto fail; 586 } 587 /* 588 * We have found a new contiguous block. 589 * 590 * First we have to replace the old block pointers with the new 591 * block pointers in the inode and indirect blocks associated 592 * with the file. 593 */ 594 #ifdef DEBUG 595 if (prtrealloc) 596 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 597 (intmax_t)start_lbn, (intmax_t)end_lbn); 598 #endif 599 blkno = newblk; 600 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 601 if (i == ssize) { 602 bap = ebap; 603 soff = -i; 604 } 605 #ifdef INVARIANTS 606 if (!ffs_checkblk(ip, 607 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 608 panic("ffs_reallocblks: unallocated block 2"); 609 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 610 panic("ffs_reallocblks: alloc mismatch"); 611 #endif 612 #ifdef DEBUG 613 if (prtrealloc) 614 printf(" %d,", *bap); 615 #endif 616 if (DOINGSOFTDEP(vp)) { 617 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 618 softdep_setup_allocdirect(ip, start_lbn + i, 619 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 620 buflist->bs_children[i]); 621 else 622 softdep_setup_allocindir_page(ip, start_lbn + i, 623 i < ssize ? sbp : ebp, soff + i, blkno, 624 *bap, buflist->bs_children[i]); 625 } 626 *bap++ = blkno; 627 } 628 /* 629 * Next we must write out the modified inode and indirect blocks. 630 * For strict correctness, the writes should be synchronous since 631 * the old block values may have been written to disk. In practise 632 * they are almost never written, but if we are concerned about 633 * strict correctness, the `doasyncfree' flag should be set to zero. 634 * 635 * The test on `doasyncfree' should be changed to test a flag 636 * that shows whether the associated buffers and inodes have 637 * been written. The flag should be set when the cluster is 638 * started and cleared whenever the buffer or inode is flushed. 639 * We can then check below to see if it is set, and do the 640 * synchronous write only when it has been cleared. 641 */ 642 if (sbap != &ip->i_din1->di_db[0]) { 643 if (doasyncfree) 644 bdwrite(sbp); 645 else 646 bwrite(sbp); 647 } else { 648 ip->i_flag |= IN_CHANGE | IN_UPDATE; 649 if (!doasyncfree) 650 ffs_update(vp, 1); 651 } 652 if (ssize < len) { 653 if (doasyncfree) 654 bdwrite(ebp); 655 else 656 bwrite(ebp); 657 } 658 /* 659 * Last, free the old blocks and assign the new blocks to the buffers. 660 */ 661 #ifdef DEBUG 662 if (prtrealloc) 663 printf("\n\tnew:"); 664 #endif 665 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 666 if (!DOINGSOFTDEP(vp)) 667 ffs_blkfree(ump, fs, ip->i_devvp, 668 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 669 fs->fs_bsize, ip->i_number); 670 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 671 #ifdef INVARIANTS 672 if (!ffs_checkblk(ip, 673 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 674 panic("ffs_reallocblks: unallocated block 3"); 675 #endif 676 #ifdef DEBUG 677 if (prtrealloc) 678 printf(" %d,", blkno); 679 #endif 680 } 681 #ifdef DEBUG 682 if (prtrealloc) { 683 prtrealloc--; 684 printf("\n"); 685 } 686 #endif 687 return (0); 688 689 fail: 690 if (ssize < len) 691 brelse(ebp); 692 if (sbap != &ip->i_din1->di_db[0]) 693 brelse(sbp); 694 return (ENOSPC); 695 } 696 697 static int 698 ffs_reallocblks_ufs2(ap) 699 struct vop_reallocblks_args /* { 700 struct vnode *a_vp; 701 struct cluster_save *a_buflist; 702 } */ *ap; 703 { 704 struct fs *fs; 705 struct inode *ip; 706 struct vnode *vp; 707 struct buf *sbp, *ebp; 708 ufs2_daddr_t *bap, *sbap, *ebap = 0; 709 struct cluster_save *buflist; 710 struct ufsmount *ump; 711 ufs_lbn_t start_lbn, end_lbn; 712 ufs2_daddr_t soff, newblk, blkno, pref; 713 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 714 int i, len, start_lvl, end_lvl, ssize; 715 716 vp = ap->a_vp; 717 ip = VTOI(vp); 718 fs = ip->i_fs; 719 ump = ip->i_ump; 720 if (fs->fs_contigsumsize <= 0) 721 return (ENOSPC); 722 buflist = ap->a_buflist; 723 len = buflist->bs_nchildren; 724 start_lbn = buflist->bs_children[0]->b_lblkno; 725 end_lbn = start_lbn + len - 1; 726 #ifdef INVARIANTS 727 for (i = 0; i < len; i++) 728 if (!ffs_checkblk(ip, 729 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 730 panic("ffs_reallocblks: unallocated block 1"); 731 for (i = 1; i < len; i++) 732 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 733 panic("ffs_reallocblks: non-logical cluster"); 734 blkno = buflist->bs_children[0]->b_blkno; 735 ssize = fsbtodb(fs, fs->fs_frag); 736 for (i = 1; i < len - 1; i++) 737 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 738 panic("ffs_reallocblks: non-physical cluster %d", i); 739 #endif 740 /* 741 * If the latest allocation is in a new cylinder group, assume that 742 * the filesystem has decided to move and do not force it back to 743 * the previous cylinder group. 744 */ 745 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 746 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 747 return (ENOSPC); 748 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 749 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 750 return (ENOSPC); 751 /* 752 * Get the starting offset and block map for the first block. 753 */ 754 if (start_lvl == 0) { 755 sbap = &ip->i_din2->di_db[0]; 756 soff = start_lbn; 757 } else { 758 idp = &start_ap[start_lvl - 1]; 759 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 760 brelse(sbp); 761 return (ENOSPC); 762 } 763 sbap = (ufs2_daddr_t *)sbp->b_data; 764 soff = idp->in_off; 765 } 766 /* 767 * If the block range spans two block maps, get the second map. 768 */ 769 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 770 ssize = len; 771 } else { 772 #ifdef INVARIANTS 773 if (start_lvl > 0 && 774 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 775 panic("ffs_reallocblk: start == end"); 776 #endif 777 ssize = len - (idp->in_off + 1); 778 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 779 goto fail; 780 ebap = (ufs2_daddr_t *)ebp->b_data; 781 } 782 /* 783 * Find the preferred location for the cluster. 784 */ 785 UFS_LOCK(ump); 786 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 787 /* 788 * Search the block map looking for an allocation of the desired size. 789 */ 790 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 791 len, ffs_clusteralloc)) == 0) { 792 UFS_UNLOCK(ump); 793 goto fail; 794 } 795 /* 796 * We have found a new contiguous block. 797 * 798 * First we have to replace the old block pointers with the new 799 * block pointers in the inode and indirect blocks associated 800 * with the file. 801 */ 802 #ifdef DEBUG 803 if (prtrealloc) 804 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 805 (intmax_t)start_lbn, (intmax_t)end_lbn); 806 #endif 807 blkno = newblk; 808 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 809 if (i == ssize) { 810 bap = ebap; 811 soff = -i; 812 } 813 #ifdef INVARIANTS 814 if (!ffs_checkblk(ip, 815 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 816 panic("ffs_reallocblks: unallocated block 2"); 817 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 818 panic("ffs_reallocblks: alloc mismatch"); 819 #endif 820 #ifdef DEBUG 821 if (prtrealloc) 822 printf(" %jd,", (intmax_t)*bap); 823 #endif 824 if (DOINGSOFTDEP(vp)) { 825 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 826 softdep_setup_allocdirect(ip, start_lbn + i, 827 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 828 buflist->bs_children[i]); 829 else 830 softdep_setup_allocindir_page(ip, start_lbn + i, 831 i < ssize ? sbp : ebp, soff + i, blkno, 832 *bap, buflist->bs_children[i]); 833 } 834 *bap++ = blkno; 835 } 836 /* 837 * Next we must write out the modified inode and indirect blocks. 838 * For strict correctness, the writes should be synchronous since 839 * the old block values may have been written to disk. In practise 840 * they are almost never written, but if we are concerned about 841 * strict correctness, the `doasyncfree' flag should be set to zero. 842 * 843 * The test on `doasyncfree' should be changed to test a flag 844 * that shows whether the associated buffers and inodes have 845 * been written. The flag should be set when the cluster is 846 * started and cleared whenever the buffer or inode is flushed. 847 * We can then check below to see if it is set, and do the 848 * synchronous write only when it has been cleared. 849 */ 850 if (sbap != &ip->i_din2->di_db[0]) { 851 if (doasyncfree) 852 bdwrite(sbp); 853 else 854 bwrite(sbp); 855 } else { 856 ip->i_flag |= IN_CHANGE | IN_UPDATE; 857 if (!doasyncfree) 858 ffs_update(vp, 1); 859 } 860 if (ssize < len) { 861 if (doasyncfree) 862 bdwrite(ebp); 863 else 864 bwrite(ebp); 865 } 866 /* 867 * Last, free the old blocks and assign the new blocks to the buffers. 868 */ 869 #ifdef DEBUG 870 if (prtrealloc) 871 printf("\n\tnew:"); 872 #endif 873 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 874 if (!DOINGSOFTDEP(vp)) 875 ffs_blkfree(ump, fs, ip->i_devvp, 876 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 877 fs->fs_bsize, ip->i_number); 878 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 879 #ifdef INVARIANTS 880 if (!ffs_checkblk(ip, 881 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 882 panic("ffs_reallocblks: unallocated block 3"); 883 #endif 884 #ifdef DEBUG 885 if (prtrealloc) 886 printf(" %jd,", (intmax_t)blkno); 887 #endif 888 } 889 #ifdef DEBUG 890 if (prtrealloc) { 891 prtrealloc--; 892 printf("\n"); 893 } 894 #endif 895 return (0); 896 897 fail: 898 if (ssize < len) 899 brelse(ebp); 900 if (sbap != &ip->i_din2->di_db[0]) 901 brelse(sbp); 902 return (ENOSPC); 903 } 904 905 /* 906 * Allocate an inode in the filesystem. 907 * 908 * If allocating a directory, use ffs_dirpref to select the inode. 909 * If allocating in a directory, the following hierarchy is followed: 910 * 1) allocate the preferred inode. 911 * 2) allocate an inode in the same cylinder group. 912 * 3) quadradically rehash into other cylinder groups, until an 913 * available inode is located. 914 * If no inode preference is given the following hierarchy is used 915 * to allocate an inode: 916 * 1) allocate an inode in cylinder group 0. 917 * 2) quadradically rehash into other cylinder groups, until an 918 * available inode is located. 919 */ 920 int 921 ffs_valloc(pvp, mode, cred, vpp) 922 struct vnode *pvp; 923 int mode; 924 struct ucred *cred; 925 struct vnode **vpp; 926 { 927 struct inode *pip; 928 struct fs *fs; 929 struct inode *ip; 930 struct timespec ts; 931 struct ufsmount *ump; 932 ino_t ino, ipref; 933 int cg, error, error1; 934 static struct timeval lastfail; 935 static int curfail; 936 937 *vpp = NULL; 938 pip = VTOI(pvp); 939 fs = pip->i_fs; 940 ump = pip->i_ump; 941 942 UFS_LOCK(ump); 943 if (fs->fs_cstotal.cs_nifree == 0) 944 goto noinodes; 945 946 if ((mode & IFMT) == IFDIR) 947 ipref = ffs_dirpref(pip); 948 else 949 ipref = pip->i_number; 950 if (ipref >= fs->fs_ncg * fs->fs_ipg) 951 ipref = 0; 952 cg = ino_to_cg(fs, ipref); 953 /* 954 * Track number of dirs created one after another 955 * in a same cg without intervening by files. 956 */ 957 if ((mode & IFMT) == IFDIR) { 958 if (fs->fs_contigdirs[cg] < 255) 959 fs->fs_contigdirs[cg]++; 960 } else { 961 if (fs->fs_contigdirs[cg] > 0) 962 fs->fs_contigdirs[cg]--; 963 } 964 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 965 (allocfcn_t *)ffs_nodealloccg); 966 if (ino == 0) 967 goto noinodes; 968 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 969 if (error) { 970 error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, 971 FFSV_FORCEINSMQ); 972 ffs_vfree(pvp, ino, mode); 973 if (error1 == 0) { 974 ip = VTOI(*vpp); 975 if (ip->i_mode) 976 goto dup_alloc; 977 ip->i_flag |= IN_MODIFIED; 978 vput(*vpp); 979 } 980 return (error); 981 } 982 ip = VTOI(*vpp); 983 if (ip->i_mode) { 984 dup_alloc: 985 printf("mode = 0%o, inum = %lu, fs = %s\n", 986 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 987 panic("ffs_valloc: dup alloc"); 988 } 989 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 990 printf("free inode %s/%lu had %ld blocks\n", 991 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 992 DIP_SET(ip, i_blocks, 0); 993 } 994 ip->i_flags = 0; 995 DIP_SET(ip, i_flags, 0); 996 /* 997 * Set up a new generation number for this inode. 998 */ 999 if (ip->i_gen == 0 || ++ip->i_gen == 0) 1000 ip->i_gen = arc4random() / 2 + 1; 1001 DIP_SET(ip, i_gen, ip->i_gen); 1002 if (fs->fs_magic == FS_UFS2_MAGIC) { 1003 vfs_timestamp(&ts); 1004 ip->i_din2->di_birthtime = ts.tv_sec; 1005 ip->i_din2->di_birthnsec = ts.tv_nsec; 1006 } 1007 ip->i_flag = 0; 1008 vnode_destroy_vobject(*vpp); 1009 (*vpp)->v_type = VNON; 1010 if (fs->fs_magic == FS_UFS2_MAGIC) 1011 (*vpp)->v_op = &ffs_vnodeops2; 1012 else 1013 (*vpp)->v_op = &ffs_vnodeops1; 1014 return (0); 1015 noinodes: 1016 UFS_UNLOCK(ump); 1017 if (ppsratecheck(&lastfail, &curfail, 1)) { 1018 ffs_fserr(fs, pip->i_number, "out of inodes"); 1019 uprintf("\n%s: create/symlink failed, no inodes free\n", 1020 fs->fs_fsmnt); 1021 } 1022 return (ENOSPC); 1023 } 1024 1025 /* 1026 * Find a cylinder group to place a directory. 1027 * 1028 * The policy implemented by this algorithm is to allocate a 1029 * directory inode in the same cylinder group as its parent 1030 * directory, but also to reserve space for its files inodes 1031 * and data. Restrict the number of directories which may be 1032 * allocated one after another in the same cylinder group 1033 * without intervening allocation of files. 1034 * 1035 * If we allocate a first level directory then force allocation 1036 * in another cylinder group. 1037 */ 1038 static ino_t 1039 ffs_dirpref(pip) 1040 struct inode *pip; 1041 { 1042 struct fs *fs; 1043 int cg, prefcg, dirsize, cgsize; 1044 int avgifree, avgbfree, avgndir, curdirsize; 1045 int minifree, minbfree, maxndir; 1046 int mincg, minndir; 1047 int maxcontigdirs; 1048 1049 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 1050 fs = pip->i_fs; 1051 1052 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 1053 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1054 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 1055 1056 /* 1057 * Force allocation in another cg if creating a first level dir. 1058 */ 1059 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1060 if (ITOV(pip)->v_vflag & VV_ROOT) { 1061 prefcg = arc4random() % fs->fs_ncg; 1062 mincg = prefcg; 1063 minndir = fs->fs_ipg; 1064 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1065 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1066 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1067 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1068 mincg = cg; 1069 minndir = fs->fs_cs(fs, cg).cs_ndir; 1070 } 1071 for (cg = 0; cg < prefcg; cg++) 1072 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1073 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1074 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1075 mincg = cg; 1076 minndir = fs->fs_cs(fs, cg).cs_ndir; 1077 } 1078 return ((ino_t)(fs->fs_ipg * mincg)); 1079 } 1080 1081 /* 1082 * Count various limits which used for 1083 * optimal allocation of a directory inode. 1084 */ 1085 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1086 minifree = avgifree - avgifree / 4; 1087 if (minifree < 1) 1088 minifree = 1; 1089 minbfree = avgbfree - avgbfree / 4; 1090 if (minbfree < 1) 1091 minbfree = 1; 1092 cgsize = fs->fs_fsize * fs->fs_fpg; 1093 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1094 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1095 if (dirsize < curdirsize) 1096 dirsize = curdirsize; 1097 if (dirsize <= 0) 1098 maxcontigdirs = 0; /* dirsize overflowed */ 1099 else 1100 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1101 if (fs->fs_avgfpdir > 0) 1102 maxcontigdirs = min(maxcontigdirs, 1103 fs->fs_ipg / fs->fs_avgfpdir); 1104 if (maxcontigdirs == 0) 1105 maxcontigdirs = 1; 1106 1107 /* 1108 * Limit number of dirs in one cg and reserve space for 1109 * regular files, but only if we have no deficit in 1110 * inodes or space. 1111 */ 1112 prefcg = ino_to_cg(fs, pip->i_number); 1113 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1114 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1115 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1116 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1117 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1118 return ((ino_t)(fs->fs_ipg * cg)); 1119 } 1120 for (cg = 0; cg < prefcg; cg++) 1121 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1122 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1123 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1124 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1125 return ((ino_t)(fs->fs_ipg * cg)); 1126 } 1127 /* 1128 * This is a backstop when we have deficit in space. 1129 */ 1130 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1131 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1132 return ((ino_t)(fs->fs_ipg * cg)); 1133 for (cg = 0; cg < prefcg; cg++) 1134 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1135 break; 1136 return ((ino_t)(fs->fs_ipg * cg)); 1137 } 1138 1139 /* 1140 * Select the desired position for the next block in a file. The file is 1141 * logically divided into sections. The first section is composed of the 1142 * direct blocks. Each additional section contains fs_maxbpg blocks. 1143 * 1144 * If no blocks have been allocated in the first section, the policy is to 1145 * request a block in the same cylinder group as the inode that describes 1146 * the file. If no blocks have been allocated in any other section, the 1147 * policy is to place the section in a cylinder group with a greater than 1148 * average number of free blocks. An appropriate cylinder group is found 1149 * by using a rotor that sweeps the cylinder groups. When a new group of 1150 * blocks is needed, the sweep begins in the cylinder group following the 1151 * cylinder group from which the previous allocation was made. The sweep 1152 * continues until a cylinder group with greater than the average number 1153 * of free blocks is found. If the allocation is for the first block in an 1154 * indirect block, the information on the previous allocation is unavailable; 1155 * here a best guess is made based upon the logical block number being 1156 * allocated. 1157 * 1158 * If a section is already partially allocated, the policy is to 1159 * contiguously allocate fs_maxcontig blocks. The end of one of these 1160 * contiguous blocks and the beginning of the next is laid out 1161 * contiguously if possible. 1162 */ 1163 ufs2_daddr_t 1164 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1165 struct inode *ip; 1166 ufs_lbn_t lbn; 1167 int indx; 1168 ufs1_daddr_t *bap; 1169 { 1170 struct fs *fs; 1171 int cg; 1172 int avgbfree, startcg; 1173 1174 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1175 fs = ip->i_fs; 1176 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1177 if (lbn < NDADDR + NINDIR(fs)) { 1178 cg = ino_to_cg(fs, ip->i_number); 1179 return (cgbase(fs, cg) + fs->fs_frag); 1180 } 1181 /* 1182 * Find a cylinder with greater than average number of 1183 * unused data blocks. 1184 */ 1185 if (indx == 0 || bap[indx - 1] == 0) 1186 startcg = 1187 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1188 else 1189 startcg = dtog(fs, bap[indx - 1]) + 1; 1190 startcg %= fs->fs_ncg; 1191 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1192 for (cg = startcg; cg < fs->fs_ncg; cg++) 1193 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1194 fs->fs_cgrotor = cg; 1195 return (cgbase(fs, cg) + fs->fs_frag); 1196 } 1197 for (cg = 0; cg <= startcg; cg++) 1198 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1199 fs->fs_cgrotor = cg; 1200 return (cgbase(fs, cg) + fs->fs_frag); 1201 } 1202 return (0); 1203 } 1204 /* 1205 * We just always try to lay things out contiguously. 1206 */ 1207 return (bap[indx - 1] + fs->fs_frag); 1208 } 1209 1210 /* 1211 * Same as above, but for UFS2 1212 */ 1213 ufs2_daddr_t 1214 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1215 struct inode *ip; 1216 ufs_lbn_t lbn; 1217 int indx; 1218 ufs2_daddr_t *bap; 1219 { 1220 struct fs *fs; 1221 int cg; 1222 int avgbfree, startcg; 1223 1224 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1225 fs = ip->i_fs; 1226 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1227 if (lbn < NDADDR + NINDIR(fs)) { 1228 cg = ino_to_cg(fs, ip->i_number); 1229 return (cgbase(fs, cg) + fs->fs_frag); 1230 } 1231 /* 1232 * Find a cylinder with greater than average number of 1233 * unused data blocks. 1234 */ 1235 if (indx == 0 || bap[indx - 1] == 0) 1236 startcg = 1237 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1238 else 1239 startcg = dtog(fs, bap[indx - 1]) + 1; 1240 startcg %= fs->fs_ncg; 1241 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1242 for (cg = startcg; cg < fs->fs_ncg; cg++) 1243 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1244 fs->fs_cgrotor = cg; 1245 return (cgbase(fs, cg) + fs->fs_frag); 1246 } 1247 for (cg = 0; cg <= startcg; cg++) 1248 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1249 fs->fs_cgrotor = cg; 1250 return (cgbase(fs, cg) + fs->fs_frag); 1251 } 1252 return (0); 1253 } 1254 /* 1255 * We just always try to lay things out contiguously. 1256 */ 1257 return (bap[indx - 1] + fs->fs_frag); 1258 } 1259 1260 /* 1261 * Implement the cylinder overflow algorithm. 1262 * 1263 * The policy implemented by this algorithm is: 1264 * 1) allocate the block in its requested cylinder group. 1265 * 2) quadradically rehash on the cylinder group number. 1266 * 3) brute force search for a free block. 1267 * 1268 * Must be called with the UFS lock held. Will release the lock on success 1269 * and return with it held on failure. 1270 */ 1271 /*VARARGS5*/ 1272 static ufs2_daddr_t 1273 ffs_hashalloc(ip, cg, pref, size, allocator) 1274 struct inode *ip; 1275 int cg; 1276 ufs2_daddr_t pref; 1277 int size; /* size for data blocks, mode for inodes */ 1278 allocfcn_t *allocator; 1279 { 1280 struct fs *fs; 1281 ufs2_daddr_t result; 1282 int i, icg = cg; 1283 1284 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1285 #ifdef INVARIANTS 1286 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1287 panic("ffs_hashalloc: allocation on suspended filesystem"); 1288 #endif 1289 fs = ip->i_fs; 1290 /* 1291 * 1: preferred cylinder group 1292 */ 1293 result = (*allocator)(ip, cg, pref, size); 1294 if (result) 1295 return (result); 1296 /* 1297 * 2: quadratic rehash 1298 */ 1299 for (i = 1; i < fs->fs_ncg; i *= 2) { 1300 cg += i; 1301 if (cg >= fs->fs_ncg) 1302 cg -= fs->fs_ncg; 1303 result = (*allocator)(ip, cg, 0, size); 1304 if (result) 1305 return (result); 1306 } 1307 /* 1308 * 3: brute force search 1309 * Note that we start at i == 2, since 0 was checked initially, 1310 * and 1 is always checked in the quadratic rehash. 1311 */ 1312 cg = (icg + 2) % fs->fs_ncg; 1313 for (i = 2; i < fs->fs_ncg; i++) { 1314 result = (*allocator)(ip, cg, 0, size); 1315 if (result) 1316 return (result); 1317 cg++; 1318 if (cg == fs->fs_ncg) 1319 cg = 0; 1320 } 1321 return (0); 1322 } 1323 1324 /* 1325 * Determine whether a fragment can be extended. 1326 * 1327 * Check to see if the necessary fragments are available, and 1328 * if they are, allocate them. 1329 */ 1330 static ufs2_daddr_t 1331 ffs_fragextend(ip, cg, bprev, osize, nsize) 1332 struct inode *ip; 1333 int cg; 1334 ufs2_daddr_t bprev; 1335 int osize, nsize; 1336 { 1337 struct fs *fs; 1338 struct cg *cgp; 1339 struct buf *bp; 1340 struct ufsmount *ump; 1341 int nffree; 1342 long bno; 1343 int frags, bbase; 1344 int i, error; 1345 u_int8_t *blksfree; 1346 1347 ump = ip->i_ump; 1348 fs = ip->i_fs; 1349 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1350 return (0); 1351 frags = numfrags(fs, nsize); 1352 bbase = fragnum(fs, bprev); 1353 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1354 /* cannot extend across a block boundary */ 1355 return (0); 1356 } 1357 UFS_UNLOCK(ump); 1358 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1359 (int)fs->fs_cgsize, NOCRED, &bp); 1360 if (error) 1361 goto fail; 1362 cgp = (struct cg *)bp->b_data; 1363 if (!cg_chkmagic(cgp)) 1364 goto fail; 1365 bp->b_xflags |= BX_BKGRDWRITE; 1366 cgp->cg_old_time = cgp->cg_time = time_second; 1367 bno = dtogd(fs, bprev); 1368 blksfree = cg_blksfree(cgp); 1369 for (i = numfrags(fs, osize); i < frags; i++) 1370 if (isclr(blksfree, bno + i)) 1371 goto fail; 1372 /* 1373 * the current fragment can be extended 1374 * deduct the count on fragment being extended into 1375 * increase the count on the remaining fragment (if any) 1376 * allocate the extended piece 1377 */ 1378 for (i = frags; i < fs->fs_frag - bbase; i++) 1379 if (isclr(blksfree, bno + i)) 1380 break; 1381 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1382 if (i != frags) 1383 cgp->cg_frsum[i - frags]++; 1384 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1385 clrbit(blksfree, bno + i); 1386 cgp->cg_cs.cs_nffree--; 1387 nffree++; 1388 } 1389 UFS_LOCK(ump); 1390 fs->fs_cstotal.cs_nffree -= nffree; 1391 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1392 fs->fs_fmod = 1; 1393 ACTIVECLEAR(fs, cg); 1394 UFS_UNLOCK(ump); 1395 if (DOINGSOFTDEP(ITOV(ip))) 1396 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); 1397 bdwrite(bp); 1398 return (bprev); 1399 1400 fail: 1401 brelse(bp); 1402 UFS_LOCK(ump); 1403 return (0); 1404 1405 } 1406 1407 /* 1408 * Determine whether a block can be allocated. 1409 * 1410 * Check to see if a block of the appropriate size is available, 1411 * and if it is, allocate it. 1412 */ 1413 static ufs2_daddr_t 1414 ffs_alloccg(ip, cg, bpref, size) 1415 struct inode *ip; 1416 int cg; 1417 ufs2_daddr_t bpref; 1418 int size; 1419 { 1420 struct fs *fs; 1421 struct cg *cgp; 1422 struct buf *bp; 1423 struct ufsmount *ump; 1424 ufs1_daddr_t bno; 1425 ufs2_daddr_t blkno; 1426 int i, allocsiz, error, frags; 1427 u_int8_t *blksfree; 1428 1429 ump = ip->i_ump; 1430 fs = ip->i_fs; 1431 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1432 return (0); 1433 UFS_UNLOCK(ump); 1434 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1435 (int)fs->fs_cgsize, NOCRED, &bp); 1436 if (error) 1437 goto fail; 1438 cgp = (struct cg *)bp->b_data; 1439 if (!cg_chkmagic(cgp) || 1440 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1441 goto fail; 1442 bp->b_xflags |= BX_BKGRDWRITE; 1443 cgp->cg_old_time = cgp->cg_time = time_second; 1444 if (size == fs->fs_bsize) { 1445 UFS_LOCK(ump); 1446 blkno = ffs_alloccgblk(ip, bp, bpref); 1447 ACTIVECLEAR(fs, cg); 1448 UFS_UNLOCK(ump); 1449 bdwrite(bp); 1450 return (blkno); 1451 } 1452 /* 1453 * check to see if any fragments are already available 1454 * allocsiz is the size which will be allocated, hacking 1455 * it down to a smaller size if necessary 1456 */ 1457 blksfree = cg_blksfree(cgp); 1458 frags = numfrags(fs, size); 1459 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1460 if (cgp->cg_frsum[allocsiz] != 0) 1461 break; 1462 if (allocsiz == fs->fs_frag) { 1463 /* 1464 * no fragments were available, so a block will be 1465 * allocated, and hacked up 1466 */ 1467 if (cgp->cg_cs.cs_nbfree == 0) 1468 goto fail; 1469 UFS_LOCK(ump); 1470 blkno = ffs_alloccgblk(ip, bp, bpref); 1471 bno = dtogd(fs, blkno); 1472 for (i = frags; i < fs->fs_frag; i++) 1473 setbit(blksfree, bno + i); 1474 i = fs->fs_frag - frags; 1475 cgp->cg_cs.cs_nffree += i; 1476 fs->fs_cstotal.cs_nffree += i; 1477 fs->fs_cs(fs, cg).cs_nffree += i; 1478 fs->fs_fmod = 1; 1479 cgp->cg_frsum[i]++; 1480 ACTIVECLEAR(fs, cg); 1481 UFS_UNLOCK(ump); 1482 bdwrite(bp); 1483 return (blkno); 1484 } 1485 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1486 if (bno < 0) 1487 goto fail; 1488 for (i = 0; i < frags; i++) 1489 clrbit(blksfree, bno + i); 1490 cgp->cg_cs.cs_nffree -= frags; 1491 cgp->cg_frsum[allocsiz]--; 1492 if (frags != allocsiz) 1493 cgp->cg_frsum[allocsiz - frags]++; 1494 UFS_LOCK(ump); 1495 fs->fs_cstotal.cs_nffree -= frags; 1496 fs->fs_cs(fs, cg).cs_nffree -= frags; 1497 fs->fs_fmod = 1; 1498 blkno = cgbase(fs, cg) + bno; 1499 ACTIVECLEAR(fs, cg); 1500 UFS_UNLOCK(ump); 1501 if (DOINGSOFTDEP(ITOV(ip))) 1502 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1503 bdwrite(bp); 1504 return (blkno); 1505 1506 fail: 1507 brelse(bp); 1508 UFS_LOCK(ump); 1509 return (0); 1510 } 1511 1512 /* 1513 * Allocate a block in a cylinder group. 1514 * 1515 * This algorithm implements the following policy: 1516 * 1) allocate the requested block. 1517 * 2) allocate a rotationally optimal block in the same cylinder. 1518 * 3) allocate the next available block on the block rotor for the 1519 * specified cylinder group. 1520 * Note that this routine only allocates fs_bsize blocks; these 1521 * blocks may be fragmented by the routine that allocates them. 1522 */ 1523 static ufs2_daddr_t 1524 ffs_alloccgblk(ip, bp, bpref) 1525 struct inode *ip; 1526 struct buf *bp; 1527 ufs2_daddr_t bpref; 1528 { 1529 struct fs *fs; 1530 struct cg *cgp; 1531 struct ufsmount *ump; 1532 ufs1_daddr_t bno; 1533 ufs2_daddr_t blkno; 1534 u_int8_t *blksfree; 1535 1536 fs = ip->i_fs; 1537 ump = ip->i_ump; 1538 mtx_assert(UFS_MTX(ump), MA_OWNED); 1539 cgp = (struct cg *)bp->b_data; 1540 blksfree = cg_blksfree(cgp); 1541 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1542 bpref = cgp->cg_rotor; 1543 } else { 1544 bpref = blknum(fs, bpref); 1545 bno = dtogd(fs, bpref); 1546 /* 1547 * if the requested block is available, use it 1548 */ 1549 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1550 goto gotit; 1551 } 1552 /* 1553 * Take the next available block in this cylinder group. 1554 */ 1555 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1556 if (bno < 0) 1557 return (0); 1558 cgp->cg_rotor = bno; 1559 gotit: 1560 blkno = fragstoblks(fs, bno); 1561 ffs_clrblock(fs, blksfree, (long)blkno); 1562 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1563 cgp->cg_cs.cs_nbfree--; 1564 fs->fs_cstotal.cs_nbfree--; 1565 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1566 fs->fs_fmod = 1; 1567 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1568 /* XXX Fixme. */ 1569 UFS_UNLOCK(ump); 1570 if (DOINGSOFTDEP(ITOV(ip))) 1571 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1572 UFS_LOCK(ump); 1573 return (blkno); 1574 } 1575 1576 /* 1577 * Determine whether a cluster can be allocated. 1578 * 1579 * We do not currently check for optimal rotational layout if there 1580 * are multiple choices in the same cylinder group. Instead we just 1581 * take the first one that we find following bpref. 1582 */ 1583 static ufs2_daddr_t 1584 ffs_clusteralloc(ip, cg, bpref, len) 1585 struct inode *ip; 1586 int cg; 1587 ufs2_daddr_t bpref; 1588 int len; 1589 { 1590 struct fs *fs; 1591 struct cg *cgp; 1592 struct buf *bp; 1593 struct ufsmount *ump; 1594 int i, run, bit, map, got; 1595 ufs2_daddr_t bno; 1596 u_char *mapp; 1597 int32_t *lp; 1598 u_int8_t *blksfree; 1599 1600 fs = ip->i_fs; 1601 ump = ip->i_ump; 1602 if (fs->fs_maxcluster[cg] < len) 1603 return (0); 1604 UFS_UNLOCK(ump); 1605 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1606 NOCRED, &bp)) 1607 goto fail_lock; 1608 cgp = (struct cg *)bp->b_data; 1609 if (!cg_chkmagic(cgp)) 1610 goto fail_lock; 1611 bp->b_xflags |= BX_BKGRDWRITE; 1612 /* 1613 * Check to see if a cluster of the needed size (or bigger) is 1614 * available in this cylinder group. 1615 */ 1616 lp = &cg_clustersum(cgp)[len]; 1617 for (i = len; i <= fs->fs_contigsumsize; i++) 1618 if (*lp++ > 0) 1619 break; 1620 if (i > fs->fs_contigsumsize) { 1621 /* 1622 * This is the first time looking for a cluster in this 1623 * cylinder group. Update the cluster summary information 1624 * to reflect the true maximum sized cluster so that 1625 * future cluster allocation requests can avoid reading 1626 * the cylinder group map only to find no clusters. 1627 */ 1628 lp = &cg_clustersum(cgp)[len - 1]; 1629 for (i = len - 1; i > 0; i--) 1630 if (*lp-- > 0) 1631 break; 1632 UFS_LOCK(ump); 1633 fs->fs_maxcluster[cg] = i; 1634 goto fail; 1635 } 1636 /* 1637 * Search the cluster map to find a big enough cluster. 1638 * We take the first one that we find, even if it is larger 1639 * than we need as we prefer to get one close to the previous 1640 * block allocation. We do not search before the current 1641 * preference point as we do not want to allocate a block 1642 * that is allocated before the previous one (as we will 1643 * then have to wait for another pass of the elevator 1644 * algorithm before it will be read). We prefer to fail and 1645 * be recalled to try an allocation in the next cylinder group. 1646 */ 1647 if (dtog(fs, bpref) != cg) 1648 bpref = 0; 1649 else 1650 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1651 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1652 map = *mapp++; 1653 bit = 1 << (bpref % NBBY); 1654 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1655 if ((map & bit) == 0) { 1656 run = 0; 1657 } else { 1658 run++; 1659 if (run == len) 1660 break; 1661 } 1662 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1663 bit <<= 1; 1664 } else { 1665 map = *mapp++; 1666 bit = 1; 1667 } 1668 } 1669 if (got >= cgp->cg_nclusterblks) 1670 goto fail_lock; 1671 /* 1672 * Allocate the cluster that we have found. 1673 */ 1674 blksfree = cg_blksfree(cgp); 1675 for (i = 1; i <= len; i++) 1676 if (!ffs_isblock(fs, blksfree, got - run + i)) 1677 panic("ffs_clusteralloc: map mismatch"); 1678 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1679 if (dtog(fs, bno) != cg) 1680 panic("ffs_clusteralloc: allocated out of group"); 1681 len = blkstofrags(fs, len); 1682 UFS_LOCK(ump); 1683 for (i = 0; i < len; i += fs->fs_frag) 1684 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1685 panic("ffs_clusteralloc: lost block"); 1686 ACTIVECLEAR(fs, cg); 1687 UFS_UNLOCK(ump); 1688 bdwrite(bp); 1689 return (bno); 1690 1691 fail_lock: 1692 UFS_LOCK(ump); 1693 fail: 1694 brelse(bp); 1695 return (0); 1696 } 1697 1698 /* 1699 * Determine whether an inode can be allocated. 1700 * 1701 * Check to see if an inode is available, and if it is, 1702 * allocate it using the following policy: 1703 * 1) allocate the requested inode. 1704 * 2) allocate the next available inode after the requested 1705 * inode in the specified cylinder group. 1706 */ 1707 static ufs2_daddr_t 1708 ffs_nodealloccg(ip, cg, ipref, mode) 1709 struct inode *ip; 1710 int cg; 1711 ufs2_daddr_t ipref; 1712 int mode; 1713 { 1714 struct fs *fs; 1715 struct cg *cgp; 1716 struct buf *bp, *ibp; 1717 struct ufsmount *ump; 1718 u_int8_t *inosused; 1719 struct ufs2_dinode *dp2; 1720 int error, start, len, loc, map, i; 1721 1722 fs = ip->i_fs; 1723 ump = ip->i_ump; 1724 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1725 return (0); 1726 UFS_UNLOCK(ump); 1727 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1728 (int)fs->fs_cgsize, NOCRED, &bp); 1729 if (error) { 1730 brelse(bp); 1731 UFS_LOCK(ump); 1732 return (0); 1733 } 1734 cgp = (struct cg *)bp->b_data; 1735 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1736 brelse(bp); 1737 UFS_LOCK(ump); 1738 return (0); 1739 } 1740 bp->b_xflags |= BX_BKGRDWRITE; 1741 cgp->cg_old_time = cgp->cg_time = time_second; 1742 inosused = cg_inosused(cgp); 1743 if (ipref) { 1744 ipref %= fs->fs_ipg; 1745 if (isclr(inosused, ipref)) 1746 goto gotit; 1747 } 1748 start = cgp->cg_irotor / NBBY; 1749 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1750 loc = skpc(0xff, len, &inosused[start]); 1751 if (loc == 0) { 1752 len = start + 1; 1753 start = 0; 1754 loc = skpc(0xff, len, &inosused[0]); 1755 if (loc == 0) { 1756 printf("cg = %d, irotor = %ld, fs = %s\n", 1757 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1758 panic("ffs_nodealloccg: map corrupted"); 1759 /* NOTREACHED */ 1760 } 1761 } 1762 i = start + len - loc; 1763 map = inosused[i]; 1764 ipref = i * NBBY; 1765 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1766 if ((map & i) == 0) { 1767 cgp->cg_irotor = ipref; 1768 goto gotit; 1769 } 1770 } 1771 printf("fs = %s\n", fs->fs_fsmnt); 1772 panic("ffs_nodealloccg: block not in map"); 1773 /* NOTREACHED */ 1774 gotit: 1775 /* 1776 * Check to see if we need to initialize more inodes. 1777 */ 1778 ibp = NULL; 1779 if (fs->fs_magic == FS_UFS2_MAGIC && 1780 ipref + INOPB(fs) > cgp->cg_initediblk && 1781 cgp->cg_initediblk < cgp->cg_niblk) { 1782 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1783 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1784 (int)fs->fs_bsize, 0, 0, 0); 1785 bzero(ibp->b_data, (int)fs->fs_bsize); 1786 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1787 for (i = 0; i < INOPB(fs); i++) { 1788 dp2->di_gen = arc4random() / 2 + 1; 1789 dp2++; 1790 } 1791 cgp->cg_initediblk += INOPB(fs); 1792 } 1793 UFS_LOCK(ump); 1794 ACTIVECLEAR(fs, cg); 1795 setbit(inosused, ipref); 1796 cgp->cg_cs.cs_nifree--; 1797 fs->fs_cstotal.cs_nifree--; 1798 fs->fs_cs(fs, cg).cs_nifree--; 1799 fs->fs_fmod = 1; 1800 if ((mode & IFMT) == IFDIR) { 1801 cgp->cg_cs.cs_ndir++; 1802 fs->fs_cstotal.cs_ndir++; 1803 fs->fs_cs(fs, cg).cs_ndir++; 1804 } 1805 UFS_UNLOCK(ump); 1806 if (DOINGSOFTDEP(ITOV(ip))) 1807 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1808 bdwrite(bp); 1809 if (ibp != NULL) 1810 bawrite(ibp); 1811 return (cg * fs->fs_ipg + ipref); 1812 } 1813 1814 /* 1815 * check if a block is free 1816 */ 1817 static int 1818 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1819 { 1820 1821 switch ((int)fs->fs_frag) { 1822 case 8: 1823 return (cp[h] == 0); 1824 case 4: 1825 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1826 case 2: 1827 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1828 case 1: 1829 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1830 default: 1831 panic("ffs_isfreeblock"); 1832 } 1833 return (0); 1834 } 1835 1836 /* 1837 * Free a block or fragment. 1838 * 1839 * The specified block or fragment is placed back in the 1840 * free map. If a fragment is deallocated, a possible 1841 * block reassembly is checked. 1842 */ 1843 void 1844 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1845 struct ufsmount *ump; 1846 struct fs *fs; 1847 struct vnode *devvp; 1848 ufs2_daddr_t bno; 1849 long size; 1850 ino_t inum; 1851 { 1852 struct cg *cgp; 1853 struct buf *bp; 1854 ufs1_daddr_t fragno, cgbno; 1855 ufs2_daddr_t cgblkno; 1856 int i, cg, blk, frags, bbase; 1857 u_int8_t *blksfree; 1858 struct cdev *dev; 1859 1860 cg = dtog(fs, bno); 1861 if (devvp->v_type == VREG) { 1862 /* devvp is a snapshot */ 1863 dev = VTOI(devvp)->i_devvp->v_rdev; 1864 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1865 } else { 1866 /* devvp is a normal disk device */ 1867 dev = devvp->v_rdev; 1868 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1869 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1870 if ((devvp->v_vflag & VV_COPYONWRITE) && 1871 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1872 return; 1873 } 1874 #ifdef INVARIANTS 1875 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1876 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1877 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1878 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1879 size, fs->fs_fsmnt); 1880 panic("ffs_blkfree: bad size"); 1881 } 1882 #endif 1883 if ((u_int)bno >= fs->fs_size) { 1884 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1885 (u_long)inum); 1886 ffs_fserr(fs, inum, "bad block"); 1887 return; 1888 } 1889 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1890 brelse(bp); 1891 return; 1892 } 1893 cgp = (struct cg *)bp->b_data; 1894 if (!cg_chkmagic(cgp)) { 1895 brelse(bp); 1896 return; 1897 } 1898 bp->b_xflags |= BX_BKGRDWRITE; 1899 cgp->cg_old_time = cgp->cg_time = time_second; 1900 cgbno = dtogd(fs, bno); 1901 blksfree = cg_blksfree(cgp); 1902 UFS_LOCK(ump); 1903 if (size == fs->fs_bsize) { 1904 fragno = fragstoblks(fs, cgbno); 1905 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1906 if (devvp->v_type == VREG) { 1907 UFS_UNLOCK(ump); 1908 /* devvp is a snapshot */ 1909 brelse(bp); 1910 return; 1911 } 1912 printf("dev = %s, block = %jd, fs = %s\n", 1913 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1914 panic("ffs_blkfree: freeing free block"); 1915 } 1916 ffs_setblock(fs, blksfree, fragno); 1917 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1918 cgp->cg_cs.cs_nbfree++; 1919 fs->fs_cstotal.cs_nbfree++; 1920 fs->fs_cs(fs, cg).cs_nbfree++; 1921 } else { 1922 bbase = cgbno - fragnum(fs, cgbno); 1923 /* 1924 * decrement the counts associated with the old frags 1925 */ 1926 blk = blkmap(fs, blksfree, bbase); 1927 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1928 /* 1929 * deallocate the fragment 1930 */ 1931 frags = numfrags(fs, size); 1932 for (i = 0; i < frags; i++) { 1933 if (isset(blksfree, cgbno + i)) { 1934 printf("dev = %s, block = %jd, fs = %s\n", 1935 devtoname(dev), (intmax_t)(bno + i), 1936 fs->fs_fsmnt); 1937 panic("ffs_blkfree: freeing free frag"); 1938 } 1939 setbit(blksfree, cgbno + i); 1940 } 1941 cgp->cg_cs.cs_nffree += i; 1942 fs->fs_cstotal.cs_nffree += i; 1943 fs->fs_cs(fs, cg).cs_nffree += i; 1944 /* 1945 * add back in counts associated with the new frags 1946 */ 1947 blk = blkmap(fs, blksfree, bbase); 1948 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1949 /* 1950 * if a complete block has been reassembled, account for it 1951 */ 1952 fragno = fragstoblks(fs, bbase); 1953 if (ffs_isblock(fs, blksfree, fragno)) { 1954 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1955 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1956 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1957 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1958 cgp->cg_cs.cs_nbfree++; 1959 fs->fs_cstotal.cs_nbfree++; 1960 fs->fs_cs(fs, cg).cs_nbfree++; 1961 } 1962 } 1963 fs->fs_fmod = 1; 1964 ACTIVECLEAR(fs, cg); 1965 UFS_UNLOCK(ump); 1966 bdwrite(bp); 1967 } 1968 1969 #ifdef INVARIANTS 1970 /* 1971 * Verify allocation of a block or fragment. Returns true if block or 1972 * fragment is allocated, false if it is free. 1973 */ 1974 static int 1975 ffs_checkblk(ip, bno, size) 1976 struct inode *ip; 1977 ufs2_daddr_t bno; 1978 long size; 1979 { 1980 struct fs *fs; 1981 struct cg *cgp; 1982 struct buf *bp; 1983 ufs1_daddr_t cgbno; 1984 int i, error, frags, free; 1985 u_int8_t *blksfree; 1986 1987 fs = ip->i_fs; 1988 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1989 printf("bsize = %ld, size = %ld, fs = %s\n", 1990 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1991 panic("ffs_checkblk: bad size"); 1992 } 1993 if ((u_int)bno >= fs->fs_size) 1994 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1995 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1996 (int)fs->fs_cgsize, NOCRED, &bp); 1997 if (error) 1998 panic("ffs_checkblk: cg bread failed"); 1999 cgp = (struct cg *)bp->b_data; 2000 if (!cg_chkmagic(cgp)) 2001 panic("ffs_checkblk: cg magic mismatch"); 2002 bp->b_xflags |= BX_BKGRDWRITE; 2003 blksfree = cg_blksfree(cgp); 2004 cgbno = dtogd(fs, bno); 2005 if (size == fs->fs_bsize) { 2006 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 2007 } else { 2008 frags = numfrags(fs, size); 2009 for (free = 0, i = 0; i < frags; i++) 2010 if (isset(blksfree, cgbno + i)) 2011 free++; 2012 if (free != 0 && free != frags) 2013 panic("ffs_checkblk: partially free fragment"); 2014 } 2015 brelse(bp); 2016 return (!free); 2017 } 2018 #endif /* INVARIANTS */ 2019 2020 /* 2021 * Free an inode. 2022 */ 2023 int 2024 ffs_vfree(pvp, ino, mode) 2025 struct vnode *pvp; 2026 ino_t ino; 2027 int mode; 2028 { 2029 struct inode *ip; 2030 2031 if (DOINGSOFTDEP(pvp)) { 2032 softdep_freefile(pvp, ino, mode); 2033 return (0); 2034 } 2035 ip = VTOI(pvp); 2036 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 2037 } 2038 2039 /* 2040 * Do the actual free operation. 2041 * The specified inode is placed back in the free map. 2042 */ 2043 int 2044 ffs_freefile(ump, fs, devvp, ino, mode) 2045 struct ufsmount *ump; 2046 struct fs *fs; 2047 struct vnode *devvp; 2048 ino_t ino; 2049 int mode; 2050 { 2051 struct cg *cgp; 2052 struct buf *bp; 2053 ufs2_daddr_t cgbno; 2054 int error, cg; 2055 u_int8_t *inosused; 2056 struct cdev *dev; 2057 2058 cg = ino_to_cg(fs, ino); 2059 if (devvp->v_type == VREG) { 2060 /* devvp is a snapshot */ 2061 dev = VTOI(devvp)->i_devvp->v_rdev; 2062 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2063 } else { 2064 /* devvp is a normal disk device */ 2065 dev = devvp->v_rdev; 2066 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2067 } 2068 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2069 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2070 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2071 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2072 brelse(bp); 2073 return (error); 2074 } 2075 cgp = (struct cg *)bp->b_data; 2076 if (!cg_chkmagic(cgp)) { 2077 brelse(bp); 2078 return (0); 2079 } 2080 bp->b_xflags |= BX_BKGRDWRITE; 2081 cgp->cg_old_time = cgp->cg_time = time_second; 2082 inosused = cg_inosused(cgp); 2083 ino %= fs->fs_ipg; 2084 if (isclr(inosused, ino)) { 2085 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 2086 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2087 if (fs->fs_ronly == 0) 2088 panic("ffs_freefile: freeing free inode"); 2089 } 2090 clrbit(inosused, ino); 2091 if (ino < cgp->cg_irotor) 2092 cgp->cg_irotor = ino; 2093 cgp->cg_cs.cs_nifree++; 2094 UFS_LOCK(ump); 2095 fs->fs_cstotal.cs_nifree++; 2096 fs->fs_cs(fs, cg).cs_nifree++; 2097 if ((mode & IFMT) == IFDIR) { 2098 cgp->cg_cs.cs_ndir--; 2099 fs->fs_cstotal.cs_ndir--; 2100 fs->fs_cs(fs, cg).cs_ndir--; 2101 } 2102 fs->fs_fmod = 1; 2103 ACTIVECLEAR(fs, cg); 2104 UFS_UNLOCK(ump); 2105 bdwrite(bp); 2106 return (0); 2107 } 2108 2109 /* 2110 * Check to see if a file is free. 2111 */ 2112 int 2113 ffs_checkfreefile(fs, devvp, ino) 2114 struct fs *fs; 2115 struct vnode *devvp; 2116 ino_t ino; 2117 { 2118 struct cg *cgp; 2119 struct buf *bp; 2120 ufs2_daddr_t cgbno; 2121 int ret, cg; 2122 u_int8_t *inosused; 2123 2124 cg = ino_to_cg(fs, ino); 2125 if (devvp->v_type == VREG) { 2126 /* devvp is a snapshot */ 2127 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2128 } else { 2129 /* devvp is a normal disk device */ 2130 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2131 } 2132 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2133 return (1); 2134 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2135 brelse(bp); 2136 return (1); 2137 } 2138 cgp = (struct cg *)bp->b_data; 2139 if (!cg_chkmagic(cgp)) { 2140 brelse(bp); 2141 return (1); 2142 } 2143 inosused = cg_inosused(cgp); 2144 ino %= fs->fs_ipg; 2145 ret = isclr(inosused, ino); 2146 brelse(bp); 2147 return (ret); 2148 } 2149 2150 /* 2151 * Find a block of the specified size in the specified cylinder group. 2152 * 2153 * It is a panic if a request is made to find a block if none are 2154 * available. 2155 */ 2156 static ufs1_daddr_t 2157 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2158 struct fs *fs; 2159 struct cg *cgp; 2160 ufs2_daddr_t bpref; 2161 int allocsiz; 2162 { 2163 ufs1_daddr_t bno; 2164 int start, len, loc, i; 2165 int blk, field, subfield, pos; 2166 u_int8_t *blksfree; 2167 2168 /* 2169 * find the fragment by searching through the free block 2170 * map for an appropriate bit pattern 2171 */ 2172 if (bpref) 2173 start = dtogd(fs, bpref) / NBBY; 2174 else 2175 start = cgp->cg_frotor / NBBY; 2176 blksfree = cg_blksfree(cgp); 2177 len = howmany(fs->fs_fpg, NBBY) - start; 2178 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2179 fragtbl[fs->fs_frag], 2180 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2181 if (loc == 0) { 2182 len = start + 1; 2183 start = 0; 2184 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2185 fragtbl[fs->fs_frag], 2186 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2187 if (loc == 0) { 2188 printf("start = %d, len = %d, fs = %s\n", 2189 start, len, fs->fs_fsmnt); 2190 panic("ffs_alloccg: map corrupted"); 2191 /* NOTREACHED */ 2192 } 2193 } 2194 bno = (start + len - loc) * NBBY; 2195 cgp->cg_frotor = bno; 2196 /* 2197 * found the byte in the map 2198 * sift through the bits to find the selected frag 2199 */ 2200 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2201 blk = blkmap(fs, blksfree, bno); 2202 blk <<= 1; 2203 field = around[allocsiz]; 2204 subfield = inside[allocsiz]; 2205 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2206 if ((blk & field) == subfield) 2207 return (bno + pos); 2208 field <<= 1; 2209 subfield <<= 1; 2210 } 2211 } 2212 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2213 panic("ffs_alloccg: block not in map"); 2214 return (-1); 2215 } 2216 2217 /* 2218 * Update the cluster map because of an allocation or free. 2219 * 2220 * Cnt == 1 means free; cnt == -1 means allocating. 2221 */ 2222 void 2223 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2224 struct ufsmount *ump; 2225 struct fs *fs; 2226 struct cg *cgp; 2227 ufs1_daddr_t blkno; 2228 int cnt; 2229 { 2230 int32_t *sump; 2231 int32_t *lp; 2232 u_char *freemapp, *mapp; 2233 int i, start, end, forw, back, map, bit; 2234 2235 mtx_assert(UFS_MTX(ump), MA_OWNED); 2236 2237 if (fs->fs_contigsumsize <= 0) 2238 return; 2239 freemapp = cg_clustersfree(cgp); 2240 sump = cg_clustersum(cgp); 2241 /* 2242 * Allocate or clear the actual block. 2243 */ 2244 if (cnt > 0) 2245 setbit(freemapp, blkno); 2246 else 2247 clrbit(freemapp, blkno); 2248 /* 2249 * Find the size of the cluster going forward. 2250 */ 2251 start = blkno + 1; 2252 end = start + fs->fs_contigsumsize; 2253 if (end >= cgp->cg_nclusterblks) 2254 end = cgp->cg_nclusterblks; 2255 mapp = &freemapp[start / NBBY]; 2256 map = *mapp++; 2257 bit = 1 << (start % NBBY); 2258 for (i = start; i < end; i++) { 2259 if ((map & bit) == 0) 2260 break; 2261 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2262 bit <<= 1; 2263 } else { 2264 map = *mapp++; 2265 bit = 1; 2266 } 2267 } 2268 forw = i - start; 2269 /* 2270 * Find the size of the cluster going backward. 2271 */ 2272 start = blkno - 1; 2273 end = start - fs->fs_contigsumsize; 2274 if (end < 0) 2275 end = -1; 2276 mapp = &freemapp[start / NBBY]; 2277 map = *mapp--; 2278 bit = 1 << (start % NBBY); 2279 for (i = start; i > end; i--) { 2280 if ((map & bit) == 0) 2281 break; 2282 if ((i & (NBBY - 1)) != 0) { 2283 bit >>= 1; 2284 } else { 2285 map = *mapp--; 2286 bit = 1 << (NBBY - 1); 2287 } 2288 } 2289 back = start - i; 2290 /* 2291 * Account for old cluster and the possibly new forward and 2292 * back clusters. 2293 */ 2294 i = back + forw + 1; 2295 if (i > fs->fs_contigsumsize) 2296 i = fs->fs_contigsumsize; 2297 sump[i] += cnt; 2298 if (back > 0) 2299 sump[back] -= cnt; 2300 if (forw > 0) 2301 sump[forw] -= cnt; 2302 /* 2303 * Update cluster summary information. 2304 */ 2305 lp = &sump[fs->fs_contigsumsize]; 2306 for (i = fs->fs_contigsumsize; i > 0; i--) 2307 if (*lp-- > 0) 2308 break; 2309 fs->fs_maxcluster[cgp->cg_cgx] = i; 2310 } 2311 2312 /* 2313 * Fserr prints the name of a filesystem with an error diagnostic. 2314 * 2315 * The form of the error message is: 2316 * fs: error message 2317 */ 2318 static void 2319 ffs_fserr(fs, inum, cp) 2320 struct fs *fs; 2321 ino_t inum; 2322 char *cp; 2323 { 2324 struct thread *td = curthread; /* XXX */ 2325 struct proc *p = td->td_proc; 2326 2327 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2328 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2329 } 2330 2331 /* 2332 * This function provides the capability for the fsck program to 2333 * update an active filesystem. Eleven operations are provided: 2334 * 2335 * adjrefcnt(inode, amt) - adjusts the reference count on the 2336 * specified inode by the specified amount. Under normal 2337 * operation the count should always go down. Decrementing 2338 * the count to zero will cause the inode to be freed. 2339 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2340 * by the specifed amount. 2341 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2342 * adjust the superblock summary. 2343 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2344 * are marked as free. Inodes should never have to be marked 2345 * as in use. 2346 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2347 * are marked as free. Inodes should never have to be marked 2348 * as in use. 2349 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2350 * are marked as free. Blocks should never have to be marked 2351 * as in use. 2352 * setflags(flags, set/clear) - the fs_flags field has the specified 2353 * flags set (second parameter +1) or cleared (second parameter -1). 2354 */ 2355 2356 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2357 2358 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2359 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2360 2361 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2362 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2363 2364 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2365 sysctl_ffs_fsck, "Adjust number of directories"); 2366 2367 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2368 sysctl_ffs_fsck, "Adjust number of free blocks"); 2369 2370 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2371 sysctl_ffs_fsck, "Adjust number of free inodes"); 2372 2373 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2374 sysctl_ffs_fsck, "Adjust number of free frags"); 2375 2376 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2377 sysctl_ffs_fsck, "Adjust number of free clusters"); 2378 2379 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2380 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2381 2382 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2383 sysctl_ffs_fsck, "Free Range of File Inodes"); 2384 2385 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2386 sysctl_ffs_fsck, "Free Range of Blocks"); 2387 2388 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2389 sysctl_ffs_fsck, "Change Filesystem Flags"); 2390 2391 #ifdef DEBUG 2392 static int fsckcmds = 0; 2393 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2394 #endif /* DEBUG */ 2395 2396 static int 2397 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2398 { 2399 struct fsck_cmd cmd; 2400 struct ufsmount *ump; 2401 struct vnode *vp; 2402 struct inode *ip; 2403 struct mount *mp; 2404 struct fs *fs; 2405 ufs2_daddr_t blkno; 2406 long blkcnt, blksize; 2407 struct file *fp; 2408 int filetype, error; 2409 2410 if (req->newlen > sizeof cmd) 2411 return (EBADRPC); 2412 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2413 return (error); 2414 if (cmd.version != FFS_CMD_VERSION) 2415 return (ERPCMISMATCH); 2416 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2417 return (error); 2418 vn_start_write(fp->f_data, &mp, V_WAIT); 2419 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2420 vn_finished_write(mp); 2421 fdrop(fp, curthread); 2422 return (EINVAL); 2423 } 2424 if (mp->mnt_flag & MNT_RDONLY) { 2425 vn_finished_write(mp); 2426 fdrop(fp, curthread); 2427 return (EROFS); 2428 } 2429 ump = VFSTOUFS(mp); 2430 fs = ump->um_fs; 2431 filetype = IFREG; 2432 2433 switch (oidp->oid_number) { 2434 2435 case FFS_SET_FLAGS: 2436 #ifdef DEBUG 2437 if (fsckcmds) 2438 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2439 cmd.size > 0 ? "set" : "clear"); 2440 #endif /* DEBUG */ 2441 if (cmd.size > 0) 2442 fs->fs_flags |= (long)cmd.value; 2443 else 2444 fs->fs_flags &= ~(long)cmd.value; 2445 break; 2446 2447 case FFS_ADJ_REFCNT: 2448 #ifdef DEBUG 2449 if (fsckcmds) { 2450 printf("%s: adjust inode %jd count by %jd\n", 2451 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2452 (intmax_t)cmd.size); 2453 } 2454 #endif /* DEBUG */ 2455 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2456 break; 2457 ip = VTOI(vp); 2458 ip->i_nlink += cmd.size; 2459 DIP_SET(ip, i_nlink, ip->i_nlink); 2460 ip->i_effnlink += cmd.size; 2461 ip->i_flag |= IN_CHANGE; 2462 if (DOINGSOFTDEP(vp)) 2463 softdep_change_linkcnt(ip); 2464 vput(vp); 2465 break; 2466 2467 case FFS_ADJ_BLKCNT: 2468 #ifdef DEBUG 2469 if (fsckcmds) { 2470 printf("%s: adjust inode %jd block count by %jd\n", 2471 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2472 (intmax_t)cmd.size); 2473 } 2474 #endif /* DEBUG */ 2475 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2476 break; 2477 ip = VTOI(vp); 2478 if (ip->i_flag & IN_SPACECOUNTED) { 2479 UFS_LOCK(ump); 2480 fs->fs_pendingblocks += cmd.size; 2481 UFS_UNLOCK(ump); 2482 } 2483 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2484 ip->i_flag |= IN_CHANGE; 2485 vput(vp); 2486 break; 2487 2488 case FFS_DIR_FREE: 2489 filetype = IFDIR; 2490 /* fall through */ 2491 2492 case FFS_FILE_FREE: 2493 #ifdef DEBUG 2494 if (fsckcmds) { 2495 if (cmd.size == 1) 2496 printf("%s: free %s inode %d\n", 2497 mp->mnt_stat.f_mntonname, 2498 filetype == IFDIR ? "directory" : "file", 2499 (ino_t)cmd.value); 2500 else 2501 printf("%s: free %s inodes %d-%d\n", 2502 mp->mnt_stat.f_mntonname, 2503 filetype == IFDIR ? "directory" : "file", 2504 (ino_t)cmd.value, 2505 (ino_t)(cmd.value + cmd.size - 1)); 2506 } 2507 #endif /* DEBUG */ 2508 while (cmd.size > 0) { 2509 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2510 cmd.value, filetype))) 2511 break; 2512 cmd.size -= 1; 2513 cmd.value += 1; 2514 } 2515 break; 2516 2517 case FFS_BLK_FREE: 2518 #ifdef DEBUG 2519 if (fsckcmds) { 2520 if (cmd.size == 1) 2521 printf("%s: free block %jd\n", 2522 mp->mnt_stat.f_mntonname, 2523 (intmax_t)cmd.value); 2524 else 2525 printf("%s: free blocks %jd-%jd\n", 2526 mp->mnt_stat.f_mntonname, 2527 (intmax_t)cmd.value, 2528 (intmax_t)cmd.value + cmd.size - 1); 2529 } 2530 #endif /* DEBUG */ 2531 blkno = cmd.value; 2532 blkcnt = cmd.size; 2533 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2534 while (blkcnt > 0) { 2535 if (blksize > blkcnt) 2536 blksize = blkcnt; 2537 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2538 blksize * fs->fs_fsize, ROOTINO); 2539 blkno += blksize; 2540 blkcnt -= blksize; 2541 blksize = fs->fs_frag; 2542 } 2543 break; 2544 2545 /* 2546 * Adjust superblock summaries. fsck(8) is expected to 2547 * submit deltas when necessary. 2548 */ 2549 case FFS_ADJ_NDIR: 2550 #ifdef DEBUG 2551 if (fsckcmds) { 2552 printf("%s: adjust number of directories by %jd\n", 2553 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2554 } 2555 #endif /* DEBUG */ 2556 fs->fs_cstotal.cs_ndir += cmd.value; 2557 break; 2558 case FFS_ADJ_NBFREE: 2559 #ifdef DEBUG 2560 if (fsckcmds) { 2561 printf("%s: adjust number of free blocks by %+jd\n", 2562 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2563 } 2564 #endif /* DEBUG */ 2565 fs->fs_cstotal.cs_nbfree += cmd.value; 2566 break; 2567 case FFS_ADJ_NIFREE: 2568 #ifdef DEBUG 2569 if (fsckcmds) { 2570 printf("%s: adjust number of free inodes by %+jd\n", 2571 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2572 } 2573 #endif /* DEBUG */ 2574 fs->fs_cstotal.cs_nifree += cmd.value; 2575 break; 2576 case FFS_ADJ_NFFREE: 2577 #ifdef DEBUG 2578 if (fsckcmds) { 2579 printf("%s: adjust number of free frags by %+jd\n", 2580 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2581 } 2582 #endif /* DEBUG */ 2583 fs->fs_cstotal.cs_nffree += cmd.value; 2584 break; 2585 case FFS_ADJ_NUMCLUSTERS: 2586 #ifdef DEBUG 2587 if (fsckcmds) { 2588 printf("%s: adjust number of free clusters by %+jd\n", 2589 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2590 } 2591 #endif /* DEBUG */ 2592 fs->fs_cstotal.cs_numclusters += cmd.value; 2593 break; 2594 2595 default: 2596 #ifdef DEBUG 2597 if (fsckcmds) { 2598 printf("Invalid request %d from fsck\n", 2599 oidp->oid_number); 2600 } 2601 #endif /* DEBUG */ 2602 error = EINVAL; 2603 break; 2604 2605 } 2606 fdrop(fp, curthread); 2607 vn_finished_write(mp); 2608 return (error); 2609 } 2610