1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/file.h> 73 #include <sys/filedesc.h> 74 #include <sys/priv.h> 75 #include <sys/proc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 82 #include <ufs/ufs/extattr.h> 83 #include <ufs/ufs/quota.h> 84 #include <ufs/ufs/inode.h> 85 #include <ufs/ufs/ufs_extern.h> 86 #include <ufs/ufs/ufsmount.h> 87 88 #include <ufs/ffs/fs.h> 89 #include <ufs/ffs/ffs_extern.h> 90 91 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 92 int size); 93 94 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 95 static ufs2_daddr_t 96 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 97 #ifdef INVARIANTS 98 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 99 #endif 100 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 101 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 102 ufs1_daddr_t, int); 103 static ino_t ffs_dirpref(struct inode *); 104 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 105 static void ffs_fserr(struct fs *, ino_t, char *); 106 static ufs2_daddr_t ffs_hashalloc 107 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 108 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 109 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 110 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 111 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 112 113 /* 114 * Allocate a block in the filesystem. 115 * 116 * The size of the requested block is given, which must be some 117 * multiple of fs_fsize and <= fs_bsize. 118 * A preference may be optionally specified. If a preference is given 119 * the following hierarchy is used to allocate a block: 120 * 1) allocate the requested block. 121 * 2) allocate a rotationally optimal block in the same cylinder. 122 * 3) allocate a block in the same cylinder group. 123 * 4) quadradically rehash into other cylinder groups, until an 124 * available block is located. 125 * If no block preference is given the following hierarchy is used 126 * to allocate a block: 127 * 1) allocate a block in the cylinder group that contains the 128 * inode for the file. 129 * 2) quadradically rehash into other cylinder groups, until an 130 * available block is located. 131 */ 132 int 133 ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp) 134 struct inode *ip; 135 ufs2_daddr_t lbn, bpref; 136 int size, flags; 137 struct ucred *cred; 138 ufs2_daddr_t *bnp; 139 { 140 struct fs *fs; 141 struct ufsmount *ump; 142 ufs2_daddr_t bno; 143 int cg, reclaimed; 144 static struct timeval lastfail; 145 static int curfail; 146 int64_t delta; 147 #ifdef QUOTA 148 int error; 149 #endif 150 151 *bnp = 0; 152 fs = ip->i_fs; 153 ump = ip->i_ump; 154 mtx_assert(UFS_MTX(ump), MA_OWNED); 155 #ifdef INVARIANTS 156 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 157 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 158 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 159 fs->fs_fsmnt); 160 panic("ffs_alloc: bad size"); 161 } 162 if (cred == NOCRED) 163 panic("ffs_alloc: missing credential"); 164 #endif /* INVARIANTS */ 165 reclaimed = 0; 166 retry: 167 #ifdef QUOTA 168 UFS_UNLOCK(ump); 169 error = chkdq(ip, btodb(size), cred, 0); 170 if (error) 171 return (error); 172 UFS_LOCK(ump); 173 #endif 174 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 175 goto nospace; 176 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 177 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 178 goto nospace; 179 if (bpref >= fs->fs_size) 180 bpref = 0; 181 if (bpref == 0) 182 cg = ino_to_cg(fs, ip->i_number); 183 else 184 cg = dtog(fs, bpref); 185 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 186 if (bno > 0) { 187 delta = btodb(size); 188 if (ip->i_flag & IN_SPACECOUNTED) { 189 UFS_LOCK(ump); 190 fs->fs_pendingblocks += delta; 191 UFS_UNLOCK(ump); 192 } 193 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 194 if (flags & IO_EXT) 195 ip->i_flag |= IN_CHANGE; 196 else 197 ip->i_flag |= IN_CHANGE | IN_UPDATE; 198 *bnp = bno; 199 return (0); 200 } 201 nospace: 202 #ifdef QUOTA 203 UFS_UNLOCK(ump); 204 /* 205 * Restore user's disk quota because allocation failed. 206 */ 207 (void) chkdq(ip, -btodb(size), cred, FORCE); 208 UFS_LOCK(ump); 209 #endif 210 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 211 reclaimed = 1; 212 softdep_request_cleanup(fs, ITOV(ip)); 213 goto retry; 214 } 215 UFS_UNLOCK(ump); 216 if (ppsratecheck(&lastfail, &curfail, 1)) { 217 ffs_fserr(fs, ip->i_number, "filesystem full"); 218 uprintf("\n%s: write failed, filesystem is full\n", 219 fs->fs_fsmnt); 220 } 221 return (ENOSPC); 222 } 223 224 /* 225 * Reallocate a fragment to a bigger size 226 * 227 * The number and size of the old block is given, and a preference 228 * and new size is also specified. The allocator attempts to extend 229 * the original block. Failing that, the regular block allocator is 230 * invoked to get an appropriate block. 231 */ 232 int 233 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) 234 struct inode *ip; 235 ufs2_daddr_t lbprev; 236 ufs2_daddr_t bprev; 237 ufs2_daddr_t bpref; 238 int osize, nsize, flags; 239 struct ucred *cred; 240 struct buf **bpp; 241 { 242 struct vnode *vp; 243 struct fs *fs; 244 struct buf *bp; 245 struct ufsmount *ump; 246 int cg, request, error, reclaimed; 247 ufs2_daddr_t bno; 248 static struct timeval lastfail; 249 static int curfail; 250 int64_t delta; 251 252 *bpp = 0; 253 vp = ITOV(ip); 254 fs = ip->i_fs; 255 bp = NULL; 256 ump = ip->i_ump; 257 mtx_assert(UFS_MTX(ump), MA_OWNED); 258 #ifdef INVARIANTS 259 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 260 panic("ffs_realloccg: allocation on suspended filesystem"); 261 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 262 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 263 printf( 264 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 265 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 266 nsize, fs->fs_fsmnt); 267 panic("ffs_realloccg: bad size"); 268 } 269 if (cred == NOCRED) 270 panic("ffs_realloccg: missing credential"); 271 #endif /* INVARIANTS */ 272 reclaimed = 0; 273 retry: 274 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 275 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 276 goto nospace; 277 } 278 if (bprev == 0) { 279 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 280 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 281 fs->fs_fsmnt); 282 panic("ffs_realloccg: bad bprev"); 283 } 284 UFS_UNLOCK(ump); 285 /* 286 * Allocate the extra space in the buffer. 287 */ 288 error = bread(vp, lbprev, osize, NOCRED, &bp); 289 if (error) { 290 brelse(bp); 291 return (error); 292 } 293 294 if (bp->b_blkno == bp->b_lblkno) { 295 if (lbprev >= NDADDR) 296 panic("ffs_realloccg: lbprev out of range"); 297 bp->b_blkno = fsbtodb(fs, bprev); 298 } 299 300 #ifdef QUOTA 301 error = chkdq(ip, btodb(nsize - osize), cred, 0); 302 if (error) { 303 brelse(bp); 304 return (error); 305 } 306 #endif 307 /* 308 * Check for extension in the existing location. 309 */ 310 cg = dtog(fs, bprev); 311 UFS_LOCK(ump); 312 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 313 if (bno) { 314 if (bp->b_blkno != fsbtodb(fs, bno)) 315 panic("ffs_realloccg: bad blockno"); 316 delta = btodb(nsize - osize); 317 if (ip->i_flag & IN_SPACECOUNTED) { 318 UFS_LOCK(ump); 319 fs->fs_pendingblocks += delta; 320 UFS_UNLOCK(ump); 321 } 322 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 323 if (flags & IO_EXT) 324 ip->i_flag |= IN_CHANGE; 325 else 326 ip->i_flag |= IN_CHANGE | IN_UPDATE; 327 allocbuf(bp, nsize); 328 bp->b_flags |= B_DONE; 329 bzero(bp->b_data + osize, nsize - osize); 330 if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) 331 vfs_bio_set_valid(bp, osize, nsize - osize); 332 *bpp = bp; 333 return (0); 334 } 335 /* 336 * Allocate a new disk location. 337 */ 338 if (bpref >= fs->fs_size) 339 bpref = 0; 340 switch ((int)fs->fs_optim) { 341 case FS_OPTSPACE: 342 /* 343 * Allocate an exact sized fragment. Although this makes 344 * best use of space, we will waste time relocating it if 345 * the file continues to grow. If the fragmentation is 346 * less than half of the minimum free reserve, we choose 347 * to begin optimizing for time. 348 */ 349 request = nsize; 350 if (fs->fs_minfree <= 5 || 351 fs->fs_cstotal.cs_nffree > 352 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 353 break; 354 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 355 fs->fs_fsmnt); 356 fs->fs_optim = FS_OPTTIME; 357 break; 358 case FS_OPTTIME: 359 /* 360 * At this point we have discovered a file that is trying to 361 * grow a small fragment to a larger fragment. To save time, 362 * we allocate a full sized block, then free the unused portion. 363 * If the file continues to grow, the `ffs_fragextend' call 364 * above will be able to grow it in place without further 365 * copying. If aberrant programs cause disk fragmentation to 366 * grow within 2% of the free reserve, we choose to begin 367 * optimizing for space. 368 */ 369 request = fs->fs_bsize; 370 if (fs->fs_cstotal.cs_nffree < 371 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 372 break; 373 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 374 fs->fs_fsmnt); 375 fs->fs_optim = FS_OPTSPACE; 376 break; 377 default: 378 printf("dev = %s, optim = %ld, fs = %s\n", 379 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 380 panic("ffs_realloccg: bad optim"); 381 /* NOTREACHED */ 382 } 383 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 384 if (bno > 0) { 385 bp->b_blkno = fsbtodb(fs, bno); 386 if (!DOINGSOFTDEP(vp)) 387 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 388 ip->i_number); 389 if (nsize < request) 390 ffs_blkfree(ump, fs, ip->i_devvp, 391 bno + numfrags(fs, nsize), 392 (long)(request - nsize), ip->i_number); 393 delta = btodb(nsize - osize); 394 if (ip->i_flag & IN_SPACECOUNTED) { 395 UFS_LOCK(ump); 396 fs->fs_pendingblocks += delta; 397 UFS_UNLOCK(ump); 398 } 399 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 400 if (flags & IO_EXT) 401 ip->i_flag |= IN_CHANGE; 402 else 403 ip->i_flag |= IN_CHANGE | IN_UPDATE; 404 allocbuf(bp, nsize); 405 bp->b_flags |= B_DONE; 406 bzero(bp->b_data + osize, nsize - osize); 407 if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) 408 vfs_bio_set_valid(bp, osize, nsize - osize); 409 *bpp = bp; 410 return (0); 411 } 412 #ifdef QUOTA 413 UFS_UNLOCK(ump); 414 /* 415 * Restore user's disk quota because allocation failed. 416 */ 417 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 418 UFS_LOCK(ump); 419 #endif 420 nospace: 421 /* 422 * no space available 423 */ 424 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 425 reclaimed = 1; 426 softdep_request_cleanup(fs, vp); 427 UFS_UNLOCK(ump); 428 if (bp) 429 brelse(bp); 430 UFS_LOCK(ump); 431 goto retry; 432 } 433 UFS_UNLOCK(ump); 434 if (bp) 435 brelse(bp); 436 if (ppsratecheck(&lastfail, &curfail, 1)) { 437 ffs_fserr(fs, ip->i_number, "filesystem full"); 438 uprintf("\n%s: write failed, filesystem is full\n", 439 fs->fs_fsmnt); 440 } 441 return (ENOSPC); 442 } 443 444 /* 445 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 446 * 447 * The vnode and an array of buffer pointers for a range of sequential 448 * logical blocks to be made contiguous is given. The allocator attempts 449 * to find a range of sequential blocks starting as close as possible 450 * from the end of the allocation for the logical block immediately 451 * preceding the current range. If successful, the physical block numbers 452 * in the buffer pointers and in the inode are changed to reflect the new 453 * allocation. If unsuccessful, the allocation is left unchanged. The 454 * success in doing the reallocation is returned. Note that the error 455 * return is not reflected back to the user. Rather the previous block 456 * allocation will be used. 457 */ 458 459 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 460 461 static int doasyncfree = 1; 462 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 463 464 static int doreallocblks = 1; 465 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 466 467 #ifdef DEBUG 468 static volatile int prtrealloc = 0; 469 #endif 470 471 int 472 ffs_reallocblks(ap) 473 struct vop_reallocblks_args /* { 474 struct vnode *a_vp; 475 struct cluster_save *a_buflist; 476 } */ *ap; 477 { 478 479 if (doreallocblks == 0) 480 return (ENOSPC); 481 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 482 return (ffs_reallocblks_ufs1(ap)); 483 return (ffs_reallocblks_ufs2(ap)); 484 } 485 486 static int 487 ffs_reallocblks_ufs1(ap) 488 struct vop_reallocblks_args /* { 489 struct vnode *a_vp; 490 struct cluster_save *a_buflist; 491 } */ *ap; 492 { 493 struct fs *fs; 494 struct inode *ip; 495 struct vnode *vp; 496 struct buf *sbp, *ebp; 497 ufs1_daddr_t *bap, *sbap, *ebap = 0; 498 struct cluster_save *buflist; 499 struct ufsmount *ump; 500 ufs_lbn_t start_lbn, end_lbn; 501 ufs1_daddr_t soff, newblk, blkno; 502 ufs2_daddr_t pref; 503 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 504 int i, len, start_lvl, end_lvl, ssize; 505 506 vp = ap->a_vp; 507 ip = VTOI(vp); 508 fs = ip->i_fs; 509 ump = ip->i_ump; 510 if (fs->fs_contigsumsize <= 0) 511 return (ENOSPC); 512 buflist = ap->a_buflist; 513 len = buflist->bs_nchildren; 514 start_lbn = buflist->bs_children[0]->b_lblkno; 515 end_lbn = start_lbn + len - 1; 516 #ifdef INVARIANTS 517 for (i = 0; i < len; i++) 518 if (!ffs_checkblk(ip, 519 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 520 panic("ffs_reallocblks: unallocated block 1"); 521 for (i = 1; i < len; i++) 522 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 523 panic("ffs_reallocblks: non-logical cluster"); 524 blkno = buflist->bs_children[0]->b_blkno; 525 ssize = fsbtodb(fs, fs->fs_frag); 526 for (i = 1; i < len - 1; i++) 527 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 528 panic("ffs_reallocblks: non-physical cluster %d", i); 529 #endif 530 /* 531 * If the latest allocation is in a new cylinder group, assume that 532 * the filesystem has decided to move and do not force it back to 533 * the previous cylinder group. 534 */ 535 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 536 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 537 return (ENOSPC); 538 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 539 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 540 return (ENOSPC); 541 /* 542 * Get the starting offset and block map for the first block. 543 */ 544 if (start_lvl == 0) { 545 sbap = &ip->i_din1->di_db[0]; 546 soff = start_lbn; 547 } else { 548 idp = &start_ap[start_lvl - 1]; 549 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 550 brelse(sbp); 551 return (ENOSPC); 552 } 553 sbap = (ufs1_daddr_t *)sbp->b_data; 554 soff = idp->in_off; 555 } 556 /* 557 * If the block range spans two block maps, get the second map. 558 */ 559 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 560 ssize = len; 561 } else { 562 #ifdef INVARIANTS 563 if (start_lvl > 0 && 564 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 565 panic("ffs_reallocblk: start == end"); 566 #endif 567 ssize = len - (idp->in_off + 1); 568 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 569 goto fail; 570 ebap = (ufs1_daddr_t *)ebp->b_data; 571 } 572 /* 573 * Find the preferred location for the cluster. 574 */ 575 UFS_LOCK(ump); 576 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 577 /* 578 * Search the block map looking for an allocation of the desired size. 579 */ 580 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 581 len, ffs_clusteralloc)) == 0) { 582 UFS_UNLOCK(ump); 583 goto fail; 584 } 585 /* 586 * We have found a new contiguous block. 587 * 588 * First we have to replace the old block pointers with the new 589 * block pointers in the inode and indirect blocks associated 590 * with the file. 591 */ 592 #ifdef DEBUG 593 if (prtrealloc) 594 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 595 (intmax_t)start_lbn, (intmax_t)end_lbn); 596 #endif 597 blkno = newblk; 598 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 599 if (i == ssize) { 600 bap = ebap; 601 soff = -i; 602 } 603 #ifdef INVARIANTS 604 if (!ffs_checkblk(ip, 605 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 606 panic("ffs_reallocblks: unallocated block 2"); 607 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 608 panic("ffs_reallocblks: alloc mismatch"); 609 #endif 610 #ifdef DEBUG 611 if (prtrealloc) 612 printf(" %d,", *bap); 613 #endif 614 if (DOINGSOFTDEP(vp)) { 615 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 616 softdep_setup_allocdirect(ip, start_lbn + i, 617 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 618 buflist->bs_children[i]); 619 else 620 softdep_setup_allocindir_page(ip, start_lbn + i, 621 i < ssize ? sbp : ebp, soff + i, blkno, 622 *bap, buflist->bs_children[i]); 623 } 624 *bap++ = blkno; 625 } 626 /* 627 * Next we must write out the modified inode and indirect blocks. 628 * For strict correctness, the writes should be synchronous since 629 * the old block values may have been written to disk. In practise 630 * they are almost never written, but if we are concerned about 631 * strict correctness, the `doasyncfree' flag should be set to zero. 632 * 633 * The test on `doasyncfree' should be changed to test a flag 634 * that shows whether the associated buffers and inodes have 635 * been written. The flag should be set when the cluster is 636 * started and cleared whenever the buffer or inode is flushed. 637 * We can then check below to see if it is set, and do the 638 * synchronous write only when it has been cleared. 639 */ 640 if (sbap != &ip->i_din1->di_db[0]) { 641 if (doasyncfree) 642 bdwrite(sbp); 643 else 644 bwrite(sbp); 645 } else { 646 ip->i_flag |= IN_CHANGE | IN_UPDATE; 647 if (!doasyncfree) 648 ffs_update(vp, 1); 649 } 650 if (ssize < len) { 651 if (doasyncfree) 652 bdwrite(ebp); 653 else 654 bwrite(ebp); 655 } 656 /* 657 * Last, free the old blocks and assign the new blocks to the buffers. 658 */ 659 #ifdef DEBUG 660 if (prtrealloc) 661 printf("\n\tnew:"); 662 #endif 663 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 664 if (!DOINGSOFTDEP(vp)) 665 ffs_blkfree(ump, fs, ip->i_devvp, 666 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 667 fs->fs_bsize, ip->i_number); 668 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 669 #ifdef INVARIANTS 670 if (!ffs_checkblk(ip, 671 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 672 panic("ffs_reallocblks: unallocated block 3"); 673 #endif 674 #ifdef DEBUG 675 if (prtrealloc) 676 printf(" %d,", blkno); 677 #endif 678 } 679 #ifdef DEBUG 680 if (prtrealloc) { 681 prtrealloc--; 682 printf("\n"); 683 } 684 #endif 685 return (0); 686 687 fail: 688 if (ssize < len) 689 brelse(ebp); 690 if (sbap != &ip->i_din1->di_db[0]) 691 brelse(sbp); 692 return (ENOSPC); 693 } 694 695 static int 696 ffs_reallocblks_ufs2(ap) 697 struct vop_reallocblks_args /* { 698 struct vnode *a_vp; 699 struct cluster_save *a_buflist; 700 } */ *ap; 701 { 702 struct fs *fs; 703 struct inode *ip; 704 struct vnode *vp; 705 struct buf *sbp, *ebp; 706 ufs2_daddr_t *bap, *sbap, *ebap = 0; 707 struct cluster_save *buflist; 708 struct ufsmount *ump; 709 ufs_lbn_t start_lbn, end_lbn; 710 ufs2_daddr_t soff, newblk, blkno, pref; 711 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 712 int i, len, start_lvl, end_lvl, ssize; 713 714 vp = ap->a_vp; 715 ip = VTOI(vp); 716 fs = ip->i_fs; 717 ump = ip->i_ump; 718 if (fs->fs_contigsumsize <= 0) 719 return (ENOSPC); 720 buflist = ap->a_buflist; 721 len = buflist->bs_nchildren; 722 start_lbn = buflist->bs_children[0]->b_lblkno; 723 end_lbn = start_lbn + len - 1; 724 #ifdef INVARIANTS 725 for (i = 0; i < len; i++) 726 if (!ffs_checkblk(ip, 727 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 728 panic("ffs_reallocblks: unallocated block 1"); 729 for (i = 1; i < len; i++) 730 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 731 panic("ffs_reallocblks: non-logical cluster"); 732 blkno = buflist->bs_children[0]->b_blkno; 733 ssize = fsbtodb(fs, fs->fs_frag); 734 for (i = 1; i < len - 1; i++) 735 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 736 panic("ffs_reallocblks: non-physical cluster %d", i); 737 #endif 738 /* 739 * If the latest allocation is in a new cylinder group, assume that 740 * the filesystem has decided to move and do not force it back to 741 * the previous cylinder group. 742 */ 743 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 744 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 745 return (ENOSPC); 746 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 747 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 748 return (ENOSPC); 749 /* 750 * Get the starting offset and block map for the first block. 751 */ 752 if (start_lvl == 0) { 753 sbap = &ip->i_din2->di_db[0]; 754 soff = start_lbn; 755 } else { 756 idp = &start_ap[start_lvl - 1]; 757 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 758 brelse(sbp); 759 return (ENOSPC); 760 } 761 sbap = (ufs2_daddr_t *)sbp->b_data; 762 soff = idp->in_off; 763 } 764 /* 765 * If the block range spans two block maps, get the second map. 766 */ 767 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 768 ssize = len; 769 } else { 770 #ifdef INVARIANTS 771 if (start_lvl > 0 && 772 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 773 panic("ffs_reallocblk: start == end"); 774 #endif 775 ssize = len - (idp->in_off + 1); 776 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 777 goto fail; 778 ebap = (ufs2_daddr_t *)ebp->b_data; 779 } 780 /* 781 * Find the preferred location for the cluster. 782 */ 783 UFS_LOCK(ump); 784 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 785 /* 786 * Search the block map looking for an allocation of the desired size. 787 */ 788 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 789 len, ffs_clusteralloc)) == 0) { 790 UFS_UNLOCK(ump); 791 goto fail; 792 } 793 /* 794 * We have found a new contiguous block. 795 * 796 * First we have to replace the old block pointers with the new 797 * block pointers in the inode and indirect blocks associated 798 * with the file. 799 */ 800 #ifdef DEBUG 801 if (prtrealloc) 802 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 803 (intmax_t)start_lbn, (intmax_t)end_lbn); 804 #endif 805 blkno = newblk; 806 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 807 if (i == ssize) { 808 bap = ebap; 809 soff = -i; 810 } 811 #ifdef INVARIANTS 812 if (!ffs_checkblk(ip, 813 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 814 panic("ffs_reallocblks: unallocated block 2"); 815 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 816 panic("ffs_reallocblks: alloc mismatch"); 817 #endif 818 #ifdef DEBUG 819 if (prtrealloc) 820 printf(" %jd,", (intmax_t)*bap); 821 #endif 822 if (DOINGSOFTDEP(vp)) { 823 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 824 softdep_setup_allocdirect(ip, start_lbn + i, 825 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 826 buflist->bs_children[i]); 827 else 828 softdep_setup_allocindir_page(ip, start_lbn + i, 829 i < ssize ? sbp : ebp, soff + i, blkno, 830 *bap, buflist->bs_children[i]); 831 } 832 *bap++ = blkno; 833 } 834 /* 835 * Next we must write out the modified inode and indirect blocks. 836 * For strict correctness, the writes should be synchronous since 837 * the old block values may have been written to disk. In practise 838 * they are almost never written, but if we are concerned about 839 * strict correctness, the `doasyncfree' flag should be set to zero. 840 * 841 * The test on `doasyncfree' should be changed to test a flag 842 * that shows whether the associated buffers and inodes have 843 * been written. The flag should be set when the cluster is 844 * started and cleared whenever the buffer or inode is flushed. 845 * We can then check below to see if it is set, and do the 846 * synchronous write only when it has been cleared. 847 */ 848 if (sbap != &ip->i_din2->di_db[0]) { 849 if (doasyncfree) 850 bdwrite(sbp); 851 else 852 bwrite(sbp); 853 } else { 854 ip->i_flag |= IN_CHANGE | IN_UPDATE; 855 if (!doasyncfree) 856 ffs_update(vp, 1); 857 } 858 if (ssize < len) { 859 if (doasyncfree) 860 bdwrite(ebp); 861 else 862 bwrite(ebp); 863 } 864 /* 865 * Last, free the old blocks and assign the new blocks to the buffers. 866 */ 867 #ifdef DEBUG 868 if (prtrealloc) 869 printf("\n\tnew:"); 870 #endif 871 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 872 if (!DOINGSOFTDEP(vp)) 873 ffs_blkfree(ump, fs, ip->i_devvp, 874 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 875 fs->fs_bsize, ip->i_number); 876 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 877 #ifdef INVARIANTS 878 if (!ffs_checkblk(ip, 879 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 880 panic("ffs_reallocblks: unallocated block 3"); 881 #endif 882 #ifdef DEBUG 883 if (prtrealloc) 884 printf(" %jd,", (intmax_t)blkno); 885 #endif 886 } 887 #ifdef DEBUG 888 if (prtrealloc) { 889 prtrealloc--; 890 printf("\n"); 891 } 892 #endif 893 return (0); 894 895 fail: 896 if (ssize < len) 897 brelse(ebp); 898 if (sbap != &ip->i_din2->di_db[0]) 899 brelse(sbp); 900 return (ENOSPC); 901 } 902 903 /* 904 * Allocate an inode in the filesystem. 905 * 906 * If allocating a directory, use ffs_dirpref to select the inode. 907 * If allocating in a directory, the following hierarchy is followed: 908 * 1) allocate the preferred inode. 909 * 2) allocate an inode in the same cylinder group. 910 * 3) quadradically rehash into other cylinder groups, until an 911 * available inode is located. 912 * If no inode preference is given the following hierarchy is used 913 * to allocate an inode: 914 * 1) allocate an inode in cylinder group 0. 915 * 2) quadradically rehash into other cylinder groups, until an 916 * available inode is located. 917 */ 918 int 919 ffs_valloc(pvp, mode, cred, vpp) 920 struct vnode *pvp; 921 int mode; 922 struct ucred *cred; 923 struct vnode **vpp; 924 { 925 struct inode *pip; 926 struct fs *fs; 927 struct inode *ip; 928 struct timespec ts; 929 struct ufsmount *ump; 930 ino_t ino, ipref; 931 int cg, error, error1; 932 static struct timeval lastfail; 933 static int curfail; 934 935 *vpp = NULL; 936 pip = VTOI(pvp); 937 fs = pip->i_fs; 938 ump = pip->i_ump; 939 940 UFS_LOCK(ump); 941 if (fs->fs_cstotal.cs_nifree == 0) 942 goto noinodes; 943 944 if ((mode & IFMT) == IFDIR) 945 ipref = ffs_dirpref(pip); 946 else 947 ipref = pip->i_number; 948 if (ipref >= fs->fs_ncg * fs->fs_ipg) 949 ipref = 0; 950 cg = ino_to_cg(fs, ipref); 951 /* 952 * Track number of dirs created one after another 953 * in a same cg without intervening by files. 954 */ 955 if ((mode & IFMT) == IFDIR) { 956 if (fs->fs_contigdirs[cg] < 255) 957 fs->fs_contigdirs[cg]++; 958 } else { 959 if (fs->fs_contigdirs[cg] > 0) 960 fs->fs_contigdirs[cg]--; 961 } 962 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 963 (allocfcn_t *)ffs_nodealloccg); 964 if (ino == 0) 965 goto noinodes; 966 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 967 if (error) { 968 error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, 969 FFSV_FORCEINSMQ); 970 ffs_vfree(pvp, ino, mode); 971 if (error1 == 0) { 972 ip = VTOI(*vpp); 973 if (ip->i_mode) 974 goto dup_alloc; 975 ip->i_flag |= IN_MODIFIED; 976 vput(*vpp); 977 } 978 return (error); 979 } 980 ip = VTOI(*vpp); 981 if (ip->i_mode) { 982 dup_alloc: 983 printf("mode = 0%o, inum = %lu, fs = %s\n", 984 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 985 panic("ffs_valloc: dup alloc"); 986 } 987 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 988 printf("free inode %s/%lu had %ld blocks\n", 989 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 990 DIP_SET(ip, i_blocks, 0); 991 } 992 ip->i_flags = 0; 993 DIP_SET(ip, i_flags, 0); 994 /* 995 * Set up a new generation number for this inode. 996 */ 997 if (ip->i_gen == 0 || ++ip->i_gen == 0) 998 ip->i_gen = arc4random() / 2 + 1; 999 DIP_SET(ip, i_gen, ip->i_gen); 1000 if (fs->fs_magic == FS_UFS2_MAGIC) { 1001 vfs_timestamp(&ts); 1002 ip->i_din2->di_birthtime = ts.tv_sec; 1003 ip->i_din2->di_birthnsec = ts.tv_nsec; 1004 } 1005 ip->i_flag = 0; 1006 vnode_destroy_vobject(*vpp); 1007 (*vpp)->v_type = VNON; 1008 if (fs->fs_magic == FS_UFS2_MAGIC) 1009 (*vpp)->v_op = &ffs_vnodeops2; 1010 else 1011 (*vpp)->v_op = &ffs_vnodeops1; 1012 return (0); 1013 noinodes: 1014 UFS_UNLOCK(ump); 1015 if (ppsratecheck(&lastfail, &curfail, 1)) { 1016 ffs_fserr(fs, pip->i_number, "out of inodes"); 1017 uprintf("\n%s: create/symlink failed, no inodes free\n", 1018 fs->fs_fsmnt); 1019 } 1020 return (ENOSPC); 1021 } 1022 1023 /* 1024 * Find a cylinder group to place a directory. 1025 * 1026 * The policy implemented by this algorithm is to allocate a 1027 * directory inode in the same cylinder group as its parent 1028 * directory, but also to reserve space for its files inodes 1029 * and data. Restrict the number of directories which may be 1030 * allocated one after another in the same cylinder group 1031 * without intervening allocation of files. 1032 * 1033 * If we allocate a first level directory then force allocation 1034 * in another cylinder group. 1035 */ 1036 static ino_t 1037 ffs_dirpref(pip) 1038 struct inode *pip; 1039 { 1040 struct fs *fs; 1041 int cg, prefcg, dirsize, cgsize; 1042 int avgifree, avgbfree, avgndir, curdirsize; 1043 int minifree, minbfree, maxndir; 1044 int mincg, minndir; 1045 int maxcontigdirs; 1046 1047 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 1048 fs = pip->i_fs; 1049 1050 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 1051 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1052 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 1053 1054 /* 1055 * Force allocation in another cg if creating a first level dir. 1056 */ 1057 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1058 if (ITOV(pip)->v_vflag & VV_ROOT) { 1059 prefcg = arc4random() % fs->fs_ncg; 1060 mincg = prefcg; 1061 minndir = fs->fs_ipg; 1062 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1063 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1064 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1065 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1066 mincg = cg; 1067 minndir = fs->fs_cs(fs, cg).cs_ndir; 1068 } 1069 for (cg = 0; cg < prefcg; cg++) 1070 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1071 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1072 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1073 mincg = cg; 1074 minndir = fs->fs_cs(fs, cg).cs_ndir; 1075 } 1076 return ((ino_t)(fs->fs_ipg * mincg)); 1077 } 1078 1079 /* 1080 * Count various limits which used for 1081 * optimal allocation of a directory inode. 1082 */ 1083 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1084 minifree = avgifree - avgifree / 4; 1085 if (minifree < 1) 1086 minifree = 1; 1087 minbfree = avgbfree - avgbfree / 4; 1088 if (minbfree < 1) 1089 minbfree = 1; 1090 cgsize = fs->fs_fsize * fs->fs_fpg; 1091 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1092 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1093 if (dirsize < curdirsize) 1094 dirsize = curdirsize; 1095 if (dirsize <= 0) 1096 maxcontigdirs = 0; /* dirsize overflowed */ 1097 else 1098 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1099 if (fs->fs_avgfpdir > 0) 1100 maxcontigdirs = min(maxcontigdirs, 1101 fs->fs_ipg / fs->fs_avgfpdir); 1102 if (maxcontigdirs == 0) 1103 maxcontigdirs = 1; 1104 1105 /* 1106 * Limit number of dirs in one cg and reserve space for 1107 * regular files, but only if we have no deficit in 1108 * inodes or space. 1109 */ 1110 prefcg = ino_to_cg(fs, pip->i_number); 1111 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1112 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1113 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1114 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1115 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1116 return ((ino_t)(fs->fs_ipg * cg)); 1117 } 1118 for (cg = 0; cg < prefcg; cg++) 1119 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1120 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1121 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1122 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1123 return ((ino_t)(fs->fs_ipg * cg)); 1124 } 1125 /* 1126 * This is a backstop when we have deficit in space. 1127 */ 1128 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1129 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1130 return ((ino_t)(fs->fs_ipg * cg)); 1131 for (cg = 0; cg < prefcg; cg++) 1132 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1133 break; 1134 return ((ino_t)(fs->fs_ipg * cg)); 1135 } 1136 1137 /* 1138 * Select the desired position for the next block in a file. The file is 1139 * logically divided into sections. The first section is composed of the 1140 * direct blocks. Each additional section contains fs_maxbpg blocks. 1141 * 1142 * If no blocks have been allocated in the first section, the policy is to 1143 * request a block in the same cylinder group as the inode that describes 1144 * the file. If no blocks have been allocated in any other section, the 1145 * policy is to place the section in a cylinder group with a greater than 1146 * average number of free blocks. An appropriate cylinder group is found 1147 * by using a rotor that sweeps the cylinder groups. When a new group of 1148 * blocks is needed, the sweep begins in the cylinder group following the 1149 * cylinder group from which the previous allocation was made. The sweep 1150 * continues until a cylinder group with greater than the average number 1151 * of free blocks is found. If the allocation is for the first block in an 1152 * indirect block, the information on the previous allocation is unavailable; 1153 * here a best guess is made based upon the logical block number being 1154 * allocated. 1155 * 1156 * If a section is already partially allocated, the policy is to 1157 * contiguously allocate fs_maxcontig blocks. The end of one of these 1158 * contiguous blocks and the beginning of the next is laid out 1159 * contiguously if possible. 1160 */ 1161 ufs2_daddr_t 1162 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1163 struct inode *ip; 1164 ufs_lbn_t lbn; 1165 int indx; 1166 ufs1_daddr_t *bap; 1167 { 1168 struct fs *fs; 1169 int cg; 1170 int avgbfree, startcg; 1171 1172 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1173 fs = ip->i_fs; 1174 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1175 if (lbn < NDADDR + NINDIR(fs)) { 1176 cg = ino_to_cg(fs, ip->i_number); 1177 return (cgbase(fs, cg) + fs->fs_frag); 1178 } 1179 /* 1180 * Find a cylinder with greater than average number of 1181 * unused data blocks. 1182 */ 1183 if (indx == 0 || bap[indx - 1] == 0) 1184 startcg = 1185 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1186 else 1187 startcg = dtog(fs, bap[indx - 1]) + 1; 1188 startcg %= fs->fs_ncg; 1189 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1190 for (cg = startcg; cg < fs->fs_ncg; cg++) 1191 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1192 fs->fs_cgrotor = cg; 1193 return (cgbase(fs, cg) + fs->fs_frag); 1194 } 1195 for (cg = 0; cg <= startcg; cg++) 1196 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1197 fs->fs_cgrotor = cg; 1198 return (cgbase(fs, cg) + fs->fs_frag); 1199 } 1200 return (0); 1201 } 1202 /* 1203 * We just always try to lay things out contiguously. 1204 */ 1205 return (bap[indx - 1] + fs->fs_frag); 1206 } 1207 1208 /* 1209 * Same as above, but for UFS2 1210 */ 1211 ufs2_daddr_t 1212 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1213 struct inode *ip; 1214 ufs_lbn_t lbn; 1215 int indx; 1216 ufs2_daddr_t *bap; 1217 { 1218 struct fs *fs; 1219 int cg; 1220 int avgbfree, startcg; 1221 1222 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1223 fs = ip->i_fs; 1224 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1225 if (lbn < NDADDR + NINDIR(fs)) { 1226 cg = ino_to_cg(fs, ip->i_number); 1227 return (cgbase(fs, cg) + fs->fs_frag); 1228 } 1229 /* 1230 * Find a cylinder with greater than average number of 1231 * unused data blocks. 1232 */ 1233 if (indx == 0 || bap[indx - 1] == 0) 1234 startcg = 1235 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1236 else 1237 startcg = dtog(fs, bap[indx - 1]) + 1; 1238 startcg %= fs->fs_ncg; 1239 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1240 for (cg = startcg; cg < fs->fs_ncg; cg++) 1241 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1242 fs->fs_cgrotor = cg; 1243 return (cgbase(fs, cg) + fs->fs_frag); 1244 } 1245 for (cg = 0; cg <= startcg; cg++) 1246 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1247 fs->fs_cgrotor = cg; 1248 return (cgbase(fs, cg) + fs->fs_frag); 1249 } 1250 return (0); 1251 } 1252 /* 1253 * We just always try to lay things out contiguously. 1254 */ 1255 return (bap[indx - 1] + fs->fs_frag); 1256 } 1257 1258 /* 1259 * Implement the cylinder overflow algorithm. 1260 * 1261 * The policy implemented by this algorithm is: 1262 * 1) allocate the block in its requested cylinder group. 1263 * 2) quadradically rehash on the cylinder group number. 1264 * 3) brute force search for a free block. 1265 * 1266 * Must be called with the UFS lock held. Will release the lock on success 1267 * and return with it held on failure. 1268 */ 1269 /*VARARGS5*/ 1270 static ufs2_daddr_t 1271 ffs_hashalloc(ip, cg, pref, size, allocator) 1272 struct inode *ip; 1273 int cg; 1274 ufs2_daddr_t pref; 1275 int size; /* size for data blocks, mode for inodes */ 1276 allocfcn_t *allocator; 1277 { 1278 struct fs *fs; 1279 ufs2_daddr_t result; 1280 int i, icg = cg; 1281 1282 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1283 #ifdef INVARIANTS 1284 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1285 panic("ffs_hashalloc: allocation on suspended filesystem"); 1286 #endif 1287 fs = ip->i_fs; 1288 /* 1289 * 1: preferred cylinder group 1290 */ 1291 result = (*allocator)(ip, cg, pref, size); 1292 if (result) 1293 return (result); 1294 /* 1295 * 2: quadratic rehash 1296 */ 1297 for (i = 1; i < fs->fs_ncg; i *= 2) { 1298 cg += i; 1299 if (cg >= fs->fs_ncg) 1300 cg -= fs->fs_ncg; 1301 result = (*allocator)(ip, cg, 0, size); 1302 if (result) 1303 return (result); 1304 } 1305 /* 1306 * 3: brute force search 1307 * Note that we start at i == 2, since 0 was checked initially, 1308 * and 1 is always checked in the quadratic rehash. 1309 */ 1310 cg = (icg + 2) % fs->fs_ncg; 1311 for (i = 2; i < fs->fs_ncg; i++) { 1312 result = (*allocator)(ip, cg, 0, size); 1313 if (result) 1314 return (result); 1315 cg++; 1316 if (cg == fs->fs_ncg) 1317 cg = 0; 1318 } 1319 return (0); 1320 } 1321 1322 /* 1323 * Determine whether a fragment can be extended. 1324 * 1325 * Check to see if the necessary fragments are available, and 1326 * if they are, allocate them. 1327 */ 1328 static ufs2_daddr_t 1329 ffs_fragextend(ip, cg, bprev, osize, nsize) 1330 struct inode *ip; 1331 int cg; 1332 ufs2_daddr_t bprev; 1333 int osize, nsize; 1334 { 1335 struct fs *fs; 1336 struct cg *cgp; 1337 struct buf *bp; 1338 struct ufsmount *ump; 1339 int nffree; 1340 long bno; 1341 int frags, bbase; 1342 int i, error; 1343 u_int8_t *blksfree; 1344 1345 ump = ip->i_ump; 1346 fs = ip->i_fs; 1347 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1348 return (0); 1349 frags = numfrags(fs, nsize); 1350 bbase = fragnum(fs, bprev); 1351 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1352 /* cannot extend across a block boundary */ 1353 return (0); 1354 } 1355 UFS_UNLOCK(ump); 1356 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1357 (int)fs->fs_cgsize, NOCRED, &bp); 1358 if (error) 1359 goto fail; 1360 cgp = (struct cg *)bp->b_data; 1361 if (!cg_chkmagic(cgp)) 1362 goto fail; 1363 bp->b_xflags |= BX_BKGRDWRITE; 1364 cgp->cg_old_time = cgp->cg_time = time_second; 1365 bno = dtogd(fs, bprev); 1366 blksfree = cg_blksfree(cgp); 1367 for (i = numfrags(fs, osize); i < frags; i++) 1368 if (isclr(blksfree, bno + i)) 1369 goto fail; 1370 /* 1371 * the current fragment can be extended 1372 * deduct the count on fragment being extended into 1373 * increase the count on the remaining fragment (if any) 1374 * allocate the extended piece 1375 */ 1376 for (i = frags; i < fs->fs_frag - bbase; i++) 1377 if (isclr(blksfree, bno + i)) 1378 break; 1379 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1380 if (i != frags) 1381 cgp->cg_frsum[i - frags]++; 1382 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1383 clrbit(blksfree, bno + i); 1384 cgp->cg_cs.cs_nffree--; 1385 nffree++; 1386 } 1387 UFS_LOCK(ump); 1388 fs->fs_cstotal.cs_nffree -= nffree; 1389 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1390 fs->fs_fmod = 1; 1391 ACTIVECLEAR(fs, cg); 1392 UFS_UNLOCK(ump); 1393 if (DOINGSOFTDEP(ITOV(ip))) 1394 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); 1395 bdwrite(bp); 1396 return (bprev); 1397 1398 fail: 1399 brelse(bp); 1400 UFS_LOCK(ump); 1401 return (0); 1402 1403 } 1404 1405 /* 1406 * Determine whether a block can be allocated. 1407 * 1408 * Check to see if a block of the appropriate size is available, 1409 * and if it is, allocate it. 1410 */ 1411 static ufs2_daddr_t 1412 ffs_alloccg(ip, cg, bpref, size) 1413 struct inode *ip; 1414 int cg; 1415 ufs2_daddr_t bpref; 1416 int size; 1417 { 1418 struct fs *fs; 1419 struct cg *cgp; 1420 struct buf *bp; 1421 struct ufsmount *ump; 1422 ufs1_daddr_t bno; 1423 ufs2_daddr_t blkno; 1424 int i, allocsiz, error, frags; 1425 u_int8_t *blksfree; 1426 1427 ump = ip->i_ump; 1428 fs = ip->i_fs; 1429 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1430 return (0); 1431 UFS_UNLOCK(ump); 1432 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1433 (int)fs->fs_cgsize, NOCRED, &bp); 1434 if (error) 1435 goto fail; 1436 cgp = (struct cg *)bp->b_data; 1437 if (!cg_chkmagic(cgp) || 1438 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1439 goto fail; 1440 bp->b_xflags |= BX_BKGRDWRITE; 1441 cgp->cg_old_time = cgp->cg_time = time_second; 1442 if (size == fs->fs_bsize) { 1443 UFS_LOCK(ump); 1444 blkno = ffs_alloccgblk(ip, bp, bpref); 1445 ACTIVECLEAR(fs, cg); 1446 UFS_UNLOCK(ump); 1447 bdwrite(bp); 1448 return (blkno); 1449 } 1450 /* 1451 * check to see if any fragments are already available 1452 * allocsiz is the size which will be allocated, hacking 1453 * it down to a smaller size if necessary 1454 */ 1455 blksfree = cg_blksfree(cgp); 1456 frags = numfrags(fs, size); 1457 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1458 if (cgp->cg_frsum[allocsiz] != 0) 1459 break; 1460 if (allocsiz == fs->fs_frag) { 1461 /* 1462 * no fragments were available, so a block will be 1463 * allocated, and hacked up 1464 */ 1465 if (cgp->cg_cs.cs_nbfree == 0) 1466 goto fail; 1467 UFS_LOCK(ump); 1468 blkno = ffs_alloccgblk(ip, bp, bpref); 1469 bno = dtogd(fs, blkno); 1470 for (i = frags; i < fs->fs_frag; i++) 1471 setbit(blksfree, bno + i); 1472 i = fs->fs_frag - frags; 1473 cgp->cg_cs.cs_nffree += i; 1474 fs->fs_cstotal.cs_nffree += i; 1475 fs->fs_cs(fs, cg).cs_nffree += i; 1476 fs->fs_fmod = 1; 1477 cgp->cg_frsum[i]++; 1478 ACTIVECLEAR(fs, cg); 1479 UFS_UNLOCK(ump); 1480 bdwrite(bp); 1481 return (blkno); 1482 } 1483 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1484 if (bno < 0) 1485 goto fail; 1486 for (i = 0; i < frags; i++) 1487 clrbit(blksfree, bno + i); 1488 cgp->cg_cs.cs_nffree -= frags; 1489 cgp->cg_frsum[allocsiz]--; 1490 if (frags != allocsiz) 1491 cgp->cg_frsum[allocsiz - frags]++; 1492 UFS_LOCK(ump); 1493 fs->fs_cstotal.cs_nffree -= frags; 1494 fs->fs_cs(fs, cg).cs_nffree -= frags; 1495 fs->fs_fmod = 1; 1496 blkno = cgbase(fs, cg) + bno; 1497 ACTIVECLEAR(fs, cg); 1498 UFS_UNLOCK(ump); 1499 if (DOINGSOFTDEP(ITOV(ip))) 1500 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1501 bdwrite(bp); 1502 return (blkno); 1503 1504 fail: 1505 brelse(bp); 1506 UFS_LOCK(ump); 1507 return (0); 1508 } 1509 1510 /* 1511 * Allocate a block in a cylinder group. 1512 * 1513 * This algorithm implements the following policy: 1514 * 1) allocate the requested block. 1515 * 2) allocate a rotationally optimal block in the same cylinder. 1516 * 3) allocate the next available block on the block rotor for the 1517 * specified cylinder group. 1518 * Note that this routine only allocates fs_bsize blocks; these 1519 * blocks may be fragmented by the routine that allocates them. 1520 */ 1521 static ufs2_daddr_t 1522 ffs_alloccgblk(ip, bp, bpref) 1523 struct inode *ip; 1524 struct buf *bp; 1525 ufs2_daddr_t bpref; 1526 { 1527 struct fs *fs; 1528 struct cg *cgp; 1529 struct ufsmount *ump; 1530 ufs1_daddr_t bno; 1531 ufs2_daddr_t blkno; 1532 u_int8_t *blksfree; 1533 1534 fs = ip->i_fs; 1535 ump = ip->i_ump; 1536 mtx_assert(UFS_MTX(ump), MA_OWNED); 1537 cgp = (struct cg *)bp->b_data; 1538 blksfree = cg_blksfree(cgp); 1539 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1540 bpref = cgp->cg_rotor; 1541 } else { 1542 bpref = blknum(fs, bpref); 1543 bno = dtogd(fs, bpref); 1544 /* 1545 * if the requested block is available, use it 1546 */ 1547 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1548 goto gotit; 1549 } 1550 /* 1551 * Take the next available block in this cylinder group. 1552 */ 1553 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1554 if (bno < 0) 1555 return (0); 1556 cgp->cg_rotor = bno; 1557 gotit: 1558 blkno = fragstoblks(fs, bno); 1559 ffs_clrblock(fs, blksfree, (long)blkno); 1560 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1561 cgp->cg_cs.cs_nbfree--; 1562 fs->fs_cstotal.cs_nbfree--; 1563 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1564 fs->fs_fmod = 1; 1565 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1566 /* XXX Fixme. */ 1567 UFS_UNLOCK(ump); 1568 if (DOINGSOFTDEP(ITOV(ip))) 1569 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1570 UFS_LOCK(ump); 1571 return (blkno); 1572 } 1573 1574 /* 1575 * Determine whether a cluster can be allocated. 1576 * 1577 * We do not currently check for optimal rotational layout if there 1578 * are multiple choices in the same cylinder group. Instead we just 1579 * take the first one that we find following bpref. 1580 */ 1581 static ufs2_daddr_t 1582 ffs_clusteralloc(ip, cg, bpref, len) 1583 struct inode *ip; 1584 int cg; 1585 ufs2_daddr_t bpref; 1586 int len; 1587 { 1588 struct fs *fs; 1589 struct cg *cgp; 1590 struct buf *bp; 1591 struct ufsmount *ump; 1592 int i, run, bit, map, got; 1593 ufs2_daddr_t bno; 1594 u_char *mapp; 1595 int32_t *lp; 1596 u_int8_t *blksfree; 1597 1598 fs = ip->i_fs; 1599 ump = ip->i_ump; 1600 if (fs->fs_maxcluster[cg] < len) 1601 return (0); 1602 UFS_UNLOCK(ump); 1603 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1604 NOCRED, &bp)) 1605 goto fail_lock; 1606 cgp = (struct cg *)bp->b_data; 1607 if (!cg_chkmagic(cgp)) 1608 goto fail_lock; 1609 bp->b_xflags |= BX_BKGRDWRITE; 1610 /* 1611 * Check to see if a cluster of the needed size (or bigger) is 1612 * available in this cylinder group. 1613 */ 1614 lp = &cg_clustersum(cgp)[len]; 1615 for (i = len; i <= fs->fs_contigsumsize; i++) 1616 if (*lp++ > 0) 1617 break; 1618 if (i > fs->fs_contigsumsize) { 1619 /* 1620 * This is the first time looking for a cluster in this 1621 * cylinder group. Update the cluster summary information 1622 * to reflect the true maximum sized cluster so that 1623 * future cluster allocation requests can avoid reading 1624 * the cylinder group map only to find no clusters. 1625 */ 1626 lp = &cg_clustersum(cgp)[len - 1]; 1627 for (i = len - 1; i > 0; i--) 1628 if (*lp-- > 0) 1629 break; 1630 UFS_LOCK(ump); 1631 fs->fs_maxcluster[cg] = i; 1632 goto fail; 1633 } 1634 /* 1635 * Search the cluster map to find a big enough cluster. 1636 * We take the first one that we find, even if it is larger 1637 * than we need as we prefer to get one close to the previous 1638 * block allocation. We do not search before the current 1639 * preference point as we do not want to allocate a block 1640 * that is allocated before the previous one (as we will 1641 * then have to wait for another pass of the elevator 1642 * algorithm before it will be read). We prefer to fail and 1643 * be recalled to try an allocation in the next cylinder group. 1644 */ 1645 if (dtog(fs, bpref) != cg) 1646 bpref = 0; 1647 else 1648 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1649 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1650 map = *mapp++; 1651 bit = 1 << (bpref % NBBY); 1652 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1653 if ((map & bit) == 0) { 1654 run = 0; 1655 } else { 1656 run++; 1657 if (run == len) 1658 break; 1659 } 1660 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1661 bit <<= 1; 1662 } else { 1663 map = *mapp++; 1664 bit = 1; 1665 } 1666 } 1667 if (got >= cgp->cg_nclusterblks) 1668 goto fail_lock; 1669 /* 1670 * Allocate the cluster that we have found. 1671 */ 1672 blksfree = cg_blksfree(cgp); 1673 for (i = 1; i <= len; i++) 1674 if (!ffs_isblock(fs, blksfree, got - run + i)) 1675 panic("ffs_clusteralloc: map mismatch"); 1676 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1677 if (dtog(fs, bno) != cg) 1678 panic("ffs_clusteralloc: allocated out of group"); 1679 len = blkstofrags(fs, len); 1680 UFS_LOCK(ump); 1681 for (i = 0; i < len; i += fs->fs_frag) 1682 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1683 panic("ffs_clusteralloc: lost block"); 1684 ACTIVECLEAR(fs, cg); 1685 UFS_UNLOCK(ump); 1686 bdwrite(bp); 1687 return (bno); 1688 1689 fail_lock: 1690 UFS_LOCK(ump); 1691 fail: 1692 brelse(bp); 1693 return (0); 1694 } 1695 1696 /* 1697 * Determine whether an inode can be allocated. 1698 * 1699 * Check to see if an inode is available, and if it is, 1700 * allocate it using the following policy: 1701 * 1) allocate the requested inode. 1702 * 2) allocate the next available inode after the requested 1703 * inode in the specified cylinder group. 1704 */ 1705 static ufs2_daddr_t 1706 ffs_nodealloccg(ip, cg, ipref, mode) 1707 struct inode *ip; 1708 int cg; 1709 ufs2_daddr_t ipref; 1710 int mode; 1711 { 1712 struct fs *fs; 1713 struct cg *cgp; 1714 struct buf *bp, *ibp; 1715 struct ufsmount *ump; 1716 u_int8_t *inosused; 1717 struct ufs2_dinode *dp2; 1718 int error, start, len, loc, map, i; 1719 1720 fs = ip->i_fs; 1721 ump = ip->i_ump; 1722 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1723 return (0); 1724 UFS_UNLOCK(ump); 1725 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1726 (int)fs->fs_cgsize, NOCRED, &bp); 1727 if (error) { 1728 brelse(bp); 1729 UFS_LOCK(ump); 1730 return (0); 1731 } 1732 cgp = (struct cg *)bp->b_data; 1733 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1734 brelse(bp); 1735 UFS_LOCK(ump); 1736 return (0); 1737 } 1738 bp->b_xflags |= BX_BKGRDWRITE; 1739 cgp->cg_old_time = cgp->cg_time = time_second; 1740 inosused = cg_inosused(cgp); 1741 if (ipref) { 1742 ipref %= fs->fs_ipg; 1743 if (isclr(inosused, ipref)) 1744 goto gotit; 1745 } 1746 start = cgp->cg_irotor / NBBY; 1747 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1748 loc = skpc(0xff, len, &inosused[start]); 1749 if (loc == 0) { 1750 len = start + 1; 1751 start = 0; 1752 loc = skpc(0xff, len, &inosused[0]); 1753 if (loc == 0) { 1754 printf("cg = %d, irotor = %ld, fs = %s\n", 1755 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1756 panic("ffs_nodealloccg: map corrupted"); 1757 /* NOTREACHED */ 1758 } 1759 } 1760 i = start + len - loc; 1761 map = inosused[i]; 1762 ipref = i * NBBY; 1763 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1764 if ((map & i) == 0) { 1765 cgp->cg_irotor = ipref; 1766 goto gotit; 1767 } 1768 } 1769 printf("fs = %s\n", fs->fs_fsmnt); 1770 panic("ffs_nodealloccg: block not in map"); 1771 /* NOTREACHED */ 1772 gotit: 1773 /* 1774 * Check to see if we need to initialize more inodes. 1775 */ 1776 ibp = NULL; 1777 if (fs->fs_magic == FS_UFS2_MAGIC && 1778 ipref + INOPB(fs) > cgp->cg_initediblk && 1779 cgp->cg_initediblk < cgp->cg_niblk) { 1780 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1781 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1782 (int)fs->fs_bsize, 0, 0, 0); 1783 bzero(ibp->b_data, (int)fs->fs_bsize); 1784 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1785 for (i = 0; i < INOPB(fs); i++) { 1786 dp2->di_gen = arc4random() / 2 + 1; 1787 dp2++; 1788 } 1789 cgp->cg_initediblk += INOPB(fs); 1790 } 1791 UFS_LOCK(ump); 1792 ACTIVECLEAR(fs, cg); 1793 setbit(inosused, ipref); 1794 cgp->cg_cs.cs_nifree--; 1795 fs->fs_cstotal.cs_nifree--; 1796 fs->fs_cs(fs, cg).cs_nifree--; 1797 fs->fs_fmod = 1; 1798 if ((mode & IFMT) == IFDIR) { 1799 cgp->cg_cs.cs_ndir++; 1800 fs->fs_cstotal.cs_ndir++; 1801 fs->fs_cs(fs, cg).cs_ndir++; 1802 } 1803 UFS_UNLOCK(ump); 1804 if (DOINGSOFTDEP(ITOV(ip))) 1805 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1806 bdwrite(bp); 1807 if (ibp != NULL) 1808 bawrite(ibp); 1809 return (cg * fs->fs_ipg + ipref); 1810 } 1811 1812 /* 1813 * check if a block is free 1814 */ 1815 static int 1816 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1817 { 1818 1819 switch ((int)fs->fs_frag) { 1820 case 8: 1821 return (cp[h] == 0); 1822 case 4: 1823 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1824 case 2: 1825 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1826 case 1: 1827 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1828 default: 1829 panic("ffs_isfreeblock"); 1830 } 1831 return (0); 1832 } 1833 1834 /* 1835 * Free a block or fragment. 1836 * 1837 * The specified block or fragment is placed back in the 1838 * free map. If a fragment is deallocated, a possible 1839 * block reassembly is checked. 1840 */ 1841 void 1842 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1843 struct ufsmount *ump; 1844 struct fs *fs; 1845 struct vnode *devvp; 1846 ufs2_daddr_t bno; 1847 long size; 1848 ino_t inum; 1849 { 1850 struct cg *cgp; 1851 struct buf *bp; 1852 ufs1_daddr_t fragno, cgbno; 1853 ufs2_daddr_t cgblkno; 1854 int i, cg, blk, frags, bbase; 1855 u_int8_t *blksfree; 1856 struct cdev *dev; 1857 1858 cg = dtog(fs, bno); 1859 if (devvp->v_type == VREG) { 1860 /* devvp is a snapshot */ 1861 dev = VTOI(devvp)->i_devvp->v_rdev; 1862 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1863 } else { 1864 /* devvp is a normal disk device */ 1865 dev = devvp->v_rdev; 1866 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1867 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1868 if ((devvp->v_vflag & VV_COPYONWRITE) && 1869 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1870 return; 1871 } 1872 #ifdef INVARIANTS 1873 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1874 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1875 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1876 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1877 size, fs->fs_fsmnt); 1878 panic("ffs_blkfree: bad size"); 1879 } 1880 #endif 1881 if ((u_int)bno >= fs->fs_size) { 1882 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1883 (u_long)inum); 1884 ffs_fserr(fs, inum, "bad block"); 1885 return; 1886 } 1887 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1888 brelse(bp); 1889 return; 1890 } 1891 cgp = (struct cg *)bp->b_data; 1892 if (!cg_chkmagic(cgp)) { 1893 brelse(bp); 1894 return; 1895 } 1896 bp->b_xflags |= BX_BKGRDWRITE; 1897 cgp->cg_old_time = cgp->cg_time = time_second; 1898 cgbno = dtogd(fs, bno); 1899 blksfree = cg_blksfree(cgp); 1900 UFS_LOCK(ump); 1901 if (size == fs->fs_bsize) { 1902 fragno = fragstoblks(fs, cgbno); 1903 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1904 if (devvp->v_type == VREG) { 1905 UFS_UNLOCK(ump); 1906 /* devvp is a snapshot */ 1907 brelse(bp); 1908 return; 1909 } 1910 printf("dev = %s, block = %jd, fs = %s\n", 1911 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1912 panic("ffs_blkfree: freeing free block"); 1913 } 1914 ffs_setblock(fs, blksfree, fragno); 1915 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1916 cgp->cg_cs.cs_nbfree++; 1917 fs->fs_cstotal.cs_nbfree++; 1918 fs->fs_cs(fs, cg).cs_nbfree++; 1919 } else { 1920 bbase = cgbno - fragnum(fs, cgbno); 1921 /* 1922 * decrement the counts associated with the old frags 1923 */ 1924 blk = blkmap(fs, blksfree, bbase); 1925 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1926 /* 1927 * deallocate the fragment 1928 */ 1929 frags = numfrags(fs, size); 1930 for (i = 0; i < frags; i++) { 1931 if (isset(blksfree, cgbno + i)) { 1932 printf("dev = %s, block = %jd, fs = %s\n", 1933 devtoname(dev), (intmax_t)(bno + i), 1934 fs->fs_fsmnt); 1935 panic("ffs_blkfree: freeing free frag"); 1936 } 1937 setbit(blksfree, cgbno + i); 1938 } 1939 cgp->cg_cs.cs_nffree += i; 1940 fs->fs_cstotal.cs_nffree += i; 1941 fs->fs_cs(fs, cg).cs_nffree += i; 1942 /* 1943 * add back in counts associated with the new frags 1944 */ 1945 blk = blkmap(fs, blksfree, bbase); 1946 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1947 /* 1948 * if a complete block has been reassembled, account for it 1949 */ 1950 fragno = fragstoblks(fs, bbase); 1951 if (ffs_isblock(fs, blksfree, fragno)) { 1952 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1953 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1954 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1955 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1956 cgp->cg_cs.cs_nbfree++; 1957 fs->fs_cstotal.cs_nbfree++; 1958 fs->fs_cs(fs, cg).cs_nbfree++; 1959 } 1960 } 1961 fs->fs_fmod = 1; 1962 ACTIVECLEAR(fs, cg); 1963 UFS_UNLOCK(ump); 1964 bdwrite(bp); 1965 } 1966 1967 #ifdef INVARIANTS 1968 /* 1969 * Verify allocation of a block or fragment. Returns true if block or 1970 * fragment is allocated, false if it is free. 1971 */ 1972 static int 1973 ffs_checkblk(ip, bno, size) 1974 struct inode *ip; 1975 ufs2_daddr_t bno; 1976 long size; 1977 { 1978 struct fs *fs; 1979 struct cg *cgp; 1980 struct buf *bp; 1981 ufs1_daddr_t cgbno; 1982 int i, error, frags, free; 1983 u_int8_t *blksfree; 1984 1985 fs = ip->i_fs; 1986 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1987 printf("bsize = %ld, size = %ld, fs = %s\n", 1988 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1989 panic("ffs_checkblk: bad size"); 1990 } 1991 if ((u_int)bno >= fs->fs_size) 1992 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1993 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1994 (int)fs->fs_cgsize, NOCRED, &bp); 1995 if (error) 1996 panic("ffs_checkblk: cg bread failed"); 1997 cgp = (struct cg *)bp->b_data; 1998 if (!cg_chkmagic(cgp)) 1999 panic("ffs_checkblk: cg magic mismatch"); 2000 bp->b_xflags |= BX_BKGRDWRITE; 2001 blksfree = cg_blksfree(cgp); 2002 cgbno = dtogd(fs, bno); 2003 if (size == fs->fs_bsize) { 2004 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 2005 } else { 2006 frags = numfrags(fs, size); 2007 for (free = 0, i = 0; i < frags; i++) 2008 if (isset(blksfree, cgbno + i)) 2009 free++; 2010 if (free != 0 && free != frags) 2011 panic("ffs_checkblk: partially free fragment"); 2012 } 2013 brelse(bp); 2014 return (!free); 2015 } 2016 #endif /* INVARIANTS */ 2017 2018 /* 2019 * Free an inode. 2020 */ 2021 int 2022 ffs_vfree(pvp, ino, mode) 2023 struct vnode *pvp; 2024 ino_t ino; 2025 int mode; 2026 { 2027 struct inode *ip; 2028 2029 if (DOINGSOFTDEP(pvp)) { 2030 softdep_freefile(pvp, ino, mode); 2031 return (0); 2032 } 2033 ip = VTOI(pvp); 2034 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 2035 } 2036 2037 /* 2038 * Do the actual free operation. 2039 * The specified inode is placed back in the free map. 2040 */ 2041 int 2042 ffs_freefile(ump, fs, devvp, ino, mode) 2043 struct ufsmount *ump; 2044 struct fs *fs; 2045 struct vnode *devvp; 2046 ino_t ino; 2047 int mode; 2048 { 2049 struct cg *cgp; 2050 struct buf *bp; 2051 ufs2_daddr_t cgbno; 2052 int error, cg; 2053 u_int8_t *inosused; 2054 struct cdev *dev; 2055 2056 cg = ino_to_cg(fs, ino); 2057 if (devvp->v_type == VREG) { 2058 /* devvp is a snapshot */ 2059 dev = VTOI(devvp)->i_devvp->v_rdev; 2060 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2061 } else { 2062 /* devvp is a normal disk device */ 2063 dev = devvp->v_rdev; 2064 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2065 } 2066 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2067 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2068 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2069 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2070 brelse(bp); 2071 return (error); 2072 } 2073 cgp = (struct cg *)bp->b_data; 2074 if (!cg_chkmagic(cgp)) { 2075 brelse(bp); 2076 return (0); 2077 } 2078 bp->b_xflags |= BX_BKGRDWRITE; 2079 cgp->cg_old_time = cgp->cg_time = time_second; 2080 inosused = cg_inosused(cgp); 2081 ino %= fs->fs_ipg; 2082 if (isclr(inosused, ino)) { 2083 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 2084 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2085 if (fs->fs_ronly == 0) 2086 panic("ffs_freefile: freeing free inode"); 2087 } 2088 clrbit(inosused, ino); 2089 if (ino < cgp->cg_irotor) 2090 cgp->cg_irotor = ino; 2091 cgp->cg_cs.cs_nifree++; 2092 UFS_LOCK(ump); 2093 fs->fs_cstotal.cs_nifree++; 2094 fs->fs_cs(fs, cg).cs_nifree++; 2095 if ((mode & IFMT) == IFDIR) { 2096 cgp->cg_cs.cs_ndir--; 2097 fs->fs_cstotal.cs_ndir--; 2098 fs->fs_cs(fs, cg).cs_ndir--; 2099 } 2100 fs->fs_fmod = 1; 2101 ACTIVECLEAR(fs, cg); 2102 UFS_UNLOCK(ump); 2103 bdwrite(bp); 2104 return (0); 2105 } 2106 2107 /* 2108 * Check to see if a file is free. 2109 */ 2110 int 2111 ffs_checkfreefile(fs, devvp, ino) 2112 struct fs *fs; 2113 struct vnode *devvp; 2114 ino_t ino; 2115 { 2116 struct cg *cgp; 2117 struct buf *bp; 2118 ufs2_daddr_t cgbno; 2119 int ret, cg; 2120 u_int8_t *inosused; 2121 2122 cg = ino_to_cg(fs, ino); 2123 if (devvp->v_type == VREG) { 2124 /* devvp is a snapshot */ 2125 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2126 } else { 2127 /* devvp is a normal disk device */ 2128 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2129 } 2130 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2131 return (1); 2132 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2133 brelse(bp); 2134 return (1); 2135 } 2136 cgp = (struct cg *)bp->b_data; 2137 if (!cg_chkmagic(cgp)) { 2138 brelse(bp); 2139 return (1); 2140 } 2141 inosused = cg_inosused(cgp); 2142 ino %= fs->fs_ipg; 2143 ret = isclr(inosused, ino); 2144 brelse(bp); 2145 return (ret); 2146 } 2147 2148 /* 2149 * Find a block of the specified size in the specified cylinder group. 2150 * 2151 * It is a panic if a request is made to find a block if none are 2152 * available. 2153 */ 2154 static ufs1_daddr_t 2155 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2156 struct fs *fs; 2157 struct cg *cgp; 2158 ufs2_daddr_t bpref; 2159 int allocsiz; 2160 { 2161 ufs1_daddr_t bno; 2162 int start, len, loc, i; 2163 int blk, field, subfield, pos; 2164 u_int8_t *blksfree; 2165 2166 /* 2167 * find the fragment by searching through the free block 2168 * map for an appropriate bit pattern 2169 */ 2170 if (bpref) 2171 start = dtogd(fs, bpref) / NBBY; 2172 else 2173 start = cgp->cg_frotor / NBBY; 2174 blksfree = cg_blksfree(cgp); 2175 len = howmany(fs->fs_fpg, NBBY) - start; 2176 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2177 fragtbl[fs->fs_frag], 2178 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2179 if (loc == 0) { 2180 len = start + 1; 2181 start = 0; 2182 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2183 fragtbl[fs->fs_frag], 2184 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2185 if (loc == 0) { 2186 printf("start = %d, len = %d, fs = %s\n", 2187 start, len, fs->fs_fsmnt); 2188 panic("ffs_alloccg: map corrupted"); 2189 /* NOTREACHED */ 2190 } 2191 } 2192 bno = (start + len - loc) * NBBY; 2193 cgp->cg_frotor = bno; 2194 /* 2195 * found the byte in the map 2196 * sift through the bits to find the selected frag 2197 */ 2198 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2199 blk = blkmap(fs, blksfree, bno); 2200 blk <<= 1; 2201 field = around[allocsiz]; 2202 subfield = inside[allocsiz]; 2203 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2204 if ((blk & field) == subfield) 2205 return (bno + pos); 2206 field <<= 1; 2207 subfield <<= 1; 2208 } 2209 } 2210 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2211 panic("ffs_alloccg: block not in map"); 2212 return (-1); 2213 } 2214 2215 /* 2216 * Update the cluster map because of an allocation or free. 2217 * 2218 * Cnt == 1 means free; cnt == -1 means allocating. 2219 */ 2220 void 2221 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2222 struct ufsmount *ump; 2223 struct fs *fs; 2224 struct cg *cgp; 2225 ufs1_daddr_t blkno; 2226 int cnt; 2227 { 2228 int32_t *sump; 2229 int32_t *lp; 2230 u_char *freemapp, *mapp; 2231 int i, start, end, forw, back, map, bit; 2232 2233 mtx_assert(UFS_MTX(ump), MA_OWNED); 2234 2235 if (fs->fs_contigsumsize <= 0) 2236 return; 2237 freemapp = cg_clustersfree(cgp); 2238 sump = cg_clustersum(cgp); 2239 /* 2240 * Allocate or clear the actual block. 2241 */ 2242 if (cnt > 0) 2243 setbit(freemapp, blkno); 2244 else 2245 clrbit(freemapp, blkno); 2246 /* 2247 * Find the size of the cluster going forward. 2248 */ 2249 start = blkno + 1; 2250 end = start + fs->fs_contigsumsize; 2251 if (end >= cgp->cg_nclusterblks) 2252 end = cgp->cg_nclusterblks; 2253 mapp = &freemapp[start / NBBY]; 2254 map = *mapp++; 2255 bit = 1 << (start % NBBY); 2256 for (i = start; i < end; i++) { 2257 if ((map & bit) == 0) 2258 break; 2259 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2260 bit <<= 1; 2261 } else { 2262 map = *mapp++; 2263 bit = 1; 2264 } 2265 } 2266 forw = i - start; 2267 /* 2268 * Find the size of the cluster going backward. 2269 */ 2270 start = blkno - 1; 2271 end = start - fs->fs_contigsumsize; 2272 if (end < 0) 2273 end = -1; 2274 mapp = &freemapp[start / NBBY]; 2275 map = *mapp--; 2276 bit = 1 << (start % NBBY); 2277 for (i = start; i > end; i--) { 2278 if ((map & bit) == 0) 2279 break; 2280 if ((i & (NBBY - 1)) != 0) { 2281 bit >>= 1; 2282 } else { 2283 map = *mapp--; 2284 bit = 1 << (NBBY - 1); 2285 } 2286 } 2287 back = start - i; 2288 /* 2289 * Account for old cluster and the possibly new forward and 2290 * back clusters. 2291 */ 2292 i = back + forw + 1; 2293 if (i > fs->fs_contigsumsize) 2294 i = fs->fs_contigsumsize; 2295 sump[i] += cnt; 2296 if (back > 0) 2297 sump[back] -= cnt; 2298 if (forw > 0) 2299 sump[forw] -= cnt; 2300 /* 2301 * Update cluster summary information. 2302 */ 2303 lp = &sump[fs->fs_contigsumsize]; 2304 for (i = fs->fs_contigsumsize; i > 0; i--) 2305 if (*lp-- > 0) 2306 break; 2307 fs->fs_maxcluster[cgp->cg_cgx] = i; 2308 } 2309 2310 /* 2311 * Fserr prints the name of a filesystem with an error diagnostic. 2312 * 2313 * The form of the error message is: 2314 * fs: error message 2315 */ 2316 static void 2317 ffs_fserr(fs, inum, cp) 2318 struct fs *fs; 2319 ino_t inum; 2320 char *cp; 2321 { 2322 struct thread *td = curthread; /* XXX */ 2323 struct proc *p = td->td_proc; 2324 2325 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2326 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2327 } 2328 2329 /* 2330 * This function provides the capability for the fsck program to 2331 * update an active filesystem. Eleven operations are provided: 2332 * 2333 * adjrefcnt(inode, amt) - adjusts the reference count on the 2334 * specified inode by the specified amount. Under normal 2335 * operation the count should always go down. Decrementing 2336 * the count to zero will cause the inode to be freed. 2337 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2338 * by the specifed amount. 2339 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2340 * adjust the superblock summary. 2341 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2342 * are marked as free. Inodes should never have to be marked 2343 * as in use. 2344 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2345 * are marked as free. Inodes should never have to be marked 2346 * as in use. 2347 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2348 * are marked as free. Blocks should never have to be marked 2349 * as in use. 2350 * setflags(flags, set/clear) - the fs_flags field has the specified 2351 * flags set (second parameter +1) or cleared (second parameter -1). 2352 */ 2353 2354 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2355 2356 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2357 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2358 2359 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2360 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2361 2362 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2363 sysctl_ffs_fsck, "Adjust number of directories"); 2364 2365 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2366 sysctl_ffs_fsck, "Adjust number of free blocks"); 2367 2368 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2369 sysctl_ffs_fsck, "Adjust number of free inodes"); 2370 2371 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2372 sysctl_ffs_fsck, "Adjust number of free frags"); 2373 2374 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2375 sysctl_ffs_fsck, "Adjust number of free clusters"); 2376 2377 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2378 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2379 2380 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2381 sysctl_ffs_fsck, "Free Range of File Inodes"); 2382 2383 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2384 sysctl_ffs_fsck, "Free Range of Blocks"); 2385 2386 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2387 sysctl_ffs_fsck, "Change Filesystem Flags"); 2388 2389 #ifdef DEBUG 2390 static int fsckcmds = 0; 2391 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2392 #endif /* DEBUG */ 2393 2394 static int 2395 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2396 { 2397 struct fsck_cmd cmd; 2398 struct ufsmount *ump; 2399 struct vnode *vp; 2400 struct inode *ip; 2401 struct mount *mp; 2402 struct fs *fs; 2403 ufs2_daddr_t blkno; 2404 long blkcnt, blksize; 2405 struct file *fp; 2406 int filetype, error; 2407 2408 if (req->newlen > sizeof cmd) 2409 return (EBADRPC); 2410 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2411 return (error); 2412 if (cmd.version != FFS_CMD_VERSION) 2413 return (ERPCMISMATCH); 2414 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2415 return (error); 2416 vn_start_write(fp->f_data, &mp, V_WAIT); 2417 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2418 vn_finished_write(mp); 2419 fdrop(fp, curthread); 2420 return (EINVAL); 2421 } 2422 if (mp->mnt_flag & MNT_RDONLY) { 2423 vn_finished_write(mp); 2424 fdrop(fp, curthread); 2425 return (EROFS); 2426 } 2427 ump = VFSTOUFS(mp); 2428 fs = ump->um_fs; 2429 filetype = IFREG; 2430 2431 switch (oidp->oid_number) { 2432 2433 case FFS_SET_FLAGS: 2434 #ifdef DEBUG 2435 if (fsckcmds) 2436 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2437 cmd.size > 0 ? "set" : "clear"); 2438 #endif /* DEBUG */ 2439 if (cmd.size > 0) 2440 fs->fs_flags |= (long)cmd.value; 2441 else 2442 fs->fs_flags &= ~(long)cmd.value; 2443 break; 2444 2445 case FFS_ADJ_REFCNT: 2446 #ifdef DEBUG 2447 if (fsckcmds) { 2448 printf("%s: adjust inode %jd count by %jd\n", 2449 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2450 (intmax_t)cmd.size); 2451 } 2452 #endif /* DEBUG */ 2453 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2454 break; 2455 ip = VTOI(vp); 2456 ip->i_nlink += cmd.size; 2457 DIP_SET(ip, i_nlink, ip->i_nlink); 2458 ip->i_effnlink += cmd.size; 2459 ip->i_flag |= IN_CHANGE; 2460 if (DOINGSOFTDEP(vp)) 2461 softdep_change_linkcnt(ip); 2462 vput(vp); 2463 break; 2464 2465 case FFS_ADJ_BLKCNT: 2466 #ifdef DEBUG 2467 if (fsckcmds) { 2468 printf("%s: adjust inode %jd block count by %jd\n", 2469 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2470 (intmax_t)cmd.size); 2471 } 2472 #endif /* DEBUG */ 2473 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2474 break; 2475 ip = VTOI(vp); 2476 if (ip->i_flag & IN_SPACECOUNTED) { 2477 UFS_LOCK(ump); 2478 fs->fs_pendingblocks += cmd.size; 2479 UFS_UNLOCK(ump); 2480 } 2481 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2482 ip->i_flag |= IN_CHANGE; 2483 vput(vp); 2484 break; 2485 2486 case FFS_DIR_FREE: 2487 filetype = IFDIR; 2488 /* fall through */ 2489 2490 case FFS_FILE_FREE: 2491 #ifdef DEBUG 2492 if (fsckcmds) { 2493 if (cmd.size == 1) 2494 printf("%s: free %s inode %d\n", 2495 mp->mnt_stat.f_mntonname, 2496 filetype == IFDIR ? "directory" : "file", 2497 (ino_t)cmd.value); 2498 else 2499 printf("%s: free %s inodes %d-%d\n", 2500 mp->mnt_stat.f_mntonname, 2501 filetype == IFDIR ? "directory" : "file", 2502 (ino_t)cmd.value, 2503 (ino_t)(cmd.value + cmd.size - 1)); 2504 } 2505 #endif /* DEBUG */ 2506 while (cmd.size > 0) { 2507 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2508 cmd.value, filetype))) 2509 break; 2510 cmd.size -= 1; 2511 cmd.value += 1; 2512 } 2513 break; 2514 2515 case FFS_BLK_FREE: 2516 #ifdef DEBUG 2517 if (fsckcmds) { 2518 if (cmd.size == 1) 2519 printf("%s: free block %jd\n", 2520 mp->mnt_stat.f_mntonname, 2521 (intmax_t)cmd.value); 2522 else 2523 printf("%s: free blocks %jd-%jd\n", 2524 mp->mnt_stat.f_mntonname, 2525 (intmax_t)cmd.value, 2526 (intmax_t)cmd.value + cmd.size - 1); 2527 } 2528 #endif /* DEBUG */ 2529 blkno = cmd.value; 2530 blkcnt = cmd.size; 2531 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2532 while (blkcnt > 0) { 2533 if (blksize > blkcnt) 2534 blksize = blkcnt; 2535 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2536 blksize * fs->fs_fsize, ROOTINO); 2537 blkno += blksize; 2538 blkcnt -= blksize; 2539 blksize = fs->fs_frag; 2540 } 2541 break; 2542 2543 /* 2544 * Adjust superblock summaries. fsck(8) is expected to 2545 * submit deltas when necessary. 2546 */ 2547 case FFS_ADJ_NDIR: 2548 #ifdef DEBUG 2549 if (fsckcmds) { 2550 printf("%s: adjust number of directories by %jd\n", 2551 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2552 } 2553 #endif /* DEBUG */ 2554 fs->fs_cstotal.cs_ndir += cmd.value; 2555 break; 2556 case FFS_ADJ_NBFREE: 2557 #ifdef DEBUG 2558 if (fsckcmds) { 2559 printf("%s: adjust number of free blocks by %+jd\n", 2560 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2561 } 2562 #endif /* DEBUG */ 2563 fs->fs_cstotal.cs_nbfree += cmd.value; 2564 break; 2565 case FFS_ADJ_NIFREE: 2566 #ifdef DEBUG 2567 if (fsckcmds) { 2568 printf("%s: adjust number of free inodes by %+jd\n", 2569 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2570 } 2571 #endif /* DEBUG */ 2572 fs->fs_cstotal.cs_nifree += cmd.value; 2573 break; 2574 case FFS_ADJ_NFFREE: 2575 #ifdef DEBUG 2576 if (fsckcmds) { 2577 printf("%s: adjust number of free frags by %+jd\n", 2578 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2579 } 2580 #endif /* DEBUG */ 2581 fs->fs_cstotal.cs_nffree += cmd.value; 2582 break; 2583 case FFS_ADJ_NUMCLUSTERS: 2584 #ifdef DEBUG 2585 if (fsckcmds) { 2586 printf("%s: adjust number of free clusters by %+jd\n", 2587 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2588 } 2589 #endif /* DEBUG */ 2590 fs->fs_cstotal.cs_numclusters += cmd.value; 2591 break; 2592 2593 default: 2594 #ifdef DEBUG 2595 if (fsckcmds) { 2596 printf("Invalid request %d from fsck\n", 2597 oidp->oid_number); 2598 } 2599 #endif /* DEBUG */ 2600 error = EINVAL; 2601 break; 2602 2603 } 2604 fdrop(fp, curthread); 2605 vn_finished_write(mp); 2606 return (error); 2607 } 2608