1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/file.h> 73 #include <sys/filedesc.h> 74 #include <sys/proc.h> 75 #include <sys/vnode.h> 76 #include <sys/mount.h> 77 #include <sys/kernel.h> 78 #include <sys/sysctl.h> 79 #include <sys/syslog.h> 80 81 #include <ufs/ufs/extattr.h> 82 #include <ufs/ufs/quota.h> 83 #include <ufs/ufs/inode.h> 84 #include <ufs/ufs/ufs_extern.h> 85 #include <ufs/ufs/ufsmount.h> 86 87 #include <ufs/ffs/fs.h> 88 #include <ufs/ffs/ffs_extern.h> 89 90 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 91 int size); 92 93 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 94 static ufs2_daddr_t 95 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 96 #ifdef DIAGNOSTIC 97 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 98 #endif 99 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 100 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 101 ufs1_daddr_t, int); 102 static ino_t ffs_dirpref(struct inode *); 103 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 104 static void ffs_fserr(struct fs *, ino_t, char *); 105 static ufs2_daddr_t ffs_hashalloc 106 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 107 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 108 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 109 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 110 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 111 112 /* 113 * Allocate a block in the filesystem. 114 * 115 * The size of the requested block is given, which must be some 116 * multiple of fs_fsize and <= fs_bsize. 117 * A preference may be optionally specified. If a preference is given 118 * the following hierarchy is used to allocate a block: 119 * 1) allocate the requested block. 120 * 2) allocate a rotationally optimal block in the same cylinder. 121 * 3) allocate a block in the same cylinder group. 122 * 4) quadradically rehash into other cylinder groups, until an 123 * available block is located. 124 * If no block preference is given the following heirarchy is used 125 * to allocate a block: 126 * 1) allocate a block in the cylinder group that contains the 127 * inode for the file. 128 * 2) quadradically rehash into other cylinder groups, until an 129 * available block is located. 130 */ 131 int 132 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 133 struct inode *ip; 134 ufs2_daddr_t lbn, bpref; 135 int size; 136 struct ucred *cred; 137 ufs2_daddr_t *bnp; 138 { 139 struct fs *fs; 140 struct ufsmount *ump; 141 ufs2_daddr_t bno; 142 int cg, reclaimed; 143 static struct timeval lastfail; 144 static int curfail; 145 #ifdef QUOTA 146 int error; 147 #endif 148 149 *bnp = 0; 150 fs = ip->i_fs; 151 ump = ip->i_ump; 152 mtx_assert(UFS_MTX(ump), MA_OWNED); 153 #ifdef DIAGNOSTIC 154 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 155 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 156 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 157 fs->fs_fsmnt); 158 panic("ffs_alloc: bad size"); 159 } 160 if (cred == NOCRED) 161 panic("ffs_alloc: missing credential"); 162 #endif /* DIAGNOSTIC */ 163 reclaimed = 0; 164 retry: 165 #ifdef QUOTA 166 UFS_UNLOCK(ump); 167 error = chkdq(ip, btodb(size), cred, 0); 168 if (error) 169 return (error); 170 UFS_LOCK(ump); 171 #endif 172 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 173 goto nospace; 174 if (suser_cred(cred, SUSER_ALLOWJAIL) && 175 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 176 goto nospace; 177 if (bpref >= fs->fs_size) 178 bpref = 0; 179 if (bpref == 0) 180 cg = ino_to_cg(fs, ip->i_number); 181 else 182 cg = dtog(fs, bpref); 183 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 184 if (bno > 0) { 185 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 186 ip->i_flag |= IN_CHANGE | IN_UPDATE; 187 *bnp = bno; 188 return (0); 189 } 190 #ifdef QUOTA 191 UFS_UNLOCK(ump); 192 /* 193 * Restore user's disk quota because allocation failed. 194 */ 195 (void) chkdq(ip, -btodb(size), cred, FORCE); 196 UFS_LOCK(ump); 197 #endif 198 nospace: 199 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 200 reclaimed = 1; 201 softdep_request_cleanup(fs, ITOV(ip)); 202 goto retry; 203 } 204 UFS_UNLOCK(ump); 205 if (ppsratecheck(&lastfail, &curfail, 1)) { 206 ffs_fserr(fs, ip->i_number, "filesystem full"); 207 uprintf("\n%s: write failed, filesystem is full\n", 208 fs->fs_fsmnt); 209 } 210 return (ENOSPC); 211 } 212 213 /* 214 * Reallocate a fragment to a bigger size 215 * 216 * The number and size of the old block is given, and a preference 217 * and new size is also specified. The allocator attempts to extend 218 * the original block. Failing that, the regular block allocator is 219 * invoked to get an appropriate block. 220 */ 221 int 222 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp) 223 struct inode *ip; 224 ufs2_daddr_t lbprev; 225 ufs2_daddr_t bprev; 226 ufs2_daddr_t bpref; 227 int osize, nsize; 228 struct ucred *cred; 229 struct buf **bpp; 230 { 231 struct vnode *vp; 232 struct fs *fs; 233 struct buf *bp; 234 struct ufsmount *ump; 235 int cg, request, error, reclaimed; 236 ufs2_daddr_t bno; 237 static struct timeval lastfail; 238 static int curfail; 239 240 *bpp = 0; 241 vp = ITOV(ip); 242 fs = ip->i_fs; 243 bp = NULL; 244 ump = ip->i_ump; 245 mtx_assert(UFS_MTX(ump), MA_OWNED); 246 #ifdef DIAGNOSTIC 247 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 248 panic("ffs_realloccg: allocation on suspended filesystem"); 249 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 250 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 251 printf( 252 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 253 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 254 nsize, fs->fs_fsmnt); 255 panic("ffs_realloccg: bad size"); 256 } 257 if (cred == NOCRED) 258 panic("ffs_realloccg: missing credential"); 259 #endif /* DIAGNOSTIC */ 260 reclaimed = 0; 261 retry: 262 if (suser_cred(cred, SUSER_ALLOWJAIL) && 263 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 264 goto nospace; 265 } 266 if (bprev == 0) { 267 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 268 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 269 fs->fs_fsmnt); 270 panic("ffs_realloccg: bad bprev"); 271 } 272 UFS_UNLOCK(ump); 273 /* 274 * Allocate the extra space in the buffer. 275 */ 276 error = bread(vp, lbprev, osize, NOCRED, &bp); 277 if (error) { 278 brelse(bp); 279 return (error); 280 } 281 282 if (bp->b_blkno == bp->b_lblkno) { 283 if (lbprev >= NDADDR) 284 panic("ffs_realloccg: lbprev out of range"); 285 bp->b_blkno = fsbtodb(fs, bprev); 286 } 287 288 #ifdef QUOTA 289 error = chkdq(ip, btodb(nsize - osize), cred, 0); 290 if (error) { 291 brelse(bp); 292 return (error); 293 } 294 #endif 295 /* 296 * Check for extension in the existing location. 297 */ 298 cg = dtog(fs, bprev); 299 UFS_LOCK(ump); 300 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 301 if (bno) { 302 if (bp->b_blkno != fsbtodb(fs, bno)) 303 panic("ffs_realloccg: bad blockno"); 304 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 305 ip->i_flag |= IN_CHANGE | IN_UPDATE; 306 allocbuf(bp, nsize); 307 bp->b_flags |= B_DONE; 308 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 309 bzero((char *)bp->b_data + osize, nsize - osize); 310 else 311 vfs_bio_clrbuf(bp); 312 *bpp = bp; 313 return (0); 314 } 315 /* 316 * Allocate a new disk location. 317 */ 318 if (bpref >= fs->fs_size) 319 bpref = 0; 320 switch ((int)fs->fs_optim) { 321 case FS_OPTSPACE: 322 /* 323 * Allocate an exact sized fragment. Although this makes 324 * best use of space, we will waste time relocating it if 325 * the file continues to grow. If the fragmentation is 326 * less than half of the minimum free reserve, we choose 327 * to begin optimizing for time. 328 */ 329 request = nsize; 330 if (fs->fs_minfree <= 5 || 331 fs->fs_cstotal.cs_nffree > 332 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 333 break; 334 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 335 fs->fs_fsmnt); 336 fs->fs_optim = FS_OPTTIME; 337 break; 338 case FS_OPTTIME: 339 /* 340 * At this point we have discovered a file that is trying to 341 * grow a small fragment to a larger fragment. To save time, 342 * we allocate a full sized block, then free the unused portion. 343 * If the file continues to grow, the `ffs_fragextend' call 344 * above will be able to grow it in place without further 345 * copying. If aberrant programs cause disk fragmentation to 346 * grow within 2% of the free reserve, we choose to begin 347 * optimizing for space. 348 */ 349 request = fs->fs_bsize; 350 if (fs->fs_cstotal.cs_nffree < 351 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 352 break; 353 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 354 fs->fs_fsmnt); 355 fs->fs_optim = FS_OPTSPACE; 356 break; 357 default: 358 printf("dev = %s, optim = %ld, fs = %s\n", 359 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 360 panic("ffs_realloccg: bad optim"); 361 /* NOTREACHED */ 362 } 363 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 364 if (bno > 0) { 365 bp->b_blkno = fsbtodb(fs, bno); 366 if (!DOINGSOFTDEP(vp)) 367 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 368 ip->i_number); 369 if (nsize < request) 370 ffs_blkfree(ump, fs, ip->i_devvp, 371 bno + numfrags(fs, nsize), 372 (long)(request - nsize), ip->i_number); 373 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 374 ip->i_flag |= IN_CHANGE | IN_UPDATE; 375 allocbuf(bp, nsize); 376 bp->b_flags |= B_DONE; 377 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 378 bzero((char *)bp->b_data + osize, nsize - osize); 379 else 380 vfs_bio_clrbuf(bp); 381 *bpp = bp; 382 return (0); 383 } 384 #ifdef QUOTA 385 UFS_UNLOCK(ump); 386 /* 387 * Restore user's disk quota because allocation failed. 388 */ 389 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 390 UFS_LOCK(ump); 391 #endif 392 nospace: 393 /* 394 * no space available 395 */ 396 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 397 reclaimed = 1; 398 softdep_request_cleanup(fs, vp); 399 UFS_UNLOCK(ump); 400 if (bp) 401 brelse(bp); 402 UFS_LOCK(ump); 403 goto retry; 404 } 405 UFS_UNLOCK(ump); 406 if (bp) 407 brelse(bp); 408 if (ppsratecheck(&lastfail, &curfail, 1)) { 409 ffs_fserr(fs, ip->i_number, "filesystem full"); 410 uprintf("\n%s: write failed, filesystem is full\n", 411 fs->fs_fsmnt); 412 } 413 return (ENOSPC); 414 } 415 416 /* 417 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 418 * 419 * The vnode and an array of buffer pointers for a range of sequential 420 * logical blocks to be made contiguous is given. The allocator attempts 421 * to find a range of sequential blocks starting as close as possible 422 * from the end of the allocation for the logical block immediately 423 * preceding the current range. If successful, the physical block numbers 424 * in the buffer pointers and in the inode are changed to reflect the new 425 * allocation. If unsuccessful, the allocation is left unchanged. The 426 * success in doing the reallocation is returned. Note that the error 427 * return is not reflected back to the user. Rather the previous block 428 * allocation will be used. 429 */ 430 431 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 432 433 static int doasyncfree = 1; 434 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 435 436 static int doreallocblks = 1; 437 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 438 439 #ifdef DEBUG 440 static volatile int prtrealloc = 0; 441 #endif 442 443 int 444 ffs_reallocblks(ap) 445 struct vop_reallocblks_args /* { 446 struct vnode *a_vp; 447 struct cluster_save *a_buflist; 448 } */ *ap; 449 { 450 451 if (doreallocblks == 0) 452 return (ENOSPC); 453 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 454 return (ffs_reallocblks_ufs1(ap)); 455 return (ffs_reallocblks_ufs2(ap)); 456 } 457 458 static int 459 ffs_reallocblks_ufs1(ap) 460 struct vop_reallocblks_args /* { 461 struct vnode *a_vp; 462 struct cluster_save *a_buflist; 463 } */ *ap; 464 { 465 struct fs *fs; 466 struct inode *ip; 467 struct vnode *vp; 468 struct buf *sbp, *ebp; 469 ufs1_daddr_t *bap, *sbap, *ebap = 0; 470 struct cluster_save *buflist; 471 struct ufsmount *ump; 472 ufs_lbn_t start_lbn, end_lbn; 473 ufs1_daddr_t soff, newblk, blkno; 474 ufs2_daddr_t pref; 475 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 476 int i, len, start_lvl, end_lvl, ssize; 477 478 vp = ap->a_vp; 479 ip = VTOI(vp); 480 fs = ip->i_fs; 481 ump = ip->i_ump; 482 if (fs->fs_contigsumsize <= 0) 483 return (ENOSPC); 484 buflist = ap->a_buflist; 485 len = buflist->bs_nchildren; 486 start_lbn = buflist->bs_children[0]->b_lblkno; 487 end_lbn = start_lbn + len - 1; 488 #ifdef DIAGNOSTIC 489 for (i = 0; i < len; i++) 490 if (!ffs_checkblk(ip, 491 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 492 panic("ffs_reallocblks: unallocated block 1"); 493 for (i = 1; i < len; i++) 494 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 495 panic("ffs_reallocblks: non-logical cluster"); 496 blkno = buflist->bs_children[0]->b_blkno; 497 ssize = fsbtodb(fs, fs->fs_frag); 498 for (i = 1; i < len - 1; i++) 499 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 500 panic("ffs_reallocblks: non-physical cluster %d", i); 501 #endif 502 /* 503 * If the latest allocation is in a new cylinder group, assume that 504 * the filesystem has decided to move and do not force it back to 505 * the previous cylinder group. 506 */ 507 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 508 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 509 return (ENOSPC); 510 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 511 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 512 return (ENOSPC); 513 /* 514 * Get the starting offset and block map for the first block. 515 */ 516 if (start_lvl == 0) { 517 sbap = &ip->i_din1->di_db[0]; 518 soff = start_lbn; 519 } else { 520 idp = &start_ap[start_lvl - 1]; 521 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 522 brelse(sbp); 523 return (ENOSPC); 524 } 525 sbap = (ufs1_daddr_t *)sbp->b_data; 526 soff = idp->in_off; 527 } 528 /* 529 * If the block range spans two block maps, get the second map. 530 */ 531 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 532 ssize = len; 533 } else { 534 #ifdef DIAGNOSTIC 535 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 536 panic("ffs_reallocblk: start == end"); 537 #endif 538 ssize = len - (idp->in_off + 1); 539 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 540 goto fail; 541 ebap = (ufs1_daddr_t *)ebp->b_data; 542 } 543 /* 544 * Find the preferred location for the cluster. 545 */ 546 UFS_LOCK(ump); 547 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 548 /* 549 * Search the block map looking for an allocation of the desired size. 550 */ 551 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 552 len, ffs_clusteralloc)) == 0) { 553 UFS_UNLOCK(ump); 554 goto fail; 555 } 556 /* 557 * We have found a new contiguous block. 558 * 559 * First we have to replace the old block pointers with the new 560 * block pointers in the inode and indirect blocks associated 561 * with the file. 562 */ 563 #ifdef DEBUG 564 if (prtrealloc) 565 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 566 (intmax_t)start_lbn, (intmax_t)end_lbn); 567 #endif 568 blkno = newblk; 569 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 570 if (i == ssize) { 571 bap = ebap; 572 soff = -i; 573 } 574 #ifdef DIAGNOSTIC 575 if (!ffs_checkblk(ip, 576 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 577 panic("ffs_reallocblks: unallocated block 2"); 578 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 579 panic("ffs_reallocblks: alloc mismatch"); 580 #endif 581 #ifdef DEBUG 582 if (prtrealloc) 583 printf(" %d,", *bap); 584 #endif 585 if (DOINGSOFTDEP(vp)) { 586 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 587 softdep_setup_allocdirect(ip, start_lbn + i, 588 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 589 buflist->bs_children[i]); 590 else 591 softdep_setup_allocindir_page(ip, start_lbn + i, 592 i < ssize ? sbp : ebp, soff + i, blkno, 593 *bap, buflist->bs_children[i]); 594 } 595 *bap++ = blkno; 596 } 597 /* 598 * Next we must write out the modified inode and indirect blocks. 599 * For strict correctness, the writes should be synchronous since 600 * the old block values may have been written to disk. In practise 601 * they are almost never written, but if we are concerned about 602 * strict correctness, the `doasyncfree' flag should be set to zero. 603 * 604 * The test on `doasyncfree' should be changed to test a flag 605 * that shows whether the associated buffers and inodes have 606 * been written. The flag should be set when the cluster is 607 * started and cleared whenever the buffer or inode is flushed. 608 * We can then check below to see if it is set, and do the 609 * synchronous write only when it has been cleared. 610 */ 611 if (sbap != &ip->i_din1->di_db[0]) { 612 if (doasyncfree) 613 bdwrite(sbp); 614 else 615 bwrite(sbp); 616 } else { 617 ip->i_flag |= IN_CHANGE | IN_UPDATE; 618 if (!doasyncfree) 619 ffs_update(vp, 1); 620 } 621 if (ssize < len) { 622 if (doasyncfree) 623 bdwrite(ebp); 624 else 625 bwrite(ebp); 626 } 627 /* 628 * Last, free the old blocks and assign the new blocks to the buffers. 629 */ 630 #ifdef DEBUG 631 if (prtrealloc) 632 printf("\n\tnew:"); 633 #endif 634 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 635 if (!DOINGSOFTDEP(vp)) 636 ffs_blkfree(ump, fs, ip->i_devvp, 637 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 638 fs->fs_bsize, ip->i_number); 639 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 640 #ifdef DIAGNOSTIC 641 if (!ffs_checkblk(ip, 642 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 643 panic("ffs_reallocblks: unallocated block 3"); 644 #endif 645 #ifdef DEBUG 646 if (prtrealloc) 647 printf(" %d,", blkno); 648 #endif 649 } 650 #ifdef DEBUG 651 if (prtrealloc) { 652 prtrealloc--; 653 printf("\n"); 654 } 655 #endif 656 return (0); 657 658 fail: 659 if (ssize < len) 660 brelse(ebp); 661 if (sbap != &ip->i_din1->di_db[0]) 662 brelse(sbp); 663 return (ENOSPC); 664 } 665 666 static int 667 ffs_reallocblks_ufs2(ap) 668 struct vop_reallocblks_args /* { 669 struct vnode *a_vp; 670 struct cluster_save *a_buflist; 671 } */ *ap; 672 { 673 struct fs *fs; 674 struct inode *ip; 675 struct vnode *vp; 676 struct buf *sbp, *ebp; 677 ufs2_daddr_t *bap, *sbap, *ebap = 0; 678 struct cluster_save *buflist; 679 struct ufsmount *ump; 680 ufs_lbn_t start_lbn, end_lbn; 681 ufs2_daddr_t soff, newblk, blkno, pref; 682 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 683 int i, len, start_lvl, end_lvl, ssize; 684 685 vp = ap->a_vp; 686 ip = VTOI(vp); 687 fs = ip->i_fs; 688 ump = ip->i_ump; 689 if (fs->fs_contigsumsize <= 0) 690 return (ENOSPC); 691 buflist = ap->a_buflist; 692 len = buflist->bs_nchildren; 693 start_lbn = buflist->bs_children[0]->b_lblkno; 694 end_lbn = start_lbn + len - 1; 695 #ifdef DIAGNOSTIC 696 for (i = 0; i < len; i++) 697 if (!ffs_checkblk(ip, 698 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 699 panic("ffs_reallocblks: unallocated block 1"); 700 for (i = 1; i < len; i++) 701 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 702 panic("ffs_reallocblks: non-logical cluster"); 703 blkno = buflist->bs_children[0]->b_blkno; 704 ssize = fsbtodb(fs, fs->fs_frag); 705 for (i = 1; i < len - 1; i++) 706 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 707 panic("ffs_reallocblks: non-physical cluster %d", i); 708 #endif 709 /* 710 * If the latest allocation is in a new cylinder group, assume that 711 * the filesystem has decided to move and do not force it back to 712 * the previous cylinder group. 713 */ 714 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 715 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 716 return (ENOSPC); 717 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 718 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 719 return (ENOSPC); 720 /* 721 * Get the starting offset and block map for the first block. 722 */ 723 if (start_lvl == 0) { 724 sbap = &ip->i_din2->di_db[0]; 725 soff = start_lbn; 726 } else { 727 idp = &start_ap[start_lvl - 1]; 728 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 729 brelse(sbp); 730 return (ENOSPC); 731 } 732 sbap = (ufs2_daddr_t *)sbp->b_data; 733 soff = idp->in_off; 734 } 735 /* 736 * If the block range spans two block maps, get the second map. 737 */ 738 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 739 ssize = len; 740 } else { 741 #ifdef DIAGNOSTIC 742 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 743 panic("ffs_reallocblk: start == end"); 744 #endif 745 ssize = len - (idp->in_off + 1); 746 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 747 goto fail; 748 ebap = (ufs2_daddr_t *)ebp->b_data; 749 } 750 /* 751 * Find the preferred location for the cluster. 752 */ 753 UFS_LOCK(ump); 754 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 755 /* 756 * Search the block map looking for an allocation of the desired size. 757 */ 758 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 759 len, ffs_clusteralloc)) == 0) { 760 UFS_UNLOCK(ump); 761 goto fail; 762 } 763 /* 764 * We have found a new contiguous block. 765 * 766 * First we have to replace the old block pointers with the new 767 * block pointers in the inode and indirect blocks associated 768 * with the file. 769 */ 770 #ifdef DEBUG 771 if (prtrealloc) 772 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 773 (intmax_t)start_lbn, (intmax_t)end_lbn); 774 #endif 775 blkno = newblk; 776 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 777 if (i == ssize) { 778 bap = ebap; 779 soff = -i; 780 } 781 #ifdef DIAGNOSTIC 782 if (!ffs_checkblk(ip, 783 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 784 panic("ffs_reallocblks: unallocated block 2"); 785 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 786 panic("ffs_reallocblks: alloc mismatch"); 787 #endif 788 #ifdef DEBUG 789 if (prtrealloc) 790 printf(" %jd,", (intmax_t)*bap); 791 #endif 792 if (DOINGSOFTDEP(vp)) { 793 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 794 softdep_setup_allocdirect(ip, start_lbn + i, 795 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 796 buflist->bs_children[i]); 797 else 798 softdep_setup_allocindir_page(ip, start_lbn + i, 799 i < ssize ? sbp : ebp, soff + i, blkno, 800 *bap, buflist->bs_children[i]); 801 } 802 *bap++ = blkno; 803 } 804 /* 805 * Next we must write out the modified inode and indirect blocks. 806 * For strict correctness, the writes should be synchronous since 807 * the old block values may have been written to disk. In practise 808 * they are almost never written, but if we are concerned about 809 * strict correctness, the `doasyncfree' flag should be set to zero. 810 * 811 * The test on `doasyncfree' should be changed to test a flag 812 * that shows whether the associated buffers and inodes have 813 * been written. The flag should be set when the cluster is 814 * started and cleared whenever the buffer or inode is flushed. 815 * We can then check below to see if it is set, and do the 816 * synchronous write only when it has been cleared. 817 */ 818 if (sbap != &ip->i_din2->di_db[0]) { 819 if (doasyncfree) 820 bdwrite(sbp); 821 else 822 bwrite(sbp); 823 } else { 824 ip->i_flag |= IN_CHANGE | IN_UPDATE; 825 if (!doasyncfree) 826 ffs_update(vp, 1); 827 } 828 if (ssize < len) { 829 if (doasyncfree) 830 bdwrite(ebp); 831 else 832 bwrite(ebp); 833 } 834 /* 835 * Last, free the old blocks and assign the new blocks to the buffers. 836 */ 837 #ifdef DEBUG 838 if (prtrealloc) 839 printf("\n\tnew:"); 840 #endif 841 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 842 if (!DOINGSOFTDEP(vp)) 843 ffs_blkfree(ump, fs, ip->i_devvp, 844 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 845 fs->fs_bsize, ip->i_number); 846 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 847 #ifdef DIAGNOSTIC 848 if (!ffs_checkblk(ip, 849 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 850 panic("ffs_reallocblks: unallocated block 3"); 851 #endif 852 #ifdef DEBUG 853 if (prtrealloc) 854 printf(" %jd,", (intmax_t)blkno); 855 #endif 856 } 857 #ifdef DEBUG 858 if (prtrealloc) { 859 prtrealloc--; 860 printf("\n"); 861 } 862 #endif 863 return (0); 864 865 fail: 866 if (ssize < len) 867 brelse(ebp); 868 if (sbap != &ip->i_din2->di_db[0]) 869 brelse(sbp); 870 return (ENOSPC); 871 } 872 873 /* 874 * Allocate an inode in the filesystem. 875 * 876 * If allocating a directory, use ffs_dirpref to select the inode. 877 * If allocating in a directory, the following hierarchy is followed: 878 * 1) allocate the preferred inode. 879 * 2) allocate an inode in the same cylinder group. 880 * 3) quadradically rehash into other cylinder groups, until an 881 * available inode is located. 882 * If no inode preference is given the following heirarchy is used 883 * to allocate an inode: 884 * 1) allocate an inode in cylinder group 0. 885 * 2) quadradically rehash into other cylinder groups, until an 886 * available inode is located. 887 */ 888 int 889 ffs_valloc(pvp, mode, cred, vpp) 890 struct vnode *pvp; 891 int mode; 892 struct ucred *cred; 893 struct vnode **vpp; 894 { 895 struct inode *pip; 896 struct fs *fs; 897 struct inode *ip; 898 struct timespec ts; 899 struct ufsmount *ump; 900 ino_t ino, ipref; 901 int cg, error; 902 static struct timeval lastfail; 903 static int curfail; 904 905 *vpp = NULL; 906 pip = VTOI(pvp); 907 fs = pip->i_fs; 908 ump = pip->i_ump; 909 910 UFS_LOCK(ump); 911 if (fs->fs_cstotal.cs_nifree == 0) 912 goto noinodes; 913 914 if ((mode & IFMT) == IFDIR) 915 ipref = ffs_dirpref(pip); 916 else 917 ipref = pip->i_number; 918 if (ipref >= fs->fs_ncg * fs->fs_ipg) 919 ipref = 0; 920 cg = ino_to_cg(fs, ipref); 921 /* 922 * Track number of dirs created one after another 923 * in a same cg without intervening by files. 924 */ 925 if ((mode & IFMT) == IFDIR) { 926 if (fs->fs_contigdirs[cg] < 255) 927 fs->fs_contigdirs[cg]++; 928 } else { 929 if (fs->fs_contigdirs[cg] > 0) 930 fs->fs_contigdirs[cg]--; 931 } 932 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 933 (allocfcn_t *)ffs_nodealloccg); 934 if (ino == 0) 935 goto noinodes; 936 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 937 if (error) { 938 ffs_vfree(pvp, ino, mode); 939 return (error); 940 } 941 ip = VTOI(*vpp); 942 if (ip->i_mode) { 943 printf("mode = 0%o, inum = %lu, fs = %s\n", 944 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 945 panic("ffs_valloc: dup alloc"); 946 } 947 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 948 printf("free inode %s/%lu had %ld blocks\n", 949 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 950 DIP_SET(ip, i_blocks, 0); 951 } 952 ip->i_flags = 0; 953 DIP_SET(ip, i_flags, 0); 954 /* 955 * Set up a new generation number for this inode. 956 */ 957 if (ip->i_gen == 0 || ++ip->i_gen == 0) 958 ip->i_gen = arc4random() / 2 + 1; 959 DIP_SET(ip, i_gen, ip->i_gen); 960 if (fs->fs_magic == FS_UFS2_MAGIC) { 961 vfs_timestamp(&ts); 962 ip->i_din2->di_birthtime = ts.tv_sec; 963 ip->i_din2->di_birthnsec = ts.tv_nsec; 964 } 965 ip->i_flag = 0; 966 vnode_destroy_vobject(*vpp); 967 (*vpp)->v_type = VNON; 968 if (fs->fs_magic == FS_UFS2_MAGIC) 969 (*vpp)->v_op = &ffs_vnodeops2; 970 else 971 (*vpp)->v_op = &ffs_vnodeops1; 972 return (0); 973 noinodes: 974 UFS_UNLOCK(ump); 975 if (ppsratecheck(&lastfail, &curfail, 1)) { 976 ffs_fserr(fs, pip->i_number, "out of inodes"); 977 uprintf("\n%s: create/symlink failed, no inodes free\n", 978 fs->fs_fsmnt); 979 } 980 return (ENOSPC); 981 } 982 983 /* 984 * Find a cylinder group to place a directory. 985 * 986 * The policy implemented by this algorithm is to allocate a 987 * directory inode in the same cylinder group as its parent 988 * directory, but also to reserve space for its files inodes 989 * and data. Restrict the number of directories which may be 990 * allocated one after another in the same cylinder group 991 * without intervening allocation of files. 992 * 993 * If we allocate a first level directory then force allocation 994 * in another cylinder group. 995 */ 996 static ino_t 997 ffs_dirpref(pip) 998 struct inode *pip; 999 { 1000 struct fs *fs; 1001 int cg, prefcg, dirsize, cgsize; 1002 int avgifree, avgbfree, avgndir, curdirsize; 1003 int minifree, minbfree, maxndir; 1004 int mincg, minndir; 1005 int maxcontigdirs; 1006 1007 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 1008 fs = pip->i_fs; 1009 1010 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 1011 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1012 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 1013 1014 /* 1015 * Force allocation in another cg if creating a first level dir. 1016 */ 1017 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1018 if (ITOV(pip)->v_vflag & VV_ROOT) { 1019 prefcg = arc4random() % fs->fs_ncg; 1020 mincg = prefcg; 1021 minndir = fs->fs_ipg; 1022 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1023 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1024 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1025 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1026 mincg = cg; 1027 minndir = fs->fs_cs(fs, cg).cs_ndir; 1028 } 1029 for (cg = 0; cg < prefcg; cg++) 1030 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1031 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1032 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1033 mincg = cg; 1034 minndir = fs->fs_cs(fs, cg).cs_ndir; 1035 } 1036 return ((ino_t)(fs->fs_ipg * mincg)); 1037 } 1038 1039 /* 1040 * Count various limits which used for 1041 * optimal allocation of a directory inode. 1042 */ 1043 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1044 minifree = avgifree - avgifree / 4; 1045 if (minifree < 1) 1046 minifree = 1; 1047 minbfree = avgbfree - avgbfree / 4; 1048 if (minbfree < 1) 1049 minbfree = 1; 1050 cgsize = fs->fs_fsize * fs->fs_fpg; 1051 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1052 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1053 if (dirsize < curdirsize) 1054 dirsize = curdirsize; 1055 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1056 if (fs->fs_avgfpdir > 0) 1057 maxcontigdirs = min(maxcontigdirs, 1058 fs->fs_ipg / fs->fs_avgfpdir); 1059 if (maxcontigdirs == 0) 1060 maxcontigdirs = 1; 1061 1062 /* 1063 * Limit number of dirs in one cg and reserve space for 1064 * regular files, but only if we have no deficit in 1065 * inodes or space. 1066 */ 1067 prefcg = ino_to_cg(fs, pip->i_number); 1068 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1069 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1070 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1071 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1072 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1073 return ((ino_t)(fs->fs_ipg * cg)); 1074 } 1075 for (cg = 0; cg < prefcg; cg++) 1076 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1077 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1078 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1079 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1080 return ((ino_t)(fs->fs_ipg * cg)); 1081 } 1082 /* 1083 * This is a backstop when we have deficit in space. 1084 */ 1085 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1086 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1087 return ((ino_t)(fs->fs_ipg * cg)); 1088 for (cg = 0; cg < prefcg; cg++) 1089 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1090 break; 1091 return ((ino_t)(fs->fs_ipg * cg)); 1092 } 1093 1094 /* 1095 * Select the desired position for the next block in a file. The file is 1096 * logically divided into sections. The first section is composed of the 1097 * direct blocks. Each additional section contains fs_maxbpg blocks. 1098 * 1099 * If no blocks have been allocated in the first section, the policy is to 1100 * request a block in the same cylinder group as the inode that describes 1101 * the file. If no blocks have been allocated in any other section, the 1102 * policy is to place the section in a cylinder group with a greater than 1103 * average number of free blocks. An appropriate cylinder group is found 1104 * by using a rotor that sweeps the cylinder groups. When a new group of 1105 * blocks is needed, the sweep begins in the cylinder group following the 1106 * cylinder group from which the previous allocation was made. The sweep 1107 * continues until a cylinder group with greater than the average number 1108 * of free blocks is found. If the allocation is for the first block in an 1109 * indirect block, the information on the previous allocation is unavailable; 1110 * here a best guess is made based upon the logical block number being 1111 * allocated. 1112 * 1113 * If a section is already partially allocated, the policy is to 1114 * contiguously allocate fs_maxcontig blocks. The end of one of these 1115 * contiguous blocks and the beginning of the next is laid out 1116 * contiguously if possible. 1117 */ 1118 ufs2_daddr_t 1119 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1120 struct inode *ip; 1121 ufs_lbn_t lbn; 1122 int indx; 1123 ufs1_daddr_t *bap; 1124 { 1125 struct fs *fs; 1126 int cg; 1127 int avgbfree, startcg; 1128 1129 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1130 fs = ip->i_fs; 1131 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1132 if (lbn < NDADDR + NINDIR(fs)) { 1133 cg = ino_to_cg(fs, ip->i_number); 1134 return (cgbase(fs, cg) + fs->fs_frag); 1135 } 1136 /* 1137 * Find a cylinder with greater than average number of 1138 * unused data blocks. 1139 */ 1140 if (indx == 0 || bap[indx - 1] == 0) 1141 startcg = 1142 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1143 else 1144 startcg = dtog(fs, bap[indx - 1]) + 1; 1145 startcg %= fs->fs_ncg; 1146 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1147 for (cg = startcg; cg < fs->fs_ncg; cg++) 1148 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1149 fs->fs_cgrotor = cg; 1150 return (cgbase(fs, cg) + fs->fs_frag); 1151 } 1152 for (cg = 0; cg <= startcg; cg++) 1153 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1154 fs->fs_cgrotor = cg; 1155 return (cgbase(fs, cg) + fs->fs_frag); 1156 } 1157 return (0); 1158 } 1159 /* 1160 * We just always try to lay things out contiguously. 1161 */ 1162 return (bap[indx - 1] + fs->fs_frag); 1163 } 1164 1165 /* 1166 * Same as above, but for UFS2 1167 */ 1168 ufs2_daddr_t 1169 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1170 struct inode *ip; 1171 ufs_lbn_t lbn; 1172 int indx; 1173 ufs2_daddr_t *bap; 1174 { 1175 struct fs *fs; 1176 int cg; 1177 int avgbfree, startcg; 1178 1179 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1180 fs = ip->i_fs; 1181 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1182 if (lbn < NDADDR + NINDIR(fs)) { 1183 cg = ino_to_cg(fs, ip->i_number); 1184 return (cgbase(fs, cg) + fs->fs_frag); 1185 } 1186 /* 1187 * Find a cylinder with greater than average number of 1188 * unused data blocks. 1189 */ 1190 if (indx == 0 || bap[indx - 1] == 0) 1191 startcg = 1192 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1193 else 1194 startcg = dtog(fs, bap[indx - 1]) + 1; 1195 startcg %= fs->fs_ncg; 1196 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1197 for (cg = startcg; cg < fs->fs_ncg; cg++) 1198 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1199 fs->fs_cgrotor = cg; 1200 return (cgbase(fs, cg) + fs->fs_frag); 1201 } 1202 for (cg = 0; cg <= startcg; cg++) 1203 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1204 fs->fs_cgrotor = cg; 1205 return (cgbase(fs, cg) + fs->fs_frag); 1206 } 1207 return (0); 1208 } 1209 /* 1210 * We just always try to lay things out contiguously. 1211 */ 1212 return (bap[indx - 1] + fs->fs_frag); 1213 } 1214 1215 /* 1216 * Implement the cylinder overflow algorithm. 1217 * 1218 * The policy implemented by this algorithm is: 1219 * 1) allocate the block in its requested cylinder group. 1220 * 2) quadradically rehash on the cylinder group number. 1221 * 3) brute force search for a free block. 1222 * 1223 * Must be called with the UFS lock held. Will release the lock on success 1224 * and return with it held on failure. 1225 */ 1226 /*VARARGS5*/ 1227 static ufs2_daddr_t 1228 ffs_hashalloc(ip, cg, pref, size, allocator) 1229 struct inode *ip; 1230 int cg; 1231 ufs2_daddr_t pref; 1232 int size; /* size for data blocks, mode for inodes */ 1233 allocfcn_t *allocator; 1234 { 1235 struct fs *fs; 1236 ufs2_daddr_t result; 1237 int i, icg = cg; 1238 1239 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1240 #ifdef DIAGNOSTIC 1241 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1242 panic("ffs_hashalloc: allocation on suspended filesystem"); 1243 #endif 1244 fs = ip->i_fs; 1245 /* 1246 * 1: preferred cylinder group 1247 */ 1248 result = (*allocator)(ip, cg, pref, size); 1249 if (result) 1250 return (result); 1251 /* 1252 * 2: quadratic rehash 1253 */ 1254 for (i = 1; i < fs->fs_ncg; i *= 2) { 1255 cg += i; 1256 if (cg >= fs->fs_ncg) 1257 cg -= fs->fs_ncg; 1258 result = (*allocator)(ip, cg, 0, size); 1259 if (result) 1260 return (result); 1261 } 1262 /* 1263 * 3: brute force search 1264 * Note that we start at i == 2, since 0 was checked initially, 1265 * and 1 is always checked in the quadratic rehash. 1266 */ 1267 cg = (icg + 2) % fs->fs_ncg; 1268 for (i = 2; i < fs->fs_ncg; i++) { 1269 result = (*allocator)(ip, cg, 0, size); 1270 if (result) 1271 return (result); 1272 cg++; 1273 if (cg == fs->fs_ncg) 1274 cg = 0; 1275 } 1276 return (0); 1277 } 1278 1279 /* 1280 * Determine whether a fragment can be extended. 1281 * 1282 * Check to see if the necessary fragments are available, and 1283 * if they are, allocate them. 1284 */ 1285 static ufs2_daddr_t 1286 ffs_fragextend(ip, cg, bprev, osize, nsize) 1287 struct inode *ip; 1288 int cg; 1289 ufs2_daddr_t bprev; 1290 int osize, nsize; 1291 { 1292 struct fs *fs; 1293 struct cg *cgp; 1294 struct buf *bp; 1295 struct ufsmount *ump; 1296 int nffree; 1297 long bno; 1298 int frags, bbase; 1299 int i, error; 1300 u_int8_t *blksfree; 1301 1302 ump = ip->i_ump; 1303 fs = ip->i_fs; 1304 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1305 return (0); 1306 frags = numfrags(fs, nsize); 1307 bbase = fragnum(fs, bprev); 1308 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1309 /* cannot extend across a block boundary */ 1310 return (0); 1311 } 1312 UFS_UNLOCK(ump); 1313 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1314 (int)fs->fs_cgsize, NOCRED, &bp); 1315 if (error) 1316 goto fail; 1317 cgp = (struct cg *)bp->b_data; 1318 if (!cg_chkmagic(cgp)) 1319 goto fail; 1320 bp->b_xflags |= BX_BKGRDWRITE; 1321 cgp->cg_old_time = cgp->cg_time = time_second; 1322 bno = dtogd(fs, bprev); 1323 blksfree = cg_blksfree(cgp); 1324 for (i = numfrags(fs, osize); i < frags; i++) 1325 if (isclr(blksfree, bno + i)) 1326 goto fail; 1327 /* 1328 * the current fragment can be extended 1329 * deduct the count on fragment being extended into 1330 * increase the count on the remaining fragment (if any) 1331 * allocate the extended piece 1332 */ 1333 for (i = frags; i < fs->fs_frag - bbase; i++) 1334 if (isclr(blksfree, bno + i)) 1335 break; 1336 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1337 if (i != frags) 1338 cgp->cg_frsum[i - frags]++; 1339 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1340 clrbit(blksfree, bno + i); 1341 cgp->cg_cs.cs_nffree--; 1342 nffree++; 1343 } 1344 UFS_LOCK(ump); 1345 fs->fs_cstotal.cs_nffree -= nffree; 1346 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1347 fs->fs_fmod = 1; 1348 ACTIVECLEAR(fs, cg); 1349 UFS_UNLOCK(ump); 1350 if (DOINGSOFTDEP(ITOV(ip))) 1351 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); 1352 bdwrite(bp); 1353 return (bprev); 1354 1355 fail: 1356 brelse(bp); 1357 UFS_LOCK(ump); 1358 return (0); 1359 1360 } 1361 1362 /* 1363 * Determine whether a block can be allocated. 1364 * 1365 * Check to see if a block of the appropriate size is available, 1366 * and if it is, allocate it. 1367 */ 1368 static ufs2_daddr_t 1369 ffs_alloccg(ip, cg, bpref, size) 1370 struct inode *ip; 1371 int cg; 1372 ufs2_daddr_t bpref; 1373 int size; 1374 { 1375 struct fs *fs; 1376 struct cg *cgp; 1377 struct buf *bp; 1378 struct ufsmount *ump; 1379 ufs1_daddr_t bno; 1380 ufs2_daddr_t blkno; 1381 int i, allocsiz, error, frags; 1382 u_int8_t *blksfree; 1383 1384 ump = ip->i_ump; 1385 fs = ip->i_fs; 1386 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1387 return (0); 1388 UFS_UNLOCK(ump); 1389 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1390 (int)fs->fs_cgsize, NOCRED, &bp); 1391 if (error) 1392 goto fail; 1393 cgp = (struct cg *)bp->b_data; 1394 if (!cg_chkmagic(cgp) || 1395 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1396 goto fail; 1397 bp->b_xflags |= BX_BKGRDWRITE; 1398 cgp->cg_old_time = cgp->cg_time = time_second; 1399 if (size == fs->fs_bsize) { 1400 UFS_LOCK(ump); 1401 blkno = ffs_alloccgblk(ip, bp, bpref); 1402 ACTIVECLEAR(fs, cg); 1403 UFS_UNLOCK(ump); 1404 bdwrite(bp); 1405 return (blkno); 1406 } 1407 /* 1408 * check to see if any fragments are already available 1409 * allocsiz is the size which will be allocated, hacking 1410 * it down to a smaller size if necessary 1411 */ 1412 blksfree = cg_blksfree(cgp); 1413 frags = numfrags(fs, size); 1414 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1415 if (cgp->cg_frsum[allocsiz] != 0) 1416 break; 1417 if (allocsiz == fs->fs_frag) { 1418 /* 1419 * no fragments were available, so a block will be 1420 * allocated, and hacked up 1421 */ 1422 if (cgp->cg_cs.cs_nbfree == 0) 1423 goto fail; 1424 UFS_LOCK(ump); 1425 blkno = ffs_alloccgblk(ip, bp, bpref); 1426 bno = dtogd(fs, blkno); 1427 for (i = frags; i < fs->fs_frag; i++) 1428 setbit(blksfree, bno + i); 1429 i = fs->fs_frag - frags; 1430 cgp->cg_cs.cs_nffree += i; 1431 fs->fs_cstotal.cs_nffree += i; 1432 fs->fs_cs(fs, cg).cs_nffree += i; 1433 fs->fs_fmod = 1; 1434 cgp->cg_frsum[i]++; 1435 ACTIVECLEAR(fs, cg); 1436 UFS_UNLOCK(ump); 1437 bdwrite(bp); 1438 return (blkno); 1439 } 1440 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1441 if (bno < 0) 1442 goto fail; 1443 for (i = 0; i < frags; i++) 1444 clrbit(blksfree, bno + i); 1445 cgp->cg_cs.cs_nffree -= frags; 1446 cgp->cg_frsum[allocsiz]--; 1447 if (frags != allocsiz) 1448 cgp->cg_frsum[allocsiz - frags]++; 1449 UFS_LOCK(ump); 1450 fs->fs_cstotal.cs_nffree -= frags; 1451 fs->fs_cs(fs, cg).cs_nffree -= frags; 1452 fs->fs_fmod = 1; 1453 blkno = cgbase(fs, cg) + bno; 1454 ACTIVECLEAR(fs, cg); 1455 UFS_UNLOCK(ump); 1456 if (DOINGSOFTDEP(ITOV(ip))) 1457 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1458 bdwrite(bp); 1459 return (blkno); 1460 1461 fail: 1462 brelse(bp); 1463 UFS_LOCK(ump); 1464 return (0); 1465 } 1466 1467 /* 1468 * Allocate a block in a cylinder group. 1469 * 1470 * This algorithm implements the following policy: 1471 * 1) allocate the requested block. 1472 * 2) allocate a rotationally optimal block in the same cylinder. 1473 * 3) allocate the next available block on the block rotor for the 1474 * specified cylinder group. 1475 * Note that this routine only allocates fs_bsize blocks; these 1476 * blocks may be fragmented by the routine that allocates them. 1477 */ 1478 static ufs2_daddr_t 1479 ffs_alloccgblk(ip, bp, bpref) 1480 struct inode *ip; 1481 struct buf *bp; 1482 ufs2_daddr_t bpref; 1483 { 1484 struct fs *fs; 1485 struct cg *cgp; 1486 struct ufsmount *ump; 1487 ufs1_daddr_t bno; 1488 ufs2_daddr_t blkno; 1489 u_int8_t *blksfree; 1490 1491 fs = ip->i_fs; 1492 ump = ip->i_ump; 1493 mtx_assert(UFS_MTX(ump), MA_OWNED); 1494 cgp = (struct cg *)bp->b_data; 1495 blksfree = cg_blksfree(cgp); 1496 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1497 bpref = cgp->cg_rotor; 1498 } else { 1499 bpref = blknum(fs, bpref); 1500 bno = dtogd(fs, bpref); 1501 /* 1502 * if the requested block is available, use it 1503 */ 1504 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1505 goto gotit; 1506 } 1507 /* 1508 * Take the next available block in this cylinder group. 1509 */ 1510 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1511 if (bno < 0) 1512 return (0); 1513 cgp->cg_rotor = bno; 1514 gotit: 1515 blkno = fragstoblks(fs, bno); 1516 ffs_clrblock(fs, blksfree, (long)blkno); 1517 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1518 cgp->cg_cs.cs_nbfree--; 1519 fs->fs_cstotal.cs_nbfree--; 1520 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1521 fs->fs_fmod = 1; 1522 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1523 /* XXX Fixme. */ 1524 UFS_UNLOCK(ump); 1525 if (DOINGSOFTDEP(ITOV(ip))) 1526 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1527 UFS_LOCK(ump); 1528 return (blkno); 1529 } 1530 1531 /* 1532 * Determine whether a cluster can be allocated. 1533 * 1534 * We do not currently check for optimal rotational layout if there 1535 * are multiple choices in the same cylinder group. Instead we just 1536 * take the first one that we find following bpref. 1537 */ 1538 static ufs2_daddr_t 1539 ffs_clusteralloc(ip, cg, bpref, len) 1540 struct inode *ip; 1541 int cg; 1542 ufs2_daddr_t bpref; 1543 int len; 1544 { 1545 struct fs *fs; 1546 struct cg *cgp; 1547 struct buf *bp; 1548 struct ufsmount *ump; 1549 int i, run, bit, map, got; 1550 ufs2_daddr_t bno; 1551 u_char *mapp; 1552 int32_t *lp; 1553 u_int8_t *blksfree; 1554 1555 fs = ip->i_fs; 1556 ump = ip->i_ump; 1557 if (fs->fs_maxcluster[cg] < len) 1558 return (0); 1559 UFS_UNLOCK(ump); 1560 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1561 NOCRED, &bp)) 1562 goto fail_lock; 1563 cgp = (struct cg *)bp->b_data; 1564 if (!cg_chkmagic(cgp)) 1565 goto fail_lock; 1566 bp->b_xflags |= BX_BKGRDWRITE; 1567 /* 1568 * Check to see if a cluster of the needed size (or bigger) is 1569 * available in this cylinder group. 1570 */ 1571 lp = &cg_clustersum(cgp)[len]; 1572 for (i = len; i <= fs->fs_contigsumsize; i++) 1573 if (*lp++ > 0) 1574 break; 1575 if (i > fs->fs_contigsumsize) { 1576 /* 1577 * This is the first time looking for a cluster in this 1578 * cylinder group. Update the cluster summary information 1579 * to reflect the true maximum sized cluster so that 1580 * future cluster allocation requests can avoid reading 1581 * the cylinder group map only to find no clusters. 1582 */ 1583 lp = &cg_clustersum(cgp)[len - 1]; 1584 for (i = len - 1; i > 0; i--) 1585 if (*lp-- > 0) 1586 break; 1587 UFS_LOCK(ump); 1588 fs->fs_maxcluster[cg] = i; 1589 goto fail; 1590 } 1591 /* 1592 * Search the cluster map to find a big enough cluster. 1593 * We take the first one that we find, even if it is larger 1594 * than we need as we prefer to get one close to the previous 1595 * block allocation. We do not search before the current 1596 * preference point as we do not want to allocate a block 1597 * that is allocated before the previous one (as we will 1598 * then have to wait for another pass of the elevator 1599 * algorithm before it will be read). We prefer to fail and 1600 * be recalled to try an allocation in the next cylinder group. 1601 */ 1602 if (dtog(fs, bpref) != cg) 1603 bpref = 0; 1604 else 1605 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1606 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1607 map = *mapp++; 1608 bit = 1 << (bpref % NBBY); 1609 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1610 if ((map & bit) == 0) { 1611 run = 0; 1612 } else { 1613 run++; 1614 if (run == len) 1615 break; 1616 } 1617 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1618 bit <<= 1; 1619 } else { 1620 map = *mapp++; 1621 bit = 1; 1622 } 1623 } 1624 if (got >= cgp->cg_nclusterblks) 1625 goto fail_lock; 1626 /* 1627 * Allocate the cluster that we have found. 1628 */ 1629 blksfree = cg_blksfree(cgp); 1630 for (i = 1; i <= len; i++) 1631 if (!ffs_isblock(fs, blksfree, got - run + i)) 1632 panic("ffs_clusteralloc: map mismatch"); 1633 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1634 if (dtog(fs, bno) != cg) 1635 panic("ffs_clusteralloc: allocated out of group"); 1636 len = blkstofrags(fs, len); 1637 UFS_LOCK(ump); 1638 for (i = 0; i < len; i += fs->fs_frag) 1639 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1640 panic("ffs_clusteralloc: lost block"); 1641 ACTIVECLEAR(fs, cg); 1642 UFS_UNLOCK(ump); 1643 bdwrite(bp); 1644 return (bno); 1645 1646 fail_lock: 1647 UFS_LOCK(ump); 1648 fail: 1649 brelse(bp); 1650 return (0); 1651 } 1652 1653 /* 1654 * Determine whether an inode can be allocated. 1655 * 1656 * Check to see if an inode is available, and if it is, 1657 * allocate it using the following policy: 1658 * 1) allocate the requested inode. 1659 * 2) allocate the next available inode after the requested 1660 * inode in the specified cylinder group. 1661 */ 1662 static ufs2_daddr_t 1663 ffs_nodealloccg(ip, cg, ipref, mode) 1664 struct inode *ip; 1665 int cg; 1666 ufs2_daddr_t ipref; 1667 int mode; 1668 { 1669 struct fs *fs; 1670 struct cg *cgp; 1671 struct buf *bp, *ibp; 1672 struct ufsmount *ump; 1673 u_int8_t *inosused; 1674 struct ufs2_dinode *dp2; 1675 int error, start, len, loc, map, i; 1676 1677 fs = ip->i_fs; 1678 ump = ip->i_ump; 1679 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1680 return (0); 1681 UFS_UNLOCK(ump); 1682 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1683 (int)fs->fs_cgsize, NOCRED, &bp); 1684 if (error) { 1685 brelse(bp); 1686 UFS_LOCK(ump); 1687 return (0); 1688 } 1689 cgp = (struct cg *)bp->b_data; 1690 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1691 brelse(bp); 1692 UFS_LOCK(ump); 1693 return (0); 1694 } 1695 bp->b_xflags |= BX_BKGRDWRITE; 1696 cgp->cg_old_time = cgp->cg_time = time_second; 1697 inosused = cg_inosused(cgp); 1698 if (ipref) { 1699 ipref %= fs->fs_ipg; 1700 if (isclr(inosused, ipref)) 1701 goto gotit; 1702 } 1703 start = cgp->cg_irotor / NBBY; 1704 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1705 loc = skpc(0xff, len, &inosused[start]); 1706 if (loc == 0) { 1707 len = start + 1; 1708 start = 0; 1709 loc = skpc(0xff, len, &inosused[0]); 1710 if (loc == 0) { 1711 printf("cg = %d, irotor = %ld, fs = %s\n", 1712 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1713 panic("ffs_nodealloccg: map corrupted"); 1714 /* NOTREACHED */ 1715 } 1716 } 1717 i = start + len - loc; 1718 map = inosused[i]; 1719 ipref = i * NBBY; 1720 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1721 if ((map & i) == 0) { 1722 cgp->cg_irotor = ipref; 1723 goto gotit; 1724 } 1725 } 1726 printf("fs = %s\n", fs->fs_fsmnt); 1727 panic("ffs_nodealloccg: block not in map"); 1728 /* NOTREACHED */ 1729 gotit: 1730 /* 1731 * Check to see if we need to initialize more inodes. 1732 */ 1733 ibp = NULL; 1734 if (fs->fs_magic == FS_UFS2_MAGIC && 1735 ipref + INOPB(fs) > cgp->cg_initediblk && 1736 cgp->cg_initediblk < cgp->cg_niblk) { 1737 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1738 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1739 (int)fs->fs_bsize, 0, 0, 0); 1740 bzero(ibp->b_data, (int)fs->fs_bsize); 1741 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1742 for (i = 0; i < INOPB(fs); i++) { 1743 dp2->di_gen = arc4random() / 2 + 1; 1744 dp2++; 1745 } 1746 cgp->cg_initediblk += INOPB(fs); 1747 } 1748 UFS_LOCK(ump); 1749 ACTIVECLEAR(fs, cg); 1750 setbit(inosused, ipref); 1751 cgp->cg_cs.cs_nifree--; 1752 fs->fs_cstotal.cs_nifree--; 1753 fs->fs_cs(fs, cg).cs_nifree--; 1754 fs->fs_fmod = 1; 1755 if ((mode & IFMT) == IFDIR) { 1756 cgp->cg_cs.cs_ndir++; 1757 fs->fs_cstotal.cs_ndir++; 1758 fs->fs_cs(fs, cg).cs_ndir++; 1759 } 1760 UFS_UNLOCK(ump); 1761 if (DOINGSOFTDEP(ITOV(ip))) 1762 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1763 bdwrite(bp); 1764 if (ibp != NULL) 1765 bawrite(ibp); 1766 return (cg * fs->fs_ipg + ipref); 1767 } 1768 1769 /* 1770 * check if a block is free 1771 */ 1772 static int 1773 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1774 { 1775 1776 switch ((int)fs->fs_frag) { 1777 case 8: 1778 return (cp[h] == 0); 1779 case 4: 1780 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1781 case 2: 1782 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1783 case 1: 1784 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1785 default: 1786 panic("ffs_isfreeblock"); 1787 } 1788 return (0); 1789 } 1790 1791 /* 1792 * Free a block or fragment. 1793 * 1794 * The specified block or fragment is placed back in the 1795 * free map. If a fragment is deallocated, a possible 1796 * block reassembly is checked. 1797 */ 1798 void 1799 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1800 struct ufsmount *ump; 1801 struct fs *fs; 1802 struct vnode *devvp; 1803 ufs2_daddr_t bno; 1804 long size; 1805 ino_t inum; 1806 { 1807 struct cg *cgp; 1808 struct buf *bp; 1809 ufs1_daddr_t fragno, cgbno; 1810 ufs2_daddr_t cgblkno; 1811 int i, cg, blk, frags, bbase; 1812 u_int8_t *blksfree; 1813 struct cdev *dev; 1814 1815 cg = dtog(fs, bno); 1816 if (devvp->v_type != VCHR) { 1817 /* devvp is a snapshot */ 1818 dev = VTOI(devvp)->i_devvp->v_rdev; 1819 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1820 } else { 1821 /* devvp is a normal disk device */ 1822 dev = devvp->v_rdev; 1823 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1824 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1825 if ((devvp->v_vflag & VV_COPYONWRITE) && 1826 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1827 return; 1828 } 1829 #ifdef DIAGNOSTIC 1830 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1831 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1832 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1833 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1834 size, fs->fs_fsmnt); 1835 panic("ffs_blkfree: bad size"); 1836 } 1837 #endif 1838 if ((u_int)bno >= fs->fs_size) { 1839 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1840 (u_long)inum); 1841 ffs_fserr(fs, inum, "bad block"); 1842 return; 1843 } 1844 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1845 brelse(bp); 1846 return; 1847 } 1848 cgp = (struct cg *)bp->b_data; 1849 if (!cg_chkmagic(cgp)) { 1850 brelse(bp); 1851 return; 1852 } 1853 bp->b_xflags |= BX_BKGRDWRITE; 1854 cgp->cg_old_time = cgp->cg_time = time_second; 1855 cgbno = dtogd(fs, bno); 1856 blksfree = cg_blksfree(cgp); 1857 UFS_LOCK(ump); 1858 if (size == fs->fs_bsize) { 1859 fragno = fragstoblks(fs, cgbno); 1860 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1861 if (devvp->v_type != VCHR) { 1862 UFS_UNLOCK(ump); 1863 /* devvp is a snapshot */ 1864 brelse(bp); 1865 return; 1866 } 1867 printf("dev = %s, block = %jd, fs = %s\n", 1868 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1869 panic("ffs_blkfree: freeing free block"); 1870 } 1871 ffs_setblock(fs, blksfree, fragno); 1872 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1873 cgp->cg_cs.cs_nbfree++; 1874 fs->fs_cstotal.cs_nbfree++; 1875 fs->fs_cs(fs, cg).cs_nbfree++; 1876 } else { 1877 bbase = cgbno - fragnum(fs, cgbno); 1878 /* 1879 * decrement the counts associated with the old frags 1880 */ 1881 blk = blkmap(fs, blksfree, bbase); 1882 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1883 /* 1884 * deallocate the fragment 1885 */ 1886 frags = numfrags(fs, size); 1887 for (i = 0; i < frags; i++) { 1888 if (isset(blksfree, cgbno + i)) { 1889 printf("dev = %s, block = %jd, fs = %s\n", 1890 devtoname(dev), (intmax_t)(bno + i), 1891 fs->fs_fsmnt); 1892 panic("ffs_blkfree: freeing free frag"); 1893 } 1894 setbit(blksfree, cgbno + i); 1895 } 1896 cgp->cg_cs.cs_nffree += i; 1897 fs->fs_cstotal.cs_nffree += i; 1898 fs->fs_cs(fs, cg).cs_nffree += i; 1899 /* 1900 * add back in counts associated with the new frags 1901 */ 1902 blk = blkmap(fs, blksfree, bbase); 1903 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1904 /* 1905 * if a complete block has been reassembled, account for it 1906 */ 1907 fragno = fragstoblks(fs, bbase); 1908 if (ffs_isblock(fs, blksfree, fragno)) { 1909 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1910 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1911 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1912 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1913 cgp->cg_cs.cs_nbfree++; 1914 fs->fs_cstotal.cs_nbfree++; 1915 fs->fs_cs(fs, cg).cs_nbfree++; 1916 } 1917 } 1918 fs->fs_fmod = 1; 1919 ACTIVECLEAR(fs, cg); 1920 UFS_UNLOCK(ump); 1921 bdwrite(bp); 1922 } 1923 1924 #ifdef DIAGNOSTIC 1925 /* 1926 * Verify allocation of a block or fragment. Returns true if block or 1927 * fragment is allocated, false if it is free. 1928 */ 1929 static int 1930 ffs_checkblk(ip, bno, size) 1931 struct inode *ip; 1932 ufs2_daddr_t bno; 1933 long size; 1934 { 1935 struct fs *fs; 1936 struct cg *cgp; 1937 struct buf *bp; 1938 ufs1_daddr_t cgbno; 1939 int i, error, frags, free; 1940 u_int8_t *blksfree; 1941 1942 fs = ip->i_fs; 1943 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1944 printf("bsize = %ld, size = %ld, fs = %s\n", 1945 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1946 panic("ffs_checkblk: bad size"); 1947 } 1948 if ((u_int)bno >= fs->fs_size) 1949 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1950 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1951 (int)fs->fs_cgsize, NOCRED, &bp); 1952 if (error) 1953 panic("ffs_checkblk: cg bread failed"); 1954 cgp = (struct cg *)bp->b_data; 1955 if (!cg_chkmagic(cgp)) 1956 panic("ffs_checkblk: cg magic mismatch"); 1957 bp->b_xflags |= BX_BKGRDWRITE; 1958 blksfree = cg_blksfree(cgp); 1959 cgbno = dtogd(fs, bno); 1960 if (size == fs->fs_bsize) { 1961 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 1962 } else { 1963 frags = numfrags(fs, size); 1964 for (free = 0, i = 0; i < frags; i++) 1965 if (isset(blksfree, cgbno + i)) 1966 free++; 1967 if (free != 0 && free != frags) 1968 panic("ffs_checkblk: partially free fragment"); 1969 } 1970 brelse(bp); 1971 return (!free); 1972 } 1973 #endif /* DIAGNOSTIC */ 1974 1975 /* 1976 * Free an inode. 1977 */ 1978 int 1979 ffs_vfree(pvp, ino, mode) 1980 struct vnode *pvp; 1981 ino_t ino; 1982 int mode; 1983 { 1984 struct inode *ip; 1985 1986 if (DOINGSOFTDEP(pvp)) { 1987 softdep_freefile(pvp, ino, mode); 1988 return (0); 1989 } 1990 ip = VTOI(pvp); 1991 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 1992 } 1993 1994 /* 1995 * Do the actual free operation. 1996 * The specified inode is placed back in the free map. 1997 */ 1998 int 1999 ffs_freefile(ump, fs, devvp, ino, mode) 2000 struct ufsmount *ump; 2001 struct fs *fs; 2002 struct vnode *devvp; 2003 ino_t ino; 2004 int mode; 2005 { 2006 struct cg *cgp; 2007 struct buf *bp; 2008 ufs2_daddr_t cgbno; 2009 int error, cg; 2010 u_int8_t *inosused; 2011 struct cdev *dev; 2012 2013 cg = ino_to_cg(fs, ino); 2014 if (devvp->v_type != VCHR) { 2015 /* devvp is a snapshot */ 2016 dev = VTOI(devvp)->i_devvp->v_rdev; 2017 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2018 } else { 2019 /* devvp is a normal disk device */ 2020 dev = devvp->v_rdev; 2021 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2022 } 2023 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2024 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2025 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2026 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2027 brelse(bp); 2028 return (error); 2029 } 2030 cgp = (struct cg *)bp->b_data; 2031 if (!cg_chkmagic(cgp)) { 2032 brelse(bp); 2033 return (0); 2034 } 2035 bp->b_xflags |= BX_BKGRDWRITE; 2036 cgp->cg_old_time = cgp->cg_time = time_second; 2037 inosused = cg_inosused(cgp); 2038 ino %= fs->fs_ipg; 2039 if (isclr(inosused, ino)) { 2040 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 2041 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2042 if (fs->fs_ronly == 0) 2043 panic("ffs_freefile: freeing free inode"); 2044 } 2045 clrbit(inosused, ino); 2046 if (ino < cgp->cg_irotor) 2047 cgp->cg_irotor = ino; 2048 cgp->cg_cs.cs_nifree++; 2049 UFS_LOCK(ump); 2050 fs->fs_cstotal.cs_nifree++; 2051 fs->fs_cs(fs, cg).cs_nifree++; 2052 if ((mode & IFMT) == IFDIR) { 2053 cgp->cg_cs.cs_ndir--; 2054 fs->fs_cstotal.cs_ndir--; 2055 fs->fs_cs(fs, cg).cs_ndir--; 2056 } 2057 fs->fs_fmod = 1; 2058 ACTIVECLEAR(fs, cg); 2059 UFS_UNLOCK(ump); 2060 bdwrite(bp); 2061 return (0); 2062 } 2063 2064 /* 2065 * Check to see if a file is free. 2066 */ 2067 int 2068 ffs_checkfreefile(fs, devvp, ino) 2069 struct fs *fs; 2070 struct vnode *devvp; 2071 ino_t ino; 2072 { 2073 struct cg *cgp; 2074 struct buf *bp; 2075 ufs2_daddr_t cgbno; 2076 int ret, cg; 2077 u_int8_t *inosused; 2078 2079 cg = ino_to_cg(fs, ino); 2080 if (devvp->v_type != VCHR) { 2081 /* devvp is a snapshot */ 2082 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2083 } else { 2084 /* devvp is a normal disk device */ 2085 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2086 } 2087 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2088 return (1); 2089 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2090 brelse(bp); 2091 return (1); 2092 } 2093 cgp = (struct cg *)bp->b_data; 2094 if (!cg_chkmagic(cgp)) { 2095 brelse(bp); 2096 return (1); 2097 } 2098 inosused = cg_inosused(cgp); 2099 ino %= fs->fs_ipg; 2100 ret = isclr(inosused, ino); 2101 brelse(bp); 2102 return (ret); 2103 } 2104 2105 /* 2106 * Find a block of the specified size in the specified cylinder group. 2107 * 2108 * It is a panic if a request is made to find a block if none are 2109 * available. 2110 */ 2111 static ufs1_daddr_t 2112 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2113 struct fs *fs; 2114 struct cg *cgp; 2115 ufs2_daddr_t bpref; 2116 int allocsiz; 2117 { 2118 ufs1_daddr_t bno; 2119 int start, len, loc, i; 2120 int blk, field, subfield, pos; 2121 u_int8_t *blksfree; 2122 2123 /* 2124 * find the fragment by searching through the free block 2125 * map for an appropriate bit pattern 2126 */ 2127 if (bpref) 2128 start = dtogd(fs, bpref) / NBBY; 2129 else 2130 start = cgp->cg_frotor / NBBY; 2131 blksfree = cg_blksfree(cgp); 2132 len = howmany(fs->fs_fpg, NBBY) - start; 2133 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2134 (u_char *)fragtbl[fs->fs_frag], 2135 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2136 if (loc == 0) { 2137 len = start + 1; 2138 start = 0; 2139 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2140 (u_char *)fragtbl[fs->fs_frag], 2141 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2142 if (loc == 0) { 2143 printf("start = %d, len = %d, fs = %s\n", 2144 start, len, fs->fs_fsmnt); 2145 panic("ffs_alloccg: map corrupted"); 2146 /* NOTREACHED */ 2147 } 2148 } 2149 bno = (start + len - loc) * NBBY; 2150 cgp->cg_frotor = bno; 2151 /* 2152 * found the byte in the map 2153 * sift through the bits to find the selected frag 2154 */ 2155 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2156 blk = blkmap(fs, blksfree, bno); 2157 blk <<= 1; 2158 field = around[allocsiz]; 2159 subfield = inside[allocsiz]; 2160 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2161 if ((blk & field) == subfield) 2162 return (bno + pos); 2163 field <<= 1; 2164 subfield <<= 1; 2165 } 2166 } 2167 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2168 panic("ffs_alloccg: block not in map"); 2169 return (-1); 2170 } 2171 2172 /* 2173 * Update the cluster map because of an allocation or free. 2174 * 2175 * Cnt == 1 means free; cnt == -1 means allocating. 2176 */ 2177 void 2178 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2179 struct ufsmount *ump; 2180 struct fs *fs; 2181 struct cg *cgp; 2182 ufs1_daddr_t blkno; 2183 int cnt; 2184 { 2185 int32_t *sump; 2186 int32_t *lp; 2187 u_char *freemapp, *mapp; 2188 int i, start, end, forw, back, map, bit; 2189 2190 mtx_assert(UFS_MTX(ump), MA_OWNED); 2191 2192 if (fs->fs_contigsumsize <= 0) 2193 return; 2194 freemapp = cg_clustersfree(cgp); 2195 sump = cg_clustersum(cgp); 2196 /* 2197 * Allocate or clear the actual block. 2198 */ 2199 if (cnt > 0) 2200 setbit(freemapp, blkno); 2201 else 2202 clrbit(freemapp, blkno); 2203 /* 2204 * Find the size of the cluster going forward. 2205 */ 2206 start = blkno + 1; 2207 end = start + fs->fs_contigsumsize; 2208 if (end >= cgp->cg_nclusterblks) 2209 end = cgp->cg_nclusterblks; 2210 mapp = &freemapp[start / NBBY]; 2211 map = *mapp++; 2212 bit = 1 << (start % NBBY); 2213 for (i = start; i < end; i++) { 2214 if ((map & bit) == 0) 2215 break; 2216 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2217 bit <<= 1; 2218 } else { 2219 map = *mapp++; 2220 bit = 1; 2221 } 2222 } 2223 forw = i - start; 2224 /* 2225 * Find the size of the cluster going backward. 2226 */ 2227 start = blkno - 1; 2228 end = start - fs->fs_contigsumsize; 2229 if (end < 0) 2230 end = -1; 2231 mapp = &freemapp[start / NBBY]; 2232 map = *mapp--; 2233 bit = 1 << (start % NBBY); 2234 for (i = start; i > end; i--) { 2235 if ((map & bit) == 0) 2236 break; 2237 if ((i & (NBBY - 1)) != 0) { 2238 bit >>= 1; 2239 } else { 2240 map = *mapp--; 2241 bit = 1 << (NBBY - 1); 2242 } 2243 } 2244 back = start - i; 2245 /* 2246 * Account for old cluster and the possibly new forward and 2247 * back clusters. 2248 */ 2249 i = back + forw + 1; 2250 if (i > fs->fs_contigsumsize) 2251 i = fs->fs_contigsumsize; 2252 sump[i] += cnt; 2253 if (back > 0) 2254 sump[back] -= cnt; 2255 if (forw > 0) 2256 sump[forw] -= cnt; 2257 /* 2258 * Update cluster summary information. 2259 */ 2260 lp = &sump[fs->fs_contigsumsize]; 2261 for (i = fs->fs_contigsumsize; i > 0; i--) 2262 if (*lp-- > 0) 2263 break; 2264 fs->fs_maxcluster[cgp->cg_cgx] = i; 2265 } 2266 2267 /* 2268 * Fserr prints the name of a filesystem with an error diagnostic. 2269 * 2270 * The form of the error message is: 2271 * fs: error message 2272 */ 2273 static void 2274 ffs_fserr(fs, inum, cp) 2275 struct fs *fs; 2276 ino_t inum; 2277 char *cp; 2278 { 2279 struct thread *td = curthread; /* XXX */ 2280 struct proc *p = td->td_proc; 2281 2282 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2283 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2284 } 2285 2286 /* 2287 * This function provides the capability for the fsck program to 2288 * update an active filesystem. Eleven operations are provided: 2289 * 2290 * adjrefcnt(inode, amt) - adjusts the reference count on the 2291 * specified inode by the specified amount. Under normal 2292 * operation the count should always go down. Decrementing 2293 * the count to zero will cause the inode to be freed. 2294 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2295 * by the specifed amount. 2296 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2297 * adjust the superblock summary. 2298 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2299 * are marked as free. Inodes should never have to be marked 2300 * as in use. 2301 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2302 * are marked as free. Inodes should never have to be marked 2303 * as in use. 2304 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2305 * are marked as free. Blocks should never have to be marked 2306 * as in use. 2307 * setflags(flags, set/clear) - the fs_flags field has the specified 2308 * flags set (second parameter +1) or cleared (second parameter -1). 2309 */ 2310 2311 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2312 2313 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2314 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2315 2316 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2317 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2318 2319 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2320 sysctl_ffs_fsck, "Adjust number of directories"); 2321 2322 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2323 sysctl_ffs_fsck, "Adjust number of free blocks"); 2324 2325 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2326 sysctl_ffs_fsck, "Adjust number of free inodes"); 2327 2328 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2329 sysctl_ffs_fsck, "Adjust number of free frags"); 2330 2331 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2332 sysctl_ffs_fsck, "Adjust number of free clusters"); 2333 2334 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2335 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2336 2337 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2338 sysctl_ffs_fsck, "Free Range of File Inodes"); 2339 2340 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2341 sysctl_ffs_fsck, "Free Range of Blocks"); 2342 2343 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2344 sysctl_ffs_fsck, "Change Filesystem Flags"); 2345 2346 #ifdef DEBUG 2347 static int fsckcmds = 0; 2348 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2349 #endif /* DEBUG */ 2350 2351 static int 2352 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2353 { 2354 struct fsck_cmd cmd; 2355 struct ufsmount *ump; 2356 struct vnode *vp; 2357 struct inode *ip; 2358 struct mount *mp; 2359 struct fs *fs; 2360 ufs2_daddr_t blkno; 2361 long blkcnt, blksize; 2362 struct file *fp; 2363 int filetype, error; 2364 2365 if (req->newlen > sizeof cmd) 2366 return (EBADRPC); 2367 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2368 return (error); 2369 if (cmd.version != FFS_CMD_VERSION) 2370 return (ERPCMISMATCH); 2371 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2372 return (error); 2373 vn_start_write(fp->f_data, &mp, V_WAIT); 2374 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2375 vn_finished_write(mp); 2376 fdrop(fp, curthread); 2377 return (EINVAL); 2378 } 2379 if (mp->mnt_flag & MNT_RDONLY) { 2380 vn_finished_write(mp); 2381 fdrop(fp, curthread); 2382 return (EROFS); 2383 } 2384 ump = VFSTOUFS(mp); 2385 fs = ump->um_fs; 2386 filetype = IFREG; 2387 2388 switch (oidp->oid_number) { 2389 2390 case FFS_SET_FLAGS: 2391 #ifdef DEBUG 2392 if (fsckcmds) 2393 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2394 cmd.size > 0 ? "set" : "clear"); 2395 #endif /* DEBUG */ 2396 if (cmd.size > 0) 2397 fs->fs_flags |= (long)cmd.value; 2398 else 2399 fs->fs_flags &= ~(long)cmd.value; 2400 break; 2401 2402 case FFS_ADJ_REFCNT: 2403 #ifdef DEBUG 2404 if (fsckcmds) { 2405 printf("%s: adjust inode %jd count by %jd\n", 2406 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2407 (intmax_t)cmd.size); 2408 } 2409 #endif /* DEBUG */ 2410 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2411 break; 2412 ip = VTOI(vp); 2413 ip->i_nlink += cmd.size; 2414 DIP_SET(ip, i_nlink, ip->i_nlink); 2415 ip->i_effnlink += cmd.size; 2416 ip->i_flag |= IN_CHANGE; 2417 if (DOINGSOFTDEP(vp)) 2418 softdep_change_linkcnt(ip); 2419 vput(vp); 2420 break; 2421 2422 case FFS_ADJ_BLKCNT: 2423 #ifdef DEBUG 2424 if (fsckcmds) { 2425 printf("%s: adjust inode %jd block count by %jd\n", 2426 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2427 (intmax_t)cmd.size); 2428 } 2429 #endif /* DEBUG */ 2430 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2431 break; 2432 ip = VTOI(vp); 2433 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2434 ip->i_flag |= IN_CHANGE; 2435 vput(vp); 2436 break; 2437 2438 case FFS_DIR_FREE: 2439 filetype = IFDIR; 2440 /* fall through */ 2441 2442 case FFS_FILE_FREE: 2443 #ifdef DEBUG 2444 if (fsckcmds) { 2445 if (cmd.size == 1) 2446 printf("%s: free %s inode %d\n", 2447 mp->mnt_stat.f_mntonname, 2448 filetype == IFDIR ? "directory" : "file", 2449 (ino_t)cmd.value); 2450 else 2451 printf("%s: free %s inodes %d-%d\n", 2452 mp->mnt_stat.f_mntonname, 2453 filetype == IFDIR ? "directory" : "file", 2454 (ino_t)cmd.value, 2455 (ino_t)(cmd.value + cmd.size - 1)); 2456 } 2457 #endif /* DEBUG */ 2458 while (cmd.size > 0) { 2459 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2460 cmd.value, filetype))) 2461 break; 2462 cmd.size -= 1; 2463 cmd.value += 1; 2464 } 2465 break; 2466 2467 case FFS_BLK_FREE: 2468 #ifdef DEBUG 2469 if (fsckcmds) { 2470 if (cmd.size == 1) 2471 printf("%s: free block %jd\n", 2472 mp->mnt_stat.f_mntonname, 2473 (intmax_t)cmd.value); 2474 else 2475 printf("%s: free blocks %jd-%jd\n", 2476 mp->mnt_stat.f_mntonname, 2477 (intmax_t)cmd.value, 2478 (intmax_t)cmd.value + cmd.size - 1); 2479 } 2480 #endif /* DEBUG */ 2481 blkno = cmd.value; 2482 blkcnt = cmd.size; 2483 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2484 while (blkcnt > 0) { 2485 if (blksize > blkcnt) 2486 blksize = blkcnt; 2487 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2488 blksize * fs->fs_fsize, ROOTINO); 2489 blkno += blksize; 2490 blkcnt -= blksize; 2491 blksize = fs->fs_frag; 2492 } 2493 break; 2494 2495 /* 2496 * Adjust superblock summaries. fsck(8) is expected to 2497 * submit deltas when necessary. 2498 */ 2499 case FFS_ADJ_NDIR: 2500 #ifdef DEBUG 2501 if (fsckcmds) { 2502 printf("%s: adjust number of directories by %jd\n", 2503 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2504 } 2505 #endif /* DEBUG */ 2506 fs->fs_cstotal.cs_ndir += cmd.value; 2507 break; 2508 case FFS_ADJ_NBFREE: 2509 #ifdef DEBUG 2510 if (fsckcmds) { 2511 printf("%s: adjust number of free blocks by %+jd\n", 2512 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2513 } 2514 #endif /* DEBUG */ 2515 fs->fs_cstotal.cs_nbfree += cmd.value; 2516 break; 2517 case FFS_ADJ_NIFREE: 2518 #ifdef DEBUG 2519 if (fsckcmds) { 2520 printf("%s: adjust number of free inodes by %+jd\n", 2521 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2522 } 2523 #endif /* DEBUG */ 2524 fs->fs_cstotal.cs_nifree += cmd.value; 2525 break; 2526 case FFS_ADJ_NFFREE: 2527 #ifdef DEBUG 2528 if (fsckcmds) { 2529 printf("%s: adjust number of free frags by %+jd\n", 2530 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2531 } 2532 #endif /* DEBUG */ 2533 fs->fs_cstotal.cs_nffree += cmd.value; 2534 break; 2535 case FFS_ADJ_NUMCLUSTERS: 2536 #ifdef DEBUG 2537 if (fsckcmds) { 2538 printf("%s: adjust number of free clusters by %+jd\n", 2539 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2540 } 2541 #endif /* DEBUG */ 2542 fs->fs_cstotal.cs_numclusters += cmd.value; 2543 break; 2544 2545 default: 2546 #ifdef DEBUG 2547 if (fsckcmds) { 2548 printf("Invalid request %d from fsck\n", 2549 oidp->oid_number); 2550 } 2551 #endif /* DEBUG */ 2552 error = EINVAL; 2553 break; 2554 2555 } 2556 fdrop(fp, curthread); 2557 vn_finished_write(mp); 2558 return (error); 2559 } 2560