1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/file.h> 73 #include <sys/filedesc.h> 74 #include <sys/priv.h> 75 #include <sys/proc.h> 76 #include <sys/vnode.h> 77 #include <sys/mount.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 #include <sys/syslog.h> 81 82 #include <ufs/ufs/extattr.h> 83 #include <ufs/ufs/quota.h> 84 #include <ufs/ufs/inode.h> 85 #include <ufs/ufs/ufs_extern.h> 86 #include <ufs/ufs/ufsmount.h> 87 88 #include <ufs/ffs/fs.h> 89 #include <ufs/ffs/ffs_extern.h> 90 91 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 92 int size); 93 94 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 95 static ufs2_daddr_t 96 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 97 #ifdef DIAGNOSTIC 98 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 99 #endif 100 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 101 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 102 ufs1_daddr_t, int); 103 static ino_t ffs_dirpref(struct inode *); 104 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 105 static void ffs_fserr(struct fs *, ino_t, char *); 106 static ufs2_daddr_t ffs_hashalloc 107 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 108 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 109 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 110 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 111 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 112 113 /* 114 * Allocate a block in the filesystem. 115 * 116 * The size of the requested block is given, which must be some 117 * multiple of fs_fsize and <= fs_bsize. 118 * A preference may be optionally specified. If a preference is given 119 * the following hierarchy is used to allocate a block: 120 * 1) allocate the requested block. 121 * 2) allocate a rotationally optimal block in the same cylinder. 122 * 3) allocate a block in the same cylinder group. 123 * 4) quadradically rehash into other cylinder groups, until an 124 * available block is located. 125 * If no block preference is given the following heirarchy is used 126 * to allocate a block: 127 * 1) allocate a block in the cylinder group that contains the 128 * inode for the file. 129 * 2) quadradically rehash into other cylinder groups, until an 130 * available block is located. 131 */ 132 int 133 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 134 struct inode *ip; 135 ufs2_daddr_t lbn, bpref; 136 int size; 137 struct ucred *cred; 138 ufs2_daddr_t *bnp; 139 { 140 struct fs *fs; 141 struct ufsmount *ump; 142 ufs2_daddr_t bno; 143 int cg, reclaimed; 144 static struct timeval lastfail; 145 static int curfail; 146 #ifdef QUOTA 147 int error; 148 #endif 149 150 *bnp = 0; 151 fs = ip->i_fs; 152 ump = ip->i_ump; 153 mtx_assert(UFS_MTX(ump), MA_OWNED); 154 #ifdef DIAGNOSTIC 155 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 156 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 157 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 158 fs->fs_fsmnt); 159 panic("ffs_alloc: bad size"); 160 } 161 if (cred == NOCRED) 162 panic("ffs_alloc: missing credential"); 163 #endif /* DIAGNOSTIC */ 164 reclaimed = 0; 165 retry: 166 #ifdef QUOTA 167 UFS_UNLOCK(ump); 168 error = chkdq(ip, btodb(size), cred, 0); 169 if (error) 170 return (error); 171 UFS_LOCK(ump); 172 #endif 173 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 174 goto nospace; 175 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, SUSER_ALLOWJAIL) && 176 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 177 goto nospace; 178 if (bpref >= fs->fs_size) 179 bpref = 0; 180 if (bpref == 0) 181 cg = ino_to_cg(fs, ip->i_number); 182 else 183 cg = dtog(fs, bpref); 184 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 185 if (bno > 0) { 186 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 187 ip->i_flag |= IN_CHANGE | IN_UPDATE; 188 *bnp = bno; 189 return (0); 190 } 191 #ifdef QUOTA 192 UFS_UNLOCK(ump); 193 /* 194 * Restore user's disk quota because allocation failed. 195 */ 196 (void) chkdq(ip, -btodb(size), cred, FORCE); 197 UFS_LOCK(ump); 198 #endif 199 nospace: 200 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 201 reclaimed = 1; 202 softdep_request_cleanup(fs, ITOV(ip)); 203 goto retry; 204 } 205 UFS_UNLOCK(ump); 206 if (ppsratecheck(&lastfail, &curfail, 1)) { 207 ffs_fserr(fs, ip->i_number, "filesystem full"); 208 uprintf("\n%s: write failed, filesystem is full\n", 209 fs->fs_fsmnt); 210 } 211 return (ENOSPC); 212 } 213 214 /* 215 * Reallocate a fragment to a bigger size 216 * 217 * The number and size of the old block is given, and a preference 218 * and new size is also specified. The allocator attempts to extend 219 * the original block. Failing that, the regular block allocator is 220 * invoked to get an appropriate block. 221 */ 222 int 223 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp) 224 struct inode *ip; 225 ufs2_daddr_t lbprev; 226 ufs2_daddr_t bprev; 227 ufs2_daddr_t bpref; 228 int osize, nsize; 229 struct ucred *cred; 230 struct buf **bpp; 231 { 232 struct vnode *vp; 233 struct fs *fs; 234 struct buf *bp; 235 struct ufsmount *ump; 236 int cg, request, error, reclaimed; 237 ufs2_daddr_t bno; 238 static struct timeval lastfail; 239 static int curfail; 240 241 *bpp = 0; 242 vp = ITOV(ip); 243 fs = ip->i_fs; 244 bp = NULL; 245 ump = ip->i_ump; 246 mtx_assert(UFS_MTX(ump), MA_OWNED); 247 #ifdef DIAGNOSTIC 248 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 249 panic("ffs_realloccg: allocation on suspended filesystem"); 250 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 251 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 252 printf( 253 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 254 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 255 nsize, fs->fs_fsmnt); 256 panic("ffs_realloccg: bad size"); 257 } 258 if (cred == NOCRED) 259 panic("ffs_realloccg: missing credential"); 260 #endif /* DIAGNOSTIC */ 261 reclaimed = 0; 262 retry: 263 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, SUSER_ALLOWJAIL) && 264 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 265 goto nospace; 266 } 267 if (bprev == 0) { 268 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 269 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 270 fs->fs_fsmnt); 271 panic("ffs_realloccg: bad bprev"); 272 } 273 UFS_UNLOCK(ump); 274 /* 275 * Allocate the extra space in the buffer. 276 */ 277 error = bread(vp, lbprev, osize, NOCRED, &bp); 278 if (error) { 279 brelse(bp); 280 return (error); 281 } 282 283 if (bp->b_blkno == bp->b_lblkno) { 284 if (lbprev >= NDADDR) 285 panic("ffs_realloccg: lbprev out of range"); 286 bp->b_blkno = fsbtodb(fs, bprev); 287 } 288 289 #ifdef QUOTA 290 error = chkdq(ip, btodb(nsize - osize), cred, 0); 291 if (error) { 292 brelse(bp); 293 return (error); 294 } 295 #endif 296 /* 297 * Check for extension in the existing location. 298 */ 299 cg = dtog(fs, bprev); 300 UFS_LOCK(ump); 301 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 302 if (bno) { 303 if (bp->b_blkno != fsbtodb(fs, bno)) 304 panic("ffs_realloccg: bad blockno"); 305 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 306 ip->i_flag |= IN_CHANGE | IN_UPDATE; 307 allocbuf(bp, nsize); 308 bp->b_flags |= B_DONE; 309 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 310 bzero((char *)bp->b_data + osize, nsize - osize); 311 else 312 vfs_bio_clrbuf(bp); 313 *bpp = bp; 314 return (0); 315 } 316 /* 317 * Allocate a new disk location. 318 */ 319 if (bpref >= fs->fs_size) 320 bpref = 0; 321 switch ((int)fs->fs_optim) { 322 case FS_OPTSPACE: 323 /* 324 * Allocate an exact sized fragment. Although this makes 325 * best use of space, we will waste time relocating it if 326 * the file continues to grow. If the fragmentation is 327 * less than half of the minimum free reserve, we choose 328 * to begin optimizing for time. 329 */ 330 request = nsize; 331 if (fs->fs_minfree <= 5 || 332 fs->fs_cstotal.cs_nffree > 333 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 334 break; 335 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 336 fs->fs_fsmnt); 337 fs->fs_optim = FS_OPTTIME; 338 break; 339 case FS_OPTTIME: 340 /* 341 * At this point we have discovered a file that is trying to 342 * grow a small fragment to a larger fragment. To save time, 343 * we allocate a full sized block, then free the unused portion. 344 * If the file continues to grow, the `ffs_fragextend' call 345 * above will be able to grow it in place without further 346 * copying. If aberrant programs cause disk fragmentation to 347 * grow within 2% of the free reserve, we choose to begin 348 * optimizing for space. 349 */ 350 request = fs->fs_bsize; 351 if (fs->fs_cstotal.cs_nffree < 352 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 353 break; 354 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 355 fs->fs_fsmnt); 356 fs->fs_optim = FS_OPTSPACE; 357 break; 358 default: 359 printf("dev = %s, optim = %ld, fs = %s\n", 360 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 361 panic("ffs_realloccg: bad optim"); 362 /* NOTREACHED */ 363 } 364 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 365 if (bno > 0) { 366 bp->b_blkno = fsbtodb(fs, bno); 367 if (!DOINGSOFTDEP(vp)) 368 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 369 ip->i_number); 370 if (nsize < request) 371 ffs_blkfree(ump, fs, ip->i_devvp, 372 bno + numfrags(fs, nsize), 373 (long)(request - nsize), ip->i_number); 374 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 375 ip->i_flag |= IN_CHANGE | IN_UPDATE; 376 allocbuf(bp, nsize); 377 bp->b_flags |= B_DONE; 378 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 379 bzero((char *)bp->b_data + osize, nsize - osize); 380 else 381 vfs_bio_clrbuf(bp); 382 *bpp = bp; 383 return (0); 384 } 385 #ifdef QUOTA 386 UFS_UNLOCK(ump); 387 /* 388 * Restore user's disk quota because allocation failed. 389 */ 390 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 391 UFS_LOCK(ump); 392 #endif 393 nospace: 394 /* 395 * no space available 396 */ 397 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 398 reclaimed = 1; 399 softdep_request_cleanup(fs, vp); 400 UFS_UNLOCK(ump); 401 if (bp) 402 brelse(bp); 403 UFS_LOCK(ump); 404 goto retry; 405 } 406 UFS_UNLOCK(ump); 407 if (bp) 408 brelse(bp); 409 if (ppsratecheck(&lastfail, &curfail, 1)) { 410 ffs_fserr(fs, ip->i_number, "filesystem full"); 411 uprintf("\n%s: write failed, filesystem is full\n", 412 fs->fs_fsmnt); 413 } 414 return (ENOSPC); 415 } 416 417 /* 418 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 419 * 420 * The vnode and an array of buffer pointers for a range of sequential 421 * logical blocks to be made contiguous is given. The allocator attempts 422 * to find a range of sequential blocks starting as close as possible 423 * from the end of the allocation for the logical block immediately 424 * preceding the current range. If successful, the physical block numbers 425 * in the buffer pointers and in the inode are changed to reflect the new 426 * allocation. If unsuccessful, the allocation is left unchanged. The 427 * success in doing the reallocation is returned. Note that the error 428 * return is not reflected back to the user. Rather the previous block 429 * allocation will be used. 430 */ 431 432 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 433 434 static int doasyncfree = 1; 435 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 436 437 static int doreallocblks = 1; 438 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 439 440 #ifdef DEBUG 441 static volatile int prtrealloc = 0; 442 #endif 443 444 int 445 ffs_reallocblks(ap) 446 struct vop_reallocblks_args /* { 447 struct vnode *a_vp; 448 struct cluster_save *a_buflist; 449 } */ *ap; 450 { 451 452 if (doreallocblks == 0) 453 return (ENOSPC); 454 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 455 return (ffs_reallocblks_ufs1(ap)); 456 return (ffs_reallocblks_ufs2(ap)); 457 } 458 459 static int 460 ffs_reallocblks_ufs1(ap) 461 struct vop_reallocblks_args /* { 462 struct vnode *a_vp; 463 struct cluster_save *a_buflist; 464 } */ *ap; 465 { 466 struct fs *fs; 467 struct inode *ip; 468 struct vnode *vp; 469 struct buf *sbp, *ebp; 470 ufs1_daddr_t *bap, *sbap, *ebap = 0; 471 struct cluster_save *buflist; 472 struct ufsmount *ump; 473 ufs_lbn_t start_lbn, end_lbn; 474 ufs1_daddr_t soff, newblk, blkno; 475 ufs2_daddr_t pref; 476 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 477 int i, len, start_lvl, end_lvl, ssize; 478 479 vp = ap->a_vp; 480 ip = VTOI(vp); 481 fs = ip->i_fs; 482 ump = ip->i_ump; 483 if (fs->fs_contigsumsize <= 0) 484 return (ENOSPC); 485 buflist = ap->a_buflist; 486 len = buflist->bs_nchildren; 487 start_lbn = buflist->bs_children[0]->b_lblkno; 488 end_lbn = start_lbn + len - 1; 489 #ifdef DIAGNOSTIC 490 for (i = 0; i < len; i++) 491 if (!ffs_checkblk(ip, 492 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 493 panic("ffs_reallocblks: unallocated block 1"); 494 for (i = 1; i < len; i++) 495 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 496 panic("ffs_reallocblks: non-logical cluster"); 497 blkno = buflist->bs_children[0]->b_blkno; 498 ssize = fsbtodb(fs, fs->fs_frag); 499 for (i = 1; i < len - 1; i++) 500 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 501 panic("ffs_reallocblks: non-physical cluster %d", i); 502 #endif 503 /* 504 * If the latest allocation is in a new cylinder group, assume that 505 * the filesystem has decided to move and do not force it back to 506 * the previous cylinder group. 507 */ 508 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 509 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 510 return (ENOSPC); 511 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 512 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 513 return (ENOSPC); 514 /* 515 * Get the starting offset and block map for the first block. 516 */ 517 if (start_lvl == 0) { 518 sbap = &ip->i_din1->di_db[0]; 519 soff = start_lbn; 520 } else { 521 idp = &start_ap[start_lvl - 1]; 522 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 523 brelse(sbp); 524 return (ENOSPC); 525 } 526 sbap = (ufs1_daddr_t *)sbp->b_data; 527 soff = idp->in_off; 528 } 529 /* 530 * If the block range spans two block maps, get the second map. 531 */ 532 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 533 ssize = len; 534 } else { 535 #ifdef DIAGNOSTIC 536 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 537 panic("ffs_reallocblk: start == end"); 538 #endif 539 ssize = len - (idp->in_off + 1); 540 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 541 goto fail; 542 ebap = (ufs1_daddr_t *)ebp->b_data; 543 } 544 /* 545 * Find the preferred location for the cluster. 546 */ 547 UFS_LOCK(ump); 548 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 549 /* 550 * Search the block map looking for an allocation of the desired size. 551 */ 552 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 553 len, ffs_clusteralloc)) == 0) { 554 UFS_UNLOCK(ump); 555 goto fail; 556 } 557 /* 558 * We have found a new contiguous block. 559 * 560 * First we have to replace the old block pointers with the new 561 * block pointers in the inode and indirect blocks associated 562 * with the file. 563 */ 564 #ifdef DEBUG 565 if (prtrealloc) 566 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 567 (intmax_t)start_lbn, (intmax_t)end_lbn); 568 #endif 569 blkno = newblk; 570 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 571 if (i == ssize) { 572 bap = ebap; 573 soff = -i; 574 } 575 #ifdef DIAGNOSTIC 576 if (!ffs_checkblk(ip, 577 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 578 panic("ffs_reallocblks: unallocated block 2"); 579 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 580 panic("ffs_reallocblks: alloc mismatch"); 581 #endif 582 #ifdef DEBUG 583 if (prtrealloc) 584 printf(" %d,", *bap); 585 #endif 586 if (DOINGSOFTDEP(vp)) { 587 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 588 softdep_setup_allocdirect(ip, start_lbn + i, 589 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 590 buflist->bs_children[i]); 591 else 592 softdep_setup_allocindir_page(ip, start_lbn + i, 593 i < ssize ? sbp : ebp, soff + i, blkno, 594 *bap, buflist->bs_children[i]); 595 } 596 *bap++ = blkno; 597 } 598 /* 599 * Next we must write out the modified inode and indirect blocks. 600 * For strict correctness, the writes should be synchronous since 601 * the old block values may have been written to disk. In practise 602 * they are almost never written, but if we are concerned about 603 * strict correctness, the `doasyncfree' flag should be set to zero. 604 * 605 * The test on `doasyncfree' should be changed to test a flag 606 * that shows whether the associated buffers and inodes have 607 * been written. The flag should be set when the cluster is 608 * started and cleared whenever the buffer or inode is flushed. 609 * We can then check below to see if it is set, and do the 610 * synchronous write only when it has been cleared. 611 */ 612 if (sbap != &ip->i_din1->di_db[0]) { 613 if (doasyncfree) 614 bdwrite(sbp); 615 else 616 bwrite(sbp); 617 } else { 618 ip->i_flag |= IN_CHANGE | IN_UPDATE; 619 if (!doasyncfree) 620 ffs_update(vp, 1); 621 } 622 if (ssize < len) { 623 if (doasyncfree) 624 bdwrite(ebp); 625 else 626 bwrite(ebp); 627 } 628 /* 629 * Last, free the old blocks and assign the new blocks to the buffers. 630 */ 631 #ifdef DEBUG 632 if (prtrealloc) 633 printf("\n\tnew:"); 634 #endif 635 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 636 if (!DOINGSOFTDEP(vp)) 637 ffs_blkfree(ump, fs, ip->i_devvp, 638 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 639 fs->fs_bsize, ip->i_number); 640 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 641 #ifdef DIAGNOSTIC 642 if (!ffs_checkblk(ip, 643 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 644 panic("ffs_reallocblks: unallocated block 3"); 645 #endif 646 #ifdef DEBUG 647 if (prtrealloc) 648 printf(" %d,", blkno); 649 #endif 650 } 651 #ifdef DEBUG 652 if (prtrealloc) { 653 prtrealloc--; 654 printf("\n"); 655 } 656 #endif 657 return (0); 658 659 fail: 660 if (ssize < len) 661 brelse(ebp); 662 if (sbap != &ip->i_din1->di_db[0]) 663 brelse(sbp); 664 return (ENOSPC); 665 } 666 667 static int 668 ffs_reallocblks_ufs2(ap) 669 struct vop_reallocblks_args /* { 670 struct vnode *a_vp; 671 struct cluster_save *a_buflist; 672 } */ *ap; 673 { 674 struct fs *fs; 675 struct inode *ip; 676 struct vnode *vp; 677 struct buf *sbp, *ebp; 678 ufs2_daddr_t *bap, *sbap, *ebap = 0; 679 struct cluster_save *buflist; 680 struct ufsmount *ump; 681 ufs_lbn_t start_lbn, end_lbn; 682 ufs2_daddr_t soff, newblk, blkno, pref; 683 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 684 int i, len, start_lvl, end_lvl, ssize; 685 686 vp = ap->a_vp; 687 ip = VTOI(vp); 688 fs = ip->i_fs; 689 ump = ip->i_ump; 690 if (fs->fs_contigsumsize <= 0) 691 return (ENOSPC); 692 buflist = ap->a_buflist; 693 len = buflist->bs_nchildren; 694 start_lbn = buflist->bs_children[0]->b_lblkno; 695 end_lbn = start_lbn + len - 1; 696 #ifdef DIAGNOSTIC 697 for (i = 0; i < len; i++) 698 if (!ffs_checkblk(ip, 699 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 700 panic("ffs_reallocblks: unallocated block 1"); 701 for (i = 1; i < len; i++) 702 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 703 panic("ffs_reallocblks: non-logical cluster"); 704 blkno = buflist->bs_children[0]->b_blkno; 705 ssize = fsbtodb(fs, fs->fs_frag); 706 for (i = 1; i < len - 1; i++) 707 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 708 panic("ffs_reallocblks: non-physical cluster %d", i); 709 #endif 710 /* 711 * If the latest allocation is in a new cylinder group, assume that 712 * the filesystem has decided to move and do not force it back to 713 * the previous cylinder group. 714 */ 715 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 716 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 717 return (ENOSPC); 718 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 719 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 720 return (ENOSPC); 721 /* 722 * Get the starting offset and block map for the first block. 723 */ 724 if (start_lvl == 0) { 725 sbap = &ip->i_din2->di_db[0]; 726 soff = start_lbn; 727 } else { 728 idp = &start_ap[start_lvl - 1]; 729 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 730 brelse(sbp); 731 return (ENOSPC); 732 } 733 sbap = (ufs2_daddr_t *)sbp->b_data; 734 soff = idp->in_off; 735 } 736 /* 737 * If the block range spans two block maps, get the second map. 738 */ 739 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 740 ssize = len; 741 } else { 742 #ifdef DIAGNOSTIC 743 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 744 panic("ffs_reallocblk: start == end"); 745 #endif 746 ssize = len - (idp->in_off + 1); 747 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 748 goto fail; 749 ebap = (ufs2_daddr_t *)ebp->b_data; 750 } 751 /* 752 * Find the preferred location for the cluster. 753 */ 754 UFS_LOCK(ump); 755 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 756 /* 757 * Search the block map looking for an allocation of the desired size. 758 */ 759 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 760 len, ffs_clusteralloc)) == 0) { 761 UFS_UNLOCK(ump); 762 goto fail; 763 } 764 /* 765 * We have found a new contiguous block. 766 * 767 * First we have to replace the old block pointers with the new 768 * block pointers in the inode and indirect blocks associated 769 * with the file. 770 */ 771 #ifdef DEBUG 772 if (prtrealloc) 773 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 774 (intmax_t)start_lbn, (intmax_t)end_lbn); 775 #endif 776 blkno = newblk; 777 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 778 if (i == ssize) { 779 bap = ebap; 780 soff = -i; 781 } 782 #ifdef DIAGNOSTIC 783 if (!ffs_checkblk(ip, 784 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 785 panic("ffs_reallocblks: unallocated block 2"); 786 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 787 panic("ffs_reallocblks: alloc mismatch"); 788 #endif 789 #ifdef DEBUG 790 if (prtrealloc) 791 printf(" %jd,", (intmax_t)*bap); 792 #endif 793 if (DOINGSOFTDEP(vp)) { 794 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 795 softdep_setup_allocdirect(ip, start_lbn + i, 796 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 797 buflist->bs_children[i]); 798 else 799 softdep_setup_allocindir_page(ip, start_lbn + i, 800 i < ssize ? sbp : ebp, soff + i, blkno, 801 *bap, buflist->bs_children[i]); 802 } 803 *bap++ = blkno; 804 } 805 /* 806 * Next we must write out the modified inode and indirect blocks. 807 * For strict correctness, the writes should be synchronous since 808 * the old block values may have been written to disk. In practise 809 * they are almost never written, but if we are concerned about 810 * strict correctness, the `doasyncfree' flag should be set to zero. 811 * 812 * The test on `doasyncfree' should be changed to test a flag 813 * that shows whether the associated buffers and inodes have 814 * been written. The flag should be set when the cluster is 815 * started and cleared whenever the buffer or inode is flushed. 816 * We can then check below to see if it is set, and do the 817 * synchronous write only when it has been cleared. 818 */ 819 if (sbap != &ip->i_din2->di_db[0]) { 820 if (doasyncfree) 821 bdwrite(sbp); 822 else 823 bwrite(sbp); 824 } else { 825 ip->i_flag |= IN_CHANGE | IN_UPDATE; 826 if (!doasyncfree) 827 ffs_update(vp, 1); 828 } 829 if (ssize < len) { 830 if (doasyncfree) 831 bdwrite(ebp); 832 else 833 bwrite(ebp); 834 } 835 /* 836 * Last, free the old blocks and assign the new blocks to the buffers. 837 */ 838 #ifdef DEBUG 839 if (prtrealloc) 840 printf("\n\tnew:"); 841 #endif 842 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 843 if (!DOINGSOFTDEP(vp)) 844 ffs_blkfree(ump, fs, ip->i_devvp, 845 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 846 fs->fs_bsize, ip->i_number); 847 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 848 #ifdef DIAGNOSTIC 849 if (!ffs_checkblk(ip, 850 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 851 panic("ffs_reallocblks: unallocated block 3"); 852 #endif 853 #ifdef DEBUG 854 if (prtrealloc) 855 printf(" %jd,", (intmax_t)blkno); 856 #endif 857 } 858 #ifdef DEBUG 859 if (prtrealloc) { 860 prtrealloc--; 861 printf("\n"); 862 } 863 #endif 864 return (0); 865 866 fail: 867 if (ssize < len) 868 brelse(ebp); 869 if (sbap != &ip->i_din2->di_db[0]) 870 brelse(sbp); 871 return (ENOSPC); 872 } 873 874 /* 875 * Allocate an inode in the filesystem. 876 * 877 * If allocating a directory, use ffs_dirpref to select the inode. 878 * If allocating in a directory, the following hierarchy is followed: 879 * 1) allocate the preferred inode. 880 * 2) allocate an inode in the same cylinder group. 881 * 3) quadradically rehash into other cylinder groups, until an 882 * available inode is located. 883 * If no inode preference is given the following heirarchy is used 884 * to allocate an inode: 885 * 1) allocate an inode in cylinder group 0. 886 * 2) quadradically rehash into other cylinder groups, until an 887 * available inode is located. 888 */ 889 int 890 ffs_valloc(pvp, mode, cred, vpp) 891 struct vnode *pvp; 892 int mode; 893 struct ucred *cred; 894 struct vnode **vpp; 895 { 896 struct inode *pip; 897 struct fs *fs; 898 struct inode *ip; 899 struct timespec ts; 900 struct ufsmount *ump; 901 ino_t ino, ipref; 902 int cg, error; 903 static struct timeval lastfail; 904 static int curfail; 905 906 *vpp = NULL; 907 pip = VTOI(pvp); 908 fs = pip->i_fs; 909 ump = pip->i_ump; 910 911 UFS_LOCK(ump); 912 if (fs->fs_cstotal.cs_nifree == 0) 913 goto noinodes; 914 915 if ((mode & IFMT) == IFDIR) 916 ipref = ffs_dirpref(pip); 917 else 918 ipref = pip->i_number; 919 if (ipref >= fs->fs_ncg * fs->fs_ipg) 920 ipref = 0; 921 cg = ino_to_cg(fs, ipref); 922 /* 923 * Track number of dirs created one after another 924 * in a same cg without intervening by files. 925 */ 926 if ((mode & IFMT) == IFDIR) { 927 if (fs->fs_contigdirs[cg] < 255) 928 fs->fs_contigdirs[cg]++; 929 } else { 930 if (fs->fs_contigdirs[cg] > 0) 931 fs->fs_contigdirs[cg]--; 932 } 933 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 934 (allocfcn_t *)ffs_nodealloccg); 935 if (ino == 0) 936 goto noinodes; 937 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 938 if (error) { 939 ffs_vfree(pvp, ino, mode); 940 return (error); 941 } 942 ip = VTOI(*vpp); 943 if (ip->i_mode) { 944 printf("mode = 0%o, inum = %lu, fs = %s\n", 945 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 946 panic("ffs_valloc: dup alloc"); 947 } 948 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 949 printf("free inode %s/%lu had %ld blocks\n", 950 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 951 DIP_SET(ip, i_blocks, 0); 952 } 953 ip->i_flags = 0; 954 DIP_SET(ip, i_flags, 0); 955 /* 956 * Set up a new generation number for this inode. 957 */ 958 if (ip->i_gen == 0 || ++ip->i_gen == 0) 959 ip->i_gen = arc4random() / 2 + 1; 960 DIP_SET(ip, i_gen, ip->i_gen); 961 if (fs->fs_magic == FS_UFS2_MAGIC) { 962 vfs_timestamp(&ts); 963 ip->i_din2->di_birthtime = ts.tv_sec; 964 ip->i_din2->di_birthnsec = ts.tv_nsec; 965 } 966 ip->i_flag = 0; 967 vnode_destroy_vobject(*vpp); 968 (*vpp)->v_type = VNON; 969 if (fs->fs_magic == FS_UFS2_MAGIC) 970 (*vpp)->v_op = &ffs_vnodeops2; 971 else 972 (*vpp)->v_op = &ffs_vnodeops1; 973 return (0); 974 noinodes: 975 UFS_UNLOCK(ump); 976 if (ppsratecheck(&lastfail, &curfail, 1)) { 977 ffs_fserr(fs, pip->i_number, "out of inodes"); 978 uprintf("\n%s: create/symlink failed, no inodes free\n", 979 fs->fs_fsmnt); 980 } 981 return (ENOSPC); 982 } 983 984 /* 985 * Find a cylinder group to place a directory. 986 * 987 * The policy implemented by this algorithm is to allocate a 988 * directory inode in the same cylinder group as its parent 989 * directory, but also to reserve space for its files inodes 990 * and data. Restrict the number of directories which may be 991 * allocated one after another in the same cylinder group 992 * without intervening allocation of files. 993 * 994 * If we allocate a first level directory then force allocation 995 * in another cylinder group. 996 */ 997 static ino_t 998 ffs_dirpref(pip) 999 struct inode *pip; 1000 { 1001 struct fs *fs; 1002 int cg, prefcg, dirsize, cgsize; 1003 int avgifree, avgbfree, avgndir, curdirsize; 1004 int minifree, minbfree, maxndir; 1005 int mincg, minndir; 1006 int maxcontigdirs; 1007 1008 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 1009 fs = pip->i_fs; 1010 1011 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 1012 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1013 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 1014 1015 /* 1016 * Force allocation in another cg if creating a first level dir. 1017 */ 1018 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1019 if (ITOV(pip)->v_vflag & VV_ROOT) { 1020 prefcg = arc4random() % fs->fs_ncg; 1021 mincg = prefcg; 1022 minndir = fs->fs_ipg; 1023 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1024 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1025 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1026 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1027 mincg = cg; 1028 minndir = fs->fs_cs(fs, cg).cs_ndir; 1029 } 1030 for (cg = 0; cg < prefcg; cg++) 1031 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1032 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1033 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1034 mincg = cg; 1035 minndir = fs->fs_cs(fs, cg).cs_ndir; 1036 } 1037 return ((ino_t)(fs->fs_ipg * mincg)); 1038 } 1039 1040 /* 1041 * Count various limits which used for 1042 * optimal allocation of a directory inode. 1043 */ 1044 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1045 minifree = avgifree - avgifree / 4; 1046 if (minifree < 1) 1047 minifree = 1; 1048 minbfree = avgbfree - avgbfree / 4; 1049 if (minbfree < 1) 1050 minbfree = 1; 1051 cgsize = fs->fs_fsize * fs->fs_fpg; 1052 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1053 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1054 if (dirsize < curdirsize) 1055 dirsize = curdirsize; 1056 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1057 if (fs->fs_avgfpdir > 0) 1058 maxcontigdirs = min(maxcontigdirs, 1059 fs->fs_ipg / fs->fs_avgfpdir); 1060 if (maxcontigdirs == 0) 1061 maxcontigdirs = 1; 1062 1063 /* 1064 * Limit number of dirs in one cg and reserve space for 1065 * regular files, but only if we have no deficit in 1066 * inodes or space. 1067 */ 1068 prefcg = ino_to_cg(fs, pip->i_number); 1069 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1070 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1071 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1072 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1073 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1074 return ((ino_t)(fs->fs_ipg * cg)); 1075 } 1076 for (cg = 0; cg < prefcg; cg++) 1077 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1078 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1079 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1080 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1081 return ((ino_t)(fs->fs_ipg * cg)); 1082 } 1083 /* 1084 * This is a backstop when we have deficit in space. 1085 */ 1086 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1087 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1088 return ((ino_t)(fs->fs_ipg * cg)); 1089 for (cg = 0; cg < prefcg; cg++) 1090 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1091 break; 1092 return ((ino_t)(fs->fs_ipg * cg)); 1093 } 1094 1095 /* 1096 * Select the desired position for the next block in a file. The file is 1097 * logically divided into sections. The first section is composed of the 1098 * direct blocks. Each additional section contains fs_maxbpg blocks. 1099 * 1100 * If no blocks have been allocated in the first section, the policy is to 1101 * request a block in the same cylinder group as the inode that describes 1102 * the file. If no blocks have been allocated in any other section, the 1103 * policy is to place the section in a cylinder group with a greater than 1104 * average number of free blocks. An appropriate cylinder group is found 1105 * by using a rotor that sweeps the cylinder groups. When a new group of 1106 * blocks is needed, the sweep begins in the cylinder group following the 1107 * cylinder group from which the previous allocation was made. The sweep 1108 * continues until a cylinder group with greater than the average number 1109 * of free blocks is found. If the allocation is for the first block in an 1110 * indirect block, the information on the previous allocation is unavailable; 1111 * here a best guess is made based upon the logical block number being 1112 * allocated. 1113 * 1114 * If a section is already partially allocated, the policy is to 1115 * contiguously allocate fs_maxcontig blocks. The end of one of these 1116 * contiguous blocks and the beginning of the next is laid out 1117 * contiguously if possible. 1118 */ 1119 ufs2_daddr_t 1120 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1121 struct inode *ip; 1122 ufs_lbn_t lbn; 1123 int indx; 1124 ufs1_daddr_t *bap; 1125 { 1126 struct fs *fs; 1127 int cg; 1128 int avgbfree, startcg; 1129 1130 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1131 fs = ip->i_fs; 1132 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1133 if (lbn < NDADDR + NINDIR(fs)) { 1134 cg = ino_to_cg(fs, ip->i_number); 1135 return (cgbase(fs, cg) + fs->fs_frag); 1136 } 1137 /* 1138 * Find a cylinder with greater than average number of 1139 * unused data blocks. 1140 */ 1141 if (indx == 0 || bap[indx - 1] == 0) 1142 startcg = 1143 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1144 else 1145 startcg = dtog(fs, bap[indx - 1]) + 1; 1146 startcg %= fs->fs_ncg; 1147 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1148 for (cg = startcg; cg < fs->fs_ncg; cg++) 1149 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1150 fs->fs_cgrotor = cg; 1151 return (cgbase(fs, cg) + fs->fs_frag); 1152 } 1153 for (cg = 0; cg <= startcg; cg++) 1154 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1155 fs->fs_cgrotor = cg; 1156 return (cgbase(fs, cg) + fs->fs_frag); 1157 } 1158 return (0); 1159 } 1160 /* 1161 * We just always try to lay things out contiguously. 1162 */ 1163 return (bap[indx - 1] + fs->fs_frag); 1164 } 1165 1166 /* 1167 * Same as above, but for UFS2 1168 */ 1169 ufs2_daddr_t 1170 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1171 struct inode *ip; 1172 ufs_lbn_t lbn; 1173 int indx; 1174 ufs2_daddr_t *bap; 1175 { 1176 struct fs *fs; 1177 int cg; 1178 int avgbfree, startcg; 1179 1180 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1181 fs = ip->i_fs; 1182 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1183 if (lbn < NDADDR + NINDIR(fs)) { 1184 cg = ino_to_cg(fs, ip->i_number); 1185 return (cgbase(fs, cg) + fs->fs_frag); 1186 } 1187 /* 1188 * Find a cylinder with greater than average number of 1189 * unused data blocks. 1190 */ 1191 if (indx == 0 || bap[indx - 1] == 0) 1192 startcg = 1193 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1194 else 1195 startcg = dtog(fs, bap[indx - 1]) + 1; 1196 startcg %= fs->fs_ncg; 1197 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1198 for (cg = startcg; cg < fs->fs_ncg; cg++) 1199 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1200 fs->fs_cgrotor = cg; 1201 return (cgbase(fs, cg) + fs->fs_frag); 1202 } 1203 for (cg = 0; cg <= startcg; cg++) 1204 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1205 fs->fs_cgrotor = cg; 1206 return (cgbase(fs, cg) + fs->fs_frag); 1207 } 1208 return (0); 1209 } 1210 /* 1211 * We just always try to lay things out contiguously. 1212 */ 1213 return (bap[indx - 1] + fs->fs_frag); 1214 } 1215 1216 /* 1217 * Implement the cylinder overflow algorithm. 1218 * 1219 * The policy implemented by this algorithm is: 1220 * 1) allocate the block in its requested cylinder group. 1221 * 2) quadradically rehash on the cylinder group number. 1222 * 3) brute force search for a free block. 1223 * 1224 * Must be called with the UFS lock held. Will release the lock on success 1225 * and return with it held on failure. 1226 */ 1227 /*VARARGS5*/ 1228 static ufs2_daddr_t 1229 ffs_hashalloc(ip, cg, pref, size, allocator) 1230 struct inode *ip; 1231 int cg; 1232 ufs2_daddr_t pref; 1233 int size; /* size for data blocks, mode for inodes */ 1234 allocfcn_t *allocator; 1235 { 1236 struct fs *fs; 1237 ufs2_daddr_t result; 1238 int i, icg = cg; 1239 1240 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1241 #ifdef DIAGNOSTIC 1242 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1243 panic("ffs_hashalloc: allocation on suspended filesystem"); 1244 #endif 1245 fs = ip->i_fs; 1246 /* 1247 * 1: preferred cylinder group 1248 */ 1249 result = (*allocator)(ip, cg, pref, size); 1250 if (result) 1251 return (result); 1252 /* 1253 * 2: quadratic rehash 1254 */ 1255 for (i = 1; i < fs->fs_ncg; i *= 2) { 1256 cg += i; 1257 if (cg >= fs->fs_ncg) 1258 cg -= fs->fs_ncg; 1259 result = (*allocator)(ip, cg, 0, size); 1260 if (result) 1261 return (result); 1262 } 1263 /* 1264 * 3: brute force search 1265 * Note that we start at i == 2, since 0 was checked initially, 1266 * and 1 is always checked in the quadratic rehash. 1267 */ 1268 cg = (icg + 2) % fs->fs_ncg; 1269 for (i = 2; i < fs->fs_ncg; i++) { 1270 result = (*allocator)(ip, cg, 0, size); 1271 if (result) 1272 return (result); 1273 cg++; 1274 if (cg == fs->fs_ncg) 1275 cg = 0; 1276 } 1277 return (0); 1278 } 1279 1280 /* 1281 * Determine whether a fragment can be extended. 1282 * 1283 * Check to see if the necessary fragments are available, and 1284 * if they are, allocate them. 1285 */ 1286 static ufs2_daddr_t 1287 ffs_fragextend(ip, cg, bprev, osize, nsize) 1288 struct inode *ip; 1289 int cg; 1290 ufs2_daddr_t bprev; 1291 int osize, nsize; 1292 { 1293 struct fs *fs; 1294 struct cg *cgp; 1295 struct buf *bp; 1296 struct ufsmount *ump; 1297 int nffree; 1298 long bno; 1299 int frags, bbase; 1300 int i, error; 1301 u_int8_t *blksfree; 1302 1303 ump = ip->i_ump; 1304 fs = ip->i_fs; 1305 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1306 return (0); 1307 frags = numfrags(fs, nsize); 1308 bbase = fragnum(fs, bprev); 1309 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1310 /* cannot extend across a block boundary */ 1311 return (0); 1312 } 1313 UFS_UNLOCK(ump); 1314 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1315 (int)fs->fs_cgsize, NOCRED, &bp); 1316 if (error) 1317 goto fail; 1318 cgp = (struct cg *)bp->b_data; 1319 if (!cg_chkmagic(cgp)) 1320 goto fail; 1321 bp->b_xflags |= BX_BKGRDWRITE; 1322 cgp->cg_old_time = cgp->cg_time = time_second; 1323 bno = dtogd(fs, bprev); 1324 blksfree = cg_blksfree(cgp); 1325 for (i = numfrags(fs, osize); i < frags; i++) 1326 if (isclr(blksfree, bno + i)) 1327 goto fail; 1328 /* 1329 * the current fragment can be extended 1330 * deduct the count on fragment being extended into 1331 * increase the count on the remaining fragment (if any) 1332 * allocate the extended piece 1333 */ 1334 for (i = frags; i < fs->fs_frag - bbase; i++) 1335 if (isclr(blksfree, bno + i)) 1336 break; 1337 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1338 if (i != frags) 1339 cgp->cg_frsum[i - frags]++; 1340 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1341 clrbit(blksfree, bno + i); 1342 cgp->cg_cs.cs_nffree--; 1343 nffree++; 1344 } 1345 UFS_LOCK(ump); 1346 fs->fs_cstotal.cs_nffree -= nffree; 1347 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1348 fs->fs_fmod = 1; 1349 ACTIVECLEAR(fs, cg); 1350 UFS_UNLOCK(ump); 1351 if (DOINGSOFTDEP(ITOV(ip))) 1352 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); 1353 bdwrite(bp); 1354 return (bprev); 1355 1356 fail: 1357 brelse(bp); 1358 UFS_LOCK(ump); 1359 return (0); 1360 1361 } 1362 1363 /* 1364 * Determine whether a block can be allocated. 1365 * 1366 * Check to see if a block of the appropriate size is available, 1367 * and if it is, allocate it. 1368 */ 1369 static ufs2_daddr_t 1370 ffs_alloccg(ip, cg, bpref, size) 1371 struct inode *ip; 1372 int cg; 1373 ufs2_daddr_t bpref; 1374 int size; 1375 { 1376 struct fs *fs; 1377 struct cg *cgp; 1378 struct buf *bp; 1379 struct ufsmount *ump; 1380 ufs1_daddr_t bno; 1381 ufs2_daddr_t blkno; 1382 int i, allocsiz, error, frags; 1383 u_int8_t *blksfree; 1384 1385 ump = ip->i_ump; 1386 fs = ip->i_fs; 1387 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1388 return (0); 1389 UFS_UNLOCK(ump); 1390 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1391 (int)fs->fs_cgsize, NOCRED, &bp); 1392 if (error) 1393 goto fail; 1394 cgp = (struct cg *)bp->b_data; 1395 if (!cg_chkmagic(cgp) || 1396 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1397 goto fail; 1398 bp->b_xflags |= BX_BKGRDWRITE; 1399 cgp->cg_old_time = cgp->cg_time = time_second; 1400 if (size == fs->fs_bsize) { 1401 UFS_LOCK(ump); 1402 blkno = ffs_alloccgblk(ip, bp, bpref); 1403 ACTIVECLEAR(fs, cg); 1404 UFS_UNLOCK(ump); 1405 bdwrite(bp); 1406 return (blkno); 1407 } 1408 /* 1409 * check to see if any fragments are already available 1410 * allocsiz is the size which will be allocated, hacking 1411 * it down to a smaller size if necessary 1412 */ 1413 blksfree = cg_blksfree(cgp); 1414 frags = numfrags(fs, size); 1415 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1416 if (cgp->cg_frsum[allocsiz] != 0) 1417 break; 1418 if (allocsiz == fs->fs_frag) { 1419 /* 1420 * no fragments were available, so a block will be 1421 * allocated, and hacked up 1422 */ 1423 if (cgp->cg_cs.cs_nbfree == 0) 1424 goto fail; 1425 UFS_LOCK(ump); 1426 blkno = ffs_alloccgblk(ip, bp, bpref); 1427 bno = dtogd(fs, blkno); 1428 for (i = frags; i < fs->fs_frag; i++) 1429 setbit(blksfree, bno + i); 1430 i = fs->fs_frag - frags; 1431 cgp->cg_cs.cs_nffree += i; 1432 fs->fs_cstotal.cs_nffree += i; 1433 fs->fs_cs(fs, cg).cs_nffree += i; 1434 fs->fs_fmod = 1; 1435 cgp->cg_frsum[i]++; 1436 ACTIVECLEAR(fs, cg); 1437 UFS_UNLOCK(ump); 1438 bdwrite(bp); 1439 return (blkno); 1440 } 1441 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1442 if (bno < 0) 1443 goto fail; 1444 for (i = 0; i < frags; i++) 1445 clrbit(blksfree, bno + i); 1446 cgp->cg_cs.cs_nffree -= frags; 1447 cgp->cg_frsum[allocsiz]--; 1448 if (frags != allocsiz) 1449 cgp->cg_frsum[allocsiz - frags]++; 1450 UFS_LOCK(ump); 1451 fs->fs_cstotal.cs_nffree -= frags; 1452 fs->fs_cs(fs, cg).cs_nffree -= frags; 1453 fs->fs_fmod = 1; 1454 blkno = cgbase(fs, cg) + bno; 1455 ACTIVECLEAR(fs, cg); 1456 UFS_UNLOCK(ump); 1457 if (DOINGSOFTDEP(ITOV(ip))) 1458 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1459 bdwrite(bp); 1460 return (blkno); 1461 1462 fail: 1463 brelse(bp); 1464 UFS_LOCK(ump); 1465 return (0); 1466 } 1467 1468 /* 1469 * Allocate a block in a cylinder group. 1470 * 1471 * This algorithm implements the following policy: 1472 * 1) allocate the requested block. 1473 * 2) allocate a rotationally optimal block in the same cylinder. 1474 * 3) allocate the next available block on the block rotor for the 1475 * specified cylinder group. 1476 * Note that this routine only allocates fs_bsize blocks; these 1477 * blocks may be fragmented by the routine that allocates them. 1478 */ 1479 static ufs2_daddr_t 1480 ffs_alloccgblk(ip, bp, bpref) 1481 struct inode *ip; 1482 struct buf *bp; 1483 ufs2_daddr_t bpref; 1484 { 1485 struct fs *fs; 1486 struct cg *cgp; 1487 struct ufsmount *ump; 1488 ufs1_daddr_t bno; 1489 ufs2_daddr_t blkno; 1490 u_int8_t *blksfree; 1491 1492 fs = ip->i_fs; 1493 ump = ip->i_ump; 1494 mtx_assert(UFS_MTX(ump), MA_OWNED); 1495 cgp = (struct cg *)bp->b_data; 1496 blksfree = cg_blksfree(cgp); 1497 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1498 bpref = cgp->cg_rotor; 1499 } else { 1500 bpref = blknum(fs, bpref); 1501 bno = dtogd(fs, bpref); 1502 /* 1503 * if the requested block is available, use it 1504 */ 1505 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1506 goto gotit; 1507 } 1508 /* 1509 * Take the next available block in this cylinder group. 1510 */ 1511 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1512 if (bno < 0) 1513 return (0); 1514 cgp->cg_rotor = bno; 1515 gotit: 1516 blkno = fragstoblks(fs, bno); 1517 ffs_clrblock(fs, blksfree, (long)blkno); 1518 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1519 cgp->cg_cs.cs_nbfree--; 1520 fs->fs_cstotal.cs_nbfree--; 1521 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1522 fs->fs_fmod = 1; 1523 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1524 /* XXX Fixme. */ 1525 UFS_UNLOCK(ump); 1526 if (DOINGSOFTDEP(ITOV(ip))) 1527 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1528 UFS_LOCK(ump); 1529 return (blkno); 1530 } 1531 1532 /* 1533 * Determine whether a cluster can be allocated. 1534 * 1535 * We do not currently check for optimal rotational layout if there 1536 * are multiple choices in the same cylinder group. Instead we just 1537 * take the first one that we find following bpref. 1538 */ 1539 static ufs2_daddr_t 1540 ffs_clusteralloc(ip, cg, bpref, len) 1541 struct inode *ip; 1542 int cg; 1543 ufs2_daddr_t bpref; 1544 int len; 1545 { 1546 struct fs *fs; 1547 struct cg *cgp; 1548 struct buf *bp; 1549 struct ufsmount *ump; 1550 int i, run, bit, map, got; 1551 ufs2_daddr_t bno; 1552 u_char *mapp; 1553 int32_t *lp; 1554 u_int8_t *blksfree; 1555 1556 fs = ip->i_fs; 1557 ump = ip->i_ump; 1558 if (fs->fs_maxcluster[cg] < len) 1559 return (0); 1560 UFS_UNLOCK(ump); 1561 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1562 NOCRED, &bp)) 1563 goto fail_lock; 1564 cgp = (struct cg *)bp->b_data; 1565 if (!cg_chkmagic(cgp)) 1566 goto fail_lock; 1567 bp->b_xflags |= BX_BKGRDWRITE; 1568 /* 1569 * Check to see if a cluster of the needed size (or bigger) is 1570 * available in this cylinder group. 1571 */ 1572 lp = &cg_clustersum(cgp)[len]; 1573 for (i = len; i <= fs->fs_contigsumsize; i++) 1574 if (*lp++ > 0) 1575 break; 1576 if (i > fs->fs_contigsumsize) { 1577 /* 1578 * This is the first time looking for a cluster in this 1579 * cylinder group. Update the cluster summary information 1580 * to reflect the true maximum sized cluster so that 1581 * future cluster allocation requests can avoid reading 1582 * the cylinder group map only to find no clusters. 1583 */ 1584 lp = &cg_clustersum(cgp)[len - 1]; 1585 for (i = len - 1; i > 0; i--) 1586 if (*lp-- > 0) 1587 break; 1588 UFS_LOCK(ump); 1589 fs->fs_maxcluster[cg] = i; 1590 goto fail; 1591 } 1592 /* 1593 * Search the cluster map to find a big enough cluster. 1594 * We take the first one that we find, even if it is larger 1595 * than we need as we prefer to get one close to the previous 1596 * block allocation. We do not search before the current 1597 * preference point as we do not want to allocate a block 1598 * that is allocated before the previous one (as we will 1599 * then have to wait for another pass of the elevator 1600 * algorithm before it will be read). We prefer to fail and 1601 * be recalled to try an allocation in the next cylinder group. 1602 */ 1603 if (dtog(fs, bpref) != cg) 1604 bpref = 0; 1605 else 1606 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1607 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1608 map = *mapp++; 1609 bit = 1 << (bpref % NBBY); 1610 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1611 if ((map & bit) == 0) { 1612 run = 0; 1613 } else { 1614 run++; 1615 if (run == len) 1616 break; 1617 } 1618 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1619 bit <<= 1; 1620 } else { 1621 map = *mapp++; 1622 bit = 1; 1623 } 1624 } 1625 if (got >= cgp->cg_nclusterblks) 1626 goto fail_lock; 1627 /* 1628 * Allocate the cluster that we have found. 1629 */ 1630 blksfree = cg_blksfree(cgp); 1631 for (i = 1; i <= len; i++) 1632 if (!ffs_isblock(fs, blksfree, got - run + i)) 1633 panic("ffs_clusteralloc: map mismatch"); 1634 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1635 if (dtog(fs, bno) != cg) 1636 panic("ffs_clusteralloc: allocated out of group"); 1637 len = blkstofrags(fs, len); 1638 UFS_LOCK(ump); 1639 for (i = 0; i < len; i += fs->fs_frag) 1640 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1641 panic("ffs_clusteralloc: lost block"); 1642 ACTIVECLEAR(fs, cg); 1643 UFS_UNLOCK(ump); 1644 bdwrite(bp); 1645 return (bno); 1646 1647 fail_lock: 1648 UFS_LOCK(ump); 1649 fail: 1650 brelse(bp); 1651 return (0); 1652 } 1653 1654 /* 1655 * Determine whether an inode can be allocated. 1656 * 1657 * Check to see if an inode is available, and if it is, 1658 * allocate it using the following policy: 1659 * 1) allocate the requested inode. 1660 * 2) allocate the next available inode after the requested 1661 * inode in the specified cylinder group. 1662 */ 1663 static ufs2_daddr_t 1664 ffs_nodealloccg(ip, cg, ipref, mode) 1665 struct inode *ip; 1666 int cg; 1667 ufs2_daddr_t ipref; 1668 int mode; 1669 { 1670 struct fs *fs; 1671 struct cg *cgp; 1672 struct buf *bp, *ibp; 1673 struct ufsmount *ump; 1674 u_int8_t *inosused; 1675 struct ufs2_dinode *dp2; 1676 int error, start, len, loc, map, i; 1677 1678 fs = ip->i_fs; 1679 ump = ip->i_ump; 1680 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1681 return (0); 1682 UFS_UNLOCK(ump); 1683 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1684 (int)fs->fs_cgsize, NOCRED, &bp); 1685 if (error) { 1686 brelse(bp); 1687 UFS_LOCK(ump); 1688 return (0); 1689 } 1690 cgp = (struct cg *)bp->b_data; 1691 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1692 brelse(bp); 1693 UFS_LOCK(ump); 1694 return (0); 1695 } 1696 bp->b_xflags |= BX_BKGRDWRITE; 1697 cgp->cg_old_time = cgp->cg_time = time_second; 1698 inosused = cg_inosused(cgp); 1699 if (ipref) { 1700 ipref %= fs->fs_ipg; 1701 if (isclr(inosused, ipref)) 1702 goto gotit; 1703 } 1704 start = cgp->cg_irotor / NBBY; 1705 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1706 loc = skpc(0xff, len, &inosused[start]); 1707 if (loc == 0) { 1708 len = start + 1; 1709 start = 0; 1710 loc = skpc(0xff, len, &inosused[0]); 1711 if (loc == 0) { 1712 printf("cg = %d, irotor = %ld, fs = %s\n", 1713 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1714 panic("ffs_nodealloccg: map corrupted"); 1715 /* NOTREACHED */ 1716 } 1717 } 1718 i = start + len - loc; 1719 map = inosused[i]; 1720 ipref = i * NBBY; 1721 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1722 if ((map & i) == 0) { 1723 cgp->cg_irotor = ipref; 1724 goto gotit; 1725 } 1726 } 1727 printf("fs = %s\n", fs->fs_fsmnt); 1728 panic("ffs_nodealloccg: block not in map"); 1729 /* NOTREACHED */ 1730 gotit: 1731 /* 1732 * Check to see if we need to initialize more inodes. 1733 */ 1734 ibp = NULL; 1735 if (fs->fs_magic == FS_UFS2_MAGIC && 1736 ipref + INOPB(fs) > cgp->cg_initediblk && 1737 cgp->cg_initediblk < cgp->cg_niblk) { 1738 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1739 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1740 (int)fs->fs_bsize, 0, 0, 0); 1741 bzero(ibp->b_data, (int)fs->fs_bsize); 1742 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1743 for (i = 0; i < INOPB(fs); i++) { 1744 dp2->di_gen = arc4random() / 2 + 1; 1745 dp2++; 1746 } 1747 cgp->cg_initediblk += INOPB(fs); 1748 } 1749 UFS_LOCK(ump); 1750 ACTIVECLEAR(fs, cg); 1751 setbit(inosused, ipref); 1752 cgp->cg_cs.cs_nifree--; 1753 fs->fs_cstotal.cs_nifree--; 1754 fs->fs_cs(fs, cg).cs_nifree--; 1755 fs->fs_fmod = 1; 1756 if ((mode & IFMT) == IFDIR) { 1757 cgp->cg_cs.cs_ndir++; 1758 fs->fs_cstotal.cs_ndir++; 1759 fs->fs_cs(fs, cg).cs_ndir++; 1760 } 1761 UFS_UNLOCK(ump); 1762 if (DOINGSOFTDEP(ITOV(ip))) 1763 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1764 bdwrite(bp); 1765 if (ibp != NULL) 1766 bawrite(ibp); 1767 return (cg * fs->fs_ipg + ipref); 1768 } 1769 1770 /* 1771 * check if a block is free 1772 */ 1773 static int 1774 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1775 { 1776 1777 switch ((int)fs->fs_frag) { 1778 case 8: 1779 return (cp[h] == 0); 1780 case 4: 1781 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1782 case 2: 1783 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1784 case 1: 1785 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1786 default: 1787 panic("ffs_isfreeblock"); 1788 } 1789 return (0); 1790 } 1791 1792 /* 1793 * Free a block or fragment. 1794 * 1795 * The specified block or fragment is placed back in the 1796 * free map. If a fragment is deallocated, a possible 1797 * block reassembly is checked. 1798 */ 1799 void 1800 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1801 struct ufsmount *ump; 1802 struct fs *fs; 1803 struct vnode *devvp; 1804 ufs2_daddr_t bno; 1805 long size; 1806 ino_t inum; 1807 { 1808 struct cg *cgp; 1809 struct buf *bp; 1810 ufs1_daddr_t fragno, cgbno; 1811 ufs2_daddr_t cgblkno; 1812 int i, cg, blk, frags, bbase; 1813 u_int8_t *blksfree; 1814 struct cdev *dev; 1815 1816 cg = dtog(fs, bno); 1817 if (devvp->v_type != VCHR) { 1818 /* devvp is a snapshot */ 1819 dev = VTOI(devvp)->i_devvp->v_rdev; 1820 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1821 } else { 1822 /* devvp is a normal disk device */ 1823 dev = devvp->v_rdev; 1824 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1825 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1826 if ((devvp->v_vflag & VV_COPYONWRITE) && 1827 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1828 return; 1829 } 1830 #ifdef DIAGNOSTIC 1831 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1832 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1833 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1834 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1835 size, fs->fs_fsmnt); 1836 panic("ffs_blkfree: bad size"); 1837 } 1838 #endif 1839 if ((u_int)bno >= fs->fs_size) { 1840 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1841 (u_long)inum); 1842 ffs_fserr(fs, inum, "bad block"); 1843 return; 1844 } 1845 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1846 brelse(bp); 1847 return; 1848 } 1849 cgp = (struct cg *)bp->b_data; 1850 if (!cg_chkmagic(cgp)) { 1851 brelse(bp); 1852 return; 1853 } 1854 bp->b_xflags |= BX_BKGRDWRITE; 1855 cgp->cg_old_time = cgp->cg_time = time_second; 1856 cgbno = dtogd(fs, bno); 1857 blksfree = cg_blksfree(cgp); 1858 UFS_LOCK(ump); 1859 if (size == fs->fs_bsize) { 1860 fragno = fragstoblks(fs, cgbno); 1861 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1862 if (devvp->v_type != VCHR) { 1863 UFS_UNLOCK(ump); 1864 /* devvp is a snapshot */ 1865 brelse(bp); 1866 return; 1867 } 1868 printf("dev = %s, block = %jd, fs = %s\n", 1869 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1870 panic("ffs_blkfree: freeing free block"); 1871 } 1872 ffs_setblock(fs, blksfree, fragno); 1873 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1874 cgp->cg_cs.cs_nbfree++; 1875 fs->fs_cstotal.cs_nbfree++; 1876 fs->fs_cs(fs, cg).cs_nbfree++; 1877 } else { 1878 bbase = cgbno - fragnum(fs, cgbno); 1879 /* 1880 * decrement the counts associated with the old frags 1881 */ 1882 blk = blkmap(fs, blksfree, bbase); 1883 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1884 /* 1885 * deallocate the fragment 1886 */ 1887 frags = numfrags(fs, size); 1888 for (i = 0; i < frags; i++) { 1889 if (isset(blksfree, cgbno + i)) { 1890 printf("dev = %s, block = %jd, fs = %s\n", 1891 devtoname(dev), (intmax_t)(bno + i), 1892 fs->fs_fsmnt); 1893 panic("ffs_blkfree: freeing free frag"); 1894 } 1895 setbit(blksfree, cgbno + i); 1896 } 1897 cgp->cg_cs.cs_nffree += i; 1898 fs->fs_cstotal.cs_nffree += i; 1899 fs->fs_cs(fs, cg).cs_nffree += i; 1900 /* 1901 * add back in counts associated with the new frags 1902 */ 1903 blk = blkmap(fs, blksfree, bbase); 1904 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1905 /* 1906 * if a complete block has been reassembled, account for it 1907 */ 1908 fragno = fragstoblks(fs, bbase); 1909 if (ffs_isblock(fs, blksfree, fragno)) { 1910 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1911 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1912 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1913 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1914 cgp->cg_cs.cs_nbfree++; 1915 fs->fs_cstotal.cs_nbfree++; 1916 fs->fs_cs(fs, cg).cs_nbfree++; 1917 } 1918 } 1919 fs->fs_fmod = 1; 1920 ACTIVECLEAR(fs, cg); 1921 UFS_UNLOCK(ump); 1922 bdwrite(bp); 1923 } 1924 1925 #ifdef DIAGNOSTIC 1926 /* 1927 * Verify allocation of a block or fragment. Returns true if block or 1928 * fragment is allocated, false if it is free. 1929 */ 1930 static int 1931 ffs_checkblk(ip, bno, size) 1932 struct inode *ip; 1933 ufs2_daddr_t bno; 1934 long size; 1935 { 1936 struct fs *fs; 1937 struct cg *cgp; 1938 struct buf *bp; 1939 ufs1_daddr_t cgbno; 1940 int i, error, frags, free; 1941 u_int8_t *blksfree; 1942 1943 fs = ip->i_fs; 1944 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1945 printf("bsize = %ld, size = %ld, fs = %s\n", 1946 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1947 panic("ffs_checkblk: bad size"); 1948 } 1949 if ((u_int)bno >= fs->fs_size) 1950 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1951 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1952 (int)fs->fs_cgsize, NOCRED, &bp); 1953 if (error) 1954 panic("ffs_checkblk: cg bread failed"); 1955 cgp = (struct cg *)bp->b_data; 1956 if (!cg_chkmagic(cgp)) 1957 panic("ffs_checkblk: cg magic mismatch"); 1958 bp->b_xflags |= BX_BKGRDWRITE; 1959 blksfree = cg_blksfree(cgp); 1960 cgbno = dtogd(fs, bno); 1961 if (size == fs->fs_bsize) { 1962 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 1963 } else { 1964 frags = numfrags(fs, size); 1965 for (free = 0, i = 0; i < frags; i++) 1966 if (isset(blksfree, cgbno + i)) 1967 free++; 1968 if (free != 0 && free != frags) 1969 panic("ffs_checkblk: partially free fragment"); 1970 } 1971 brelse(bp); 1972 return (!free); 1973 } 1974 #endif /* DIAGNOSTIC */ 1975 1976 /* 1977 * Free an inode. 1978 */ 1979 int 1980 ffs_vfree(pvp, ino, mode) 1981 struct vnode *pvp; 1982 ino_t ino; 1983 int mode; 1984 { 1985 struct inode *ip; 1986 1987 if (DOINGSOFTDEP(pvp)) { 1988 softdep_freefile(pvp, ino, mode); 1989 return (0); 1990 } 1991 ip = VTOI(pvp); 1992 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 1993 } 1994 1995 /* 1996 * Do the actual free operation. 1997 * The specified inode is placed back in the free map. 1998 */ 1999 int 2000 ffs_freefile(ump, fs, devvp, ino, mode) 2001 struct ufsmount *ump; 2002 struct fs *fs; 2003 struct vnode *devvp; 2004 ino_t ino; 2005 int mode; 2006 { 2007 struct cg *cgp; 2008 struct buf *bp; 2009 ufs2_daddr_t cgbno; 2010 int error, cg; 2011 u_int8_t *inosused; 2012 struct cdev *dev; 2013 2014 cg = ino_to_cg(fs, ino); 2015 if (devvp->v_type != VCHR) { 2016 /* devvp is a snapshot */ 2017 dev = VTOI(devvp)->i_devvp->v_rdev; 2018 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2019 } else { 2020 /* devvp is a normal disk device */ 2021 dev = devvp->v_rdev; 2022 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2023 } 2024 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2025 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2026 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2027 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2028 brelse(bp); 2029 return (error); 2030 } 2031 cgp = (struct cg *)bp->b_data; 2032 if (!cg_chkmagic(cgp)) { 2033 brelse(bp); 2034 return (0); 2035 } 2036 bp->b_xflags |= BX_BKGRDWRITE; 2037 cgp->cg_old_time = cgp->cg_time = time_second; 2038 inosused = cg_inosused(cgp); 2039 ino %= fs->fs_ipg; 2040 if (isclr(inosused, ino)) { 2041 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 2042 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2043 if (fs->fs_ronly == 0) 2044 panic("ffs_freefile: freeing free inode"); 2045 } 2046 clrbit(inosused, ino); 2047 if (ino < cgp->cg_irotor) 2048 cgp->cg_irotor = ino; 2049 cgp->cg_cs.cs_nifree++; 2050 UFS_LOCK(ump); 2051 fs->fs_cstotal.cs_nifree++; 2052 fs->fs_cs(fs, cg).cs_nifree++; 2053 if ((mode & IFMT) == IFDIR) { 2054 cgp->cg_cs.cs_ndir--; 2055 fs->fs_cstotal.cs_ndir--; 2056 fs->fs_cs(fs, cg).cs_ndir--; 2057 } 2058 fs->fs_fmod = 1; 2059 ACTIVECLEAR(fs, cg); 2060 UFS_UNLOCK(ump); 2061 bdwrite(bp); 2062 return (0); 2063 } 2064 2065 /* 2066 * Check to see if a file is free. 2067 */ 2068 int 2069 ffs_checkfreefile(fs, devvp, ino) 2070 struct fs *fs; 2071 struct vnode *devvp; 2072 ino_t ino; 2073 { 2074 struct cg *cgp; 2075 struct buf *bp; 2076 ufs2_daddr_t cgbno; 2077 int ret, cg; 2078 u_int8_t *inosused; 2079 2080 cg = ino_to_cg(fs, ino); 2081 if (devvp->v_type != VCHR) { 2082 /* devvp is a snapshot */ 2083 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2084 } else { 2085 /* devvp is a normal disk device */ 2086 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2087 } 2088 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2089 return (1); 2090 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2091 brelse(bp); 2092 return (1); 2093 } 2094 cgp = (struct cg *)bp->b_data; 2095 if (!cg_chkmagic(cgp)) { 2096 brelse(bp); 2097 return (1); 2098 } 2099 inosused = cg_inosused(cgp); 2100 ino %= fs->fs_ipg; 2101 ret = isclr(inosused, ino); 2102 brelse(bp); 2103 return (ret); 2104 } 2105 2106 /* 2107 * Find a block of the specified size in the specified cylinder group. 2108 * 2109 * It is a panic if a request is made to find a block if none are 2110 * available. 2111 */ 2112 static ufs1_daddr_t 2113 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2114 struct fs *fs; 2115 struct cg *cgp; 2116 ufs2_daddr_t bpref; 2117 int allocsiz; 2118 { 2119 ufs1_daddr_t bno; 2120 int start, len, loc, i; 2121 int blk, field, subfield, pos; 2122 u_int8_t *blksfree; 2123 2124 /* 2125 * find the fragment by searching through the free block 2126 * map for an appropriate bit pattern 2127 */ 2128 if (bpref) 2129 start = dtogd(fs, bpref) / NBBY; 2130 else 2131 start = cgp->cg_frotor / NBBY; 2132 blksfree = cg_blksfree(cgp); 2133 len = howmany(fs->fs_fpg, NBBY) - start; 2134 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2135 fragtbl[fs->fs_frag], 2136 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2137 if (loc == 0) { 2138 len = start + 1; 2139 start = 0; 2140 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2141 fragtbl[fs->fs_frag], 2142 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2143 if (loc == 0) { 2144 printf("start = %d, len = %d, fs = %s\n", 2145 start, len, fs->fs_fsmnt); 2146 panic("ffs_alloccg: map corrupted"); 2147 /* NOTREACHED */ 2148 } 2149 } 2150 bno = (start + len - loc) * NBBY; 2151 cgp->cg_frotor = bno; 2152 /* 2153 * found the byte in the map 2154 * sift through the bits to find the selected frag 2155 */ 2156 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2157 blk = blkmap(fs, blksfree, bno); 2158 blk <<= 1; 2159 field = around[allocsiz]; 2160 subfield = inside[allocsiz]; 2161 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2162 if ((blk & field) == subfield) 2163 return (bno + pos); 2164 field <<= 1; 2165 subfield <<= 1; 2166 } 2167 } 2168 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2169 panic("ffs_alloccg: block not in map"); 2170 return (-1); 2171 } 2172 2173 /* 2174 * Update the cluster map because of an allocation or free. 2175 * 2176 * Cnt == 1 means free; cnt == -1 means allocating. 2177 */ 2178 void 2179 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2180 struct ufsmount *ump; 2181 struct fs *fs; 2182 struct cg *cgp; 2183 ufs1_daddr_t blkno; 2184 int cnt; 2185 { 2186 int32_t *sump; 2187 int32_t *lp; 2188 u_char *freemapp, *mapp; 2189 int i, start, end, forw, back, map, bit; 2190 2191 mtx_assert(UFS_MTX(ump), MA_OWNED); 2192 2193 if (fs->fs_contigsumsize <= 0) 2194 return; 2195 freemapp = cg_clustersfree(cgp); 2196 sump = cg_clustersum(cgp); 2197 /* 2198 * Allocate or clear the actual block. 2199 */ 2200 if (cnt > 0) 2201 setbit(freemapp, blkno); 2202 else 2203 clrbit(freemapp, blkno); 2204 /* 2205 * Find the size of the cluster going forward. 2206 */ 2207 start = blkno + 1; 2208 end = start + fs->fs_contigsumsize; 2209 if (end >= cgp->cg_nclusterblks) 2210 end = cgp->cg_nclusterblks; 2211 mapp = &freemapp[start / NBBY]; 2212 map = *mapp++; 2213 bit = 1 << (start % NBBY); 2214 for (i = start; i < end; i++) { 2215 if ((map & bit) == 0) 2216 break; 2217 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2218 bit <<= 1; 2219 } else { 2220 map = *mapp++; 2221 bit = 1; 2222 } 2223 } 2224 forw = i - start; 2225 /* 2226 * Find the size of the cluster going backward. 2227 */ 2228 start = blkno - 1; 2229 end = start - fs->fs_contigsumsize; 2230 if (end < 0) 2231 end = -1; 2232 mapp = &freemapp[start / NBBY]; 2233 map = *mapp--; 2234 bit = 1 << (start % NBBY); 2235 for (i = start; i > end; i--) { 2236 if ((map & bit) == 0) 2237 break; 2238 if ((i & (NBBY - 1)) != 0) { 2239 bit >>= 1; 2240 } else { 2241 map = *mapp--; 2242 bit = 1 << (NBBY - 1); 2243 } 2244 } 2245 back = start - i; 2246 /* 2247 * Account for old cluster and the possibly new forward and 2248 * back clusters. 2249 */ 2250 i = back + forw + 1; 2251 if (i > fs->fs_contigsumsize) 2252 i = fs->fs_contigsumsize; 2253 sump[i] += cnt; 2254 if (back > 0) 2255 sump[back] -= cnt; 2256 if (forw > 0) 2257 sump[forw] -= cnt; 2258 /* 2259 * Update cluster summary information. 2260 */ 2261 lp = &sump[fs->fs_contigsumsize]; 2262 for (i = fs->fs_contigsumsize; i > 0; i--) 2263 if (*lp-- > 0) 2264 break; 2265 fs->fs_maxcluster[cgp->cg_cgx] = i; 2266 } 2267 2268 /* 2269 * Fserr prints the name of a filesystem with an error diagnostic. 2270 * 2271 * The form of the error message is: 2272 * fs: error message 2273 */ 2274 static void 2275 ffs_fserr(fs, inum, cp) 2276 struct fs *fs; 2277 ino_t inum; 2278 char *cp; 2279 { 2280 struct thread *td = curthread; /* XXX */ 2281 struct proc *p = td->td_proc; 2282 2283 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2284 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2285 } 2286 2287 /* 2288 * This function provides the capability for the fsck program to 2289 * update an active filesystem. Eleven operations are provided: 2290 * 2291 * adjrefcnt(inode, amt) - adjusts the reference count on the 2292 * specified inode by the specified amount. Under normal 2293 * operation the count should always go down. Decrementing 2294 * the count to zero will cause the inode to be freed. 2295 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2296 * by the specifed amount. 2297 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2298 * adjust the superblock summary. 2299 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2300 * are marked as free. Inodes should never have to be marked 2301 * as in use. 2302 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2303 * are marked as free. Inodes should never have to be marked 2304 * as in use. 2305 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2306 * are marked as free. Blocks should never have to be marked 2307 * as in use. 2308 * setflags(flags, set/clear) - the fs_flags field has the specified 2309 * flags set (second parameter +1) or cleared (second parameter -1). 2310 */ 2311 2312 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2313 2314 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2315 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2316 2317 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2318 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2319 2320 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2321 sysctl_ffs_fsck, "Adjust number of directories"); 2322 2323 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2324 sysctl_ffs_fsck, "Adjust number of free blocks"); 2325 2326 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2327 sysctl_ffs_fsck, "Adjust number of free inodes"); 2328 2329 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2330 sysctl_ffs_fsck, "Adjust number of free frags"); 2331 2332 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2333 sysctl_ffs_fsck, "Adjust number of free clusters"); 2334 2335 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2336 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2337 2338 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2339 sysctl_ffs_fsck, "Free Range of File Inodes"); 2340 2341 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2342 sysctl_ffs_fsck, "Free Range of Blocks"); 2343 2344 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2345 sysctl_ffs_fsck, "Change Filesystem Flags"); 2346 2347 #ifdef DEBUG 2348 static int fsckcmds = 0; 2349 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2350 #endif /* DEBUG */ 2351 2352 static int 2353 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2354 { 2355 struct fsck_cmd cmd; 2356 struct ufsmount *ump; 2357 struct vnode *vp; 2358 struct inode *ip; 2359 struct mount *mp; 2360 struct fs *fs; 2361 ufs2_daddr_t blkno; 2362 long blkcnt, blksize; 2363 struct file *fp; 2364 int filetype, error; 2365 2366 if (req->newlen > sizeof cmd) 2367 return (EBADRPC); 2368 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2369 return (error); 2370 if (cmd.version != FFS_CMD_VERSION) 2371 return (ERPCMISMATCH); 2372 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2373 return (error); 2374 vn_start_write(fp->f_data, &mp, V_WAIT); 2375 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2376 vn_finished_write(mp); 2377 fdrop(fp, curthread); 2378 return (EINVAL); 2379 } 2380 if (mp->mnt_flag & MNT_RDONLY) { 2381 vn_finished_write(mp); 2382 fdrop(fp, curthread); 2383 return (EROFS); 2384 } 2385 ump = VFSTOUFS(mp); 2386 fs = ump->um_fs; 2387 filetype = IFREG; 2388 2389 switch (oidp->oid_number) { 2390 2391 case FFS_SET_FLAGS: 2392 #ifdef DEBUG 2393 if (fsckcmds) 2394 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2395 cmd.size > 0 ? "set" : "clear"); 2396 #endif /* DEBUG */ 2397 if (cmd.size > 0) 2398 fs->fs_flags |= (long)cmd.value; 2399 else 2400 fs->fs_flags &= ~(long)cmd.value; 2401 break; 2402 2403 case FFS_ADJ_REFCNT: 2404 #ifdef DEBUG 2405 if (fsckcmds) { 2406 printf("%s: adjust inode %jd count by %jd\n", 2407 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2408 (intmax_t)cmd.size); 2409 } 2410 #endif /* DEBUG */ 2411 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2412 break; 2413 ip = VTOI(vp); 2414 ip->i_nlink += cmd.size; 2415 DIP_SET(ip, i_nlink, ip->i_nlink); 2416 ip->i_effnlink += cmd.size; 2417 ip->i_flag |= IN_CHANGE; 2418 if (DOINGSOFTDEP(vp)) 2419 softdep_change_linkcnt(ip); 2420 vput(vp); 2421 break; 2422 2423 case FFS_ADJ_BLKCNT: 2424 #ifdef DEBUG 2425 if (fsckcmds) { 2426 printf("%s: adjust inode %jd block count by %jd\n", 2427 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2428 (intmax_t)cmd.size); 2429 } 2430 #endif /* DEBUG */ 2431 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2432 break; 2433 ip = VTOI(vp); 2434 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2435 ip->i_flag |= IN_CHANGE; 2436 vput(vp); 2437 break; 2438 2439 case FFS_DIR_FREE: 2440 filetype = IFDIR; 2441 /* fall through */ 2442 2443 case FFS_FILE_FREE: 2444 #ifdef DEBUG 2445 if (fsckcmds) { 2446 if (cmd.size == 1) 2447 printf("%s: free %s inode %d\n", 2448 mp->mnt_stat.f_mntonname, 2449 filetype == IFDIR ? "directory" : "file", 2450 (ino_t)cmd.value); 2451 else 2452 printf("%s: free %s inodes %d-%d\n", 2453 mp->mnt_stat.f_mntonname, 2454 filetype == IFDIR ? "directory" : "file", 2455 (ino_t)cmd.value, 2456 (ino_t)(cmd.value + cmd.size - 1)); 2457 } 2458 #endif /* DEBUG */ 2459 while (cmd.size > 0) { 2460 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2461 cmd.value, filetype))) 2462 break; 2463 cmd.size -= 1; 2464 cmd.value += 1; 2465 } 2466 break; 2467 2468 case FFS_BLK_FREE: 2469 #ifdef DEBUG 2470 if (fsckcmds) { 2471 if (cmd.size == 1) 2472 printf("%s: free block %jd\n", 2473 mp->mnt_stat.f_mntonname, 2474 (intmax_t)cmd.value); 2475 else 2476 printf("%s: free blocks %jd-%jd\n", 2477 mp->mnt_stat.f_mntonname, 2478 (intmax_t)cmd.value, 2479 (intmax_t)cmd.value + cmd.size - 1); 2480 } 2481 #endif /* DEBUG */ 2482 blkno = cmd.value; 2483 blkcnt = cmd.size; 2484 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2485 while (blkcnt > 0) { 2486 if (blksize > blkcnt) 2487 blksize = blkcnt; 2488 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2489 blksize * fs->fs_fsize, ROOTINO); 2490 blkno += blksize; 2491 blkcnt -= blksize; 2492 blksize = fs->fs_frag; 2493 } 2494 break; 2495 2496 /* 2497 * Adjust superblock summaries. fsck(8) is expected to 2498 * submit deltas when necessary. 2499 */ 2500 case FFS_ADJ_NDIR: 2501 #ifdef DEBUG 2502 if (fsckcmds) { 2503 printf("%s: adjust number of directories by %jd\n", 2504 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2505 } 2506 #endif /* DEBUG */ 2507 fs->fs_cstotal.cs_ndir += cmd.value; 2508 break; 2509 case FFS_ADJ_NBFREE: 2510 #ifdef DEBUG 2511 if (fsckcmds) { 2512 printf("%s: adjust number of free blocks by %+jd\n", 2513 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2514 } 2515 #endif /* DEBUG */ 2516 fs->fs_cstotal.cs_nbfree += cmd.value; 2517 break; 2518 case FFS_ADJ_NIFREE: 2519 #ifdef DEBUG 2520 if (fsckcmds) { 2521 printf("%s: adjust number of free inodes by %+jd\n", 2522 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2523 } 2524 #endif /* DEBUG */ 2525 fs->fs_cstotal.cs_nifree += cmd.value; 2526 break; 2527 case FFS_ADJ_NFFREE: 2528 #ifdef DEBUG 2529 if (fsckcmds) { 2530 printf("%s: adjust number of free frags by %+jd\n", 2531 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2532 } 2533 #endif /* DEBUG */ 2534 fs->fs_cstotal.cs_nffree += cmd.value; 2535 break; 2536 case FFS_ADJ_NUMCLUSTERS: 2537 #ifdef DEBUG 2538 if (fsckcmds) { 2539 printf("%s: adjust number of free clusters by %+jd\n", 2540 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2541 } 2542 #endif /* DEBUG */ 2543 fs->fs_cstotal.cs_numclusters += cmd.value; 2544 break; 2545 2546 default: 2547 #ifdef DEBUG 2548 if (fsckcmds) { 2549 printf("Invalid request %d from fsck\n", 2550 oidp->oid_number); 2551 } 2552 #endif /* DEBUG */ 2553 error = EINVAL; 2554 break; 2555 2556 } 2557 fdrop(fp, curthread); 2558 vn_finished_write(mp); 2559 return (error); 2560 } 2561