1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/file.h> 73 #include <sys/filedesc.h> 74 #include <sys/proc.h> 75 #include <sys/vnode.h> 76 #include <sys/mount.h> 77 #include <sys/kernel.h> 78 #include <sys/sysctl.h> 79 #include <sys/syslog.h> 80 81 #include <ufs/ufs/extattr.h> 82 #include <ufs/ufs/quota.h> 83 #include <ufs/ufs/inode.h> 84 #include <ufs/ufs/ufs_extern.h> 85 #include <ufs/ufs/ufsmount.h> 86 87 #include <ufs/ffs/fs.h> 88 #include <ufs/ffs/ffs_extern.h> 89 90 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 91 int size); 92 93 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 94 static ufs2_daddr_t 95 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 96 #ifdef DIAGNOSTIC 97 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 98 #endif 99 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 100 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 101 ufs1_daddr_t, int); 102 static ino_t ffs_dirpref(struct inode *); 103 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 104 static void ffs_fserr(struct fs *, ino_t, char *); 105 static ufs2_daddr_t ffs_hashalloc 106 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 107 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 108 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 109 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 110 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 111 112 /* 113 * Allocate a block in the filesystem. 114 * 115 * The size of the requested block is given, which must be some 116 * multiple of fs_fsize and <= fs_bsize. 117 * A preference may be optionally specified. If a preference is given 118 * the following hierarchy is used to allocate a block: 119 * 1) allocate the requested block. 120 * 2) allocate a rotationally optimal block in the same cylinder. 121 * 3) allocate a block in the same cylinder group. 122 * 4) quadradically rehash into other cylinder groups, until an 123 * available block is located. 124 * If no block preference is given the following heirarchy is used 125 * to allocate a block: 126 * 1) allocate a block in the cylinder group that contains the 127 * inode for the file. 128 * 2) quadradically rehash into other cylinder groups, until an 129 * available block is located. 130 */ 131 int 132 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 133 struct inode *ip; 134 ufs2_daddr_t lbn, bpref; 135 int size; 136 struct ucred *cred; 137 ufs2_daddr_t *bnp; 138 { 139 struct fs *fs; 140 struct ufsmount *ump; 141 ufs2_daddr_t bno; 142 int cg, reclaimed; 143 #ifdef QUOTA 144 int error; 145 #endif 146 147 *bnp = 0; 148 fs = ip->i_fs; 149 ump = ip->i_ump; 150 mtx_assert(UFS_MTX(ump), MA_OWNED); 151 #ifdef DIAGNOSTIC 152 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 153 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 154 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 155 fs->fs_fsmnt); 156 panic("ffs_alloc: bad size"); 157 } 158 if (cred == NOCRED) 159 panic("ffs_alloc: missing credential"); 160 #endif /* DIAGNOSTIC */ 161 reclaimed = 0; 162 retry: 163 #ifdef QUOTA 164 UFS_UNLOCK(ump); 165 error = chkdq(ip, btodb(size), cred, 0); 166 if (error) 167 return (error); 168 UFS_LOCK(ump); 169 #endif 170 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 171 goto nospace; 172 if (suser_cred(cred, SUSER_ALLOWJAIL) && 173 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 174 goto nospace; 175 if (bpref >= fs->fs_size) 176 bpref = 0; 177 if (bpref == 0) 178 cg = ino_to_cg(fs, ip->i_number); 179 else 180 cg = dtog(fs, bpref); 181 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 182 if (bno > 0) { 183 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size)); 184 ip->i_flag |= IN_CHANGE | IN_UPDATE; 185 *bnp = bno; 186 return (0); 187 } 188 #ifdef QUOTA 189 UFS_UNLOCK(ump); 190 /* 191 * Restore user's disk quota because allocation failed. 192 */ 193 (void) chkdq(ip, -btodb(size), cred, FORCE); 194 UFS_LOCK(ump); 195 #endif 196 nospace: 197 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 198 reclaimed = 1; 199 softdep_request_cleanup(fs, ITOV(ip)); 200 goto retry; 201 } 202 UFS_UNLOCK(ump); 203 ffs_fserr(fs, ip->i_number, "filesystem full"); 204 mtx_lock(&Giant); 205 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 206 mtx_unlock(&Giant); 207 return (ENOSPC); 208 } 209 210 /* 211 * Reallocate a fragment to a bigger size 212 * 213 * The number and size of the old block is given, and a preference 214 * and new size is also specified. The allocator attempts to extend 215 * the original block. Failing that, the regular block allocator is 216 * invoked to get an appropriate block. 217 */ 218 int 219 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp) 220 struct inode *ip; 221 ufs2_daddr_t lbprev; 222 ufs2_daddr_t bprev; 223 ufs2_daddr_t bpref; 224 int osize, nsize; 225 struct ucred *cred; 226 struct buf **bpp; 227 { 228 struct vnode *vp; 229 struct fs *fs; 230 struct buf *bp; 231 struct ufsmount *ump; 232 int cg, request, error, reclaimed; 233 ufs2_daddr_t bno; 234 235 *bpp = 0; 236 vp = ITOV(ip); 237 fs = ip->i_fs; 238 bp = NULL; 239 ump = ip->i_ump; 240 mtx_assert(UFS_MTX(ump), MA_OWNED); 241 #ifdef DIAGNOSTIC 242 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 243 panic("ffs_realloccg: allocation on suspended filesystem"); 244 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 245 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 246 printf( 247 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 248 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 249 nsize, fs->fs_fsmnt); 250 panic("ffs_realloccg: bad size"); 251 } 252 if (cred == NOCRED) 253 panic("ffs_realloccg: missing credential"); 254 #endif /* DIAGNOSTIC */ 255 reclaimed = 0; 256 retry: 257 if (suser_cred(cred, SUSER_ALLOWJAIL) && 258 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 259 goto nospace; 260 } 261 if (bprev == 0) { 262 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 263 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 264 fs->fs_fsmnt); 265 panic("ffs_realloccg: bad bprev"); 266 } 267 UFS_UNLOCK(ump); 268 /* 269 * Allocate the extra space in the buffer. 270 */ 271 error = bread(vp, lbprev, osize, NOCRED, &bp); 272 if (error) { 273 brelse(bp); 274 return (error); 275 } 276 277 if (bp->b_blkno == bp->b_lblkno) { 278 if (lbprev >= NDADDR) 279 panic("ffs_realloccg: lbprev out of range"); 280 bp->b_blkno = fsbtodb(fs, bprev); 281 } 282 283 #ifdef QUOTA 284 error = chkdq(ip, btodb(nsize - osize), cred, 0); 285 if (error) { 286 brelse(bp); 287 return (error); 288 } 289 #endif 290 /* 291 * Check for extension in the existing location. 292 */ 293 cg = dtog(fs, bprev); 294 UFS_LOCK(ump); 295 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 296 if (bno) { 297 if (bp->b_blkno != fsbtodb(fs, bno)) 298 panic("ffs_realloccg: bad blockno"); 299 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 300 ip->i_flag |= IN_CHANGE | IN_UPDATE; 301 allocbuf(bp, nsize); 302 bp->b_flags |= B_DONE; 303 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 304 bzero((char *)bp->b_data + osize, nsize - osize); 305 else 306 vfs_bio_clrbuf(bp); 307 *bpp = bp; 308 return (0); 309 } 310 /* 311 * Allocate a new disk location. 312 */ 313 if (bpref >= fs->fs_size) 314 bpref = 0; 315 switch ((int)fs->fs_optim) { 316 case FS_OPTSPACE: 317 /* 318 * Allocate an exact sized fragment. Although this makes 319 * best use of space, we will waste time relocating it if 320 * the file continues to grow. If the fragmentation is 321 * less than half of the minimum free reserve, we choose 322 * to begin optimizing for time. 323 */ 324 request = nsize; 325 if (fs->fs_minfree <= 5 || 326 fs->fs_cstotal.cs_nffree > 327 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 328 break; 329 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 330 fs->fs_fsmnt); 331 fs->fs_optim = FS_OPTTIME; 332 break; 333 case FS_OPTTIME: 334 /* 335 * At this point we have discovered a file that is trying to 336 * grow a small fragment to a larger fragment. To save time, 337 * we allocate a full sized block, then free the unused portion. 338 * If the file continues to grow, the `ffs_fragextend' call 339 * above will be able to grow it in place without further 340 * copying. If aberrant programs cause disk fragmentation to 341 * grow within 2% of the free reserve, we choose to begin 342 * optimizing for space. 343 */ 344 request = fs->fs_bsize; 345 if (fs->fs_cstotal.cs_nffree < 346 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 347 break; 348 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 349 fs->fs_fsmnt); 350 fs->fs_optim = FS_OPTSPACE; 351 break; 352 default: 353 printf("dev = %s, optim = %ld, fs = %s\n", 354 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 355 panic("ffs_realloccg: bad optim"); 356 /* NOTREACHED */ 357 } 358 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 359 if (bno > 0) { 360 bp->b_blkno = fsbtodb(fs, bno); 361 if (!DOINGSOFTDEP(vp)) 362 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 363 ip->i_number); 364 if (nsize < request) 365 ffs_blkfree(ump, fs, ip->i_devvp, 366 bno + numfrags(fs, nsize), 367 (long)(request - nsize), ip->i_number); 368 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(nsize - osize)); 369 ip->i_flag |= IN_CHANGE | IN_UPDATE; 370 allocbuf(bp, nsize); 371 bp->b_flags |= B_DONE; 372 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO) 373 bzero((char *)bp->b_data + osize, nsize - osize); 374 else 375 vfs_bio_clrbuf(bp); 376 *bpp = bp; 377 return (0); 378 } 379 #ifdef QUOTA 380 UFS_UNLOCK(ump); 381 /* 382 * Restore user's disk quota because allocation failed. 383 */ 384 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 385 UFS_LOCK(ump); 386 #endif 387 nospace: 388 /* 389 * no space available 390 */ 391 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 392 reclaimed = 1; 393 softdep_request_cleanup(fs, vp); 394 UFS_UNLOCK(ump); 395 if (bp) 396 brelse(bp); 397 UFS_LOCK(ump); 398 goto retry; 399 } 400 UFS_UNLOCK(ump); 401 if (bp) 402 brelse(bp); 403 ffs_fserr(fs, ip->i_number, "filesystem full"); 404 mtx_lock(&Giant); 405 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 406 mtx_unlock(&Giant); 407 return (ENOSPC); 408 } 409 410 /* 411 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 412 * 413 * The vnode and an array of buffer pointers for a range of sequential 414 * logical blocks to be made contiguous is given. The allocator attempts 415 * to find a range of sequential blocks starting as close as possible 416 * from the end of the allocation for the logical block immediately 417 * preceding the current range. If successful, the physical block numbers 418 * in the buffer pointers and in the inode are changed to reflect the new 419 * allocation. If unsuccessful, the allocation is left unchanged. The 420 * success in doing the reallocation is returned. Note that the error 421 * return is not reflected back to the user. Rather the previous block 422 * allocation will be used. 423 */ 424 425 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 426 427 static int doasyncfree = 1; 428 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 429 430 static int doreallocblks = 1; 431 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 432 433 #ifdef DEBUG 434 static volatile int prtrealloc = 0; 435 #endif 436 437 int 438 ffs_reallocblks(ap) 439 struct vop_reallocblks_args /* { 440 struct vnode *a_vp; 441 struct cluster_save *a_buflist; 442 } */ *ap; 443 { 444 445 if (doreallocblks == 0) 446 return (ENOSPC); 447 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 448 return (ffs_reallocblks_ufs1(ap)); 449 return (ffs_reallocblks_ufs2(ap)); 450 } 451 452 static int 453 ffs_reallocblks_ufs1(ap) 454 struct vop_reallocblks_args /* { 455 struct vnode *a_vp; 456 struct cluster_save *a_buflist; 457 } */ *ap; 458 { 459 struct fs *fs; 460 struct inode *ip; 461 struct vnode *vp; 462 struct buf *sbp, *ebp; 463 ufs1_daddr_t *bap, *sbap, *ebap = 0; 464 struct cluster_save *buflist; 465 struct ufsmount *ump; 466 ufs_lbn_t start_lbn, end_lbn; 467 ufs1_daddr_t soff, newblk, blkno; 468 ufs2_daddr_t pref; 469 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 470 int i, len, start_lvl, end_lvl, ssize; 471 472 vp = ap->a_vp; 473 ip = VTOI(vp); 474 fs = ip->i_fs; 475 ump = ip->i_ump; 476 if (fs->fs_contigsumsize <= 0) 477 return (ENOSPC); 478 buflist = ap->a_buflist; 479 len = buflist->bs_nchildren; 480 start_lbn = buflist->bs_children[0]->b_lblkno; 481 end_lbn = start_lbn + len - 1; 482 #ifdef DIAGNOSTIC 483 for (i = 0; i < len; i++) 484 if (!ffs_checkblk(ip, 485 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 486 panic("ffs_reallocblks: unallocated block 1"); 487 for (i = 1; i < len; i++) 488 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 489 panic("ffs_reallocblks: non-logical cluster"); 490 blkno = buflist->bs_children[0]->b_blkno; 491 ssize = fsbtodb(fs, fs->fs_frag); 492 for (i = 1; i < len - 1; i++) 493 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 494 panic("ffs_reallocblks: non-physical cluster %d", i); 495 #endif 496 /* 497 * If the latest allocation is in a new cylinder group, assume that 498 * the filesystem has decided to move and do not force it back to 499 * the previous cylinder group. 500 */ 501 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 502 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 503 return (ENOSPC); 504 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 505 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 506 return (ENOSPC); 507 /* 508 * Get the starting offset and block map for the first block. 509 */ 510 if (start_lvl == 0) { 511 sbap = &ip->i_din1->di_db[0]; 512 soff = start_lbn; 513 } else { 514 idp = &start_ap[start_lvl - 1]; 515 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 516 brelse(sbp); 517 return (ENOSPC); 518 } 519 sbap = (ufs1_daddr_t *)sbp->b_data; 520 soff = idp->in_off; 521 } 522 /* 523 * If the block range spans two block maps, get the second map. 524 */ 525 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 526 ssize = len; 527 } else { 528 #ifdef DIAGNOSTIC 529 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 530 panic("ffs_reallocblk: start == end"); 531 #endif 532 ssize = len - (idp->in_off + 1); 533 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 534 goto fail; 535 ebap = (ufs1_daddr_t *)ebp->b_data; 536 } 537 /* 538 * Find the preferred location for the cluster. 539 */ 540 UFS_LOCK(ump); 541 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 542 /* 543 * Search the block map looking for an allocation of the desired size. 544 */ 545 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 546 len, ffs_clusteralloc)) == 0) { 547 UFS_UNLOCK(ump); 548 goto fail; 549 } 550 /* 551 * We have found a new contiguous block. 552 * 553 * First we have to replace the old block pointers with the new 554 * block pointers in the inode and indirect blocks associated 555 * with the file. 556 */ 557 #ifdef DEBUG 558 if (prtrealloc) 559 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 560 (intmax_t)start_lbn, (intmax_t)end_lbn); 561 #endif 562 blkno = newblk; 563 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 564 if (i == ssize) { 565 bap = ebap; 566 soff = -i; 567 } 568 #ifdef DIAGNOSTIC 569 if (!ffs_checkblk(ip, 570 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 571 panic("ffs_reallocblks: unallocated block 2"); 572 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 573 panic("ffs_reallocblks: alloc mismatch"); 574 #endif 575 #ifdef DEBUG 576 if (prtrealloc) 577 printf(" %d,", *bap); 578 #endif 579 if (DOINGSOFTDEP(vp)) { 580 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 581 softdep_setup_allocdirect(ip, start_lbn + i, 582 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 583 buflist->bs_children[i]); 584 else 585 softdep_setup_allocindir_page(ip, start_lbn + i, 586 i < ssize ? sbp : ebp, soff + i, blkno, 587 *bap, buflist->bs_children[i]); 588 } 589 *bap++ = blkno; 590 } 591 /* 592 * Next we must write out the modified inode and indirect blocks. 593 * For strict correctness, the writes should be synchronous since 594 * the old block values may have been written to disk. In practise 595 * they are almost never written, but if we are concerned about 596 * strict correctness, the `doasyncfree' flag should be set to zero. 597 * 598 * The test on `doasyncfree' should be changed to test a flag 599 * that shows whether the associated buffers and inodes have 600 * been written. The flag should be set when the cluster is 601 * started and cleared whenever the buffer or inode is flushed. 602 * We can then check below to see if it is set, and do the 603 * synchronous write only when it has been cleared. 604 */ 605 if (sbap != &ip->i_din1->di_db[0]) { 606 if (doasyncfree) 607 bdwrite(sbp); 608 else 609 bwrite(sbp); 610 } else { 611 ip->i_flag |= IN_CHANGE | IN_UPDATE; 612 if (!doasyncfree) 613 ffs_update(vp, 1); 614 } 615 if (ssize < len) { 616 if (doasyncfree) 617 bdwrite(ebp); 618 else 619 bwrite(ebp); 620 } 621 /* 622 * Last, free the old blocks and assign the new blocks to the buffers. 623 */ 624 #ifdef DEBUG 625 if (prtrealloc) 626 printf("\n\tnew:"); 627 #endif 628 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 629 if (!DOINGSOFTDEP(vp)) 630 ffs_blkfree(ump, fs, ip->i_devvp, 631 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 632 fs->fs_bsize, ip->i_number); 633 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 634 #ifdef DIAGNOSTIC 635 if (!ffs_checkblk(ip, 636 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 637 panic("ffs_reallocblks: unallocated block 3"); 638 #endif 639 #ifdef DEBUG 640 if (prtrealloc) 641 printf(" %d,", blkno); 642 #endif 643 } 644 #ifdef DEBUG 645 if (prtrealloc) { 646 prtrealloc--; 647 printf("\n"); 648 } 649 #endif 650 return (0); 651 652 fail: 653 if (ssize < len) 654 brelse(ebp); 655 if (sbap != &ip->i_din1->di_db[0]) 656 brelse(sbp); 657 return (ENOSPC); 658 } 659 660 static int 661 ffs_reallocblks_ufs2(ap) 662 struct vop_reallocblks_args /* { 663 struct vnode *a_vp; 664 struct cluster_save *a_buflist; 665 } */ *ap; 666 { 667 struct fs *fs; 668 struct inode *ip; 669 struct vnode *vp; 670 struct buf *sbp, *ebp; 671 ufs2_daddr_t *bap, *sbap, *ebap = 0; 672 struct cluster_save *buflist; 673 struct ufsmount *ump; 674 ufs_lbn_t start_lbn, end_lbn; 675 ufs2_daddr_t soff, newblk, blkno, pref; 676 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 677 int i, len, start_lvl, end_lvl, ssize; 678 679 vp = ap->a_vp; 680 ip = VTOI(vp); 681 fs = ip->i_fs; 682 ump = ip->i_ump; 683 if (fs->fs_contigsumsize <= 0) 684 return (ENOSPC); 685 buflist = ap->a_buflist; 686 len = buflist->bs_nchildren; 687 start_lbn = buflist->bs_children[0]->b_lblkno; 688 end_lbn = start_lbn + len - 1; 689 #ifdef DIAGNOSTIC 690 for (i = 0; i < len; i++) 691 if (!ffs_checkblk(ip, 692 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 693 panic("ffs_reallocblks: unallocated block 1"); 694 for (i = 1; i < len; i++) 695 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 696 panic("ffs_reallocblks: non-logical cluster"); 697 blkno = buflist->bs_children[0]->b_blkno; 698 ssize = fsbtodb(fs, fs->fs_frag); 699 for (i = 1; i < len - 1; i++) 700 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 701 panic("ffs_reallocblks: non-physical cluster %d", i); 702 #endif 703 /* 704 * If the latest allocation is in a new cylinder group, assume that 705 * the filesystem has decided to move and do not force it back to 706 * the previous cylinder group. 707 */ 708 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 709 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 710 return (ENOSPC); 711 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 712 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 713 return (ENOSPC); 714 /* 715 * Get the starting offset and block map for the first block. 716 */ 717 if (start_lvl == 0) { 718 sbap = &ip->i_din2->di_db[0]; 719 soff = start_lbn; 720 } else { 721 idp = &start_ap[start_lvl - 1]; 722 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 723 brelse(sbp); 724 return (ENOSPC); 725 } 726 sbap = (ufs2_daddr_t *)sbp->b_data; 727 soff = idp->in_off; 728 } 729 /* 730 * If the block range spans two block maps, get the second map. 731 */ 732 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 733 ssize = len; 734 } else { 735 #ifdef DIAGNOSTIC 736 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 737 panic("ffs_reallocblk: start == end"); 738 #endif 739 ssize = len - (idp->in_off + 1); 740 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 741 goto fail; 742 ebap = (ufs2_daddr_t *)ebp->b_data; 743 } 744 /* 745 * Find the preferred location for the cluster. 746 */ 747 UFS_LOCK(ump); 748 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 749 /* 750 * Search the block map looking for an allocation of the desired size. 751 */ 752 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 753 len, ffs_clusteralloc)) == 0) { 754 UFS_UNLOCK(ump); 755 goto fail; 756 } 757 /* 758 * We have found a new contiguous block. 759 * 760 * First we have to replace the old block pointers with the new 761 * block pointers in the inode and indirect blocks associated 762 * with the file. 763 */ 764 #ifdef DEBUG 765 if (prtrealloc) 766 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 767 (intmax_t)start_lbn, (intmax_t)end_lbn); 768 #endif 769 blkno = newblk; 770 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 771 if (i == ssize) { 772 bap = ebap; 773 soff = -i; 774 } 775 #ifdef DIAGNOSTIC 776 if (!ffs_checkblk(ip, 777 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 778 panic("ffs_reallocblks: unallocated block 2"); 779 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 780 panic("ffs_reallocblks: alloc mismatch"); 781 #endif 782 #ifdef DEBUG 783 if (prtrealloc) 784 printf(" %jd,", (intmax_t)*bap); 785 #endif 786 if (DOINGSOFTDEP(vp)) { 787 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 788 softdep_setup_allocdirect(ip, start_lbn + i, 789 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 790 buflist->bs_children[i]); 791 else 792 softdep_setup_allocindir_page(ip, start_lbn + i, 793 i < ssize ? sbp : ebp, soff + i, blkno, 794 *bap, buflist->bs_children[i]); 795 } 796 *bap++ = blkno; 797 } 798 /* 799 * Next we must write out the modified inode and indirect blocks. 800 * For strict correctness, the writes should be synchronous since 801 * the old block values may have been written to disk. In practise 802 * they are almost never written, but if we are concerned about 803 * strict correctness, the `doasyncfree' flag should be set to zero. 804 * 805 * The test on `doasyncfree' should be changed to test a flag 806 * that shows whether the associated buffers and inodes have 807 * been written. The flag should be set when the cluster is 808 * started and cleared whenever the buffer or inode is flushed. 809 * We can then check below to see if it is set, and do the 810 * synchronous write only when it has been cleared. 811 */ 812 if (sbap != &ip->i_din2->di_db[0]) { 813 if (doasyncfree) 814 bdwrite(sbp); 815 else 816 bwrite(sbp); 817 } else { 818 ip->i_flag |= IN_CHANGE | IN_UPDATE; 819 if (!doasyncfree) 820 ffs_update(vp, 1); 821 } 822 if (ssize < len) { 823 if (doasyncfree) 824 bdwrite(ebp); 825 else 826 bwrite(ebp); 827 } 828 /* 829 * Last, free the old blocks and assign the new blocks to the buffers. 830 */ 831 #ifdef DEBUG 832 if (prtrealloc) 833 printf("\n\tnew:"); 834 #endif 835 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 836 if (!DOINGSOFTDEP(vp)) 837 ffs_blkfree(ump, fs, ip->i_devvp, 838 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 839 fs->fs_bsize, ip->i_number); 840 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 841 #ifdef DIAGNOSTIC 842 if (!ffs_checkblk(ip, 843 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 844 panic("ffs_reallocblks: unallocated block 3"); 845 #endif 846 #ifdef DEBUG 847 if (prtrealloc) 848 printf(" %jd,", (intmax_t)blkno); 849 #endif 850 } 851 #ifdef DEBUG 852 if (prtrealloc) { 853 prtrealloc--; 854 printf("\n"); 855 } 856 #endif 857 return (0); 858 859 fail: 860 if (ssize < len) 861 brelse(ebp); 862 if (sbap != &ip->i_din2->di_db[0]) 863 brelse(sbp); 864 return (ENOSPC); 865 } 866 867 /* 868 * Allocate an inode in the filesystem. 869 * 870 * If allocating a directory, use ffs_dirpref to select the inode. 871 * If allocating in a directory, the following hierarchy is followed: 872 * 1) allocate the preferred inode. 873 * 2) allocate an inode in the same cylinder group. 874 * 3) quadradically rehash into other cylinder groups, until an 875 * available inode is located. 876 * If no inode preference is given the following heirarchy is used 877 * to allocate an inode: 878 * 1) allocate an inode in cylinder group 0. 879 * 2) quadradically rehash into other cylinder groups, until an 880 * available inode is located. 881 */ 882 int 883 ffs_valloc(pvp, mode, cred, vpp) 884 struct vnode *pvp; 885 int mode; 886 struct ucred *cred; 887 struct vnode **vpp; 888 { 889 struct inode *pip; 890 struct fs *fs; 891 struct inode *ip; 892 struct timespec ts; 893 struct ufsmount *ump; 894 ino_t ino, ipref; 895 int cg, error; 896 897 *vpp = NULL; 898 pip = VTOI(pvp); 899 fs = pip->i_fs; 900 ump = pip->i_ump; 901 902 UFS_LOCK(ump); 903 if (fs->fs_cstotal.cs_nifree == 0) 904 goto noinodes; 905 906 if ((mode & IFMT) == IFDIR) 907 ipref = ffs_dirpref(pip); 908 else 909 ipref = pip->i_number; 910 if (ipref >= fs->fs_ncg * fs->fs_ipg) 911 ipref = 0; 912 cg = ino_to_cg(fs, ipref); 913 /* 914 * Track number of dirs created one after another 915 * in a same cg without intervening by files. 916 */ 917 if ((mode & IFMT) == IFDIR) { 918 if (fs->fs_contigdirs[cg] < 255) 919 fs->fs_contigdirs[cg]++; 920 } else { 921 if (fs->fs_contigdirs[cg] > 0) 922 fs->fs_contigdirs[cg]--; 923 } 924 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 925 (allocfcn_t *)ffs_nodealloccg); 926 if (ino == 0) 927 goto noinodes; 928 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 929 if (error) { 930 ffs_vfree(pvp, ino, mode); 931 return (error); 932 } 933 ip = VTOI(*vpp); 934 if (ip->i_mode) { 935 printf("mode = 0%o, inum = %lu, fs = %s\n", 936 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 937 panic("ffs_valloc: dup alloc"); 938 } 939 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 940 printf("free inode %s/%lu had %ld blocks\n", 941 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 942 DIP_SET(ip, i_blocks, 0); 943 } 944 ip->i_flags = 0; 945 DIP_SET(ip, i_flags, 0); 946 /* 947 * Set up a new generation number for this inode. 948 */ 949 if (ip->i_gen == 0 || ++ip->i_gen == 0) 950 ip->i_gen = arc4random() / 2 + 1; 951 DIP_SET(ip, i_gen, ip->i_gen); 952 if (fs->fs_magic == FS_UFS2_MAGIC) { 953 vfs_timestamp(&ts); 954 ip->i_din2->di_birthtime = ts.tv_sec; 955 ip->i_din2->di_birthnsec = ts.tv_nsec; 956 } 957 return (0); 958 noinodes: 959 UFS_UNLOCK(ump); 960 ffs_fserr(fs, pip->i_number, "out of inodes"); 961 mtx_lock(&Giant); 962 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 963 mtx_unlock(&Giant); 964 return (ENOSPC); 965 } 966 967 /* 968 * Find a cylinder group to place a directory. 969 * 970 * The policy implemented by this algorithm is to allocate a 971 * directory inode in the same cylinder group as its parent 972 * directory, but also to reserve space for its files inodes 973 * and data. Restrict the number of directories which may be 974 * allocated one after another in the same cylinder group 975 * without intervening allocation of files. 976 * 977 * If we allocate a first level directory then force allocation 978 * in another cylinder group. 979 */ 980 static ino_t 981 ffs_dirpref(pip) 982 struct inode *pip; 983 { 984 struct fs *fs; 985 int cg, prefcg, dirsize, cgsize; 986 int avgifree, avgbfree, avgndir, curdirsize; 987 int minifree, minbfree, maxndir; 988 int mincg, minndir; 989 int maxcontigdirs; 990 991 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 992 fs = pip->i_fs; 993 994 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 995 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 996 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 997 998 /* 999 * Force allocation in another cg if creating a first level dir. 1000 */ 1001 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1002 if (ITOV(pip)->v_vflag & VV_ROOT) { 1003 prefcg = arc4random() % fs->fs_ncg; 1004 mincg = prefcg; 1005 minndir = fs->fs_ipg; 1006 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1007 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1008 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1009 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1010 mincg = cg; 1011 minndir = fs->fs_cs(fs, cg).cs_ndir; 1012 } 1013 for (cg = 0; cg < prefcg; cg++) 1014 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1015 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1016 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1017 mincg = cg; 1018 minndir = fs->fs_cs(fs, cg).cs_ndir; 1019 } 1020 return ((ino_t)(fs->fs_ipg * mincg)); 1021 } 1022 1023 /* 1024 * Count various limits which used for 1025 * optimal allocation of a directory inode. 1026 */ 1027 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1028 minifree = avgifree - avgifree / 4; 1029 if (minifree < 1) 1030 minifree = 1; 1031 minbfree = avgbfree - avgbfree / 4; 1032 if (minbfree < 1) 1033 minbfree = 1; 1034 cgsize = fs->fs_fsize * fs->fs_fpg; 1035 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1036 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1037 if (dirsize < curdirsize) 1038 dirsize = curdirsize; 1039 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1040 if (fs->fs_avgfpdir > 0) 1041 maxcontigdirs = min(maxcontigdirs, 1042 fs->fs_ipg / fs->fs_avgfpdir); 1043 if (maxcontigdirs == 0) 1044 maxcontigdirs = 1; 1045 1046 /* 1047 * Limit number of dirs in one cg and reserve space for 1048 * regular files, but only if we have no deficit in 1049 * inodes or space. 1050 */ 1051 prefcg = ino_to_cg(fs, pip->i_number); 1052 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1053 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1054 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1055 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1056 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1057 return ((ino_t)(fs->fs_ipg * cg)); 1058 } 1059 for (cg = 0; cg < prefcg; cg++) 1060 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1061 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1062 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1063 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1064 return ((ino_t)(fs->fs_ipg * cg)); 1065 } 1066 /* 1067 * This is a backstop when we have deficit in space. 1068 */ 1069 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1070 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1071 return ((ino_t)(fs->fs_ipg * cg)); 1072 for (cg = 0; cg < prefcg; cg++) 1073 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1074 break; 1075 return ((ino_t)(fs->fs_ipg * cg)); 1076 } 1077 1078 /* 1079 * Select the desired position for the next block in a file. The file is 1080 * logically divided into sections. The first section is composed of the 1081 * direct blocks. Each additional section contains fs_maxbpg blocks. 1082 * 1083 * If no blocks have been allocated in the first section, the policy is to 1084 * request a block in the same cylinder group as the inode that describes 1085 * the file. If no blocks have been allocated in any other section, the 1086 * policy is to place the section in a cylinder group with a greater than 1087 * average number of free blocks. An appropriate cylinder group is found 1088 * by using a rotor that sweeps the cylinder groups. When a new group of 1089 * blocks is needed, the sweep begins in the cylinder group following the 1090 * cylinder group from which the previous allocation was made. The sweep 1091 * continues until a cylinder group with greater than the average number 1092 * of free blocks is found. If the allocation is for the first block in an 1093 * indirect block, the information on the previous allocation is unavailable; 1094 * here a best guess is made based upon the logical block number being 1095 * allocated. 1096 * 1097 * If a section is already partially allocated, the policy is to 1098 * contiguously allocate fs_maxcontig blocks. The end of one of these 1099 * contiguous blocks and the beginning of the next is laid out 1100 * contiguously if possible. 1101 */ 1102 ufs2_daddr_t 1103 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1104 struct inode *ip; 1105 ufs_lbn_t lbn; 1106 int indx; 1107 ufs1_daddr_t *bap; 1108 { 1109 struct fs *fs; 1110 int cg; 1111 int avgbfree, startcg; 1112 1113 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1114 fs = ip->i_fs; 1115 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1116 if (lbn < NDADDR + NINDIR(fs)) { 1117 cg = ino_to_cg(fs, ip->i_number); 1118 return (cgbase(fs, cg) + fs->fs_frag); 1119 } 1120 /* 1121 * Find a cylinder with greater than average number of 1122 * unused data blocks. 1123 */ 1124 if (indx == 0 || bap[indx - 1] == 0) 1125 startcg = 1126 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1127 else 1128 startcg = dtog(fs, bap[indx - 1]) + 1; 1129 startcg %= fs->fs_ncg; 1130 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1131 for (cg = startcg; cg < fs->fs_ncg; cg++) 1132 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1133 fs->fs_cgrotor = cg; 1134 return (cgbase(fs, cg) + fs->fs_frag); 1135 } 1136 for (cg = 0; cg <= startcg; cg++) 1137 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1138 fs->fs_cgrotor = cg; 1139 return (cgbase(fs, cg) + fs->fs_frag); 1140 } 1141 return (0); 1142 } 1143 /* 1144 * We just always try to lay things out contiguously. 1145 */ 1146 return (bap[indx - 1] + fs->fs_frag); 1147 } 1148 1149 /* 1150 * Same as above, but for UFS2 1151 */ 1152 ufs2_daddr_t 1153 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1154 struct inode *ip; 1155 ufs_lbn_t lbn; 1156 int indx; 1157 ufs2_daddr_t *bap; 1158 { 1159 struct fs *fs; 1160 int cg; 1161 int avgbfree, startcg; 1162 1163 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1164 fs = ip->i_fs; 1165 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1166 if (lbn < NDADDR + NINDIR(fs)) { 1167 cg = ino_to_cg(fs, ip->i_number); 1168 return (cgbase(fs, cg) + fs->fs_frag); 1169 } 1170 /* 1171 * Find a cylinder with greater than average number of 1172 * unused data blocks. 1173 */ 1174 if (indx == 0 || bap[indx - 1] == 0) 1175 startcg = 1176 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1177 else 1178 startcg = dtog(fs, bap[indx - 1]) + 1; 1179 startcg %= fs->fs_ncg; 1180 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1181 for (cg = startcg; cg < fs->fs_ncg; cg++) 1182 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1183 fs->fs_cgrotor = cg; 1184 return (cgbase(fs, cg) + fs->fs_frag); 1185 } 1186 for (cg = 0; cg <= startcg; cg++) 1187 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1188 fs->fs_cgrotor = cg; 1189 return (cgbase(fs, cg) + fs->fs_frag); 1190 } 1191 return (0); 1192 } 1193 /* 1194 * We just always try to lay things out contiguously. 1195 */ 1196 return (bap[indx - 1] + fs->fs_frag); 1197 } 1198 1199 /* 1200 * Implement the cylinder overflow algorithm. 1201 * 1202 * The policy implemented by this algorithm is: 1203 * 1) allocate the block in its requested cylinder group. 1204 * 2) quadradically rehash on the cylinder group number. 1205 * 3) brute force search for a free block. 1206 * 1207 * Must be called with the UFS lock held. Will release the lock on success 1208 * and return with it held on failure. 1209 */ 1210 /*VARARGS5*/ 1211 static ufs2_daddr_t 1212 ffs_hashalloc(ip, cg, pref, size, allocator) 1213 struct inode *ip; 1214 int cg; 1215 ufs2_daddr_t pref; 1216 int size; /* size for data blocks, mode for inodes */ 1217 allocfcn_t *allocator; 1218 { 1219 struct fs *fs; 1220 ufs2_daddr_t result; 1221 int i, icg = cg; 1222 1223 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1224 #ifdef DIAGNOSTIC 1225 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1226 panic("ffs_hashalloc: allocation on suspended filesystem"); 1227 #endif 1228 fs = ip->i_fs; 1229 /* 1230 * 1: preferred cylinder group 1231 */ 1232 result = (*allocator)(ip, cg, pref, size); 1233 if (result) 1234 return (result); 1235 /* 1236 * 2: quadratic rehash 1237 */ 1238 for (i = 1; i < fs->fs_ncg; i *= 2) { 1239 cg += i; 1240 if (cg >= fs->fs_ncg) 1241 cg -= fs->fs_ncg; 1242 result = (*allocator)(ip, cg, 0, size); 1243 if (result) 1244 return (result); 1245 } 1246 /* 1247 * 3: brute force search 1248 * Note that we start at i == 2, since 0 was checked initially, 1249 * and 1 is always checked in the quadratic rehash. 1250 */ 1251 cg = (icg + 2) % fs->fs_ncg; 1252 for (i = 2; i < fs->fs_ncg; i++) { 1253 result = (*allocator)(ip, cg, 0, size); 1254 if (result) 1255 return (result); 1256 cg++; 1257 if (cg == fs->fs_ncg) 1258 cg = 0; 1259 } 1260 return (0); 1261 } 1262 1263 /* 1264 * Determine whether a fragment can be extended. 1265 * 1266 * Check to see if the necessary fragments are available, and 1267 * if they are, allocate them. 1268 */ 1269 static ufs2_daddr_t 1270 ffs_fragextend(ip, cg, bprev, osize, nsize) 1271 struct inode *ip; 1272 int cg; 1273 ufs2_daddr_t bprev; 1274 int osize, nsize; 1275 { 1276 struct fs *fs; 1277 struct cg *cgp; 1278 struct buf *bp; 1279 struct ufsmount *ump; 1280 int nffree; 1281 long bno; 1282 int frags, bbase; 1283 int i, error; 1284 u_int8_t *blksfree; 1285 1286 ump = ip->i_ump; 1287 fs = ip->i_fs; 1288 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1289 return (0); 1290 frags = numfrags(fs, nsize); 1291 bbase = fragnum(fs, bprev); 1292 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1293 /* cannot extend across a block boundary */ 1294 return (0); 1295 } 1296 UFS_UNLOCK(ump); 1297 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1298 (int)fs->fs_cgsize, NOCRED, &bp); 1299 if (error) 1300 goto fail; 1301 cgp = (struct cg *)bp->b_data; 1302 if (!cg_chkmagic(cgp)) 1303 goto fail; 1304 bp->b_xflags |= BX_BKGRDWRITE; 1305 cgp->cg_old_time = cgp->cg_time = time_second; 1306 bno = dtogd(fs, bprev); 1307 blksfree = cg_blksfree(cgp); 1308 for (i = numfrags(fs, osize); i < frags; i++) 1309 if (isclr(blksfree, bno + i)) 1310 goto fail; 1311 /* 1312 * the current fragment can be extended 1313 * deduct the count on fragment being extended into 1314 * increase the count on the remaining fragment (if any) 1315 * allocate the extended piece 1316 */ 1317 for (i = frags; i < fs->fs_frag - bbase; i++) 1318 if (isclr(blksfree, bno + i)) 1319 break; 1320 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1321 if (i != frags) 1322 cgp->cg_frsum[i - frags]++; 1323 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1324 clrbit(blksfree, bno + i); 1325 cgp->cg_cs.cs_nffree--; 1326 nffree++; 1327 } 1328 UFS_LOCK(ump); 1329 fs->fs_cstotal.cs_nffree -= nffree; 1330 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1331 fs->fs_fmod = 1; 1332 ACTIVECLEAR(fs, cg); 1333 UFS_UNLOCK(ump); 1334 if (DOINGSOFTDEP(ITOV(ip))) 1335 softdep_setup_blkmapdep(bp, fs, bprev); 1336 bdwrite(bp); 1337 return (bprev); 1338 1339 fail: 1340 brelse(bp); 1341 UFS_LOCK(ump); 1342 return (0); 1343 1344 } 1345 1346 /* 1347 * Determine whether a block can be allocated. 1348 * 1349 * Check to see if a block of the appropriate size is available, 1350 * and if it is, allocate it. 1351 */ 1352 static ufs2_daddr_t 1353 ffs_alloccg(ip, cg, bpref, size) 1354 struct inode *ip; 1355 int cg; 1356 ufs2_daddr_t bpref; 1357 int size; 1358 { 1359 struct fs *fs; 1360 struct cg *cgp; 1361 struct buf *bp; 1362 struct ufsmount *ump; 1363 ufs1_daddr_t bno; 1364 ufs2_daddr_t blkno; 1365 int i, allocsiz, error, frags; 1366 u_int8_t *blksfree; 1367 1368 ump = ip->i_ump; 1369 fs = ip->i_fs; 1370 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1371 return (0); 1372 UFS_UNLOCK(ump); 1373 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1374 (int)fs->fs_cgsize, NOCRED, &bp); 1375 if (error) 1376 goto fail; 1377 cgp = (struct cg *)bp->b_data; 1378 if (!cg_chkmagic(cgp) || 1379 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1380 goto fail; 1381 bp->b_xflags |= BX_BKGRDWRITE; 1382 cgp->cg_old_time = cgp->cg_time = time_second; 1383 if (size == fs->fs_bsize) { 1384 UFS_LOCK(ump); 1385 blkno = ffs_alloccgblk(ip, bp, bpref); 1386 ACTIVECLEAR(fs, cg); 1387 UFS_UNLOCK(ump); 1388 bdwrite(bp); 1389 return (blkno); 1390 } 1391 /* 1392 * check to see if any fragments are already available 1393 * allocsiz is the size which will be allocated, hacking 1394 * it down to a smaller size if necessary 1395 */ 1396 blksfree = cg_blksfree(cgp); 1397 frags = numfrags(fs, size); 1398 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1399 if (cgp->cg_frsum[allocsiz] != 0) 1400 break; 1401 if (allocsiz == fs->fs_frag) { 1402 /* 1403 * no fragments were available, so a block will be 1404 * allocated, and hacked up 1405 */ 1406 if (cgp->cg_cs.cs_nbfree == 0) 1407 goto fail; 1408 UFS_LOCK(ump); 1409 blkno = ffs_alloccgblk(ip, bp, bpref); 1410 bno = dtogd(fs, blkno); 1411 for (i = frags; i < fs->fs_frag; i++) 1412 setbit(blksfree, bno + i); 1413 i = fs->fs_frag - frags; 1414 cgp->cg_cs.cs_nffree += i; 1415 fs->fs_cstotal.cs_nffree += i; 1416 fs->fs_cs(fs, cg).cs_nffree += i; 1417 fs->fs_fmod = 1; 1418 cgp->cg_frsum[i]++; 1419 ACTIVECLEAR(fs, cg); 1420 UFS_UNLOCK(ump); 1421 bdwrite(bp); 1422 return (blkno); 1423 } 1424 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1425 if (bno < 0) 1426 goto fail; 1427 for (i = 0; i < frags; i++) 1428 clrbit(blksfree, bno + i); 1429 cgp->cg_cs.cs_nffree -= frags; 1430 cgp->cg_frsum[allocsiz]--; 1431 if (frags != allocsiz) 1432 cgp->cg_frsum[allocsiz - frags]++; 1433 UFS_LOCK(ump); 1434 fs->fs_cstotal.cs_nffree -= frags; 1435 fs->fs_cs(fs, cg).cs_nffree -= frags; 1436 fs->fs_fmod = 1; 1437 blkno = cgbase(fs, cg) + bno; 1438 ACTIVECLEAR(fs, cg); 1439 UFS_UNLOCK(ump); 1440 if (DOINGSOFTDEP(ITOV(ip))) 1441 softdep_setup_blkmapdep(bp, fs, blkno); 1442 bdwrite(bp); 1443 return (blkno); 1444 1445 fail: 1446 brelse(bp); 1447 UFS_LOCK(ump); 1448 return (0); 1449 } 1450 1451 /* 1452 * Allocate a block in a cylinder group. 1453 * 1454 * This algorithm implements the following policy: 1455 * 1) allocate the requested block. 1456 * 2) allocate a rotationally optimal block in the same cylinder. 1457 * 3) allocate the next available block on the block rotor for the 1458 * specified cylinder group. 1459 * Note that this routine only allocates fs_bsize blocks; these 1460 * blocks may be fragmented by the routine that allocates them. 1461 */ 1462 static ufs2_daddr_t 1463 ffs_alloccgblk(ip, bp, bpref) 1464 struct inode *ip; 1465 struct buf *bp; 1466 ufs2_daddr_t bpref; 1467 { 1468 struct fs *fs; 1469 struct cg *cgp; 1470 struct ufsmount *ump; 1471 ufs1_daddr_t bno; 1472 ufs2_daddr_t blkno; 1473 u_int8_t *blksfree; 1474 1475 fs = ip->i_fs; 1476 ump = ip->i_ump; 1477 mtx_assert(UFS_MTX(ump), MA_OWNED); 1478 cgp = (struct cg *)bp->b_data; 1479 blksfree = cg_blksfree(cgp); 1480 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1481 bpref = cgp->cg_rotor; 1482 } else { 1483 bpref = blknum(fs, bpref); 1484 bno = dtogd(fs, bpref); 1485 /* 1486 * if the requested block is available, use it 1487 */ 1488 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1489 goto gotit; 1490 } 1491 /* 1492 * Take the next available block in this cylinder group. 1493 */ 1494 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1495 if (bno < 0) 1496 return (0); 1497 cgp->cg_rotor = bno; 1498 gotit: 1499 blkno = fragstoblks(fs, bno); 1500 ffs_clrblock(fs, blksfree, (long)blkno); 1501 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1502 cgp->cg_cs.cs_nbfree--; 1503 fs->fs_cstotal.cs_nbfree--; 1504 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1505 fs->fs_fmod = 1; 1506 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1507 /* XXX Fixme. */ 1508 UFS_UNLOCK(ump); 1509 if (DOINGSOFTDEP(ITOV(ip))) 1510 softdep_setup_blkmapdep(bp, fs, blkno); 1511 UFS_LOCK(ump); 1512 return (blkno); 1513 } 1514 1515 /* 1516 * Determine whether a cluster can be allocated. 1517 * 1518 * We do not currently check for optimal rotational layout if there 1519 * are multiple choices in the same cylinder group. Instead we just 1520 * take the first one that we find following bpref. 1521 */ 1522 static ufs2_daddr_t 1523 ffs_clusteralloc(ip, cg, bpref, len) 1524 struct inode *ip; 1525 int cg; 1526 ufs2_daddr_t bpref; 1527 int len; 1528 { 1529 struct fs *fs; 1530 struct cg *cgp; 1531 struct buf *bp; 1532 struct ufsmount *ump; 1533 int i, run, bit, map, got; 1534 ufs2_daddr_t bno; 1535 u_char *mapp; 1536 int32_t *lp; 1537 u_int8_t *blksfree; 1538 1539 fs = ip->i_fs; 1540 ump = ip->i_ump; 1541 if (fs->fs_maxcluster[cg] < len) 1542 return (0); 1543 UFS_UNLOCK(ump); 1544 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1545 NOCRED, &bp)) 1546 goto fail_lock; 1547 cgp = (struct cg *)bp->b_data; 1548 if (!cg_chkmagic(cgp)) 1549 goto fail_lock; 1550 bp->b_xflags |= BX_BKGRDWRITE; 1551 /* 1552 * Check to see if a cluster of the needed size (or bigger) is 1553 * available in this cylinder group. 1554 */ 1555 lp = &cg_clustersum(cgp)[len]; 1556 for (i = len; i <= fs->fs_contigsumsize; i++) 1557 if (*lp++ > 0) 1558 break; 1559 if (i > fs->fs_contigsumsize) { 1560 /* 1561 * This is the first time looking for a cluster in this 1562 * cylinder group. Update the cluster summary information 1563 * to reflect the true maximum sized cluster so that 1564 * future cluster allocation requests can avoid reading 1565 * the cylinder group map only to find no clusters. 1566 */ 1567 lp = &cg_clustersum(cgp)[len - 1]; 1568 for (i = len - 1; i > 0; i--) 1569 if (*lp-- > 0) 1570 break; 1571 UFS_LOCK(ump); 1572 fs->fs_maxcluster[cg] = i; 1573 goto fail; 1574 } 1575 /* 1576 * Search the cluster map to find a big enough cluster. 1577 * We take the first one that we find, even if it is larger 1578 * than we need as we prefer to get one close to the previous 1579 * block allocation. We do not search before the current 1580 * preference point as we do not want to allocate a block 1581 * that is allocated before the previous one (as we will 1582 * then have to wait for another pass of the elevator 1583 * algorithm before it will be read). We prefer to fail and 1584 * be recalled to try an allocation in the next cylinder group. 1585 */ 1586 if (dtog(fs, bpref) != cg) 1587 bpref = 0; 1588 else 1589 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1590 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1591 map = *mapp++; 1592 bit = 1 << (bpref % NBBY); 1593 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1594 if ((map & bit) == 0) { 1595 run = 0; 1596 } else { 1597 run++; 1598 if (run == len) 1599 break; 1600 } 1601 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1602 bit <<= 1; 1603 } else { 1604 map = *mapp++; 1605 bit = 1; 1606 } 1607 } 1608 if (got >= cgp->cg_nclusterblks) 1609 goto fail_lock; 1610 /* 1611 * Allocate the cluster that we have found. 1612 */ 1613 blksfree = cg_blksfree(cgp); 1614 for (i = 1; i <= len; i++) 1615 if (!ffs_isblock(fs, blksfree, got - run + i)) 1616 panic("ffs_clusteralloc: map mismatch"); 1617 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1618 if (dtog(fs, bno) != cg) 1619 panic("ffs_clusteralloc: allocated out of group"); 1620 len = blkstofrags(fs, len); 1621 UFS_LOCK(ump); 1622 for (i = 0; i < len; i += fs->fs_frag) 1623 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1624 panic("ffs_clusteralloc: lost block"); 1625 ACTIVECLEAR(fs, cg); 1626 UFS_UNLOCK(ump); 1627 bdwrite(bp); 1628 return (bno); 1629 1630 fail_lock: 1631 UFS_LOCK(ump); 1632 fail: 1633 brelse(bp); 1634 return (0); 1635 } 1636 1637 /* 1638 * Determine whether an inode can be allocated. 1639 * 1640 * Check to see if an inode is available, and if it is, 1641 * allocate it using the following policy: 1642 * 1) allocate the requested inode. 1643 * 2) allocate the next available inode after the requested 1644 * inode in the specified cylinder group. 1645 */ 1646 static ufs2_daddr_t 1647 ffs_nodealloccg(ip, cg, ipref, mode) 1648 struct inode *ip; 1649 int cg; 1650 ufs2_daddr_t ipref; 1651 int mode; 1652 { 1653 struct fs *fs; 1654 struct cg *cgp; 1655 struct buf *bp, *ibp; 1656 struct ufsmount *ump; 1657 u_int8_t *inosused; 1658 struct ufs2_dinode *dp2; 1659 int error, start, len, loc, map, i; 1660 1661 fs = ip->i_fs; 1662 ump = ip->i_ump; 1663 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1664 return (0); 1665 UFS_UNLOCK(ump); 1666 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1667 (int)fs->fs_cgsize, NOCRED, &bp); 1668 if (error) { 1669 brelse(bp); 1670 UFS_LOCK(ump); 1671 return (0); 1672 } 1673 cgp = (struct cg *)bp->b_data; 1674 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1675 brelse(bp); 1676 UFS_LOCK(ump); 1677 return (0); 1678 } 1679 bp->b_xflags |= BX_BKGRDWRITE; 1680 cgp->cg_old_time = cgp->cg_time = time_second; 1681 inosused = cg_inosused(cgp); 1682 if (ipref) { 1683 ipref %= fs->fs_ipg; 1684 if (isclr(inosused, ipref)) 1685 goto gotit; 1686 } 1687 start = cgp->cg_irotor / NBBY; 1688 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1689 loc = skpc(0xff, len, &inosused[start]); 1690 if (loc == 0) { 1691 len = start + 1; 1692 start = 0; 1693 loc = skpc(0xff, len, &inosused[0]); 1694 if (loc == 0) { 1695 printf("cg = %d, irotor = %ld, fs = %s\n", 1696 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1697 panic("ffs_nodealloccg: map corrupted"); 1698 /* NOTREACHED */ 1699 } 1700 } 1701 i = start + len - loc; 1702 map = inosused[i]; 1703 ipref = i * NBBY; 1704 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1705 if ((map & i) == 0) { 1706 cgp->cg_irotor = ipref; 1707 goto gotit; 1708 } 1709 } 1710 printf("fs = %s\n", fs->fs_fsmnt); 1711 panic("ffs_nodealloccg: block not in map"); 1712 /* NOTREACHED */ 1713 gotit: 1714 /* 1715 * Check to see if we need to initialize more inodes. 1716 */ 1717 ibp = NULL; 1718 if (fs->fs_magic == FS_UFS2_MAGIC && 1719 ipref + INOPB(fs) > cgp->cg_initediblk && 1720 cgp->cg_initediblk < cgp->cg_niblk) { 1721 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1722 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1723 (int)fs->fs_bsize, 0, 0, 0); 1724 bzero(ibp->b_data, (int)fs->fs_bsize); 1725 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1726 for (i = 0; i < INOPB(fs); i++) { 1727 dp2->di_gen = arc4random() / 2 + 1; 1728 dp2++; 1729 } 1730 cgp->cg_initediblk += INOPB(fs); 1731 } 1732 UFS_LOCK(ump); 1733 ACTIVECLEAR(fs, cg); 1734 setbit(inosused, ipref); 1735 cgp->cg_cs.cs_nifree--; 1736 fs->fs_cstotal.cs_nifree--; 1737 fs->fs_cs(fs, cg).cs_nifree--; 1738 fs->fs_fmod = 1; 1739 if ((mode & IFMT) == IFDIR) { 1740 cgp->cg_cs.cs_ndir++; 1741 fs->fs_cstotal.cs_ndir++; 1742 fs->fs_cs(fs, cg).cs_ndir++; 1743 } 1744 UFS_UNLOCK(ump); 1745 if (DOINGSOFTDEP(ITOV(ip))) 1746 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1747 bdwrite(bp); 1748 if (ibp != NULL) 1749 bawrite(ibp); 1750 return (cg * fs->fs_ipg + ipref); 1751 } 1752 1753 /* 1754 * check if a block is free 1755 */ 1756 static int 1757 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1758 { 1759 1760 switch ((int)fs->fs_frag) { 1761 case 8: 1762 return (cp[h] == 0); 1763 case 4: 1764 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1765 case 2: 1766 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1767 case 1: 1768 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1769 default: 1770 panic("ffs_isfreeblock"); 1771 } 1772 return (0); 1773 } 1774 1775 /* 1776 * Free a block or fragment. 1777 * 1778 * The specified block or fragment is placed back in the 1779 * free map. If a fragment is deallocated, a possible 1780 * block reassembly is checked. 1781 */ 1782 void 1783 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1784 struct ufsmount *ump; 1785 struct fs *fs; 1786 struct vnode *devvp; 1787 ufs2_daddr_t bno; 1788 long size; 1789 ino_t inum; 1790 { 1791 struct cg *cgp; 1792 struct buf *bp; 1793 ufs1_daddr_t fragno, cgbno; 1794 ufs2_daddr_t cgblkno; 1795 int i, cg, blk, frags, bbase; 1796 u_int8_t *blksfree; 1797 struct cdev *dev; 1798 1799 cg = dtog(fs, bno); 1800 if (devvp->v_type != VCHR) { 1801 /* devvp is a snapshot */ 1802 dev = VTOI(devvp)->i_devvp->v_rdev; 1803 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1804 } else { 1805 /* devvp is a normal disk device */ 1806 dev = devvp->v_rdev; 1807 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1808 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1809 if ((devvp->v_vflag & VV_COPYONWRITE) && 1810 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1811 return; 1812 } 1813 #ifdef DIAGNOSTIC 1814 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1815 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1816 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1817 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1818 size, fs->fs_fsmnt); 1819 panic("ffs_blkfree: bad size"); 1820 } 1821 #endif 1822 if ((u_int)bno >= fs->fs_size) { 1823 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1824 (u_long)inum); 1825 ffs_fserr(fs, inum, "bad block"); 1826 return; 1827 } 1828 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1829 brelse(bp); 1830 return; 1831 } 1832 cgp = (struct cg *)bp->b_data; 1833 if (!cg_chkmagic(cgp)) { 1834 brelse(bp); 1835 return; 1836 } 1837 bp->b_xflags |= BX_BKGRDWRITE; 1838 cgp->cg_old_time = cgp->cg_time = time_second; 1839 cgbno = dtogd(fs, bno); 1840 blksfree = cg_blksfree(cgp); 1841 UFS_LOCK(ump); 1842 if (size == fs->fs_bsize) { 1843 fragno = fragstoblks(fs, cgbno); 1844 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1845 if (devvp->v_type != VCHR) { 1846 UFS_UNLOCK(ump); 1847 /* devvp is a snapshot */ 1848 brelse(bp); 1849 return; 1850 } 1851 printf("dev = %s, block = %jd, fs = %s\n", 1852 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1853 panic("ffs_blkfree: freeing free block"); 1854 } 1855 ffs_setblock(fs, blksfree, fragno); 1856 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1857 cgp->cg_cs.cs_nbfree++; 1858 fs->fs_cstotal.cs_nbfree++; 1859 fs->fs_cs(fs, cg).cs_nbfree++; 1860 } else { 1861 bbase = cgbno - fragnum(fs, cgbno); 1862 /* 1863 * decrement the counts associated with the old frags 1864 */ 1865 blk = blkmap(fs, blksfree, bbase); 1866 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1867 /* 1868 * deallocate the fragment 1869 */ 1870 frags = numfrags(fs, size); 1871 for (i = 0; i < frags; i++) { 1872 if (isset(blksfree, cgbno + i)) { 1873 printf("dev = %s, block = %jd, fs = %s\n", 1874 devtoname(dev), (intmax_t)(bno + i), 1875 fs->fs_fsmnt); 1876 panic("ffs_blkfree: freeing free frag"); 1877 } 1878 setbit(blksfree, cgbno + i); 1879 } 1880 cgp->cg_cs.cs_nffree += i; 1881 fs->fs_cstotal.cs_nffree += i; 1882 fs->fs_cs(fs, cg).cs_nffree += i; 1883 /* 1884 * add back in counts associated with the new frags 1885 */ 1886 blk = blkmap(fs, blksfree, bbase); 1887 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1888 /* 1889 * if a complete block has been reassembled, account for it 1890 */ 1891 fragno = fragstoblks(fs, bbase); 1892 if (ffs_isblock(fs, blksfree, fragno)) { 1893 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1894 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1895 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1896 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1897 cgp->cg_cs.cs_nbfree++; 1898 fs->fs_cstotal.cs_nbfree++; 1899 fs->fs_cs(fs, cg).cs_nbfree++; 1900 } 1901 } 1902 fs->fs_fmod = 1; 1903 ACTIVECLEAR(fs, cg); 1904 UFS_UNLOCK(ump); 1905 bdwrite(bp); 1906 } 1907 1908 #ifdef DIAGNOSTIC 1909 /* 1910 * Verify allocation of a block or fragment. Returns true if block or 1911 * fragment is allocated, false if it is free. 1912 */ 1913 static int 1914 ffs_checkblk(ip, bno, size) 1915 struct inode *ip; 1916 ufs2_daddr_t bno; 1917 long size; 1918 { 1919 struct fs *fs; 1920 struct cg *cgp; 1921 struct buf *bp; 1922 ufs1_daddr_t cgbno; 1923 int i, error, frags, free; 1924 u_int8_t *blksfree; 1925 1926 fs = ip->i_fs; 1927 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1928 printf("bsize = %ld, size = %ld, fs = %s\n", 1929 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1930 panic("ffs_checkblk: bad size"); 1931 } 1932 if ((u_int)bno >= fs->fs_size) 1933 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1934 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1935 (int)fs->fs_cgsize, NOCRED, &bp); 1936 if (error) 1937 panic("ffs_checkblk: cg bread failed"); 1938 cgp = (struct cg *)bp->b_data; 1939 if (!cg_chkmagic(cgp)) 1940 panic("ffs_checkblk: cg magic mismatch"); 1941 bp->b_xflags |= BX_BKGRDWRITE; 1942 blksfree = cg_blksfree(cgp); 1943 cgbno = dtogd(fs, bno); 1944 if (size == fs->fs_bsize) { 1945 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 1946 } else { 1947 frags = numfrags(fs, size); 1948 for (free = 0, i = 0; i < frags; i++) 1949 if (isset(blksfree, cgbno + i)) 1950 free++; 1951 if (free != 0 && free != frags) 1952 panic("ffs_checkblk: partially free fragment"); 1953 } 1954 brelse(bp); 1955 return (!free); 1956 } 1957 #endif /* DIAGNOSTIC */ 1958 1959 /* 1960 * Free an inode. 1961 */ 1962 int 1963 ffs_vfree(pvp, ino, mode) 1964 struct vnode *pvp; 1965 ino_t ino; 1966 int mode; 1967 { 1968 struct inode *ip; 1969 1970 if (DOINGSOFTDEP(pvp)) { 1971 softdep_freefile(pvp, ino, mode); 1972 return (0); 1973 } 1974 ip = VTOI(pvp); 1975 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 1976 } 1977 1978 /* 1979 * Do the actual free operation. 1980 * The specified inode is placed back in the free map. 1981 */ 1982 int 1983 ffs_freefile(ump, fs, devvp, ino, mode) 1984 struct ufsmount *ump; 1985 struct fs *fs; 1986 struct vnode *devvp; 1987 ino_t ino; 1988 int mode; 1989 { 1990 struct cg *cgp; 1991 struct buf *bp; 1992 ufs2_daddr_t cgbno; 1993 int error, cg; 1994 u_int8_t *inosused; 1995 struct cdev *dev; 1996 1997 cg = ino_to_cg(fs, ino); 1998 if (devvp->v_type != VCHR) { 1999 /* devvp is a snapshot */ 2000 dev = VTOI(devvp)->i_devvp->v_rdev; 2001 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2002 } else { 2003 /* devvp is a normal disk device */ 2004 dev = devvp->v_rdev; 2005 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2006 } 2007 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2008 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2009 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2010 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2011 brelse(bp); 2012 return (error); 2013 } 2014 cgp = (struct cg *)bp->b_data; 2015 if (!cg_chkmagic(cgp)) { 2016 brelse(bp); 2017 return (0); 2018 } 2019 bp->b_xflags |= BX_BKGRDWRITE; 2020 cgp->cg_old_time = cgp->cg_time = time_second; 2021 inosused = cg_inosused(cgp); 2022 ino %= fs->fs_ipg; 2023 if (isclr(inosused, ino)) { 2024 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 2025 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2026 if (fs->fs_ronly == 0) 2027 panic("ffs_freefile: freeing free inode"); 2028 } 2029 clrbit(inosused, ino); 2030 if (ino < cgp->cg_irotor) 2031 cgp->cg_irotor = ino; 2032 cgp->cg_cs.cs_nifree++; 2033 UFS_LOCK(ump); 2034 fs->fs_cstotal.cs_nifree++; 2035 fs->fs_cs(fs, cg).cs_nifree++; 2036 if ((mode & IFMT) == IFDIR) { 2037 cgp->cg_cs.cs_ndir--; 2038 fs->fs_cstotal.cs_ndir--; 2039 fs->fs_cs(fs, cg).cs_ndir--; 2040 } 2041 fs->fs_fmod = 1; 2042 ACTIVECLEAR(fs, cg); 2043 UFS_UNLOCK(ump); 2044 bdwrite(bp); 2045 return (0); 2046 } 2047 2048 /* 2049 * Check to see if a file is free. 2050 */ 2051 int 2052 ffs_checkfreefile(fs, devvp, ino) 2053 struct fs *fs; 2054 struct vnode *devvp; 2055 ino_t ino; 2056 { 2057 struct cg *cgp; 2058 struct buf *bp; 2059 ufs2_daddr_t cgbno; 2060 int ret, cg; 2061 u_int8_t *inosused; 2062 2063 cg = ino_to_cg(fs, ino); 2064 if (devvp->v_type != VCHR) { 2065 /* devvp is a snapshot */ 2066 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2067 } else { 2068 /* devvp is a normal disk device */ 2069 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2070 } 2071 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 2072 return (1); 2073 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2074 brelse(bp); 2075 return (1); 2076 } 2077 cgp = (struct cg *)bp->b_data; 2078 if (!cg_chkmagic(cgp)) { 2079 brelse(bp); 2080 return (1); 2081 } 2082 inosused = cg_inosused(cgp); 2083 ino %= fs->fs_ipg; 2084 ret = isclr(inosused, ino); 2085 brelse(bp); 2086 return (ret); 2087 } 2088 2089 /* 2090 * Find a block of the specified size in the specified cylinder group. 2091 * 2092 * It is a panic if a request is made to find a block if none are 2093 * available. 2094 */ 2095 static ufs1_daddr_t 2096 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2097 struct fs *fs; 2098 struct cg *cgp; 2099 ufs2_daddr_t bpref; 2100 int allocsiz; 2101 { 2102 ufs1_daddr_t bno; 2103 int start, len, loc, i; 2104 int blk, field, subfield, pos; 2105 u_int8_t *blksfree; 2106 2107 /* 2108 * find the fragment by searching through the free block 2109 * map for an appropriate bit pattern 2110 */ 2111 if (bpref) 2112 start = dtogd(fs, bpref) / NBBY; 2113 else 2114 start = cgp->cg_frotor / NBBY; 2115 blksfree = cg_blksfree(cgp); 2116 len = howmany(fs->fs_fpg, NBBY) - start; 2117 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2118 (u_char *)fragtbl[fs->fs_frag], 2119 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2120 if (loc == 0) { 2121 len = start + 1; 2122 start = 0; 2123 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2124 (u_char *)fragtbl[fs->fs_frag], 2125 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2126 if (loc == 0) { 2127 printf("start = %d, len = %d, fs = %s\n", 2128 start, len, fs->fs_fsmnt); 2129 panic("ffs_alloccg: map corrupted"); 2130 /* NOTREACHED */ 2131 } 2132 } 2133 bno = (start + len - loc) * NBBY; 2134 cgp->cg_frotor = bno; 2135 /* 2136 * found the byte in the map 2137 * sift through the bits to find the selected frag 2138 */ 2139 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2140 blk = blkmap(fs, blksfree, bno); 2141 blk <<= 1; 2142 field = around[allocsiz]; 2143 subfield = inside[allocsiz]; 2144 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2145 if ((blk & field) == subfield) 2146 return (bno + pos); 2147 field <<= 1; 2148 subfield <<= 1; 2149 } 2150 } 2151 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2152 panic("ffs_alloccg: block not in map"); 2153 return (-1); 2154 } 2155 2156 /* 2157 * Update the cluster map because of an allocation or free. 2158 * 2159 * Cnt == 1 means free; cnt == -1 means allocating. 2160 */ 2161 void 2162 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2163 struct ufsmount *ump; 2164 struct fs *fs; 2165 struct cg *cgp; 2166 ufs1_daddr_t blkno; 2167 int cnt; 2168 { 2169 int32_t *sump; 2170 int32_t *lp; 2171 u_char *freemapp, *mapp; 2172 int i, start, end, forw, back, map, bit; 2173 2174 mtx_assert(UFS_MTX(ump), MA_OWNED); 2175 2176 if (fs->fs_contigsumsize <= 0) 2177 return; 2178 freemapp = cg_clustersfree(cgp); 2179 sump = cg_clustersum(cgp); 2180 /* 2181 * Allocate or clear the actual block. 2182 */ 2183 if (cnt > 0) 2184 setbit(freemapp, blkno); 2185 else 2186 clrbit(freemapp, blkno); 2187 /* 2188 * Find the size of the cluster going forward. 2189 */ 2190 start = blkno + 1; 2191 end = start + fs->fs_contigsumsize; 2192 if (end >= cgp->cg_nclusterblks) 2193 end = cgp->cg_nclusterblks; 2194 mapp = &freemapp[start / NBBY]; 2195 map = *mapp++; 2196 bit = 1 << (start % NBBY); 2197 for (i = start; i < end; i++) { 2198 if ((map & bit) == 0) 2199 break; 2200 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2201 bit <<= 1; 2202 } else { 2203 map = *mapp++; 2204 bit = 1; 2205 } 2206 } 2207 forw = i - start; 2208 /* 2209 * Find the size of the cluster going backward. 2210 */ 2211 start = blkno - 1; 2212 end = start - fs->fs_contigsumsize; 2213 if (end < 0) 2214 end = -1; 2215 mapp = &freemapp[start / NBBY]; 2216 map = *mapp--; 2217 bit = 1 << (start % NBBY); 2218 for (i = start; i > end; i--) { 2219 if ((map & bit) == 0) 2220 break; 2221 if ((i & (NBBY - 1)) != 0) { 2222 bit >>= 1; 2223 } else { 2224 map = *mapp--; 2225 bit = 1 << (NBBY - 1); 2226 } 2227 } 2228 back = start - i; 2229 /* 2230 * Account for old cluster and the possibly new forward and 2231 * back clusters. 2232 */ 2233 i = back + forw + 1; 2234 if (i > fs->fs_contigsumsize) 2235 i = fs->fs_contigsumsize; 2236 sump[i] += cnt; 2237 if (back > 0) 2238 sump[back] -= cnt; 2239 if (forw > 0) 2240 sump[forw] -= cnt; 2241 /* 2242 * Update cluster summary information. 2243 */ 2244 lp = &sump[fs->fs_contigsumsize]; 2245 for (i = fs->fs_contigsumsize; i > 0; i--) 2246 if (*lp-- > 0) 2247 break; 2248 fs->fs_maxcluster[cgp->cg_cgx] = i; 2249 } 2250 2251 /* 2252 * Fserr prints the name of a filesystem with an error diagnostic. 2253 * 2254 * The form of the error message is: 2255 * fs: error message 2256 */ 2257 static void 2258 ffs_fserr(fs, inum, cp) 2259 struct fs *fs; 2260 ino_t inum; 2261 char *cp; 2262 { 2263 struct thread *td = curthread; /* XXX */ 2264 struct proc *p = td->td_proc; 2265 2266 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2267 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2268 } 2269 2270 /* 2271 * This function provides the capability for the fsck program to 2272 * update an active filesystem. Eleven operations are provided: 2273 * 2274 * adjrefcnt(inode, amt) - adjusts the reference count on the 2275 * specified inode by the specified amount. Under normal 2276 * operation the count should always go down. Decrementing 2277 * the count to zero will cause the inode to be freed. 2278 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2279 * by the specifed amount. 2280 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2281 * adjust the superblock summary. 2282 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2283 * are marked as free. Inodes should never have to be marked 2284 * as in use. 2285 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2286 * are marked as free. Inodes should never have to be marked 2287 * as in use. 2288 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2289 * are marked as free. Blocks should never have to be marked 2290 * as in use. 2291 * setflags(flags, set/clear) - the fs_flags field has the specified 2292 * flags set (second parameter +1) or cleared (second parameter -1). 2293 */ 2294 2295 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2296 2297 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2298 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2299 2300 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2301 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2302 2303 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2304 sysctl_ffs_fsck, "Adjust number of directories"); 2305 2306 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2307 sysctl_ffs_fsck, "Adjust number of free blocks"); 2308 2309 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2310 sysctl_ffs_fsck, "Adjust number of free inodes"); 2311 2312 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2313 sysctl_ffs_fsck, "Adjust number of free frags"); 2314 2315 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2316 sysctl_ffs_fsck, "Adjust number of free clusters"); 2317 2318 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2319 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2320 2321 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2322 sysctl_ffs_fsck, "Free Range of File Inodes"); 2323 2324 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2325 sysctl_ffs_fsck, "Free Range of Blocks"); 2326 2327 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2328 sysctl_ffs_fsck, "Change Filesystem Flags"); 2329 2330 #ifdef DEBUG 2331 static int fsckcmds = 0; 2332 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2333 #endif /* DEBUG */ 2334 2335 static int 2336 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2337 { 2338 struct fsck_cmd cmd; 2339 struct ufsmount *ump; 2340 struct vnode *vp; 2341 struct inode *ip; 2342 struct mount *mp; 2343 struct fs *fs; 2344 ufs2_daddr_t blkno; 2345 long blkcnt, blksize; 2346 struct file *fp; 2347 int filetype, error; 2348 2349 if (req->newlen > sizeof cmd) 2350 return (EBADRPC); 2351 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2352 return (error); 2353 if (cmd.version != FFS_CMD_VERSION) 2354 return (ERPCMISMATCH); 2355 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2356 return (error); 2357 vn_start_write(fp->f_data, &mp, V_WAIT); 2358 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2359 vn_finished_write(mp); 2360 fdrop(fp, curthread); 2361 return (EINVAL); 2362 } 2363 if (mp->mnt_flag & MNT_RDONLY) { 2364 vn_finished_write(mp); 2365 fdrop(fp, curthread); 2366 return (EROFS); 2367 } 2368 ump = VFSTOUFS(mp); 2369 fs = ump->um_fs; 2370 filetype = IFREG; 2371 2372 switch (oidp->oid_number) { 2373 2374 case FFS_SET_FLAGS: 2375 #ifdef DEBUG 2376 if (fsckcmds) 2377 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2378 cmd.size > 0 ? "set" : "clear"); 2379 #endif /* DEBUG */ 2380 if (cmd.size > 0) 2381 fs->fs_flags |= (long)cmd.value; 2382 else 2383 fs->fs_flags &= ~(long)cmd.value; 2384 break; 2385 2386 case FFS_ADJ_REFCNT: 2387 #ifdef DEBUG 2388 if (fsckcmds) { 2389 printf("%s: adjust inode %jd count by %jd\n", 2390 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2391 (intmax_t)cmd.size); 2392 } 2393 #endif /* DEBUG */ 2394 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2395 break; 2396 ip = VTOI(vp); 2397 ip->i_nlink += cmd.size; 2398 DIP_SET(ip, i_nlink, ip->i_nlink); 2399 ip->i_effnlink += cmd.size; 2400 ip->i_flag |= IN_CHANGE; 2401 if (DOINGSOFTDEP(vp)) 2402 softdep_change_linkcnt(ip); 2403 vput(vp); 2404 break; 2405 2406 case FFS_ADJ_BLKCNT: 2407 #ifdef DEBUG 2408 if (fsckcmds) { 2409 printf("%s: adjust inode %jd block count by %jd\n", 2410 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2411 (intmax_t)cmd.size); 2412 } 2413 #endif /* DEBUG */ 2414 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2415 break; 2416 ip = VTOI(vp); 2417 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2418 ip->i_flag |= IN_CHANGE; 2419 vput(vp); 2420 break; 2421 2422 case FFS_DIR_FREE: 2423 filetype = IFDIR; 2424 /* fall through */ 2425 2426 case FFS_FILE_FREE: 2427 #ifdef DEBUG 2428 if (fsckcmds) { 2429 if (cmd.size == 1) 2430 printf("%s: free %s inode %d\n", 2431 mp->mnt_stat.f_mntonname, 2432 filetype == IFDIR ? "directory" : "file", 2433 (ino_t)cmd.value); 2434 else 2435 printf("%s: free %s inodes %d-%d\n", 2436 mp->mnt_stat.f_mntonname, 2437 filetype == IFDIR ? "directory" : "file", 2438 (ino_t)cmd.value, 2439 (ino_t)(cmd.value + cmd.size - 1)); 2440 } 2441 #endif /* DEBUG */ 2442 while (cmd.size > 0) { 2443 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2444 cmd.value, filetype))) 2445 break; 2446 cmd.size -= 1; 2447 cmd.value += 1; 2448 } 2449 break; 2450 2451 case FFS_BLK_FREE: 2452 #ifdef DEBUG 2453 if (fsckcmds) { 2454 if (cmd.size == 1) 2455 printf("%s: free block %jd\n", 2456 mp->mnt_stat.f_mntonname, 2457 (intmax_t)cmd.value); 2458 else 2459 printf("%s: free blocks %jd-%jd\n", 2460 mp->mnt_stat.f_mntonname, 2461 (intmax_t)cmd.value, 2462 (intmax_t)cmd.value + cmd.size - 1); 2463 } 2464 #endif /* DEBUG */ 2465 blkno = cmd.value; 2466 blkcnt = cmd.size; 2467 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2468 while (blkcnt > 0) { 2469 if (blksize > blkcnt) 2470 blksize = blkcnt; 2471 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2472 blksize * fs->fs_fsize, ROOTINO); 2473 blkno += blksize; 2474 blkcnt -= blksize; 2475 blksize = fs->fs_frag; 2476 } 2477 break; 2478 2479 /* 2480 * Adjust superblock summaries. fsck(8) is expected to 2481 * submit deltas when necessary. 2482 */ 2483 case FFS_ADJ_NDIR: 2484 #ifdef DEBUG 2485 if (fsckcmds) { 2486 printf("%s: adjust number of directories by %jd\n", 2487 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2488 } 2489 #endif /* DEBUG */ 2490 fs->fs_cstotal.cs_ndir += cmd.value; 2491 break; 2492 case FFS_ADJ_NBFREE: 2493 #ifdef DEBUG 2494 if (fsckcmds) { 2495 printf("%s: adjust number of free blocks by %+jd\n", 2496 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2497 } 2498 #endif /* DEBUG */ 2499 fs->fs_cstotal.cs_nbfree += cmd.value; 2500 break; 2501 case FFS_ADJ_NIFREE: 2502 #ifdef DEBUG 2503 if (fsckcmds) { 2504 printf("%s: adjust number of free inodes by %+jd\n", 2505 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2506 } 2507 #endif /* DEBUG */ 2508 fs->fs_cstotal.cs_nifree += cmd.value; 2509 break; 2510 case FFS_ADJ_NFFREE: 2511 #ifdef DEBUG 2512 if (fsckcmds) { 2513 printf("%s: adjust number of free frags by %+jd\n", 2514 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2515 } 2516 #endif /* DEBUG */ 2517 fs->fs_cstotal.cs_nffree += cmd.value; 2518 break; 2519 case FFS_ADJ_NUMCLUSTERS: 2520 #ifdef DEBUG 2521 if (fsckcmds) { 2522 printf("%s: adjust number of free clusters by %+jd\n", 2523 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2524 } 2525 #endif /* DEBUG */ 2526 fs->fs_cstotal.cs_numclusters += cmd.value; 2527 break; 2528 2529 default: 2530 #ifdef DEBUG 2531 if (fsckcmds) { 2532 printf("Invalid request %d from fsck\n", 2533 oidp->oid_number); 2534 } 2535 #endif /* DEBUG */ 2536 error = EINVAL; 2537 break; 2538 2539 } 2540 fdrop(fp, curthread); 2541 vn_finished_write(mp); 2542 return (error); 2543 } 2544