1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_quota.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/fcntl.h> 73 #include <sys/file.h> 74 #include <sys/filedesc.h> 75 #include <sys/priv.h> 76 #include <sys/proc.h> 77 #include <sys/vnode.h> 78 #include <sys/mount.h> 79 #include <sys/kernel.h> 80 #include <sys/syscallsubr.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <security/audit/audit.h> 85 86 #include <ufs/ufs/dir.h> 87 #include <ufs/ufs/extattr.h> 88 #include <ufs/ufs/quota.h> 89 #include <ufs/ufs/inode.h> 90 #include <ufs/ufs/ufs_extern.h> 91 #include <ufs/ufs/ufsmount.h> 92 93 #include <ufs/ffs/fs.h> 94 #include <ufs/ffs/ffs_extern.h> 95 96 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref, 97 int size); 98 99 static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int); 100 static ufs2_daddr_t 101 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 102 #ifdef INVARIANTS 103 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 104 #endif 105 static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int); 106 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, 107 ufs1_daddr_t, int); 108 static ino_t ffs_dirpref(struct inode *); 109 static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t, 110 int, int); 111 static void ffs_fserr(struct fs *, ino_t, char *); 112 static ufs2_daddr_t ffs_hashalloc 113 (struct inode *, u_int, ufs2_daddr_t, int, allocfcn_t *); 114 static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int); 115 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 116 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 117 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 118 119 /* 120 * Allocate a block in the filesystem. 121 * 122 * The size of the requested block is given, which must be some 123 * multiple of fs_fsize and <= fs_bsize. 124 * A preference may be optionally specified. If a preference is given 125 * the following hierarchy is used to allocate a block: 126 * 1) allocate the requested block. 127 * 2) allocate a rotationally optimal block in the same cylinder. 128 * 3) allocate a block in the same cylinder group. 129 * 4) quadradically rehash into other cylinder groups, until an 130 * available block is located. 131 * If no block preference is given the following hierarchy is used 132 * to allocate a block: 133 * 1) allocate a block in the cylinder group that contains the 134 * inode for the file. 135 * 2) quadradically rehash into other cylinder groups, until an 136 * available block is located. 137 */ 138 int 139 ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp) 140 struct inode *ip; 141 ufs2_daddr_t lbn, bpref; 142 int size, flags; 143 struct ucred *cred; 144 ufs2_daddr_t *bnp; 145 { 146 struct fs *fs; 147 struct ufsmount *ump; 148 ufs2_daddr_t bno; 149 u_int cg, reclaimed; 150 static struct timeval lastfail; 151 static int curfail; 152 int64_t delta; 153 #ifdef QUOTA 154 int error; 155 #endif 156 157 *bnp = 0; 158 fs = ip->i_fs; 159 ump = ip->i_ump; 160 mtx_assert(UFS_MTX(ump), MA_OWNED); 161 #ifdef INVARIANTS 162 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 163 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 164 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 165 fs->fs_fsmnt); 166 panic("ffs_alloc: bad size"); 167 } 168 if (cred == NOCRED) 169 panic("ffs_alloc: missing credential"); 170 #endif /* INVARIANTS */ 171 reclaimed = 0; 172 retry: 173 #ifdef QUOTA 174 UFS_UNLOCK(ump); 175 error = chkdq(ip, btodb(size), cred, 0); 176 if (error) 177 return (error); 178 UFS_LOCK(ump); 179 #endif 180 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 181 goto nospace; 182 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 183 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 184 goto nospace; 185 if (bpref >= fs->fs_size) 186 bpref = 0; 187 if (bpref == 0) 188 cg = ino_to_cg(fs, ip->i_number); 189 else 190 cg = dtog(fs, bpref); 191 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 192 if (bno > 0) { 193 delta = btodb(size); 194 if (ip->i_flag & IN_SPACECOUNTED) { 195 UFS_LOCK(ump); 196 fs->fs_pendingblocks += delta; 197 UFS_UNLOCK(ump); 198 } 199 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 200 if (flags & IO_EXT) 201 ip->i_flag |= IN_CHANGE; 202 else 203 ip->i_flag |= IN_CHANGE | IN_UPDATE; 204 *bnp = bno; 205 return (0); 206 } 207 nospace: 208 #ifdef QUOTA 209 UFS_UNLOCK(ump); 210 /* 211 * Restore user's disk quota because allocation failed. 212 */ 213 (void) chkdq(ip, -btodb(size), cred, FORCE); 214 UFS_LOCK(ump); 215 #endif 216 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 217 reclaimed = 1; 218 softdep_request_cleanup(fs, ITOV(ip)); 219 goto retry; 220 } 221 UFS_UNLOCK(ump); 222 if (ppsratecheck(&lastfail, &curfail, 1)) { 223 ffs_fserr(fs, ip->i_number, "filesystem full"); 224 uprintf("\n%s: write failed, filesystem is full\n", 225 fs->fs_fsmnt); 226 } 227 return (ENOSPC); 228 } 229 230 /* 231 * Reallocate a fragment to a bigger size 232 * 233 * The number and size of the old block is given, and a preference 234 * and new size is also specified. The allocator attempts to extend 235 * the original block. Failing that, the regular block allocator is 236 * invoked to get an appropriate block. 237 */ 238 int 239 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) 240 struct inode *ip; 241 ufs2_daddr_t lbprev; 242 ufs2_daddr_t bprev; 243 ufs2_daddr_t bpref; 244 int osize, nsize, flags; 245 struct ucred *cred; 246 struct buf **bpp; 247 { 248 struct vnode *vp; 249 struct fs *fs; 250 struct buf *bp; 251 struct ufsmount *ump; 252 u_int cg, request, reclaimed; 253 int error; 254 ufs2_daddr_t bno; 255 static struct timeval lastfail; 256 static int curfail; 257 int64_t delta; 258 259 *bpp = 0; 260 vp = ITOV(ip); 261 fs = ip->i_fs; 262 bp = NULL; 263 ump = ip->i_ump; 264 mtx_assert(UFS_MTX(ump), MA_OWNED); 265 #ifdef INVARIANTS 266 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 267 panic("ffs_realloccg: allocation on suspended filesystem"); 268 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 269 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 270 printf( 271 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 272 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 273 nsize, fs->fs_fsmnt); 274 panic("ffs_realloccg: bad size"); 275 } 276 if (cred == NOCRED) 277 panic("ffs_realloccg: missing credential"); 278 #endif /* INVARIANTS */ 279 reclaimed = 0; 280 retry: 281 if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) && 282 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) { 283 goto nospace; 284 } 285 if (bprev == 0) { 286 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 287 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 288 fs->fs_fsmnt); 289 panic("ffs_realloccg: bad bprev"); 290 } 291 UFS_UNLOCK(ump); 292 /* 293 * Allocate the extra space in the buffer. 294 */ 295 error = bread(vp, lbprev, osize, NOCRED, &bp); 296 if (error) { 297 brelse(bp); 298 return (error); 299 } 300 301 if (bp->b_blkno == bp->b_lblkno) { 302 if (lbprev >= NDADDR) 303 panic("ffs_realloccg: lbprev out of range"); 304 bp->b_blkno = fsbtodb(fs, bprev); 305 } 306 307 #ifdef QUOTA 308 error = chkdq(ip, btodb(nsize - osize), cred, 0); 309 if (error) { 310 brelse(bp); 311 return (error); 312 } 313 #endif 314 /* 315 * Check for extension in the existing location. 316 */ 317 cg = dtog(fs, bprev); 318 UFS_LOCK(ump); 319 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 320 if (bno) { 321 if (bp->b_blkno != fsbtodb(fs, bno)) 322 panic("ffs_realloccg: bad blockno"); 323 delta = btodb(nsize - osize); 324 if (ip->i_flag & IN_SPACECOUNTED) { 325 UFS_LOCK(ump); 326 fs->fs_pendingblocks += delta; 327 UFS_UNLOCK(ump); 328 } 329 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 330 if (flags & IO_EXT) 331 ip->i_flag |= IN_CHANGE; 332 else 333 ip->i_flag |= IN_CHANGE | IN_UPDATE; 334 allocbuf(bp, nsize); 335 bp->b_flags |= B_DONE; 336 bzero(bp->b_data + osize, nsize - osize); 337 if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) 338 vfs_bio_set_valid(bp, osize, nsize - osize); 339 *bpp = bp; 340 return (0); 341 } 342 /* 343 * Allocate a new disk location. 344 */ 345 if (bpref >= fs->fs_size) 346 bpref = 0; 347 switch ((int)fs->fs_optim) { 348 case FS_OPTSPACE: 349 /* 350 * Allocate an exact sized fragment. Although this makes 351 * best use of space, we will waste time relocating it if 352 * the file continues to grow. If the fragmentation is 353 * less than half of the minimum free reserve, we choose 354 * to begin optimizing for time. 355 */ 356 request = nsize; 357 if (fs->fs_minfree <= 5 || 358 fs->fs_cstotal.cs_nffree > 359 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 360 break; 361 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 362 fs->fs_fsmnt); 363 fs->fs_optim = FS_OPTTIME; 364 break; 365 case FS_OPTTIME: 366 /* 367 * At this point we have discovered a file that is trying to 368 * grow a small fragment to a larger fragment. To save time, 369 * we allocate a full sized block, then free the unused portion. 370 * If the file continues to grow, the `ffs_fragextend' call 371 * above will be able to grow it in place without further 372 * copying. If aberrant programs cause disk fragmentation to 373 * grow within 2% of the free reserve, we choose to begin 374 * optimizing for space. 375 */ 376 request = fs->fs_bsize; 377 if (fs->fs_cstotal.cs_nffree < 378 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 379 break; 380 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 381 fs->fs_fsmnt); 382 fs->fs_optim = FS_OPTSPACE; 383 break; 384 default: 385 printf("dev = %s, optim = %ld, fs = %s\n", 386 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 387 panic("ffs_realloccg: bad optim"); 388 /* NOTREACHED */ 389 } 390 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 391 if (bno > 0) { 392 bp->b_blkno = fsbtodb(fs, bno); 393 if (!DOINGSOFTDEP(vp)) 394 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, 395 ip->i_number); 396 if (nsize < request) 397 ffs_blkfree(ump, fs, ip->i_devvp, 398 bno + numfrags(fs, nsize), 399 (long)(request - nsize), ip->i_number); 400 delta = btodb(nsize - osize); 401 if (ip->i_flag & IN_SPACECOUNTED) { 402 UFS_LOCK(ump); 403 fs->fs_pendingblocks += delta; 404 UFS_UNLOCK(ump); 405 } 406 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); 407 if (flags & IO_EXT) 408 ip->i_flag |= IN_CHANGE; 409 else 410 ip->i_flag |= IN_CHANGE | IN_UPDATE; 411 allocbuf(bp, nsize); 412 bp->b_flags |= B_DONE; 413 bzero(bp->b_data + osize, nsize - osize); 414 if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO) 415 vfs_bio_set_valid(bp, osize, nsize - osize); 416 *bpp = bp; 417 return (0); 418 } 419 #ifdef QUOTA 420 UFS_UNLOCK(ump); 421 /* 422 * Restore user's disk quota because allocation failed. 423 */ 424 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 425 UFS_LOCK(ump); 426 #endif 427 nospace: 428 /* 429 * no space available 430 */ 431 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 432 reclaimed = 1; 433 softdep_request_cleanup(fs, vp); 434 UFS_UNLOCK(ump); 435 if (bp) { 436 brelse(bp); 437 bp = NULL; 438 } 439 UFS_LOCK(ump); 440 goto retry; 441 } 442 UFS_UNLOCK(ump); 443 if (bp) 444 brelse(bp); 445 if (ppsratecheck(&lastfail, &curfail, 1)) { 446 ffs_fserr(fs, ip->i_number, "filesystem full"); 447 uprintf("\n%s: write failed, filesystem is full\n", 448 fs->fs_fsmnt); 449 } 450 return (ENOSPC); 451 } 452 453 /* 454 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 455 * 456 * The vnode and an array of buffer pointers for a range of sequential 457 * logical blocks to be made contiguous is given. The allocator attempts 458 * to find a range of sequential blocks starting as close as possible 459 * from the end of the allocation for the logical block immediately 460 * preceding the current range. If successful, the physical block numbers 461 * in the buffer pointers and in the inode are changed to reflect the new 462 * allocation. If unsuccessful, the allocation is left unchanged. The 463 * success in doing the reallocation is returned. Note that the error 464 * return is not reflected back to the user. Rather the previous block 465 * allocation will be used. 466 */ 467 468 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 469 470 static int doasyncfree = 1; 471 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 472 473 static int doreallocblks = 1; 474 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 475 476 #ifdef DEBUG 477 static volatile int prtrealloc = 0; 478 #endif 479 480 int 481 ffs_reallocblks(ap) 482 struct vop_reallocblks_args /* { 483 struct vnode *a_vp; 484 struct cluster_save *a_buflist; 485 } */ *ap; 486 { 487 488 if (doreallocblks == 0) 489 return (ENOSPC); 490 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 491 return (ffs_reallocblks_ufs1(ap)); 492 return (ffs_reallocblks_ufs2(ap)); 493 } 494 495 static int 496 ffs_reallocblks_ufs1(ap) 497 struct vop_reallocblks_args /* { 498 struct vnode *a_vp; 499 struct cluster_save *a_buflist; 500 } */ *ap; 501 { 502 struct fs *fs; 503 struct inode *ip; 504 struct vnode *vp; 505 struct buf *sbp, *ebp; 506 ufs1_daddr_t *bap, *sbap, *ebap = 0; 507 struct cluster_save *buflist; 508 struct ufsmount *ump; 509 ufs_lbn_t start_lbn, end_lbn; 510 ufs1_daddr_t soff, newblk, blkno; 511 ufs2_daddr_t pref; 512 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 513 int i, len, start_lvl, end_lvl, ssize; 514 515 vp = ap->a_vp; 516 ip = VTOI(vp); 517 fs = ip->i_fs; 518 ump = ip->i_ump; 519 if (fs->fs_contigsumsize <= 0) 520 return (ENOSPC); 521 buflist = ap->a_buflist; 522 len = buflist->bs_nchildren; 523 start_lbn = buflist->bs_children[0]->b_lblkno; 524 end_lbn = start_lbn + len - 1; 525 #ifdef INVARIANTS 526 for (i = 0; i < len; i++) 527 if (!ffs_checkblk(ip, 528 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 529 panic("ffs_reallocblks: unallocated block 1"); 530 for (i = 1; i < len; i++) 531 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 532 panic("ffs_reallocblks: non-logical cluster"); 533 blkno = buflist->bs_children[0]->b_blkno; 534 ssize = fsbtodb(fs, fs->fs_frag); 535 for (i = 1; i < len - 1; i++) 536 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 537 panic("ffs_reallocblks: non-physical cluster %d", i); 538 #endif 539 /* 540 * If the latest allocation is in a new cylinder group, assume that 541 * the filesystem has decided to move and do not force it back to 542 * the previous cylinder group. 543 */ 544 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 545 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 546 return (ENOSPC); 547 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 548 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 549 return (ENOSPC); 550 /* 551 * Get the starting offset and block map for the first block. 552 */ 553 if (start_lvl == 0) { 554 sbap = &ip->i_din1->di_db[0]; 555 soff = start_lbn; 556 } else { 557 idp = &start_ap[start_lvl - 1]; 558 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 559 brelse(sbp); 560 return (ENOSPC); 561 } 562 sbap = (ufs1_daddr_t *)sbp->b_data; 563 soff = idp->in_off; 564 } 565 /* 566 * If the block range spans two block maps, get the second map. 567 */ 568 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 569 ssize = len; 570 } else { 571 #ifdef INVARIANTS 572 if (start_lvl > 0 && 573 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 574 panic("ffs_reallocblk: start == end"); 575 #endif 576 ssize = len - (idp->in_off + 1); 577 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 578 goto fail; 579 ebap = (ufs1_daddr_t *)ebp->b_data; 580 } 581 /* 582 * Find the preferred location for the cluster. 583 */ 584 UFS_LOCK(ump); 585 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 586 /* 587 * Search the block map looking for an allocation of the desired size. 588 */ 589 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 590 len, ffs_clusteralloc)) == 0) { 591 UFS_UNLOCK(ump); 592 goto fail; 593 } 594 /* 595 * We have found a new contiguous block. 596 * 597 * First we have to replace the old block pointers with the new 598 * block pointers in the inode and indirect blocks associated 599 * with the file. 600 */ 601 #ifdef DEBUG 602 if (prtrealloc) 603 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 604 (intmax_t)start_lbn, (intmax_t)end_lbn); 605 #endif 606 blkno = newblk; 607 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 608 if (i == ssize) { 609 bap = ebap; 610 soff = -i; 611 } 612 #ifdef INVARIANTS 613 if (!ffs_checkblk(ip, 614 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 615 panic("ffs_reallocblks: unallocated block 2"); 616 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 617 panic("ffs_reallocblks: alloc mismatch"); 618 #endif 619 #ifdef DEBUG 620 if (prtrealloc) 621 printf(" %d,", *bap); 622 #endif 623 if (DOINGSOFTDEP(vp)) { 624 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 625 softdep_setup_allocdirect(ip, start_lbn + i, 626 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 627 buflist->bs_children[i]); 628 else 629 softdep_setup_allocindir_page(ip, start_lbn + i, 630 i < ssize ? sbp : ebp, soff + i, blkno, 631 *bap, buflist->bs_children[i]); 632 } 633 *bap++ = blkno; 634 } 635 /* 636 * Next we must write out the modified inode and indirect blocks. 637 * For strict correctness, the writes should be synchronous since 638 * the old block values may have been written to disk. In practise 639 * they are almost never written, but if we are concerned about 640 * strict correctness, the `doasyncfree' flag should be set to zero. 641 * 642 * The test on `doasyncfree' should be changed to test a flag 643 * that shows whether the associated buffers and inodes have 644 * been written. The flag should be set when the cluster is 645 * started and cleared whenever the buffer or inode is flushed. 646 * We can then check below to see if it is set, and do the 647 * synchronous write only when it has been cleared. 648 */ 649 if (sbap != &ip->i_din1->di_db[0]) { 650 if (doasyncfree) 651 bdwrite(sbp); 652 else 653 bwrite(sbp); 654 } else { 655 ip->i_flag |= IN_CHANGE | IN_UPDATE; 656 if (!doasyncfree) 657 ffs_update(vp, 1); 658 } 659 if (ssize < len) { 660 if (doasyncfree) 661 bdwrite(ebp); 662 else 663 bwrite(ebp); 664 } 665 /* 666 * Last, free the old blocks and assign the new blocks to the buffers. 667 */ 668 #ifdef DEBUG 669 if (prtrealloc) 670 printf("\n\tnew:"); 671 #endif 672 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 673 if (!DOINGSOFTDEP(vp)) 674 ffs_blkfree(ump, fs, ip->i_devvp, 675 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 676 fs->fs_bsize, ip->i_number); 677 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 678 #ifdef INVARIANTS 679 if (!ffs_checkblk(ip, 680 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 681 panic("ffs_reallocblks: unallocated block 3"); 682 #endif 683 #ifdef DEBUG 684 if (prtrealloc) 685 printf(" %d,", blkno); 686 #endif 687 } 688 #ifdef DEBUG 689 if (prtrealloc) { 690 prtrealloc--; 691 printf("\n"); 692 } 693 #endif 694 return (0); 695 696 fail: 697 if (ssize < len) 698 brelse(ebp); 699 if (sbap != &ip->i_din1->di_db[0]) 700 brelse(sbp); 701 return (ENOSPC); 702 } 703 704 static int 705 ffs_reallocblks_ufs2(ap) 706 struct vop_reallocblks_args /* { 707 struct vnode *a_vp; 708 struct cluster_save *a_buflist; 709 } */ *ap; 710 { 711 struct fs *fs; 712 struct inode *ip; 713 struct vnode *vp; 714 struct buf *sbp, *ebp; 715 ufs2_daddr_t *bap, *sbap, *ebap = 0; 716 struct cluster_save *buflist; 717 struct ufsmount *ump; 718 ufs_lbn_t start_lbn, end_lbn; 719 ufs2_daddr_t soff, newblk, blkno, pref; 720 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 721 int i, len, start_lvl, end_lvl, ssize; 722 723 vp = ap->a_vp; 724 ip = VTOI(vp); 725 fs = ip->i_fs; 726 ump = ip->i_ump; 727 if (fs->fs_contigsumsize <= 0) 728 return (ENOSPC); 729 buflist = ap->a_buflist; 730 len = buflist->bs_nchildren; 731 start_lbn = buflist->bs_children[0]->b_lblkno; 732 end_lbn = start_lbn + len - 1; 733 #ifdef INVARIANTS 734 for (i = 0; i < len; i++) 735 if (!ffs_checkblk(ip, 736 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 737 panic("ffs_reallocblks: unallocated block 1"); 738 for (i = 1; i < len; i++) 739 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 740 panic("ffs_reallocblks: non-logical cluster"); 741 blkno = buflist->bs_children[0]->b_blkno; 742 ssize = fsbtodb(fs, fs->fs_frag); 743 for (i = 1; i < len - 1; i++) 744 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 745 panic("ffs_reallocblks: non-physical cluster %d", i); 746 #endif 747 /* 748 * If the latest allocation is in a new cylinder group, assume that 749 * the filesystem has decided to move and do not force it back to 750 * the previous cylinder group. 751 */ 752 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 753 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 754 return (ENOSPC); 755 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 756 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 757 return (ENOSPC); 758 /* 759 * Get the starting offset and block map for the first block. 760 */ 761 if (start_lvl == 0) { 762 sbap = &ip->i_din2->di_db[0]; 763 soff = start_lbn; 764 } else { 765 idp = &start_ap[start_lvl - 1]; 766 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 767 brelse(sbp); 768 return (ENOSPC); 769 } 770 sbap = (ufs2_daddr_t *)sbp->b_data; 771 soff = idp->in_off; 772 } 773 /* 774 * If the block range spans two block maps, get the second map. 775 */ 776 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 777 ssize = len; 778 } else { 779 #ifdef INVARIANTS 780 if (start_lvl > 0 && 781 start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 782 panic("ffs_reallocblk: start == end"); 783 #endif 784 ssize = len - (idp->in_off + 1); 785 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 786 goto fail; 787 ebap = (ufs2_daddr_t *)ebp->b_data; 788 } 789 /* 790 * Find the preferred location for the cluster. 791 */ 792 UFS_LOCK(ump); 793 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 794 /* 795 * Search the block map looking for an allocation of the desired size. 796 */ 797 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 798 len, ffs_clusteralloc)) == 0) { 799 UFS_UNLOCK(ump); 800 goto fail; 801 } 802 /* 803 * We have found a new contiguous block. 804 * 805 * First we have to replace the old block pointers with the new 806 * block pointers in the inode and indirect blocks associated 807 * with the file. 808 */ 809 #ifdef DEBUG 810 if (prtrealloc) 811 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 812 (intmax_t)start_lbn, (intmax_t)end_lbn); 813 #endif 814 blkno = newblk; 815 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 816 if (i == ssize) { 817 bap = ebap; 818 soff = -i; 819 } 820 #ifdef INVARIANTS 821 if (!ffs_checkblk(ip, 822 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 823 panic("ffs_reallocblks: unallocated block 2"); 824 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 825 panic("ffs_reallocblks: alloc mismatch"); 826 #endif 827 #ifdef DEBUG 828 if (prtrealloc) 829 printf(" %jd,", (intmax_t)*bap); 830 #endif 831 if (DOINGSOFTDEP(vp)) { 832 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 833 softdep_setup_allocdirect(ip, start_lbn + i, 834 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 835 buflist->bs_children[i]); 836 else 837 softdep_setup_allocindir_page(ip, start_lbn + i, 838 i < ssize ? sbp : ebp, soff + i, blkno, 839 *bap, buflist->bs_children[i]); 840 } 841 *bap++ = blkno; 842 } 843 /* 844 * Next we must write out the modified inode and indirect blocks. 845 * For strict correctness, the writes should be synchronous since 846 * the old block values may have been written to disk. In practise 847 * they are almost never written, but if we are concerned about 848 * strict correctness, the `doasyncfree' flag should be set to zero. 849 * 850 * The test on `doasyncfree' should be changed to test a flag 851 * that shows whether the associated buffers and inodes have 852 * been written. The flag should be set when the cluster is 853 * started and cleared whenever the buffer or inode is flushed. 854 * We can then check below to see if it is set, and do the 855 * synchronous write only when it has been cleared. 856 */ 857 if (sbap != &ip->i_din2->di_db[0]) { 858 if (doasyncfree) 859 bdwrite(sbp); 860 else 861 bwrite(sbp); 862 } else { 863 ip->i_flag |= IN_CHANGE | IN_UPDATE; 864 if (!doasyncfree) 865 ffs_update(vp, 1); 866 } 867 if (ssize < len) { 868 if (doasyncfree) 869 bdwrite(ebp); 870 else 871 bwrite(ebp); 872 } 873 /* 874 * Last, free the old blocks and assign the new blocks to the buffers. 875 */ 876 #ifdef DEBUG 877 if (prtrealloc) 878 printf("\n\tnew:"); 879 #endif 880 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 881 if (!DOINGSOFTDEP(vp)) 882 ffs_blkfree(ump, fs, ip->i_devvp, 883 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 884 fs->fs_bsize, ip->i_number); 885 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 886 #ifdef INVARIANTS 887 if (!ffs_checkblk(ip, 888 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 889 panic("ffs_reallocblks: unallocated block 3"); 890 #endif 891 #ifdef DEBUG 892 if (prtrealloc) 893 printf(" %jd,", (intmax_t)blkno); 894 #endif 895 } 896 #ifdef DEBUG 897 if (prtrealloc) { 898 prtrealloc--; 899 printf("\n"); 900 } 901 #endif 902 return (0); 903 904 fail: 905 if (ssize < len) 906 brelse(ebp); 907 if (sbap != &ip->i_din2->di_db[0]) 908 brelse(sbp); 909 return (ENOSPC); 910 } 911 912 /* 913 * Allocate an inode in the filesystem. 914 * 915 * If allocating a directory, use ffs_dirpref to select the inode. 916 * If allocating in a directory, the following hierarchy is followed: 917 * 1) allocate the preferred inode. 918 * 2) allocate an inode in the same cylinder group. 919 * 3) quadradically rehash into other cylinder groups, until an 920 * available inode is located. 921 * If no inode preference is given the following hierarchy is used 922 * to allocate an inode: 923 * 1) allocate an inode in cylinder group 0. 924 * 2) quadradically rehash into other cylinder groups, until an 925 * available inode is located. 926 */ 927 int 928 ffs_valloc(pvp, mode, cred, vpp) 929 struct vnode *pvp; 930 int mode; 931 struct ucred *cred; 932 struct vnode **vpp; 933 { 934 struct inode *pip; 935 struct fs *fs; 936 struct inode *ip; 937 struct timespec ts; 938 struct ufsmount *ump; 939 ino_t ino, ipref; 940 u_int cg; 941 int error, error1; 942 static struct timeval lastfail; 943 static int curfail; 944 945 *vpp = NULL; 946 pip = VTOI(pvp); 947 fs = pip->i_fs; 948 ump = pip->i_ump; 949 950 UFS_LOCK(ump); 951 if (fs->fs_cstotal.cs_nifree == 0) 952 goto noinodes; 953 954 if ((mode & IFMT) == IFDIR) 955 ipref = ffs_dirpref(pip); 956 else 957 ipref = pip->i_number; 958 if (ipref >= fs->fs_ncg * fs->fs_ipg) 959 ipref = 0; 960 cg = ino_to_cg(fs, ipref); 961 /* 962 * Track number of dirs created one after another 963 * in a same cg without intervening by files. 964 */ 965 if ((mode & IFMT) == IFDIR) { 966 if (fs->fs_contigdirs[cg] < 255) 967 fs->fs_contigdirs[cg]++; 968 } else { 969 if (fs->fs_contigdirs[cg] > 0) 970 fs->fs_contigdirs[cg]--; 971 } 972 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 973 (allocfcn_t *)ffs_nodealloccg); 974 if (ino == 0) 975 goto noinodes; 976 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 977 if (error) { 978 error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, 979 FFSV_FORCEINSMQ); 980 ffs_vfree(pvp, ino, mode); 981 if (error1 == 0) { 982 ip = VTOI(*vpp); 983 if (ip->i_mode) 984 goto dup_alloc; 985 ip->i_flag |= IN_MODIFIED; 986 vput(*vpp); 987 } 988 return (error); 989 } 990 ip = VTOI(*vpp); 991 if (ip->i_mode) { 992 dup_alloc: 993 printf("mode = 0%o, inum = %lu, fs = %s\n", 994 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 995 panic("ffs_valloc: dup alloc"); 996 } 997 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 998 printf("free inode %s/%lu had %ld blocks\n", 999 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 1000 DIP_SET(ip, i_blocks, 0); 1001 } 1002 ip->i_flags = 0; 1003 DIP_SET(ip, i_flags, 0); 1004 /* 1005 * Set up a new generation number for this inode. 1006 */ 1007 if (ip->i_gen == 0 || ++ip->i_gen == 0) 1008 ip->i_gen = arc4random() / 2 + 1; 1009 DIP_SET(ip, i_gen, ip->i_gen); 1010 if (fs->fs_magic == FS_UFS2_MAGIC) { 1011 vfs_timestamp(&ts); 1012 ip->i_din2->di_birthtime = ts.tv_sec; 1013 ip->i_din2->di_birthnsec = ts.tv_nsec; 1014 } 1015 ip->i_flag = 0; 1016 vnode_destroy_vobject(*vpp); 1017 (*vpp)->v_type = VNON; 1018 if (fs->fs_magic == FS_UFS2_MAGIC) 1019 (*vpp)->v_op = &ffs_vnodeops2; 1020 else 1021 (*vpp)->v_op = &ffs_vnodeops1; 1022 return (0); 1023 noinodes: 1024 UFS_UNLOCK(ump); 1025 if (ppsratecheck(&lastfail, &curfail, 1)) { 1026 ffs_fserr(fs, pip->i_number, "out of inodes"); 1027 uprintf("\n%s: create/symlink failed, no inodes free\n", 1028 fs->fs_fsmnt); 1029 } 1030 return (ENOSPC); 1031 } 1032 1033 /* 1034 * Find a cylinder group to place a directory. 1035 * 1036 * The policy implemented by this algorithm is to allocate a 1037 * directory inode in the same cylinder group as its parent 1038 * directory, but also to reserve space for its files inodes 1039 * and data. Restrict the number of directories which may be 1040 * allocated one after another in the same cylinder group 1041 * without intervening allocation of files. 1042 * 1043 * If we allocate a first level directory then force allocation 1044 * in another cylinder group. 1045 */ 1046 static ino_t 1047 ffs_dirpref(pip) 1048 struct inode *pip; 1049 { 1050 struct fs *fs; 1051 u_int cg, prefcg, dirsize, cgsize; 1052 u_int avgifree, avgbfree, avgndir, curdirsize; 1053 u_int minifree, minbfree, maxndir; 1054 u_int mincg, minndir; 1055 u_int maxcontigdirs; 1056 1057 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED); 1058 fs = pip->i_fs; 1059 1060 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 1061 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1062 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 1063 1064 /* 1065 * Force allocation in another cg if creating a first level dir. 1066 */ 1067 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 1068 if (ITOV(pip)->v_vflag & VV_ROOT) { 1069 prefcg = arc4random() % fs->fs_ncg; 1070 mincg = prefcg; 1071 minndir = fs->fs_ipg; 1072 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1073 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1074 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1075 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1076 mincg = cg; 1077 minndir = fs->fs_cs(fs, cg).cs_ndir; 1078 } 1079 for (cg = 0; cg < prefcg; cg++) 1080 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 1081 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 1082 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1083 mincg = cg; 1084 minndir = fs->fs_cs(fs, cg).cs_ndir; 1085 } 1086 return ((ino_t)(fs->fs_ipg * mincg)); 1087 } 1088 1089 /* 1090 * Count various limits which used for 1091 * optimal allocation of a directory inode. 1092 */ 1093 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 1094 minifree = avgifree - avgifree / 4; 1095 if (minifree < 1) 1096 minifree = 1; 1097 minbfree = avgbfree - avgbfree / 4; 1098 if (minbfree < 1) 1099 minbfree = 1; 1100 cgsize = fs->fs_fsize * fs->fs_fpg; 1101 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 1102 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 1103 if (dirsize < curdirsize) 1104 dirsize = curdirsize; 1105 if (dirsize <= 0) 1106 maxcontigdirs = 0; /* dirsize overflowed */ 1107 else 1108 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 1109 if (fs->fs_avgfpdir > 0) 1110 maxcontigdirs = min(maxcontigdirs, 1111 fs->fs_ipg / fs->fs_avgfpdir); 1112 if (maxcontigdirs == 0) 1113 maxcontigdirs = 1; 1114 1115 /* 1116 * Limit number of dirs in one cg and reserve space for 1117 * regular files, but only if we have no deficit in 1118 * inodes or space. 1119 */ 1120 prefcg = ino_to_cg(fs, pip->i_number); 1121 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1122 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1123 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1124 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1125 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1126 return ((ino_t)(fs->fs_ipg * cg)); 1127 } 1128 for (cg = 0; cg < prefcg; cg++) 1129 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1130 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1131 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1132 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1133 return ((ino_t)(fs->fs_ipg * cg)); 1134 } 1135 /* 1136 * This is a backstop when we have deficit in space. 1137 */ 1138 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1139 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1140 return ((ino_t)(fs->fs_ipg * cg)); 1141 for (cg = 0; cg < prefcg; cg++) 1142 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1143 break; 1144 return ((ino_t)(fs->fs_ipg * cg)); 1145 } 1146 1147 /* 1148 * Select the desired position for the next block in a file. The file is 1149 * logically divided into sections. The first section is composed of the 1150 * direct blocks. Each additional section contains fs_maxbpg blocks. 1151 * 1152 * If no blocks have been allocated in the first section, the policy is to 1153 * request a block in the same cylinder group as the inode that describes 1154 * the file. If no blocks have been allocated in any other section, the 1155 * policy is to place the section in a cylinder group with a greater than 1156 * average number of free blocks. An appropriate cylinder group is found 1157 * by using a rotor that sweeps the cylinder groups. When a new group of 1158 * blocks is needed, the sweep begins in the cylinder group following the 1159 * cylinder group from which the previous allocation was made. The sweep 1160 * continues until a cylinder group with greater than the average number 1161 * of free blocks is found. If the allocation is for the first block in an 1162 * indirect block, the information on the previous allocation is unavailable; 1163 * here a best guess is made based upon the logical block number being 1164 * allocated. 1165 * 1166 * If a section is already partially allocated, the policy is to 1167 * contiguously allocate fs_maxcontig blocks. The end of one of these 1168 * contiguous blocks and the beginning of the next is laid out 1169 * contiguously if possible. 1170 */ 1171 ufs2_daddr_t 1172 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1173 struct inode *ip; 1174 ufs_lbn_t lbn; 1175 int indx; 1176 ufs1_daddr_t *bap; 1177 { 1178 struct fs *fs; 1179 u_int cg; 1180 u_int avgbfree, startcg; 1181 1182 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1183 fs = ip->i_fs; 1184 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1185 if (lbn < NDADDR + NINDIR(fs)) { 1186 cg = ino_to_cg(fs, ip->i_number); 1187 return (cgbase(fs, cg) + fs->fs_frag); 1188 } 1189 /* 1190 * Find a cylinder with greater than average number of 1191 * unused data blocks. 1192 */ 1193 if (indx == 0 || bap[indx - 1] == 0) 1194 startcg = 1195 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1196 else 1197 startcg = dtog(fs, bap[indx - 1]) + 1; 1198 startcg %= fs->fs_ncg; 1199 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1200 for (cg = startcg; cg < fs->fs_ncg; cg++) 1201 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1202 fs->fs_cgrotor = cg; 1203 return (cgbase(fs, cg) + fs->fs_frag); 1204 } 1205 for (cg = 0; cg <= startcg; cg++) 1206 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1207 fs->fs_cgrotor = cg; 1208 return (cgbase(fs, cg) + fs->fs_frag); 1209 } 1210 return (0); 1211 } 1212 /* 1213 * We just always try to lay things out contiguously. 1214 */ 1215 return (bap[indx - 1] + fs->fs_frag); 1216 } 1217 1218 /* 1219 * Same as above, but for UFS2 1220 */ 1221 ufs2_daddr_t 1222 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1223 struct inode *ip; 1224 ufs_lbn_t lbn; 1225 int indx; 1226 ufs2_daddr_t *bap; 1227 { 1228 struct fs *fs; 1229 u_int cg; 1230 u_int avgbfree, startcg; 1231 1232 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1233 fs = ip->i_fs; 1234 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1235 if (lbn < NDADDR + NINDIR(fs)) { 1236 cg = ino_to_cg(fs, ip->i_number); 1237 return (cgbase(fs, cg) + fs->fs_frag); 1238 } 1239 /* 1240 * Find a cylinder with greater than average number of 1241 * unused data blocks. 1242 */ 1243 if (indx == 0 || bap[indx - 1] == 0) 1244 startcg = 1245 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1246 else 1247 startcg = dtog(fs, bap[indx - 1]) + 1; 1248 startcg %= fs->fs_ncg; 1249 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1250 for (cg = startcg; cg < fs->fs_ncg; cg++) 1251 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1252 fs->fs_cgrotor = cg; 1253 return (cgbase(fs, cg) + fs->fs_frag); 1254 } 1255 for (cg = 0; cg <= startcg; cg++) 1256 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1257 fs->fs_cgrotor = cg; 1258 return (cgbase(fs, cg) + fs->fs_frag); 1259 } 1260 return (0); 1261 } 1262 /* 1263 * We just always try to lay things out contiguously. 1264 */ 1265 return (bap[indx - 1] + fs->fs_frag); 1266 } 1267 1268 /* 1269 * Implement the cylinder overflow algorithm. 1270 * 1271 * The policy implemented by this algorithm is: 1272 * 1) allocate the block in its requested cylinder group. 1273 * 2) quadradically rehash on the cylinder group number. 1274 * 3) brute force search for a free block. 1275 * 1276 * Must be called with the UFS lock held. Will release the lock on success 1277 * and return with it held on failure. 1278 */ 1279 /*VARARGS5*/ 1280 static ufs2_daddr_t 1281 ffs_hashalloc(ip, cg, pref, size, allocator) 1282 struct inode *ip; 1283 u_int cg; 1284 ufs2_daddr_t pref; 1285 int size; /* size for data blocks, mode for inodes */ 1286 allocfcn_t *allocator; 1287 { 1288 struct fs *fs; 1289 ufs2_daddr_t result; 1290 u_int i, icg = cg; 1291 1292 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); 1293 #ifdef INVARIANTS 1294 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1295 panic("ffs_hashalloc: allocation on suspended filesystem"); 1296 #endif 1297 fs = ip->i_fs; 1298 /* 1299 * 1: preferred cylinder group 1300 */ 1301 result = (*allocator)(ip, cg, pref, size); 1302 if (result) 1303 return (result); 1304 /* 1305 * 2: quadratic rehash 1306 */ 1307 for (i = 1; i < fs->fs_ncg; i *= 2) { 1308 cg += i; 1309 if (cg >= fs->fs_ncg) 1310 cg -= fs->fs_ncg; 1311 result = (*allocator)(ip, cg, 0, size); 1312 if (result) 1313 return (result); 1314 } 1315 /* 1316 * 3: brute force search 1317 * Note that we start at i == 2, since 0 was checked initially, 1318 * and 1 is always checked in the quadratic rehash. 1319 */ 1320 cg = (icg + 2) % fs->fs_ncg; 1321 for (i = 2; i < fs->fs_ncg; i++) { 1322 result = (*allocator)(ip, cg, 0, size); 1323 if (result) 1324 return (result); 1325 cg++; 1326 if (cg == fs->fs_ncg) 1327 cg = 0; 1328 } 1329 return (0); 1330 } 1331 1332 /* 1333 * Determine whether a fragment can be extended. 1334 * 1335 * Check to see if the necessary fragments are available, and 1336 * if they are, allocate them. 1337 */ 1338 static ufs2_daddr_t 1339 ffs_fragextend(ip, cg, bprev, osize, nsize) 1340 struct inode *ip; 1341 u_int cg; 1342 ufs2_daddr_t bprev; 1343 int osize, nsize; 1344 { 1345 struct fs *fs; 1346 struct cg *cgp; 1347 struct buf *bp; 1348 struct ufsmount *ump; 1349 int nffree; 1350 long bno; 1351 int frags, bbase; 1352 int i, error; 1353 u_int8_t *blksfree; 1354 1355 ump = ip->i_ump; 1356 fs = ip->i_fs; 1357 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1358 return (0); 1359 frags = numfrags(fs, nsize); 1360 bbase = fragnum(fs, bprev); 1361 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1362 /* cannot extend across a block boundary */ 1363 return (0); 1364 } 1365 UFS_UNLOCK(ump); 1366 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1367 (int)fs->fs_cgsize, NOCRED, &bp); 1368 if (error) 1369 goto fail; 1370 cgp = (struct cg *)bp->b_data; 1371 if (!cg_chkmagic(cgp)) 1372 goto fail; 1373 bp->b_xflags |= BX_BKGRDWRITE; 1374 cgp->cg_old_time = cgp->cg_time = time_second; 1375 bno = dtogd(fs, bprev); 1376 blksfree = cg_blksfree(cgp); 1377 for (i = numfrags(fs, osize); i < frags; i++) 1378 if (isclr(blksfree, bno + i)) 1379 goto fail; 1380 /* 1381 * the current fragment can be extended 1382 * deduct the count on fragment being extended into 1383 * increase the count on the remaining fragment (if any) 1384 * allocate the extended piece 1385 */ 1386 for (i = frags; i < fs->fs_frag - bbase; i++) 1387 if (isclr(blksfree, bno + i)) 1388 break; 1389 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1390 if (i != frags) 1391 cgp->cg_frsum[i - frags]++; 1392 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) { 1393 clrbit(blksfree, bno + i); 1394 cgp->cg_cs.cs_nffree--; 1395 nffree++; 1396 } 1397 UFS_LOCK(ump); 1398 fs->fs_cstotal.cs_nffree -= nffree; 1399 fs->fs_cs(fs, cg).cs_nffree -= nffree; 1400 fs->fs_fmod = 1; 1401 ACTIVECLEAR(fs, cg); 1402 UFS_UNLOCK(ump); 1403 if (DOINGSOFTDEP(ITOV(ip))) 1404 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); 1405 bdwrite(bp); 1406 return (bprev); 1407 1408 fail: 1409 brelse(bp); 1410 UFS_LOCK(ump); 1411 return (0); 1412 1413 } 1414 1415 /* 1416 * Determine whether a block can be allocated. 1417 * 1418 * Check to see if a block of the appropriate size is available, 1419 * and if it is, allocate it. 1420 */ 1421 static ufs2_daddr_t 1422 ffs_alloccg(ip, cg, bpref, size) 1423 struct inode *ip; 1424 u_int cg; 1425 ufs2_daddr_t bpref; 1426 int size; 1427 { 1428 struct fs *fs; 1429 struct cg *cgp; 1430 struct buf *bp; 1431 struct ufsmount *ump; 1432 ufs1_daddr_t bno; 1433 ufs2_daddr_t blkno; 1434 int i, allocsiz, error, frags; 1435 u_int8_t *blksfree; 1436 1437 ump = ip->i_ump; 1438 fs = ip->i_fs; 1439 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1440 return (0); 1441 UFS_UNLOCK(ump); 1442 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1443 (int)fs->fs_cgsize, NOCRED, &bp); 1444 if (error) 1445 goto fail; 1446 cgp = (struct cg *)bp->b_data; 1447 if (!cg_chkmagic(cgp) || 1448 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1449 goto fail; 1450 bp->b_xflags |= BX_BKGRDWRITE; 1451 cgp->cg_old_time = cgp->cg_time = time_second; 1452 if (size == fs->fs_bsize) { 1453 UFS_LOCK(ump); 1454 blkno = ffs_alloccgblk(ip, bp, bpref); 1455 ACTIVECLEAR(fs, cg); 1456 UFS_UNLOCK(ump); 1457 bdwrite(bp); 1458 return (blkno); 1459 } 1460 /* 1461 * check to see if any fragments are already available 1462 * allocsiz is the size which will be allocated, hacking 1463 * it down to a smaller size if necessary 1464 */ 1465 blksfree = cg_blksfree(cgp); 1466 frags = numfrags(fs, size); 1467 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1468 if (cgp->cg_frsum[allocsiz] != 0) 1469 break; 1470 if (allocsiz == fs->fs_frag) { 1471 /* 1472 * no fragments were available, so a block will be 1473 * allocated, and hacked up 1474 */ 1475 if (cgp->cg_cs.cs_nbfree == 0) 1476 goto fail; 1477 UFS_LOCK(ump); 1478 blkno = ffs_alloccgblk(ip, bp, bpref); 1479 bno = dtogd(fs, blkno); 1480 for (i = frags; i < fs->fs_frag; i++) 1481 setbit(blksfree, bno + i); 1482 i = fs->fs_frag - frags; 1483 cgp->cg_cs.cs_nffree += i; 1484 fs->fs_cstotal.cs_nffree += i; 1485 fs->fs_cs(fs, cg).cs_nffree += i; 1486 fs->fs_fmod = 1; 1487 cgp->cg_frsum[i]++; 1488 ACTIVECLEAR(fs, cg); 1489 UFS_UNLOCK(ump); 1490 bdwrite(bp); 1491 return (blkno); 1492 } 1493 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1494 if (bno < 0) 1495 goto fail; 1496 for (i = 0; i < frags; i++) 1497 clrbit(blksfree, bno + i); 1498 cgp->cg_cs.cs_nffree -= frags; 1499 cgp->cg_frsum[allocsiz]--; 1500 if (frags != allocsiz) 1501 cgp->cg_frsum[allocsiz - frags]++; 1502 UFS_LOCK(ump); 1503 fs->fs_cstotal.cs_nffree -= frags; 1504 fs->fs_cs(fs, cg).cs_nffree -= frags; 1505 fs->fs_fmod = 1; 1506 blkno = cgbase(fs, cg) + bno; 1507 ACTIVECLEAR(fs, cg); 1508 UFS_UNLOCK(ump); 1509 if (DOINGSOFTDEP(ITOV(ip))) 1510 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1511 bdwrite(bp); 1512 return (blkno); 1513 1514 fail: 1515 brelse(bp); 1516 UFS_LOCK(ump); 1517 return (0); 1518 } 1519 1520 /* 1521 * Allocate a block in a cylinder group. 1522 * 1523 * This algorithm implements the following policy: 1524 * 1) allocate the requested block. 1525 * 2) allocate a rotationally optimal block in the same cylinder. 1526 * 3) allocate the next available block on the block rotor for the 1527 * specified cylinder group. 1528 * Note that this routine only allocates fs_bsize blocks; these 1529 * blocks may be fragmented by the routine that allocates them. 1530 */ 1531 static ufs2_daddr_t 1532 ffs_alloccgblk(ip, bp, bpref) 1533 struct inode *ip; 1534 struct buf *bp; 1535 ufs2_daddr_t bpref; 1536 { 1537 struct fs *fs; 1538 struct cg *cgp; 1539 struct ufsmount *ump; 1540 ufs1_daddr_t bno; 1541 ufs2_daddr_t blkno; 1542 u_int8_t *blksfree; 1543 1544 fs = ip->i_fs; 1545 ump = ip->i_ump; 1546 mtx_assert(UFS_MTX(ump), MA_OWNED); 1547 cgp = (struct cg *)bp->b_data; 1548 blksfree = cg_blksfree(cgp); 1549 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1550 bpref = cgp->cg_rotor; 1551 } else { 1552 bpref = blknum(fs, bpref); 1553 bno = dtogd(fs, bpref); 1554 /* 1555 * if the requested block is available, use it 1556 */ 1557 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1558 goto gotit; 1559 } 1560 /* 1561 * Take the next available block in this cylinder group. 1562 */ 1563 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1564 if (bno < 0) 1565 return (0); 1566 cgp->cg_rotor = bno; 1567 gotit: 1568 blkno = fragstoblks(fs, bno); 1569 ffs_clrblock(fs, blksfree, (long)blkno); 1570 ffs_clusteracct(ump, fs, cgp, blkno, -1); 1571 cgp->cg_cs.cs_nbfree--; 1572 fs->fs_cstotal.cs_nbfree--; 1573 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1574 fs->fs_fmod = 1; 1575 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1576 /* XXX Fixme. */ 1577 UFS_UNLOCK(ump); 1578 if (DOINGSOFTDEP(ITOV(ip))) 1579 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); 1580 UFS_LOCK(ump); 1581 return (blkno); 1582 } 1583 1584 /* 1585 * Determine whether a cluster can be allocated. 1586 * 1587 * We do not currently check for optimal rotational layout if there 1588 * are multiple choices in the same cylinder group. Instead we just 1589 * take the first one that we find following bpref. 1590 */ 1591 static ufs2_daddr_t 1592 ffs_clusteralloc(ip, cg, bpref, len) 1593 struct inode *ip; 1594 u_int cg; 1595 ufs2_daddr_t bpref; 1596 int len; 1597 { 1598 struct fs *fs; 1599 struct cg *cgp; 1600 struct buf *bp; 1601 struct ufsmount *ump; 1602 int i, run, bit, map, got; 1603 ufs2_daddr_t bno; 1604 u_char *mapp; 1605 int32_t *lp; 1606 u_int8_t *blksfree; 1607 1608 fs = ip->i_fs; 1609 ump = ip->i_ump; 1610 if (fs->fs_maxcluster[cg] < len) 1611 return (0); 1612 UFS_UNLOCK(ump); 1613 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1614 NOCRED, &bp)) 1615 goto fail_lock; 1616 cgp = (struct cg *)bp->b_data; 1617 if (!cg_chkmagic(cgp)) 1618 goto fail_lock; 1619 bp->b_xflags |= BX_BKGRDWRITE; 1620 /* 1621 * Check to see if a cluster of the needed size (or bigger) is 1622 * available in this cylinder group. 1623 */ 1624 lp = &cg_clustersum(cgp)[len]; 1625 for (i = len; i <= fs->fs_contigsumsize; i++) 1626 if (*lp++ > 0) 1627 break; 1628 if (i > fs->fs_contigsumsize) { 1629 /* 1630 * This is the first time looking for a cluster in this 1631 * cylinder group. Update the cluster summary information 1632 * to reflect the true maximum sized cluster so that 1633 * future cluster allocation requests can avoid reading 1634 * the cylinder group map only to find no clusters. 1635 */ 1636 lp = &cg_clustersum(cgp)[len - 1]; 1637 for (i = len - 1; i > 0; i--) 1638 if (*lp-- > 0) 1639 break; 1640 UFS_LOCK(ump); 1641 fs->fs_maxcluster[cg] = i; 1642 goto fail; 1643 } 1644 /* 1645 * Search the cluster map to find a big enough cluster. 1646 * We take the first one that we find, even if it is larger 1647 * than we need as we prefer to get one close to the previous 1648 * block allocation. We do not search before the current 1649 * preference point as we do not want to allocate a block 1650 * that is allocated before the previous one (as we will 1651 * then have to wait for another pass of the elevator 1652 * algorithm before it will be read). We prefer to fail and 1653 * be recalled to try an allocation in the next cylinder group. 1654 */ 1655 if (dtog(fs, bpref) != cg) 1656 bpref = 0; 1657 else 1658 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1659 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1660 map = *mapp++; 1661 bit = 1 << (bpref % NBBY); 1662 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1663 if ((map & bit) == 0) { 1664 run = 0; 1665 } else { 1666 run++; 1667 if (run == len) 1668 break; 1669 } 1670 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1671 bit <<= 1; 1672 } else { 1673 map = *mapp++; 1674 bit = 1; 1675 } 1676 } 1677 if (got >= cgp->cg_nclusterblks) 1678 goto fail_lock; 1679 /* 1680 * Allocate the cluster that we have found. 1681 */ 1682 blksfree = cg_blksfree(cgp); 1683 for (i = 1; i <= len; i++) 1684 if (!ffs_isblock(fs, blksfree, got - run + i)) 1685 panic("ffs_clusteralloc: map mismatch"); 1686 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1687 if (dtog(fs, bno) != cg) 1688 panic("ffs_clusteralloc: allocated out of group"); 1689 len = blkstofrags(fs, len); 1690 UFS_LOCK(ump); 1691 for (i = 0; i < len; i += fs->fs_frag) 1692 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1693 panic("ffs_clusteralloc: lost block"); 1694 ACTIVECLEAR(fs, cg); 1695 UFS_UNLOCK(ump); 1696 bdwrite(bp); 1697 return (bno); 1698 1699 fail_lock: 1700 UFS_LOCK(ump); 1701 fail: 1702 brelse(bp); 1703 return (0); 1704 } 1705 1706 /* 1707 * Determine whether an inode can be allocated. 1708 * 1709 * Check to see if an inode is available, and if it is, 1710 * allocate it using the following policy: 1711 * 1) allocate the requested inode. 1712 * 2) allocate the next available inode after the requested 1713 * inode in the specified cylinder group. 1714 */ 1715 static ufs2_daddr_t 1716 ffs_nodealloccg(ip, cg, ipref, mode) 1717 struct inode *ip; 1718 u_int cg; 1719 ufs2_daddr_t ipref; 1720 int mode; 1721 { 1722 struct fs *fs; 1723 struct cg *cgp; 1724 struct buf *bp, *ibp; 1725 struct ufsmount *ump; 1726 u_int8_t *inosused; 1727 struct ufs2_dinode *dp2; 1728 int error, start, len, loc, map, i; 1729 1730 fs = ip->i_fs; 1731 ump = ip->i_ump; 1732 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1733 return (0); 1734 UFS_UNLOCK(ump); 1735 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1736 (int)fs->fs_cgsize, NOCRED, &bp); 1737 if (error) { 1738 brelse(bp); 1739 UFS_LOCK(ump); 1740 return (0); 1741 } 1742 cgp = (struct cg *)bp->b_data; 1743 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1744 brelse(bp); 1745 UFS_LOCK(ump); 1746 return (0); 1747 } 1748 bp->b_xflags |= BX_BKGRDWRITE; 1749 cgp->cg_old_time = cgp->cg_time = time_second; 1750 inosused = cg_inosused(cgp); 1751 if (ipref) { 1752 ipref %= fs->fs_ipg; 1753 if (isclr(inosused, ipref)) 1754 goto gotit; 1755 } 1756 start = cgp->cg_irotor / NBBY; 1757 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1758 loc = skpc(0xff, len, &inosused[start]); 1759 if (loc == 0) { 1760 len = start + 1; 1761 start = 0; 1762 loc = skpc(0xff, len, &inosused[0]); 1763 if (loc == 0) { 1764 printf("cg = %d, irotor = %ld, fs = %s\n", 1765 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1766 panic("ffs_nodealloccg: map corrupted"); 1767 /* NOTREACHED */ 1768 } 1769 } 1770 i = start + len - loc; 1771 map = inosused[i]; 1772 ipref = i * NBBY; 1773 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1774 if ((map & i) == 0) { 1775 cgp->cg_irotor = ipref; 1776 goto gotit; 1777 } 1778 } 1779 printf("fs = %s\n", fs->fs_fsmnt); 1780 panic("ffs_nodealloccg: block not in map"); 1781 /* NOTREACHED */ 1782 gotit: 1783 /* 1784 * Check to see if we need to initialize more inodes. 1785 */ 1786 ibp = NULL; 1787 if (fs->fs_magic == FS_UFS2_MAGIC && 1788 ipref + INOPB(fs) > cgp->cg_initediblk && 1789 cgp->cg_initediblk < cgp->cg_niblk) { 1790 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1791 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1792 (int)fs->fs_bsize, 0, 0, 0); 1793 bzero(ibp->b_data, (int)fs->fs_bsize); 1794 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1795 for (i = 0; i < INOPB(fs); i++) { 1796 dp2->di_gen = arc4random() / 2 + 1; 1797 dp2++; 1798 } 1799 cgp->cg_initediblk += INOPB(fs); 1800 } 1801 UFS_LOCK(ump); 1802 ACTIVECLEAR(fs, cg); 1803 setbit(inosused, ipref); 1804 cgp->cg_cs.cs_nifree--; 1805 fs->fs_cstotal.cs_nifree--; 1806 fs->fs_cs(fs, cg).cs_nifree--; 1807 fs->fs_fmod = 1; 1808 if ((mode & IFMT) == IFDIR) { 1809 cgp->cg_cs.cs_ndir++; 1810 fs->fs_cstotal.cs_ndir++; 1811 fs->fs_cs(fs, cg).cs_ndir++; 1812 } 1813 UFS_UNLOCK(ump); 1814 if (DOINGSOFTDEP(ITOV(ip))) 1815 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1816 bdwrite(bp); 1817 if (ibp != NULL) 1818 bawrite(ibp); 1819 return ((ino_t)(cg * fs->fs_ipg + ipref)); 1820 } 1821 1822 /* 1823 * check if a block is free 1824 */ 1825 static int 1826 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1827 { 1828 1829 switch ((int)fs->fs_frag) { 1830 case 8: 1831 return (cp[h] == 0); 1832 case 4: 1833 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1834 case 2: 1835 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1836 case 1: 1837 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1838 default: 1839 panic("ffs_isfreeblock"); 1840 } 1841 return (0); 1842 } 1843 1844 /* 1845 * Free a block or fragment. 1846 * 1847 * The specified block or fragment is placed back in the 1848 * free map. If a fragment is deallocated, a possible 1849 * block reassembly is checked. 1850 */ 1851 void 1852 ffs_blkfree(ump, fs, devvp, bno, size, inum) 1853 struct ufsmount *ump; 1854 struct fs *fs; 1855 struct vnode *devvp; 1856 ufs2_daddr_t bno; 1857 long size; 1858 ino_t inum; 1859 { 1860 struct cg *cgp; 1861 struct buf *bp; 1862 ufs1_daddr_t fragno, cgbno; 1863 ufs2_daddr_t cgblkno; 1864 int i, blk, frags, bbase; 1865 u_int cg; 1866 u_int8_t *blksfree; 1867 struct cdev *dev; 1868 1869 cg = dtog(fs, bno); 1870 if (devvp->v_type == VREG) { 1871 /* devvp is a snapshot */ 1872 dev = VTOI(devvp)->i_devvp->v_rdev; 1873 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1874 } else { 1875 /* devvp is a normal disk device */ 1876 dev = devvp->v_rdev; 1877 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1878 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1879 if ((devvp->v_vflag & VV_COPYONWRITE) && 1880 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1881 return; 1882 } 1883 #ifdef INVARIANTS 1884 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1885 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1886 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1887 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1888 size, fs->fs_fsmnt); 1889 panic("ffs_blkfree: bad size"); 1890 } 1891 #endif 1892 if ((u_int)bno >= fs->fs_size) { 1893 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1894 (u_long)inum); 1895 ffs_fserr(fs, inum, "bad block"); 1896 return; 1897 } 1898 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { 1899 brelse(bp); 1900 return; 1901 } 1902 cgp = (struct cg *)bp->b_data; 1903 if (!cg_chkmagic(cgp)) { 1904 brelse(bp); 1905 return; 1906 } 1907 bp->b_xflags |= BX_BKGRDWRITE; 1908 cgp->cg_old_time = cgp->cg_time = time_second; 1909 cgbno = dtogd(fs, bno); 1910 blksfree = cg_blksfree(cgp); 1911 UFS_LOCK(ump); 1912 if (size == fs->fs_bsize) { 1913 fragno = fragstoblks(fs, cgbno); 1914 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1915 if (devvp->v_type == VREG) { 1916 UFS_UNLOCK(ump); 1917 /* devvp is a snapshot */ 1918 brelse(bp); 1919 return; 1920 } 1921 printf("dev = %s, block = %jd, fs = %s\n", 1922 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1923 panic("ffs_blkfree: freeing free block"); 1924 } 1925 ffs_setblock(fs, blksfree, fragno); 1926 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1927 cgp->cg_cs.cs_nbfree++; 1928 fs->fs_cstotal.cs_nbfree++; 1929 fs->fs_cs(fs, cg).cs_nbfree++; 1930 } else { 1931 bbase = cgbno - fragnum(fs, cgbno); 1932 /* 1933 * decrement the counts associated with the old frags 1934 */ 1935 blk = blkmap(fs, blksfree, bbase); 1936 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1937 /* 1938 * deallocate the fragment 1939 */ 1940 frags = numfrags(fs, size); 1941 for (i = 0; i < frags; i++) { 1942 if (isset(blksfree, cgbno + i)) { 1943 printf("dev = %s, block = %jd, fs = %s\n", 1944 devtoname(dev), (intmax_t)(bno + i), 1945 fs->fs_fsmnt); 1946 panic("ffs_blkfree: freeing free frag"); 1947 } 1948 setbit(blksfree, cgbno + i); 1949 } 1950 cgp->cg_cs.cs_nffree += i; 1951 fs->fs_cstotal.cs_nffree += i; 1952 fs->fs_cs(fs, cg).cs_nffree += i; 1953 /* 1954 * add back in counts associated with the new frags 1955 */ 1956 blk = blkmap(fs, blksfree, bbase); 1957 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1958 /* 1959 * if a complete block has been reassembled, account for it 1960 */ 1961 fragno = fragstoblks(fs, bbase); 1962 if (ffs_isblock(fs, blksfree, fragno)) { 1963 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1964 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1965 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1966 ffs_clusteracct(ump, fs, cgp, fragno, 1); 1967 cgp->cg_cs.cs_nbfree++; 1968 fs->fs_cstotal.cs_nbfree++; 1969 fs->fs_cs(fs, cg).cs_nbfree++; 1970 } 1971 } 1972 fs->fs_fmod = 1; 1973 ACTIVECLEAR(fs, cg); 1974 UFS_UNLOCK(ump); 1975 bdwrite(bp); 1976 } 1977 1978 #ifdef INVARIANTS 1979 /* 1980 * Verify allocation of a block or fragment. Returns true if block or 1981 * fragment is allocated, false if it is free. 1982 */ 1983 static int 1984 ffs_checkblk(ip, bno, size) 1985 struct inode *ip; 1986 ufs2_daddr_t bno; 1987 long size; 1988 { 1989 struct fs *fs; 1990 struct cg *cgp; 1991 struct buf *bp; 1992 ufs1_daddr_t cgbno; 1993 int i, error, frags, free; 1994 u_int8_t *blksfree; 1995 1996 fs = ip->i_fs; 1997 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1998 printf("bsize = %ld, size = %ld, fs = %s\n", 1999 (long)fs->fs_bsize, size, fs->fs_fsmnt); 2000 panic("ffs_checkblk: bad size"); 2001 } 2002 if ((u_int)bno >= fs->fs_size) 2003 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 2004 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 2005 (int)fs->fs_cgsize, NOCRED, &bp); 2006 if (error) 2007 panic("ffs_checkblk: cg bread failed"); 2008 cgp = (struct cg *)bp->b_data; 2009 if (!cg_chkmagic(cgp)) 2010 panic("ffs_checkblk: cg magic mismatch"); 2011 bp->b_xflags |= BX_BKGRDWRITE; 2012 blksfree = cg_blksfree(cgp); 2013 cgbno = dtogd(fs, bno); 2014 if (size == fs->fs_bsize) { 2015 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 2016 } else { 2017 frags = numfrags(fs, size); 2018 for (free = 0, i = 0; i < frags; i++) 2019 if (isset(blksfree, cgbno + i)) 2020 free++; 2021 if (free != 0 && free != frags) 2022 panic("ffs_checkblk: partially free fragment"); 2023 } 2024 brelse(bp); 2025 return (!free); 2026 } 2027 #endif /* INVARIANTS */ 2028 2029 /* 2030 * Free an inode. 2031 */ 2032 int 2033 ffs_vfree(pvp, ino, mode) 2034 struct vnode *pvp; 2035 ino_t ino; 2036 int mode; 2037 { 2038 struct inode *ip; 2039 2040 if (DOINGSOFTDEP(pvp)) { 2041 softdep_freefile(pvp, ino, mode); 2042 return (0); 2043 } 2044 ip = VTOI(pvp); 2045 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); 2046 } 2047 2048 /* 2049 * Do the actual free operation. 2050 * The specified inode is placed back in the free map. 2051 */ 2052 int 2053 ffs_freefile(ump, fs, devvp, ino, mode) 2054 struct ufsmount *ump; 2055 struct fs *fs; 2056 struct vnode *devvp; 2057 ino_t ino; 2058 int mode; 2059 { 2060 struct cg *cgp; 2061 struct buf *bp; 2062 ufs2_daddr_t cgbno; 2063 int error; 2064 u_int cg; 2065 u_int8_t *inosused; 2066 struct cdev *dev; 2067 2068 cg = ino_to_cg(fs, ino); 2069 if (devvp->v_type == VREG) { 2070 /* devvp is a snapshot */ 2071 dev = VTOI(devvp)->i_devvp->v_rdev; 2072 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2073 } else { 2074 /* devvp is a normal disk device */ 2075 dev = devvp->v_rdev; 2076 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2077 } 2078 if (ino >= fs->fs_ipg * fs->fs_ncg) 2079 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 2080 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 2081 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 2082 brelse(bp); 2083 return (error); 2084 } 2085 cgp = (struct cg *)bp->b_data; 2086 if (!cg_chkmagic(cgp)) { 2087 brelse(bp); 2088 return (0); 2089 } 2090 bp->b_xflags |= BX_BKGRDWRITE; 2091 cgp->cg_old_time = cgp->cg_time = time_second; 2092 inosused = cg_inosused(cgp); 2093 ino %= fs->fs_ipg; 2094 if (isclr(inosused, ino)) { 2095 printf("dev = %s, ino = %u, fs = %s\n", devtoname(dev), 2096 ino + cg * fs->fs_ipg, fs->fs_fsmnt); 2097 if (fs->fs_ronly == 0) 2098 panic("ffs_freefile: freeing free inode"); 2099 } 2100 clrbit(inosused, ino); 2101 if (ino < cgp->cg_irotor) 2102 cgp->cg_irotor = ino; 2103 cgp->cg_cs.cs_nifree++; 2104 UFS_LOCK(ump); 2105 fs->fs_cstotal.cs_nifree++; 2106 fs->fs_cs(fs, cg).cs_nifree++; 2107 if ((mode & IFMT) == IFDIR) { 2108 cgp->cg_cs.cs_ndir--; 2109 fs->fs_cstotal.cs_ndir--; 2110 fs->fs_cs(fs, cg).cs_ndir--; 2111 } 2112 fs->fs_fmod = 1; 2113 ACTIVECLEAR(fs, cg); 2114 UFS_UNLOCK(ump); 2115 bdwrite(bp); 2116 return (0); 2117 } 2118 2119 /* 2120 * Check to see if a file is free. 2121 */ 2122 int 2123 ffs_checkfreefile(fs, devvp, ino) 2124 struct fs *fs; 2125 struct vnode *devvp; 2126 ino_t ino; 2127 { 2128 struct cg *cgp; 2129 struct buf *bp; 2130 ufs2_daddr_t cgbno; 2131 int ret; 2132 u_int cg; 2133 u_int8_t *inosused; 2134 2135 cg = ino_to_cg(fs, ino); 2136 if (devvp->v_type == VREG) { 2137 /* devvp is a snapshot */ 2138 cgbno = fragstoblks(fs, cgtod(fs, cg)); 2139 } else { 2140 /* devvp is a normal disk device */ 2141 cgbno = fsbtodb(fs, cgtod(fs, cg)); 2142 } 2143 if (ino >= fs->fs_ipg * fs->fs_ncg) 2144 return (1); 2145 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { 2146 brelse(bp); 2147 return (1); 2148 } 2149 cgp = (struct cg *)bp->b_data; 2150 if (!cg_chkmagic(cgp)) { 2151 brelse(bp); 2152 return (1); 2153 } 2154 inosused = cg_inosused(cgp); 2155 ino %= fs->fs_ipg; 2156 ret = isclr(inosused, ino); 2157 brelse(bp); 2158 return (ret); 2159 } 2160 2161 /* 2162 * Find a block of the specified size in the specified cylinder group. 2163 * 2164 * It is a panic if a request is made to find a block if none are 2165 * available. 2166 */ 2167 static ufs1_daddr_t 2168 ffs_mapsearch(fs, cgp, bpref, allocsiz) 2169 struct fs *fs; 2170 struct cg *cgp; 2171 ufs2_daddr_t bpref; 2172 int allocsiz; 2173 { 2174 ufs1_daddr_t bno; 2175 int start, len, loc, i; 2176 int blk, field, subfield, pos; 2177 u_int8_t *blksfree; 2178 2179 /* 2180 * find the fragment by searching through the free block 2181 * map for an appropriate bit pattern 2182 */ 2183 if (bpref) 2184 start = dtogd(fs, bpref) / NBBY; 2185 else 2186 start = cgp->cg_frotor / NBBY; 2187 blksfree = cg_blksfree(cgp); 2188 len = howmany(fs->fs_fpg, NBBY) - start; 2189 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2190 fragtbl[fs->fs_frag], 2191 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2192 if (loc == 0) { 2193 len = start + 1; 2194 start = 0; 2195 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2196 fragtbl[fs->fs_frag], 2197 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2198 if (loc == 0) { 2199 printf("start = %d, len = %d, fs = %s\n", 2200 start, len, fs->fs_fsmnt); 2201 panic("ffs_alloccg: map corrupted"); 2202 /* NOTREACHED */ 2203 } 2204 } 2205 bno = (start + len - loc) * NBBY; 2206 cgp->cg_frotor = bno; 2207 /* 2208 * found the byte in the map 2209 * sift through the bits to find the selected frag 2210 */ 2211 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2212 blk = blkmap(fs, blksfree, bno); 2213 blk <<= 1; 2214 field = around[allocsiz]; 2215 subfield = inside[allocsiz]; 2216 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2217 if ((blk & field) == subfield) 2218 return (bno + pos); 2219 field <<= 1; 2220 subfield <<= 1; 2221 } 2222 } 2223 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2224 panic("ffs_alloccg: block not in map"); 2225 return (-1); 2226 } 2227 2228 /* 2229 * Update the cluster map because of an allocation or free. 2230 * 2231 * Cnt == 1 means free; cnt == -1 means allocating. 2232 */ 2233 void 2234 ffs_clusteracct(ump, fs, cgp, blkno, cnt) 2235 struct ufsmount *ump; 2236 struct fs *fs; 2237 struct cg *cgp; 2238 ufs1_daddr_t blkno; 2239 int cnt; 2240 { 2241 int32_t *sump; 2242 int32_t *lp; 2243 u_char *freemapp, *mapp; 2244 int i, start, end, forw, back, map, bit; 2245 2246 mtx_assert(UFS_MTX(ump), MA_OWNED); 2247 2248 if (fs->fs_contigsumsize <= 0) 2249 return; 2250 freemapp = cg_clustersfree(cgp); 2251 sump = cg_clustersum(cgp); 2252 /* 2253 * Allocate or clear the actual block. 2254 */ 2255 if (cnt > 0) 2256 setbit(freemapp, blkno); 2257 else 2258 clrbit(freemapp, blkno); 2259 /* 2260 * Find the size of the cluster going forward. 2261 */ 2262 start = blkno + 1; 2263 end = start + fs->fs_contigsumsize; 2264 if (end >= cgp->cg_nclusterblks) 2265 end = cgp->cg_nclusterblks; 2266 mapp = &freemapp[start / NBBY]; 2267 map = *mapp++; 2268 bit = 1 << (start % NBBY); 2269 for (i = start; i < end; i++) { 2270 if ((map & bit) == 0) 2271 break; 2272 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2273 bit <<= 1; 2274 } else { 2275 map = *mapp++; 2276 bit = 1; 2277 } 2278 } 2279 forw = i - start; 2280 /* 2281 * Find the size of the cluster going backward. 2282 */ 2283 start = blkno - 1; 2284 end = start - fs->fs_contigsumsize; 2285 if (end < 0) 2286 end = -1; 2287 mapp = &freemapp[start / NBBY]; 2288 map = *mapp--; 2289 bit = 1 << (start % NBBY); 2290 for (i = start; i > end; i--) { 2291 if ((map & bit) == 0) 2292 break; 2293 if ((i & (NBBY - 1)) != 0) { 2294 bit >>= 1; 2295 } else { 2296 map = *mapp--; 2297 bit = 1 << (NBBY - 1); 2298 } 2299 } 2300 back = start - i; 2301 /* 2302 * Account for old cluster and the possibly new forward and 2303 * back clusters. 2304 */ 2305 i = back + forw + 1; 2306 if (i > fs->fs_contigsumsize) 2307 i = fs->fs_contigsumsize; 2308 sump[i] += cnt; 2309 if (back > 0) 2310 sump[back] -= cnt; 2311 if (forw > 0) 2312 sump[forw] -= cnt; 2313 /* 2314 * Update cluster summary information. 2315 */ 2316 lp = &sump[fs->fs_contigsumsize]; 2317 for (i = fs->fs_contigsumsize; i > 0; i--) 2318 if (*lp-- > 0) 2319 break; 2320 fs->fs_maxcluster[cgp->cg_cgx] = i; 2321 } 2322 2323 /* 2324 * Fserr prints the name of a filesystem with an error diagnostic. 2325 * 2326 * The form of the error message is: 2327 * fs: error message 2328 */ 2329 static void 2330 ffs_fserr(fs, inum, cp) 2331 struct fs *fs; 2332 ino_t inum; 2333 char *cp; 2334 { 2335 struct thread *td = curthread; /* XXX */ 2336 struct proc *p = td->td_proc; 2337 2338 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2339 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2340 } 2341 2342 /* 2343 * This function provides the capability for the fsck program to 2344 * update an active filesystem. Fourteen operations are provided: 2345 * 2346 * adjrefcnt(inode, amt) - adjusts the reference count on the 2347 * specified inode by the specified amount. Under normal 2348 * operation the count should always go down. Decrementing 2349 * the count to zero will cause the inode to be freed. 2350 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2351 * by the specifed amount. 2352 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - 2353 * adjust the superblock summary. 2354 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2355 * are marked as free. Inodes should never have to be marked 2356 * as in use. 2357 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2358 * are marked as free. Inodes should never have to be marked 2359 * as in use. 2360 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2361 * are marked as free. Blocks should never have to be marked 2362 * as in use. 2363 * setflags(flags, set/clear) - the fs_flags field has the specified 2364 * flags set (second parameter +1) or cleared (second parameter -1). 2365 * setcwd(dirinode) - set the current directory to dirinode in the 2366 * filesystem associated with the snapshot. 2367 * setdotdot(oldvalue, newvalue) - Verify that the inode number for ".." 2368 * in the current directory is oldvalue then change it to newvalue. 2369 * unlink(nameptr, oldvalue) - Verify that the inode number associated 2370 * with nameptr in the current directory is oldvalue then unlink it. 2371 */ 2372 2373 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2374 2375 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2376 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2377 2378 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2379 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2380 2381 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR, 2382 sysctl_ffs_fsck, "Adjust number of directories"); 2383 2384 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR, 2385 sysctl_ffs_fsck, "Adjust number of free blocks"); 2386 2387 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR, 2388 sysctl_ffs_fsck, "Adjust number of free inodes"); 2389 2390 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR, 2391 sysctl_ffs_fsck, "Adjust number of free frags"); 2392 2393 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR, 2394 sysctl_ffs_fsck, "Adjust number of free clusters"); 2395 2396 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2397 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2398 2399 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2400 sysctl_ffs_fsck, "Free Range of File Inodes"); 2401 2402 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2403 sysctl_ffs_fsck, "Free Range of Blocks"); 2404 2405 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2406 sysctl_ffs_fsck, "Change Filesystem Flags"); 2407 2408 static SYSCTL_NODE(_vfs_ffs, FFS_SET_CWD, setcwd, CTLFLAG_WR, 2409 sysctl_ffs_fsck, "Set Current Working Directory"); 2410 2411 static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOTDOT, setdotdot, CTLFLAG_WR, 2412 sysctl_ffs_fsck, "Change Value of .. Entry"); 2413 2414 static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR, 2415 sysctl_ffs_fsck, "Unlink a Duplicate Name"); 2416 2417 #ifdef DEBUG 2418 static int fsckcmds = 0; 2419 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2420 #endif /* DEBUG */ 2421 2422 static int 2423 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2424 { 2425 struct thread *td = curthread; 2426 struct fsck_cmd cmd; 2427 struct ufsmount *ump; 2428 struct vnode *vp, *vpold, *dvp, *fdvp; 2429 struct inode *ip, *dp; 2430 struct mount *mp; 2431 struct fs *fs; 2432 ufs2_daddr_t blkno; 2433 long blkcnt, blksize; 2434 struct filedesc *fdp; 2435 struct file *fp; 2436 int vfslocked, filetype, error; 2437 2438 if (req->newlen > sizeof cmd) 2439 return (EBADRPC); 2440 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2441 return (error); 2442 if (cmd.version != FFS_CMD_VERSION) 2443 return (ERPCMISMATCH); 2444 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2445 return (error); 2446 vp = fp->f_data; 2447 if (vp->v_type != VREG && vp->v_type != VDIR) { 2448 fdrop(fp, td); 2449 return (EINVAL); 2450 } 2451 vn_start_write(vp, &mp, V_WAIT); 2452 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2453 vn_finished_write(mp); 2454 fdrop(fp, td); 2455 return (EINVAL); 2456 } 2457 if (mp->mnt_flag & MNT_RDONLY) { 2458 vn_finished_write(mp); 2459 fdrop(fp, td); 2460 return (EROFS); 2461 } 2462 ump = VFSTOUFS(mp); 2463 fs = ump->um_fs; 2464 filetype = IFREG; 2465 2466 switch (oidp->oid_number) { 2467 2468 case FFS_SET_FLAGS: 2469 #ifdef DEBUG 2470 if (fsckcmds) 2471 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2472 cmd.size > 0 ? "set" : "clear"); 2473 #endif /* DEBUG */ 2474 if (cmd.size > 0) 2475 fs->fs_flags |= (long)cmd.value; 2476 else 2477 fs->fs_flags &= ~(long)cmd.value; 2478 break; 2479 2480 case FFS_ADJ_REFCNT: 2481 #ifdef DEBUG 2482 if (fsckcmds) { 2483 printf("%s: adjust inode %jd count by %jd\n", 2484 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2485 (intmax_t)cmd.size); 2486 } 2487 #endif /* DEBUG */ 2488 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2489 break; 2490 ip = VTOI(vp); 2491 ip->i_nlink += cmd.size; 2492 DIP_SET(ip, i_nlink, ip->i_nlink); 2493 ip->i_effnlink += cmd.size; 2494 ip->i_flag |= IN_CHANGE; 2495 if (DOINGSOFTDEP(vp)) 2496 softdep_change_linkcnt(ip); 2497 vput(vp); 2498 break; 2499 2500 case FFS_ADJ_BLKCNT: 2501 #ifdef DEBUG 2502 if (fsckcmds) { 2503 printf("%s: adjust inode %jd block count by %jd\n", 2504 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2505 (intmax_t)cmd.size); 2506 } 2507 #endif /* DEBUG */ 2508 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2509 break; 2510 ip = VTOI(vp); 2511 if (ip->i_flag & IN_SPACECOUNTED) { 2512 UFS_LOCK(ump); 2513 fs->fs_pendingblocks += cmd.size; 2514 UFS_UNLOCK(ump); 2515 } 2516 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); 2517 ip->i_flag |= IN_CHANGE; 2518 vput(vp); 2519 break; 2520 2521 case FFS_DIR_FREE: 2522 filetype = IFDIR; 2523 /* fall through */ 2524 2525 case FFS_FILE_FREE: 2526 #ifdef DEBUG 2527 if (fsckcmds) { 2528 if (cmd.size == 1) 2529 printf("%s: free %s inode %d\n", 2530 mp->mnt_stat.f_mntonname, 2531 filetype == IFDIR ? "directory" : "file", 2532 (ino_t)cmd.value); 2533 else 2534 printf("%s: free %s inodes %d-%d\n", 2535 mp->mnt_stat.f_mntonname, 2536 filetype == IFDIR ? "directory" : "file", 2537 (ino_t)cmd.value, 2538 (ino_t)(cmd.value + cmd.size - 1)); 2539 } 2540 #endif /* DEBUG */ 2541 while (cmd.size > 0) { 2542 if ((error = ffs_freefile(ump, fs, ump->um_devvp, 2543 cmd.value, filetype))) 2544 break; 2545 cmd.size -= 1; 2546 cmd.value += 1; 2547 } 2548 break; 2549 2550 case FFS_BLK_FREE: 2551 #ifdef DEBUG 2552 if (fsckcmds) { 2553 if (cmd.size == 1) 2554 printf("%s: free block %jd\n", 2555 mp->mnt_stat.f_mntonname, 2556 (intmax_t)cmd.value); 2557 else 2558 printf("%s: free blocks %jd-%jd\n", 2559 mp->mnt_stat.f_mntonname, 2560 (intmax_t)cmd.value, 2561 (intmax_t)cmd.value + cmd.size - 1); 2562 } 2563 #endif /* DEBUG */ 2564 blkno = cmd.value; 2565 blkcnt = cmd.size; 2566 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2567 while (blkcnt > 0) { 2568 if (blksize > blkcnt) 2569 blksize = blkcnt; 2570 ffs_blkfree(ump, fs, ump->um_devvp, blkno, 2571 blksize * fs->fs_fsize, ROOTINO); 2572 blkno += blksize; 2573 blkcnt -= blksize; 2574 blksize = fs->fs_frag; 2575 } 2576 break; 2577 2578 /* 2579 * Adjust superblock summaries. fsck(8) is expected to 2580 * submit deltas when necessary. 2581 */ 2582 case FFS_ADJ_NDIR: 2583 #ifdef DEBUG 2584 if (fsckcmds) { 2585 printf("%s: adjust number of directories by %jd\n", 2586 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2587 } 2588 #endif /* DEBUG */ 2589 fs->fs_cstotal.cs_ndir += cmd.value; 2590 break; 2591 2592 case FFS_ADJ_NBFREE: 2593 #ifdef DEBUG 2594 if (fsckcmds) { 2595 printf("%s: adjust number of free blocks by %+jd\n", 2596 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2597 } 2598 #endif /* DEBUG */ 2599 fs->fs_cstotal.cs_nbfree += cmd.value; 2600 break; 2601 2602 case FFS_ADJ_NIFREE: 2603 #ifdef DEBUG 2604 if (fsckcmds) { 2605 printf("%s: adjust number of free inodes by %+jd\n", 2606 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2607 } 2608 #endif /* DEBUG */ 2609 fs->fs_cstotal.cs_nifree += cmd.value; 2610 break; 2611 2612 case FFS_ADJ_NFFREE: 2613 #ifdef DEBUG 2614 if (fsckcmds) { 2615 printf("%s: adjust number of free frags by %+jd\n", 2616 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2617 } 2618 #endif /* DEBUG */ 2619 fs->fs_cstotal.cs_nffree += cmd.value; 2620 break; 2621 2622 case FFS_ADJ_NUMCLUSTERS: 2623 #ifdef DEBUG 2624 if (fsckcmds) { 2625 printf("%s: adjust number of free clusters by %+jd\n", 2626 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2627 } 2628 #endif /* DEBUG */ 2629 fs->fs_cstotal.cs_numclusters += cmd.value; 2630 break; 2631 2632 case FFS_SET_CWD: 2633 #ifdef DEBUG 2634 if (fsckcmds) { 2635 printf("%s: set current directory to inode %jd\n", 2636 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); 2637 } 2638 #endif /* DEBUG */ 2639 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp))) 2640 break; 2641 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2642 AUDIT_ARG_VNODE1(vp); 2643 if ((error = change_dir(vp, td)) != 0) { 2644 vput(vp); 2645 VFS_UNLOCK_GIANT(vfslocked); 2646 break; 2647 } 2648 VOP_UNLOCK(vp, 0); 2649 VFS_UNLOCK_GIANT(vfslocked); 2650 fdp = td->td_proc->p_fd; 2651 FILEDESC_XLOCK(fdp); 2652 vpold = fdp->fd_cdir; 2653 fdp->fd_cdir = vp; 2654 FILEDESC_XUNLOCK(fdp); 2655 vfslocked = VFS_LOCK_GIANT(vpold->v_mount); 2656 vrele(vpold); 2657 VFS_UNLOCK_GIANT(vfslocked); 2658 break; 2659 2660 case FFS_SET_DOTDOT: 2661 #ifdef DEBUG 2662 if (fsckcmds) { 2663 printf("%s: change .. in cwd from %jd to %jd\n", 2664 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2665 (intmax_t)cmd.size); 2666 } 2667 #endif /* DEBUG */ 2668 /* 2669 * First we have to get and lock the parent directory 2670 * to which ".." points. 2671 */ 2672 error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &fdvp); 2673 if (error) 2674 break; 2675 /* 2676 * Now we get and lock the child directory containing "..". 2677 */ 2678 FILEDESC_SLOCK(td->td_proc->p_fd); 2679 dvp = td->td_proc->p_fd->fd_cdir; 2680 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2681 if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) { 2682 vput(fdvp); 2683 break; 2684 } 2685 dp = VTOI(dvp); 2686 dp->i_offset = 12; /* XXX mastertemplate.dot_reclen */ 2687 error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size, 2688 DT_DIR, 0); 2689 cache_purge(fdvp); 2690 cache_purge(dvp); 2691 vput(dvp); 2692 vput(fdvp); 2693 break; 2694 2695 case FFS_UNLINK: 2696 #ifdef DEBUG 2697 if (fsckcmds) { 2698 char buf[32]; 2699 2700 if (copyinstr((char *)(intptr_t)cmd.value, buf,32,NULL)) 2701 strncpy(buf, "Name_too_long", 32); 2702 printf("%s: unlink %s (inode %jd)\n", 2703 mp->mnt_stat.f_mntonname, buf, (intmax_t)cmd.size); 2704 } 2705 #endif /* DEBUG */ 2706 /* 2707 * kern_unlinkat will do its own start/finish writes and 2708 * they do not nest, so drop ours here. Setting mp == NULL 2709 * indicates that vn_finished_write is not needed down below. 2710 */ 2711 vn_finished_write(mp); 2712 mp = NULL; 2713 error = kern_unlinkat(td, AT_FDCWD, (char *)(intptr_t)cmd.value, 2714 UIO_USERSPACE, (ino_t)cmd.size); 2715 break; 2716 2717 default: 2718 #ifdef DEBUG 2719 if (fsckcmds) { 2720 printf("Invalid request %d from fsck\n", 2721 oidp->oid_number); 2722 } 2723 #endif /* DEBUG */ 2724 error = EINVAL; 2725 break; 2726 2727 } 2728 fdrop(fp, td); 2729 vn_finished_write(mp); 2730 return (error); 2731 } 2732