1 /* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 43 * $FreeBSD$ 44 */ 45 46 #include "opt_quota.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/conf.h> 53 #include <sys/file.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/mount.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/syslog.h> 61 62 #include <ufs/ufs/extattr.h> 63 #include <ufs/ufs/quota.h> 64 #include <ufs/ufs/inode.h> 65 #include <ufs/ufs/ufs_extern.h> 66 #include <ufs/ufs/ufsmount.h> 67 68 #include <ufs/ffs/fs.h> 69 #include <ufs/ffs/ffs_extern.h> 70 71 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, 72 int size); 73 74 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); 75 static ufs2_daddr_t 76 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); 77 #ifdef DIAGNOSTIC 78 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); 79 #endif 80 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); 81 static ino_t ffs_dirpref(struct inode *); 82 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); 83 static void ffs_fserr(struct fs *, ino_t, char *); 84 static ufs2_daddr_t ffs_hashalloc 85 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); 86 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); 87 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); 88 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); 89 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); 90 91 /* 92 * Allocate a block in the filesystem. 93 * 94 * The size of the requested block is given, which must be some 95 * multiple of fs_fsize and <= fs_bsize. 96 * A preference may be optionally specified. If a preference is given 97 * the following hierarchy is used to allocate a block: 98 * 1) allocate the requested block. 99 * 2) allocate a rotationally optimal block in the same cylinder. 100 * 3) allocate a block in the same cylinder group. 101 * 4) quadradically rehash into other cylinder groups, until an 102 * available block is located. 103 * If no block preference is given the following heirarchy is used 104 * to allocate a block: 105 * 1) allocate a block in the cylinder group that contains the 106 * inode for the file. 107 * 2) quadradically rehash into other cylinder groups, until an 108 * available block is located. 109 */ 110 int 111 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 112 struct inode *ip; 113 ufs2_daddr_t lbn, bpref; 114 int size; 115 struct ucred *cred; 116 ufs2_daddr_t *bnp; 117 { 118 struct fs *fs; 119 ufs2_daddr_t bno; 120 int cg, reclaimed; 121 #ifdef QUOTA 122 int error; 123 #endif 124 125 *bnp = 0; 126 fs = ip->i_fs; 127 #ifdef DIAGNOSTIC 128 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 129 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 130 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 131 fs->fs_fsmnt); 132 panic("ffs_alloc: bad size"); 133 } 134 if (cred == NOCRED) 135 panic("ffs_alloc: missing credential"); 136 #endif /* DIAGNOSTIC */ 137 reclaimed = 0; 138 retry: 139 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 140 goto nospace; 141 if (suser_cred(cred, PRISON_ROOT) && 142 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 143 goto nospace; 144 #ifdef QUOTA 145 error = chkdq(ip, btodb(size), cred, 0); 146 if (error) 147 return (error); 148 #endif 149 if (bpref >= fs->fs_size) 150 bpref = 0; 151 if (bpref == 0) 152 cg = ino_to_cg(fs, ip->i_number); 153 else 154 cg = dtog(fs, bpref); 155 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 156 if (bno > 0) { 157 DIP(ip, i_blocks) += btodb(size); 158 ip->i_flag |= IN_CHANGE | IN_UPDATE; 159 *bnp = bno; 160 return (0); 161 } 162 #ifdef QUOTA 163 /* 164 * Restore user's disk quota because allocation failed. 165 */ 166 (void) chkdq(ip, -btodb(size), cred, FORCE); 167 #endif 168 nospace: 169 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 170 reclaimed = 1; 171 softdep_request_cleanup(fs, ITOV(ip)); 172 goto retry; 173 } 174 ffs_fserr(fs, ip->i_number, "filesystem full"); 175 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 176 return (ENOSPC); 177 } 178 179 /* 180 * Reallocate a fragment to a bigger size 181 * 182 * The number and size of the old block is given, and a preference 183 * and new size is also specified. The allocator attempts to extend 184 * the original block. Failing that, the regular block allocator is 185 * invoked to get an appropriate block. 186 */ 187 int 188 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp) 189 struct inode *ip; 190 ufs2_daddr_t lbprev; 191 ufs2_daddr_t bprev; 192 ufs2_daddr_t bpref; 193 int osize, nsize; 194 struct ucred *cred; 195 struct buf **bpp; 196 { 197 struct vnode *vp; 198 struct fs *fs; 199 struct buf *bp; 200 int cg, request, error, reclaimed; 201 ufs2_daddr_t bno; 202 203 *bpp = 0; 204 vp = ITOV(ip); 205 fs = ip->i_fs; 206 #ifdef DIAGNOSTIC 207 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 208 panic("ffs_realloccg: allocation on suspended filesystem"); 209 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 210 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 211 printf( 212 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 213 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 214 nsize, fs->fs_fsmnt); 215 panic("ffs_realloccg: bad size"); 216 } 217 if (cred == NOCRED) 218 panic("ffs_realloccg: missing credential"); 219 #endif /* DIAGNOSTIC */ 220 reclaimed = 0; 221 retry: 222 if (suser_cred(cred, PRISON_ROOT) && 223 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 224 goto nospace; 225 if (bprev == 0) { 226 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", 227 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, 228 fs->fs_fsmnt); 229 panic("ffs_realloccg: bad bprev"); 230 } 231 /* 232 * Allocate the extra space in the buffer. 233 */ 234 error = bread(vp, lbprev, osize, NOCRED, &bp); 235 if (error) { 236 brelse(bp); 237 return (error); 238 } 239 240 if (bp->b_blkno == bp->b_lblkno) { 241 if (lbprev >= NDADDR) 242 panic("ffs_realloccg: lbprev out of range"); 243 bp->b_blkno = fsbtodb(fs, bprev); 244 } 245 246 #ifdef QUOTA 247 error = chkdq(ip, btodb(nsize - osize), cred, 0); 248 if (error) { 249 brelse(bp); 250 return (error); 251 } 252 #endif 253 /* 254 * Check for extension in the existing location. 255 */ 256 cg = dtog(fs, bprev); 257 bno = ffs_fragextend(ip, cg, bprev, osize, nsize); 258 if (bno) { 259 if (bp->b_blkno != fsbtodb(fs, bno)) 260 panic("ffs_realloccg: bad blockno"); 261 DIP(ip, i_blocks) += btodb(nsize - osize); 262 ip->i_flag |= IN_CHANGE | IN_UPDATE; 263 allocbuf(bp, nsize); 264 bp->b_flags |= B_DONE; 265 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 266 *bpp = bp; 267 return (0); 268 } 269 /* 270 * Allocate a new disk location. 271 */ 272 if (bpref >= fs->fs_size) 273 bpref = 0; 274 switch ((int)fs->fs_optim) { 275 case FS_OPTSPACE: 276 /* 277 * Allocate an exact sized fragment. Although this makes 278 * best use of space, we will waste time relocating it if 279 * the file continues to grow. If the fragmentation is 280 * less than half of the minimum free reserve, we choose 281 * to begin optimizing for time. 282 */ 283 request = nsize; 284 if (fs->fs_minfree <= 5 || 285 fs->fs_cstotal.cs_nffree > 286 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 287 break; 288 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 289 fs->fs_fsmnt); 290 fs->fs_optim = FS_OPTTIME; 291 break; 292 case FS_OPTTIME: 293 /* 294 * At this point we have discovered a file that is trying to 295 * grow a small fragment to a larger fragment. To save time, 296 * we allocate a full sized block, then free the unused portion. 297 * If the file continues to grow, the `ffs_fragextend' call 298 * above will be able to grow it in place without further 299 * copying. If aberrant programs cause disk fragmentation to 300 * grow within 2% of the free reserve, we choose to begin 301 * optimizing for space. 302 */ 303 request = fs->fs_bsize; 304 if (fs->fs_cstotal.cs_nffree < 305 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 306 break; 307 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 308 fs->fs_fsmnt); 309 fs->fs_optim = FS_OPTSPACE; 310 break; 311 default: 312 printf("dev = %s, optim = %ld, fs = %s\n", 313 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 314 panic("ffs_realloccg: bad optim"); 315 /* NOTREACHED */ 316 } 317 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 318 if (bno > 0) { 319 bp->b_blkno = fsbtodb(fs, bno); 320 if (!DOINGSOFTDEP(vp)) 321 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, 322 ip->i_number); 323 if (nsize < request) 324 ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize), 325 (long)(request - nsize), ip->i_number); 326 DIP(ip, i_blocks) += btodb(nsize - osize); 327 ip->i_flag |= IN_CHANGE | IN_UPDATE; 328 allocbuf(bp, nsize); 329 bp->b_flags |= B_DONE; 330 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 331 *bpp = bp; 332 return (0); 333 } 334 #ifdef QUOTA 335 /* 336 * Restore user's disk quota because allocation failed. 337 */ 338 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 339 #endif 340 brelse(bp); 341 nospace: 342 /* 343 * no space available 344 */ 345 if (fs->fs_pendingblocks > 0 && reclaimed == 0) { 346 reclaimed = 1; 347 softdep_request_cleanup(fs, vp); 348 goto retry; 349 } 350 ffs_fserr(fs, ip->i_number, "filesystem full"); 351 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 352 return (ENOSPC); 353 } 354 355 /* 356 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 357 * 358 * The vnode and an array of buffer pointers for a range of sequential 359 * logical blocks to be made contiguous is given. The allocator attempts 360 * to find a range of sequential blocks starting as close as possible 361 * from the end of the allocation for the logical block immediately 362 * preceding the current range. If successful, the physical block numbers 363 * in the buffer pointers and in the inode are changed to reflect the new 364 * allocation. If unsuccessful, the allocation is left unchanged. The 365 * success in doing the reallocation is returned. Note that the error 366 * return is not reflected back to the user. Rather the previous block 367 * allocation will be used. 368 */ 369 370 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 371 372 static int doasyncfree = 1; 373 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 374 375 static int doreallocblks = 1; 376 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 377 378 #ifdef DEBUG 379 static volatile int prtrealloc = 0; 380 #endif 381 382 int 383 ffs_reallocblks(ap) 384 struct vop_reallocblks_args /* { 385 struct vnode *a_vp; 386 struct cluster_save *a_buflist; 387 } */ *ap; 388 { 389 390 if (doreallocblks == 0) 391 return (ENOSPC); 392 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) 393 return (ffs_reallocblks_ufs1(ap)); 394 return (ffs_reallocblks_ufs2(ap)); 395 } 396 397 static int 398 ffs_reallocblks_ufs1(ap) 399 struct vop_reallocblks_args /* { 400 struct vnode *a_vp; 401 struct cluster_save *a_buflist; 402 } */ *ap; 403 { 404 struct fs *fs; 405 struct inode *ip; 406 struct vnode *vp; 407 struct buf *sbp, *ebp; 408 ufs1_daddr_t *bap, *sbap, *ebap = 0; 409 struct cluster_save *buflist; 410 ufs_lbn_t start_lbn, end_lbn; 411 ufs1_daddr_t soff, newblk, blkno; 412 ufs2_daddr_t pref; 413 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 414 int i, len, start_lvl, end_lvl, ssize; 415 416 vp = ap->a_vp; 417 ip = VTOI(vp); 418 fs = ip->i_fs; 419 if (fs->fs_contigsumsize <= 0) 420 return (ENOSPC); 421 buflist = ap->a_buflist; 422 len = buflist->bs_nchildren; 423 start_lbn = buflist->bs_children[0]->b_lblkno; 424 end_lbn = start_lbn + len - 1; 425 #ifdef DIAGNOSTIC 426 for (i = 0; i < len; i++) 427 if (!ffs_checkblk(ip, 428 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 429 panic("ffs_reallocblks: unallocated block 1"); 430 for (i = 1; i < len; i++) 431 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 432 panic("ffs_reallocblks: non-logical cluster"); 433 blkno = buflist->bs_children[0]->b_blkno; 434 ssize = fsbtodb(fs, fs->fs_frag); 435 for (i = 1; i < len - 1; i++) 436 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 437 panic("ffs_reallocblks: non-physical cluster %d", i); 438 #endif 439 /* 440 * If the latest allocation is in a new cylinder group, assume that 441 * the filesystem has decided to move and do not force it back to 442 * the previous cylinder group. 443 */ 444 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 445 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 446 return (ENOSPC); 447 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 448 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 449 return (ENOSPC); 450 /* 451 * Get the starting offset and block map for the first block. 452 */ 453 if (start_lvl == 0) { 454 sbap = &ip->i_din1->di_db[0]; 455 soff = start_lbn; 456 } else { 457 idp = &start_ap[start_lvl - 1]; 458 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 459 brelse(sbp); 460 return (ENOSPC); 461 } 462 sbap = (ufs1_daddr_t *)sbp->b_data; 463 soff = idp->in_off; 464 } 465 /* 466 * Find the preferred location for the cluster. 467 */ 468 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); 469 /* 470 * If the block range spans two block maps, get the second map. 471 */ 472 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 473 ssize = len; 474 } else { 475 #ifdef DIAGNOSTIC 476 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 477 panic("ffs_reallocblk: start == end"); 478 #endif 479 ssize = len - (idp->in_off + 1); 480 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 481 goto fail; 482 ebap = (ufs1_daddr_t *)ebp->b_data; 483 } 484 /* 485 * Search the block map looking for an allocation of the desired size. 486 */ 487 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 488 len, ffs_clusteralloc)) == 0) 489 goto fail; 490 /* 491 * We have found a new contiguous block. 492 * 493 * First we have to replace the old block pointers with the new 494 * block pointers in the inode and indirect blocks associated 495 * with the file. 496 */ 497 #ifdef DEBUG 498 if (prtrealloc) 499 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 500 (intmax_t)start_lbn, (intmax_t)end_lbn); 501 #endif 502 blkno = newblk; 503 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 504 if (i == ssize) { 505 bap = ebap; 506 soff = -i; 507 } 508 #ifdef DIAGNOSTIC 509 if (!ffs_checkblk(ip, 510 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 511 panic("ffs_reallocblks: unallocated block 2"); 512 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 513 panic("ffs_reallocblks: alloc mismatch"); 514 #endif 515 #ifdef DEBUG 516 if (prtrealloc) 517 printf(" %d,", *bap); 518 #endif 519 if (DOINGSOFTDEP(vp)) { 520 if (sbap == &ip->i_din1->di_db[0] && i < ssize) 521 softdep_setup_allocdirect(ip, start_lbn + i, 522 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 523 buflist->bs_children[i]); 524 else 525 softdep_setup_allocindir_page(ip, start_lbn + i, 526 i < ssize ? sbp : ebp, soff + i, blkno, 527 *bap, buflist->bs_children[i]); 528 } 529 *bap++ = blkno; 530 } 531 /* 532 * Next we must write out the modified inode and indirect blocks. 533 * For strict correctness, the writes should be synchronous since 534 * the old block values may have been written to disk. In practise 535 * they are almost never written, but if we are concerned about 536 * strict correctness, the `doasyncfree' flag should be set to zero. 537 * 538 * The test on `doasyncfree' should be changed to test a flag 539 * that shows whether the associated buffers and inodes have 540 * been written. The flag should be set when the cluster is 541 * started and cleared whenever the buffer or inode is flushed. 542 * We can then check below to see if it is set, and do the 543 * synchronous write only when it has been cleared. 544 */ 545 if (sbap != &ip->i_din1->di_db[0]) { 546 if (doasyncfree) 547 bdwrite(sbp); 548 else 549 bwrite(sbp); 550 } else { 551 ip->i_flag |= IN_CHANGE | IN_UPDATE; 552 if (!doasyncfree) 553 UFS_UPDATE(vp, 1); 554 } 555 if (ssize < len) { 556 if (doasyncfree) 557 bdwrite(ebp); 558 else 559 bwrite(ebp); 560 } 561 /* 562 * Last, free the old blocks and assign the new blocks to the buffers. 563 */ 564 #ifdef DEBUG 565 if (prtrealloc) 566 printf("\n\tnew:"); 567 #endif 568 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 569 if (!DOINGSOFTDEP(vp)) 570 ffs_blkfree(fs, ip->i_devvp, 571 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 572 fs->fs_bsize, ip->i_number); 573 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 574 #ifdef DIAGNOSTIC 575 if (!ffs_checkblk(ip, 576 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 577 panic("ffs_reallocblks: unallocated block 3"); 578 #endif 579 #ifdef DEBUG 580 if (prtrealloc) 581 printf(" %d,", blkno); 582 #endif 583 } 584 #ifdef DEBUG 585 if (prtrealloc) { 586 prtrealloc--; 587 printf("\n"); 588 } 589 #endif 590 return (0); 591 592 fail: 593 if (ssize < len) 594 brelse(ebp); 595 if (sbap != &ip->i_din1->di_db[0]) 596 brelse(sbp); 597 return (ENOSPC); 598 } 599 600 static int 601 ffs_reallocblks_ufs2(ap) 602 struct vop_reallocblks_args /* { 603 struct vnode *a_vp; 604 struct cluster_save *a_buflist; 605 } */ *ap; 606 { 607 struct fs *fs; 608 struct inode *ip; 609 struct vnode *vp; 610 struct buf *sbp, *ebp; 611 ufs2_daddr_t *bap, *sbap, *ebap = 0; 612 struct cluster_save *buflist; 613 ufs_lbn_t start_lbn, end_lbn; 614 ufs2_daddr_t soff, newblk, blkno, pref; 615 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 616 int i, len, start_lvl, end_lvl, ssize; 617 618 vp = ap->a_vp; 619 ip = VTOI(vp); 620 fs = ip->i_fs; 621 if (fs->fs_contigsumsize <= 0) 622 return (ENOSPC); 623 buflist = ap->a_buflist; 624 len = buflist->bs_nchildren; 625 start_lbn = buflist->bs_children[0]->b_lblkno; 626 end_lbn = start_lbn + len - 1; 627 #ifdef DIAGNOSTIC 628 for (i = 0; i < len; i++) 629 if (!ffs_checkblk(ip, 630 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 631 panic("ffs_reallocblks: unallocated block 1"); 632 for (i = 1; i < len; i++) 633 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 634 panic("ffs_reallocblks: non-logical cluster"); 635 blkno = buflist->bs_children[0]->b_blkno; 636 ssize = fsbtodb(fs, fs->fs_frag); 637 for (i = 1; i < len - 1; i++) 638 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 639 panic("ffs_reallocblks: non-physical cluster %d", i); 640 #endif 641 /* 642 * If the latest allocation is in a new cylinder group, assume that 643 * the filesystem has decided to move and do not force it back to 644 * the previous cylinder group. 645 */ 646 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 647 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 648 return (ENOSPC); 649 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 650 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 651 return (ENOSPC); 652 /* 653 * Get the starting offset and block map for the first block. 654 */ 655 if (start_lvl == 0) { 656 sbap = &ip->i_din2->di_db[0]; 657 soff = start_lbn; 658 } else { 659 idp = &start_ap[start_lvl - 1]; 660 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { 661 brelse(sbp); 662 return (ENOSPC); 663 } 664 sbap = (ufs2_daddr_t *)sbp->b_data; 665 soff = idp->in_off; 666 } 667 /* 668 * Find the preferred location for the cluster. 669 */ 670 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); 671 /* 672 * If the block range spans two block maps, get the second map. 673 */ 674 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 675 ssize = len; 676 } else { 677 #ifdef DIAGNOSTIC 678 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 679 panic("ffs_reallocblk: start == end"); 680 #endif 681 ssize = len - (idp->in_off + 1); 682 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) 683 goto fail; 684 ebap = (ufs2_daddr_t *)ebp->b_data; 685 } 686 /* 687 * Search the block map looking for an allocation of the desired size. 688 */ 689 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 690 len, ffs_clusteralloc)) == 0) 691 goto fail; 692 /* 693 * We have found a new contiguous block. 694 * 695 * First we have to replace the old block pointers with the new 696 * block pointers in the inode and indirect blocks associated 697 * with the file. 698 */ 699 #ifdef DEBUG 700 if (prtrealloc) 701 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 702 (intmax_t)start_lbn, (intmax_t)end_lbn); 703 #endif 704 blkno = newblk; 705 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 706 if (i == ssize) { 707 bap = ebap; 708 soff = -i; 709 } 710 #ifdef DIAGNOSTIC 711 if (!ffs_checkblk(ip, 712 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 713 panic("ffs_reallocblks: unallocated block 2"); 714 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 715 panic("ffs_reallocblks: alloc mismatch"); 716 #endif 717 #ifdef DEBUG 718 if (prtrealloc) 719 printf(" %jd,", (intmax_t)*bap); 720 #endif 721 if (DOINGSOFTDEP(vp)) { 722 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 723 softdep_setup_allocdirect(ip, start_lbn + i, 724 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 725 buflist->bs_children[i]); 726 else 727 softdep_setup_allocindir_page(ip, start_lbn + i, 728 i < ssize ? sbp : ebp, soff + i, blkno, 729 *bap, buflist->bs_children[i]); 730 } 731 *bap++ = blkno; 732 } 733 /* 734 * Next we must write out the modified inode and indirect blocks. 735 * For strict correctness, the writes should be synchronous since 736 * the old block values may have been written to disk. In practise 737 * they are almost never written, but if we are concerned about 738 * strict correctness, the `doasyncfree' flag should be set to zero. 739 * 740 * The test on `doasyncfree' should be changed to test a flag 741 * that shows whether the associated buffers and inodes have 742 * been written. The flag should be set when the cluster is 743 * started and cleared whenever the buffer or inode is flushed. 744 * We can then check below to see if it is set, and do the 745 * synchronous write only when it has been cleared. 746 */ 747 if (sbap != &ip->i_din2->di_db[0]) { 748 if (doasyncfree) 749 bdwrite(sbp); 750 else 751 bwrite(sbp); 752 } else { 753 ip->i_flag |= IN_CHANGE | IN_UPDATE; 754 if (!doasyncfree) 755 UFS_UPDATE(vp, 1); 756 } 757 if (ssize < len) { 758 if (doasyncfree) 759 bdwrite(ebp); 760 else 761 bwrite(ebp); 762 } 763 /* 764 * Last, free the old blocks and assign the new blocks to the buffers. 765 */ 766 #ifdef DEBUG 767 if (prtrealloc) 768 printf("\n\tnew:"); 769 #endif 770 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 771 if (!DOINGSOFTDEP(vp)) 772 ffs_blkfree(fs, ip->i_devvp, 773 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 774 fs->fs_bsize, ip->i_number); 775 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 776 #ifdef DIAGNOSTIC 777 if (!ffs_checkblk(ip, 778 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 779 panic("ffs_reallocblks: unallocated block 3"); 780 #endif 781 #ifdef DEBUG 782 if (prtrealloc) 783 printf(" %jd,", (intmax_t)blkno); 784 #endif 785 } 786 #ifdef DEBUG 787 if (prtrealloc) { 788 prtrealloc--; 789 printf("\n"); 790 } 791 #endif 792 return (0); 793 794 fail: 795 if (ssize < len) 796 brelse(ebp); 797 if (sbap != &ip->i_din2->di_db[0]) 798 brelse(sbp); 799 return (ENOSPC); 800 } 801 802 /* 803 * Allocate an inode in the filesystem. 804 * 805 * If allocating a directory, use ffs_dirpref to select the inode. 806 * If allocating in a directory, the following hierarchy is followed: 807 * 1) allocate the preferred inode. 808 * 2) allocate an inode in the same cylinder group. 809 * 3) quadradically rehash into other cylinder groups, until an 810 * available inode is located. 811 * If no inode preference is given the following heirarchy is used 812 * to allocate an inode: 813 * 1) allocate an inode in cylinder group 0. 814 * 2) quadradically rehash into other cylinder groups, until an 815 * available inode is located. 816 */ 817 int 818 ffs_valloc(pvp, mode, cred, vpp) 819 struct vnode *pvp; 820 int mode; 821 struct ucred *cred; 822 struct vnode **vpp; 823 { 824 struct inode *pip; 825 struct fs *fs; 826 struct inode *ip; 827 struct timespec ts; 828 ino_t ino, ipref; 829 int cg, error; 830 831 *vpp = NULL; 832 pip = VTOI(pvp); 833 fs = pip->i_fs; 834 if (fs->fs_cstotal.cs_nifree == 0) 835 goto noinodes; 836 837 if ((mode & IFMT) == IFDIR) 838 ipref = ffs_dirpref(pip); 839 else 840 ipref = pip->i_number; 841 if (ipref >= fs->fs_ncg * fs->fs_ipg) 842 ipref = 0; 843 cg = ino_to_cg(fs, ipref); 844 /* 845 * Track number of dirs created one after another 846 * in a same cg without intervening by files. 847 */ 848 if ((mode & IFMT) == IFDIR) { 849 if (fs->fs_contigdirs[cg] < 255) 850 fs->fs_contigdirs[cg]++; 851 } else { 852 if (fs->fs_contigdirs[cg] > 0) 853 fs->fs_contigdirs[cg]--; 854 } 855 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 856 (allocfcn_t *)ffs_nodealloccg); 857 if (ino == 0) 858 goto noinodes; 859 error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 860 if (error) { 861 UFS_VFREE(pvp, ino, mode); 862 return (error); 863 } 864 ip = VTOI(*vpp); 865 if (ip->i_mode) { 866 printf("mode = 0%o, inum = %lu, fs = %s\n", 867 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 868 panic("ffs_valloc: dup alloc"); 869 } 870 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ 871 printf("free inode %s/%lu had %ld blocks\n", 872 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); 873 DIP(ip, i_blocks) = 0; 874 } 875 ip->i_flags = 0; 876 DIP(ip, i_flags) = 0; 877 /* 878 * Set up a new generation number for this inode. 879 */ 880 if (ip->i_gen == 0 || ++ip->i_gen == 0) 881 ip->i_gen = arc4random() / 2 + 1; 882 DIP(ip, i_gen) = ip->i_gen; 883 if (fs->fs_magic == FS_UFS2_MAGIC) { 884 vfs_timestamp(&ts); 885 ip->i_din2->di_birthtime = ts.tv_sec; 886 ip->i_din2->di_birthnsec = ts.tv_nsec; 887 } 888 return (0); 889 noinodes: 890 ffs_fserr(fs, pip->i_number, "out of inodes"); 891 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 892 return (ENOSPC); 893 } 894 895 /* 896 * Find a cylinder group to place a directory. 897 * 898 * The policy implemented by this algorithm is to allocate a 899 * directory inode in the same cylinder group as its parent 900 * directory, but also to reserve space for its files inodes 901 * and data. Restrict the number of directories which may be 902 * allocated one after another in the same cylinder group 903 * without intervening allocation of files. 904 * 905 * If we allocate a first level directory then force allocation 906 * in another cylinder group. 907 */ 908 static ino_t 909 ffs_dirpref(pip) 910 struct inode *pip; 911 { 912 struct fs *fs; 913 int cg, prefcg, dirsize, cgsize; 914 int avgifree, avgbfree, avgndir, curdirsize; 915 int minifree, minbfree, maxndir; 916 int mincg, minndir; 917 int maxcontigdirs; 918 919 fs = pip->i_fs; 920 921 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 922 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 923 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 924 925 /* 926 * Force allocation in another cg if creating a first level dir. 927 */ 928 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); 929 if (ITOV(pip)->v_vflag & VV_ROOT) { 930 prefcg = arc4random() % fs->fs_ncg; 931 mincg = prefcg; 932 minndir = fs->fs_ipg; 933 for (cg = prefcg; cg < fs->fs_ncg; cg++) 934 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 935 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 936 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 937 mincg = cg; 938 minndir = fs->fs_cs(fs, cg).cs_ndir; 939 } 940 for (cg = 0; cg < prefcg; cg++) 941 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 942 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 943 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 944 mincg = cg; 945 minndir = fs->fs_cs(fs, cg).cs_ndir; 946 } 947 return ((ino_t)(fs->fs_ipg * mincg)); 948 } 949 950 /* 951 * Count various limits which used for 952 * optimal allocation of a directory inode. 953 */ 954 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 955 minifree = avgifree - fs->fs_ipg / 4; 956 if (minifree < 0) 957 minifree = 0; 958 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; 959 if (minbfree < 0) 960 minbfree = 0; 961 cgsize = fs->fs_fsize * fs->fs_fpg; 962 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 963 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 964 if (dirsize < curdirsize) 965 dirsize = curdirsize; 966 maxcontigdirs = min(cgsize / dirsize, 255); 967 if (fs->fs_avgfpdir > 0) 968 maxcontigdirs = min(maxcontigdirs, 969 fs->fs_ipg / fs->fs_avgfpdir); 970 if (maxcontigdirs == 0) 971 maxcontigdirs = 1; 972 973 /* 974 * Limit number of dirs in one cg and reserve space for 975 * regular files, but only if we have no deficit in 976 * inodes or space. 977 */ 978 prefcg = ino_to_cg(fs, pip->i_number); 979 for (cg = prefcg; cg < fs->fs_ncg; cg++) 980 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 981 fs->fs_cs(fs, cg).cs_nifree >= minifree && 982 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 983 if (fs->fs_contigdirs[cg] < maxcontigdirs) 984 return ((ino_t)(fs->fs_ipg * cg)); 985 } 986 for (cg = 0; cg < prefcg; cg++) 987 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 988 fs->fs_cs(fs, cg).cs_nifree >= minifree && 989 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 990 if (fs->fs_contigdirs[cg] < maxcontigdirs) 991 return ((ino_t)(fs->fs_ipg * cg)); 992 } 993 /* 994 * This is a backstop when we have deficit in space. 995 */ 996 for (cg = prefcg; cg < fs->fs_ncg; cg++) 997 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 998 return ((ino_t)(fs->fs_ipg * cg)); 999 for (cg = 0; cg < prefcg; cg++) 1000 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1001 break; 1002 return ((ino_t)(fs->fs_ipg * cg)); 1003 } 1004 1005 /* 1006 * Select the desired position for the next block in a file. The file is 1007 * logically divided into sections. The first section is composed of the 1008 * direct blocks. Each additional section contains fs_maxbpg blocks. 1009 * 1010 * If no blocks have been allocated in the first section, the policy is to 1011 * request a block in the same cylinder group as the inode that describes 1012 * the file. If no blocks have been allocated in any other section, the 1013 * policy is to place the section in a cylinder group with a greater than 1014 * average number of free blocks. An appropriate cylinder group is found 1015 * by using a rotor that sweeps the cylinder groups. When a new group of 1016 * blocks is needed, the sweep begins in the cylinder group following the 1017 * cylinder group from which the previous allocation was made. The sweep 1018 * continues until a cylinder group with greater than the average number 1019 * of free blocks is found. If the allocation is for the first block in an 1020 * indirect block, the information on the previous allocation is unavailable; 1021 * here a best guess is made based upon the logical block number being 1022 * allocated. 1023 * 1024 * If a section is already partially allocated, the policy is to 1025 * contiguously allocate fs_maxcontig blocks. The end of one of these 1026 * contiguous blocks and the beginning of the next is laid out 1027 * contiguously if possible. 1028 */ 1029 ufs2_daddr_t 1030 ffs_blkpref_ufs1(ip, lbn, indx, bap) 1031 struct inode *ip; 1032 ufs_lbn_t lbn; 1033 int indx; 1034 ufs1_daddr_t *bap; 1035 { 1036 struct fs *fs; 1037 int cg; 1038 int avgbfree, startcg; 1039 1040 fs = ip->i_fs; 1041 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1042 if (lbn < NDADDR + NINDIR(fs)) { 1043 cg = ino_to_cg(fs, ip->i_number); 1044 return (fs->fs_fpg * cg + fs->fs_frag); 1045 } 1046 /* 1047 * Find a cylinder with greater than average number of 1048 * unused data blocks. 1049 */ 1050 if (indx == 0 || bap[indx - 1] == 0) 1051 startcg = 1052 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1053 else 1054 startcg = dtog(fs, bap[indx - 1]) + 1; 1055 startcg %= fs->fs_ncg; 1056 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1057 for (cg = startcg; cg < fs->fs_ncg; cg++) 1058 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1059 fs->fs_cgrotor = cg; 1060 return (fs->fs_fpg * cg + fs->fs_frag); 1061 } 1062 for (cg = 0; cg <= startcg; cg++) 1063 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1064 fs->fs_cgrotor = cg; 1065 return (fs->fs_fpg * cg + fs->fs_frag); 1066 } 1067 return (0); 1068 } 1069 /* 1070 * We just always try to lay things out contiguously. 1071 */ 1072 return (bap[indx - 1] + fs->fs_frag); 1073 } 1074 1075 /* 1076 * Same as above, but for UFS2 1077 */ 1078 ufs2_daddr_t 1079 ffs_blkpref_ufs2(ip, lbn, indx, bap) 1080 struct inode *ip; 1081 ufs_lbn_t lbn; 1082 int indx; 1083 ufs2_daddr_t *bap; 1084 { 1085 struct fs *fs; 1086 int cg; 1087 int avgbfree, startcg; 1088 1089 fs = ip->i_fs; 1090 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1091 if (lbn < NDADDR + NINDIR(fs)) { 1092 cg = ino_to_cg(fs, ip->i_number); 1093 return (fs->fs_fpg * cg + fs->fs_frag); 1094 } 1095 /* 1096 * Find a cylinder with greater than average number of 1097 * unused data blocks. 1098 */ 1099 if (indx == 0 || bap[indx - 1] == 0) 1100 startcg = 1101 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1102 else 1103 startcg = dtog(fs, bap[indx - 1]) + 1; 1104 startcg %= fs->fs_ncg; 1105 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1106 for (cg = startcg; cg < fs->fs_ncg; cg++) 1107 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1108 fs->fs_cgrotor = cg; 1109 return (fs->fs_fpg * cg + fs->fs_frag); 1110 } 1111 for (cg = 0; cg <= startcg; cg++) 1112 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1113 fs->fs_cgrotor = cg; 1114 return (fs->fs_fpg * cg + fs->fs_frag); 1115 } 1116 return (0); 1117 } 1118 /* 1119 * We just always try to lay things out contiguously. 1120 */ 1121 return (bap[indx - 1] + fs->fs_frag); 1122 } 1123 1124 /* 1125 * Implement the cylinder overflow algorithm. 1126 * 1127 * The policy implemented by this algorithm is: 1128 * 1) allocate the block in its requested cylinder group. 1129 * 2) quadradically rehash on the cylinder group number. 1130 * 3) brute force search for a free block. 1131 */ 1132 /*VARARGS5*/ 1133 static ufs2_daddr_t 1134 ffs_hashalloc(ip, cg, pref, size, allocator) 1135 struct inode *ip; 1136 int cg; 1137 ufs2_daddr_t pref; 1138 int size; /* size for data blocks, mode for inodes */ 1139 allocfcn_t *allocator; 1140 { 1141 struct fs *fs; 1142 ufs2_daddr_t result; 1143 int i, icg = cg; 1144 1145 #ifdef DIAGNOSTIC 1146 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) 1147 panic("ffs_hashalloc: allocation on suspended filesystem"); 1148 #endif 1149 fs = ip->i_fs; 1150 /* 1151 * 1: preferred cylinder group 1152 */ 1153 result = (*allocator)(ip, cg, pref, size); 1154 if (result) 1155 return (result); 1156 /* 1157 * 2: quadratic rehash 1158 */ 1159 for (i = 1; i < fs->fs_ncg; i *= 2) { 1160 cg += i; 1161 if (cg >= fs->fs_ncg) 1162 cg -= fs->fs_ncg; 1163 result = (*allocator)(ip, cg, 0, size); 1164 if (result) 1165 return (result); 1166 } 1167 /* 1168 * 3: brute force search 1169 * Note that we start at i == 2, since 0 was checked initially, 1170 * and 1 is always checked in the quadratic rehash. 1171 */ 1172 cg = (icg + 2) % fs->fs_ncg; 1173 for (i = 2; i < fs->fs_ncg; i++) { 1174 result = (*allocator)(ip, cg, 0, size); 1175 if (result) 1176 return (result); 1177 cg++; 1178 if (cg == fs->fs_ncg) 1179 cg = 0; 1180 } 1181 return (0); 1182 } 1183 1184 /* 1185 * Determine whether a fragment can be extended. 1186 * 1187 * Check to see if the necessary fragments are available, and 1188 * if they are, allocate them. 1189 */ 1190 static ufs2_daddr_t 1191 ffs_fragextend(ip, cg, bprev, osize, nsize) 1192 struct inode *ip; 1193 int cg; 1194 ufs2_daddr_t bprev; 1195 int osize, nsize; 1196 { 1197 struct fs *fs; 1198 struct cg *cgp; 1199 struct buf *bp; 1200 long bno; 1201 int frags, bbase; 1202 int i, error; 1203 u_int8_t *blksfree; 1204 1205 fs = ip->i_fs; 1206 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1207 return (0); 1208 frags = numfrags(fs, nsize); 1209 bbase = fragnum(fs, bprev); 1210 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1211 /* cannot extend across a block boundary */ 1212 return (0); 1213 } 1214 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1215 (int)fs->fs_cgsize, NOCRED, &bp); 1216 if (error) { 1217 brelse(bp); 1218 return (0); 1219 } 1220 cgp = (struct cg *)bp->b_data; 1221 if (!cg_chkmagic(cgp)) { 1222 brelse(bp); 1223 return (0); 1224 } 1225 bp->b_xflags |= BX_BKGRDWRITE; 1226 cgp->cg_old_time = cgp->cg_time = time_second; 1227 bno = dtogd(fs, bprev); 1228 blksfree = cg_blksfree(cgp); 1229 for (i = numfrags(fs, osize); i < frags; i++) 1230 if (isclr(blksfree, bno + i)) { 1231 brelse(bp); 1232 return (0); 1233 } 1234 /* 1235 * the current fragment can be extended 1236 * deduct the count on fragment being extended into 1237 * increase the count on the remaining fragment (if any) 1238 * allocate the extended piece 1239 */ 1240 for (i = frags; i < fs->fs_frag - bbase; i++) 1241 if (isclr(blksfree, bno + i)) 1242 break; 1243 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1244 if (i != frags) 1245 cgp->cg_frsum[i - frags]++; 1246 for (i = numfrags(fs, osize); i < frags; i++) { 1247 clrbit(blksfree, bno + i); 1248 cgp->cg_cs.cs_nffree--; 1249 fs->fs_cstotal.cs_nffree--; 1250 fs->fs_cs(fs, cg).cs_nffree--; 1251 } 1252 fs->fs_fmod = 1; 1253 if (DOINGSOFTDEP(ITOV(ip))) 1254 softdep_setup_blkmapdep(bp, fs, bprev); 1255 if (fs->fs_active != 0) 1256 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1257 bdwrite(bp); 1258 return (bprev); 1259 } 1260 1261 /* 1262 * Determine whether a block can be allocated. 1263 * 1264 * Check to see if a block of the appropriate size is available, 1265 * and if it is, allocate it. 1266 */ 1267 static ufs2_daddr_t 1268 ffs_alloccg(ip, cg, bpref, size) 1269 struct inode *ip; 1270 int cg; 1271 ufs2_daddr_t bpref; 1272 int size; 1273 { 1274 struct fs *fs; 1275 struct cg *cgp; 1276 struct buf *bp; 1277 ufs1_daddr_t bno; 1278 ufs2_daddr_t blkno; 1279 int i, allocsiz, error, frags; 1280 u_int8_t *blksfree; 1281 1282 fs = ip->i_fs; 1283 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1284 return (0); 1285 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1286 (int)fs->fs_cgsize, NOCRED, &bp); 1287 if (error) { 1288 brelse(bp); 1289 return (0); 1290 } 1291 cgp = (struct cg *)bp->b_data; 1292 if (!cg_chkmagic(cgp) || 1293 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 1294 brelse(bp); 1295 return (0); 1296 } 1297 bp->b_xflags |= BX_BKGRDWRITE; 1298 cgp->cg_old_time = cgp->cg_time = time_second; 1299 if (size == fs->fs_bsize) { 1300 blkno = ffs_alloccgblk(ip, bp, bpref); 1301 if (fs->fs_active != 0) 1302 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1303 bdwrite(bp); 1304 return (blkno); 1305 } 1306 /* 1307 * check to see if any fragments are already available 1308 * allocsiz is the size which will be allocated, hacking 1309 * it down to a smaller size if necessary 1310 */ 1311 blksfree = cg_blksfree(cgp); 1312 frags = numfrags(fs, size); 1313 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1314 if (cgp->cg_frsum[allocsiz] != 0) 1315 break; 1316 if (allocsiz == fs->fs_frag) { 1317 /* 1318 * no fragments were available, so a block will be 1319 * allocated, and hacked up 1320 */ 1321 if (cgp->cg_cs.cs_nbfree == 0) { 1322 brelse(bp); 1323 return (0); 1324 } 1325 blkno = ffs_alloccgblk(ip, bp, bpref); 1326 bno = dtogd(fs, blkno); 1327 for (i = frags; i < fs->fs_frag; i++) 1328 setbit(blksfree, bno + i); 1329 i = fs->fs_frag - frags; 1330 cgp->cg_cs.cs_nffree += i; 1331 fs->fs_cstotal.cs_nffree += i; 1332 fs->fs_cs(fs, cg).cs_nffree += i; 1333 fs->fs_fmod = 1; 1334 cgp->cg_frsum[i]++; 1335 if (fs->fs_active != 0) 1336 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1337 bdwrite(bp); 1338 return (blkno); 1339 } 1340 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1341 if (bno < 0) { 1342 brelse(bp); 1343 return (0); 1344 } 1345 for (i = 0; i < frags; i++) 1346 clrbit(blksfree, bno + i); 1347 cgp->cg_cs.cs_nffree -= frags; 1348 fs->fs_cstotal.cs_nffree -= frags; 1349 fs->fs_cs(fs, cg).cs_nffree -= frags; 1350 fs->fs_fmod = 1; 1351 cgp->cg_frsum[allocsiz]--; 1352 if (frags != allocsiz) 1353 cgp->cg_frsum[allocsiz - frags]++; 1354 blkno = cg * fs->fs_fpg + bno; 1355 if (DOINGSOFTDEP(ITOV(ip))) 1356 softdep_setup_blkmapdep(bp, fs, blkno); 1357 if (fs->fs_active != 0) 1358 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1359 bdwrite(bp); 1360 return (blkno); 1361 } 1362 1363 /* 1364 * Allocate a block in a cylinder group. 1365 * 1366 * This algorithm implements the following policy: 1367 * 1) allocate the requested block. 1368 * 2) allocate a rotationally optimal block in the same cylinder. 1369 * 3) allocate the next available block on the block rotor for the 1370 * specified cylinder group. 1371 * Note that this routine only allocates fs_bsize blocks; these 1372 * blocks may be fragmented by the routine that allocates them. 1373 */ 1374 static ufs2_daddr_t 1375 ffs_alloccgblk(ip, bp, bpref) 1376 struct inode *ip; 1377 struct buf *bp; 1378 ufs2_daddr_t bpref; 1379 { 1380 struct fs *fs; 1381 struct cg *cgp; 1382 ufs1_daddr_t bno; 1383 ufs2_daddr_t blkno; 1384 u_int8_t *blksfree; 1385 1386 fs = ip->i_fs; 1387 cgp = (struct cg *)bp->b_data; 1388 blksfree = cg_blksfree(cgp); 1389 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1390 bpref = cgp->cg_rotor; 1391 } else { 1392 bpref = blknum(fs, bpref); 1393 bno = dtogd(fs, bpref); 1394 /* 1395 * if the requested block is available, use it 1396 */ 1397 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1398 goto gotit; 1399 } 1400 /* 1401 * Take the next available block in this cylinder group. 1402 */ 1403 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1404 if (bno < 0) 1405 return (0); 1406 cgp->cg_rotor = bno; 1407 gotit: 1408 blkno = fragstoblks(fs, bno); 1409 ffs_clrblock(fs, blksfree, (long)blkno); 1410 ffs_clusteracct(fs, cgp, blkno, -1); 1411 cgp->cg_cs.cs_nbfree--; 1412 fs->fs_cstotal.cs_nbfree--; 1413 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1414 fs->fs_fmod = 1; 1415 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 1416 if (DOINGSOFTDEP(ITOV(ip))) 1417 softdep_setup_blkmapdep(bp, fs, blkno); 1418 return (blkno); 1419 } 1420 1421 /* 1422 * Determine whether a cluster can be allocated. 1423 * 1424 * We do not currently check for optimal rotational layout if there 1425 * are multiple choices in the same cylinder group. Instead we just 1426 * take the first one that we find following bpref. 1427 */ 1428 static ufs2_daddr_t 1429 ffs_clusteralloc(ip, cg, bpref, len) 1430 struct inode *ip; 1431 int cg; 1432 ufs2_daddr_t bpref; 1433 int len; 1434 { 1435 struct fs *fs; 1436 struct cg *cgp; 1437 struct buf *bp; 1438 int i, run, bit, map, got; 1439 ufs2_daddr_t bno; 1440 u_char *mapp; 1441 int32_t *lp; 1442 u_int8_t *blksfree; 1443 1444 fs = ip->i_fs; 1445 if (fs->fs_maxcluster[cg] < len) 1446 return (0); 1447 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1448 NOCRED, &bp)) 1449 goto fail; 1450 cgp = (struct cg *)bp->b_data; 1451 if (!cg_chkmagic(cgp)) 1452 goto fail; 1453 bp->b_xflags |= BX_BKGRDWRITE; 1454 /* 1455 * Check to see if a cluster of the needed size (or bigger) is 1456 * available in this cylinder group. 1457 */ 1458 lp = &cg_clustersum(cgp)[len]; 1459 for (i = len; i <= fs->fs_contigsumsize; i++) 1460 if (*lp++ > 0) 1461 break; 1462 if (i > fs->fs_contigsumsize) { 1463 /* 1464 * This is the first time looking for a cluster in this 1465 * cylinder group. Update the cluster summary information 1466 * to reflect the true maximum sized cluster so that 1467 * future cluster allocation requests can avoid reading 1468 * the cylinder group map only to find no clusters. 1469 */ 1470 lp = &cg_clustersum(cgp)[len - 1]; 1471 for (i = len - 1; i > 0; i--) 1472 if (*lp-- > 0) 1473 break; 1474 fs->fs_maxcluster[cg] = i; 1475 goto fail; 1476 } 1477 /* 1478 * Search the cluster map to find a big enough cluster. 1479 * We take the first one that we find, even if it is larger 1480 * than we need as we prefer to get one close to the previous 1481 * block allocation. We do not search before the current 1482 * preference point as we do not want to allocate a block 1483 * that is allocated before the previous one (as we will 1484 * then have to wait for another pass of the elevator 1485 * algorithm before it will be read). We prefer to fail and 1486 * be recalled to try an allocation in the next cylinder group. 1487 */ 1488 if (dtog(fs, bpref) != cg) 1489 bpref = 0; 1490 else 1491 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1492 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1493 map = *mapp++; 1494 bit = 1 << (bpref % NBBY); 1495 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1496 if ((map & bit) == 0) { 1497 run = 0; 1498 } else { 1499 run++; 1500 if (run == len) 1501 break; 1502 } 1503 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1504 bit <<= 1; 1505 } else { 1506 map = *mapp++; 1507 bit = 1; 1508 } 1509 } 1510 if (got >= cgp->cg_nclusterblks) 1511 goto fail; 1512 /* 1513 * Allocate the cluster that we have found. 1514 */ 1515 blksfree = cg_blksfree(cgp); 1516 for (i = 1; i <= len; i++) 1517 if (!ffs_isblock(fs, blksfree, got - run + i)) 1518 panic("ffs_clusteralloc: map mismatch"); 1519 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1520 if (dtog(fs, bno) != cg) 1521 panic("ffs_clusteralloc: allocated out of group"); 1522 len = blkstofrags(fs, len); 1523 for (i = 0; i < len; i += fs->fs_frag) 1524 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1525 panic("ffs_clusteralloc: lost block"); 1526 if (fs->fs_active != 0) 1527 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1528 bdwrite(bp); 1529 return (bno); 1530 1531 fail: 1532 brelse(bp); 1533 return (0); 1534 } 1535 1536 /* 1537 * Determine whether an inode can be allocated. 1538 * 1539 * Check to see if an inode is available, and if it is, 1540 * allocate it using the following policy: 1541 * 1) allocate the requested inode. 1542 * 2) allocate the next available inode after the requested 1543 * inode in the specified cylinder group. 1544 */ 1545 static ufs2_daddr_t 1546 ffs_nodealloccg(ip, cg, ipref, mode) 1547 struct inode *ip; 1548 int cg; 1549 ufs2_daddr_t ipref; 1550 int mode; 1551 { 1552 struct fs *fs; 1553 struct cg *cgp; 1554 struct buf *bp, *ibp; 1555 u_int8_t *inosused; 1556 struct ufs2_dinode *dp2; 1557 int error, start, len, loc, map, i; 1558 1559 fs = ip->i_fs; 1560 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1561 return (0); 1562 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1563 (int)fs->fs_cgsize, NOCRED, &bp); 1564 if (error) { 1565 brelse(bp); 1566 return (0); 1567 } 1568 cgp = (struct cg *)bp->b_data; 1569 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1570 brelse(bp); 1571 return (0); 1572 } 1573 bp->b_xflags |= BX_BKGRDWRITE; 1574 cgp->cg_old_time = cgp->cg_time = time_second; 1575 inosused = cg_inosused(cgp); 1576 if (ipref) { 1577 ipref %= fs->fs_ipg; 1578 if (isclr(inosused, ipref)) 1579 goto gotit; 1580 } 1581 start = cgp->cg_irotor / NBBY; 1582 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1583 loc = skpc(0xff, len, &inosused[start]); 1584 if (loc == 0) { 1585 len = start + 1; 1586 start = 0; 1587 loc = skpc(0xff, len, &inosused[0]); 1588 if (loc == 0) { 1589 printf("cg = %d, irotor = %ld, fs = %s\n", 1590 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1591 panic("ffs_nodealloccg: map corrupted"); 1592 /* NOTREACHED */ 1593 } 1594 } 1595 i = start + len - loc; 1596 map = inosused[i]; 1597 ipref = i * NBBY; 1598 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1599 if ((map & i) == 0) { 1600 cgp->cg_irotor = ipref; 1601 goto gotit; 1602 } 1603 } 1604 printf("fs = %s\n", fs->fs_fsmnt); 1605 panic("ffs_nodealloccg: block not in map"); 1606 /* NOTREACHED */ 1607 gotit: 1608 if (DOINGSOFTDEP(ITOV(ip))) 1609 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1610 setbit(inosused, ipref); 1611 cgp->cg_cs.cs_nifree--; 1612 fs->fs_cstotal.cs_nifree--; 1613 fs->fs_cs(fs, cg).cs_nifree--; 1614 fs->fs_fmod = 1; 1615 if ((mode & IFMT) == IFDIR) { 1616 cgp->cg_cs.cs_ndir++; 1617 fs->fs_cstotal.cs_ndir++; 1618 fs->fs_cs(fs, cg).cs_ndir++; 1619 } 1620 /* 1621 * Check to see if we need to initialize more inodes. 1622 */ 1623 if (fs->fs_magic == FS_UFS2_MAGIC && 1624 ipref + INOPB(fs) > cgp->cg_initediblk && 1625 cgp->cg_initediblk < cgp->cg_niblk) { 1626 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1627 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1628 (int)fs->fs_bsize, 0, 0, 0); 1629 bzero(ibp->b_data, (int)fs->fs_bsize); 1630 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1631 for (i = 0; i < INOPB(fs); i++) { 1632 dp2->di_gen = arc4random() / 2 + 1; 1633 dp2++; 1634 } 1635 bawrite(ibp); 1636 cgp->cg_initediblk += INOPB(fs); 1637 } 1638 if (fs->fs_active != 0) 1639 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1640 bdwrite(bp); 1641 return (cg * fs->fs_ipg + ipref); 1642 } 1643 1644 /* 1645 * check if a block is free 1646 */ 1647 static int 1648 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) 1649 { 1650 1651 switch ((int)fs->fs_frag) { 1652 case 8: 1653 return (cp[h] == 0); 1654 case 4: 1655 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); 1656 case 2: 1657 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); 1658 case 1: 1659 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); 1660 default: 1661 panic("ffs_isfreeblock"); 1662 } 1663 return (0); 1664 } 1665 1666 /* 1667 * Free a block or fragment. 1668 * 1669 * The specified block or fragment is placed back in the 1670 * free map. If a fragment is deallocated, a possible 1671 * block reassembly is checked. 1672 */ 1673 void 1674 ffs_blkfree(fs, devvp, bno, size, inum) 1675 struct fs *fs; 1676 struct vnode *devvp; 1677 ufs2_daddr_t bno; 1678 long size; 1679 ino_t inum; 1680 { 1681 struct cg *cgp; 1682 struct buf *bp; 1683 ufs1_daddr_t fragno, cgbno; 1684 ufs2_daddr_t cgblkno; 1685 int i, error, cg, blk, frags, bbase; 1686 u_int8_t *blksfree; 1687 dev_t dev; 1688 1689 cg = dtog(fs, bno); 1690 if (devvp->v_type != VCHR) { 1691 /* devvp is a snapshot */ 1692 dev = VTOI(devvp)->i_devvp->v_rdev; 1693 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1694 } else { 1695 /* devvp is a normal disk device */ 1696 dev = devvp->v_rdev; 1697 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1698 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); 1699 if ((devvp->v_vflag & VV_COPYONWRITE) && 1700 ffs_snapblkfree(fs, devvp, bno, size, inum)) 1701 return; 1702 VOP_FREEBLKS(devvp, fsbtodb(fs, bno), size); 1703 } 1704 #ifdef DIAGNOSTIC 1705 if (dev->si_mountpoint && 1706 (dev->si_mountpoint->mnt_kern_flag & MNTK_SUSPENDED)) 1707 panic("ffs_blkfree: deallocation on suspended filesystem"); 1708 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1709 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1710 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", 1711 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, 1712 size, fs->fs_fsmnt); 1713 panic("ffs_blkfree: bad size"); 1714 } 1715 #endif 1716 if ((u_int)bno >= fs->fs_size) { 1717 printf("bad block %jd, ino %lu\n", (intmax_t)bno, 1718 (u_long)inum); 1719 ffs_fserr(fs, inum, "bad block"); 1720 return; 1721 } 1722 if ((error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp))) { 1723 brelse(bp); 1724 return; 1725 } 1726 cgp = (struct cg *)bp->b_data; 1727 if (!cg_chkmagic(cgp)) { 1728 brelse(bp); 1729 return; 1730 } 1731 bp->b_xflags |= BX_BKGRDWRITE; 1732 cgp->cg_old_time = cgp->cg_time = time_second; 1733 cgbno = dtogd(fs, bno); 1734 blksfree = cg_blksfree(cgp); 1735 if (size == fs->fs_bsize) { 1736 fragno = fragstoblks(fs, cgbno); 1737 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1738 if (devvp->v_type != VCHR) { 1739 /* devvp is a snapshot */ 1740 brelse(bp); 1741 return; 1742 } 1743 printf("dev = %s, block = %jd, fs = %s\n", 1744 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); 1745 panic("ffs_blkfree: freeing free block"); 1746 } 1747 ffs_setblock(fs, blksfree, fragno); 1748 ffs_clusteracct(fs, cgp, fragno, 1); 1749 cgp->cg_cs.cs_nbfree++; 1750 fs->fs_cstotal.cs_nbfree++; 1751 fs->fs_cs(fs, cg).cs_nbfree++; 1752 } else { 1753 bbase = cgbno - fragnum(fs, cgbno); 1754 /* 1755 * decrement the counts associated with the old frags 1756 */ 1757 blk = blkmap(fs, blksfree, bbase); 1758 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1759 /* 1760 * deallocate the fragment 1761 */ 1762 frags = numfrags(fs, size); 1763 for (i = 0; i < frags; i++) { 1764 if (isset(blksfree, cgbno + i)) { 1765 printf("dev = %s, block = %jd, fs = %s\n", 1766 devtoname(dev), (intmax_t)(bno + i), 1767 fs->fs_fsmnt); 1768 panic("ffs_blkfree: freeing free frag"); 1769 } 1770 setbit(blksfree, cgbno + i); 1771 } 1772 cgp->cg_cs.cs_nffree += i; 1773 fs->fs_cstotal.cs_nffree += i; 1774 fs->fs_cs(fs, cg).cs_nffree += i; 1775 /* 1776 * add back in counts associated with the new frags 1777 */ 1778 blk = blkmap(fs, blksfree, bbase); 1779 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1780 /* 1781 * if a complete block has been reassembled, account for it 1782 */ 1783 fragno = fragstoblks(fs, bbase); 1784 if (ffs_isblock(fs, blksfree, fragno)) { 1785 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1786 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1787 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1788 ffs_clusteracct(fs, cgp, fragno, 1); 1789 cgp->cg_cs.cs_nbfree++; 1790 fs->fs_cstotal.cs_nbfree++; 1791 fs->fs_cs(fs, cg).cs_nbfree++; 1792 } 1793 } 1794 fs->fs_fmod = 1; 1795 if (fs->fs_active != 0) 1796 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1797 bdwrite(bp); 1798 } 1799 1800 #ifdef DIAGNOSTIC 1801 /* 1802 * Verify allocation of a block or fragment. Returns true if block or 1803 * fragment is allocated, false if it is free. 1804 */ 1805 static int 1806 ffs_checkblk(ip, bno, size) 1807 struct inode *ip; 1808 ufs2_daddr_t bno; 1809 long size; 1810 { 1811 struct fs *fs; 1812 struct cg *cgp; 1813 struct buf *bp; 1814 ufs1_daddr_t cgbno; 1815 int i, error, frags, free; 1816 u_int8_t *blksfree; 1817 1818 fs = ip->i_fs; 1819 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1820 printf("bsize = %ld, size = %ld, fs = %s\n", 1821 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1822 panic("ffs_checkblk: bad size"); 1823 } 1824 if ((u_int)bno >= fs->fs_size) 1825 panic("ffs_checkblk: bad block %jd", (intmax_t)bno); 1826 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1827 (int)fs->fs_cgsize, NOCRED, &bp); 1828 if (error) 1829 panic("ffs_checkblk: cg bread failed"); 1830 cgp = (struct cg *)bp->b_data; 1831 if (!cg_chkmagic(cgp)) 1832 panic("ffs_checkblk: cg magic mismatch"); 1833 bp->b_xflags |= BX_BKGRDWRITE; 1834 blksfree = cg_blksfree(cgp); 1835 cgbno = dtogd(fs, bno); 1836 if (size == fs->fs_bsize) { 1837 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); 1838 } else { 1839 frags = numfrags(fs, size); 1840 for (free = 0, i = 0; i < frags; i++) 1841 if (isset(blksfree, cgbno + i)) 1842 free++; 1843 if (free != 0 && free != frags) 1844 panic("ffs_checkblk: partially free fragment"); 1845 } 1846 brelse(bp); 1847 return (!free); 1848 } 1849 #endif /* DIAGNOSTIC */ 1850 1851 /* 1852 * Free an inode. 1853 */ 1854 int 1855 ffs_vfree(pvp, ino, mode) 1856 struct vnode *pvp; 1857 ino_t ino; 1858 int mode; 1859 { 1860 if (DOINGSOFTDEP(pvp)) { 1861 softdep_freefile(pvp, ino, mode); 1862 return (0); 1863 } 1864 return (ffs_freefile(VTOI(pvp)->i_fs, VTOI(pvp)->i_devvp, ino, mode)); 1865 } 1866 1867 /* 1868 * Do the actual free operation. 1869 * The specified inode is placed back in the free map. 1870 */ 1871 int 1872 ffs_freefile(fs, devvp, ino, mode) 1873 struct fs *fs; 1874 struct vnode *devvp; 1875 ino_t ino; 1876 int mode; 1877 { 1878 struct cg *cgp; 1879 struct buf *bp; 1880 ufs2_daddr_t cgbno; 1881 int error, cg; 1882 u_int8_t *inosused; 1883 dev_t dev; 1884 1885 cg = ino_to_cg(fs, ino); 1886 if (devvp->v_type != VCHR) { 1887 /* devvp is a snapshot */ 1888 dev = VTOI(devvp)->i_devvp->v_rdev; 1889 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1890 } else { 1891 /* devvp is a normal disk device */ 1892 dev = devvp->v_rdev; 1893 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1894 } 1895 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1896 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", 1897 devtoname(dev), (u_long)ino, fs->fs_fsmnt); 1898 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 1899 brelse(bp); 1900 return (error); 1901 } 1902 cgp = (struct cg *)bp->b_data; 1903 if (!cg_chkmagic(cgp)) { 1904 brelse(bp); 1905 return (0); 1906 } 1907 bp->b_xflags |= BX_BKGRDWRITE; 1908 cgp->cg_old_time = cgp->cg_time = time_second; 1909 inosused = cg_inosused(cgp); 1910 ino %= fs->fs_ipg; 1911 if (isclr(inosused, ino)) { 1912 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), 1913 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); 1914 if (fs->fs_ronly == 0) 1915 panic("ffs_freefile: freeing free inode"); 1916 } 1917 clrbit(inosused, ino); 1918 if (ino < cgp->cg_irotor) 1919 cgp->cg_irotor = ino; 1920 cgp->cg_cs.cs_nifree++; 1921 fs->fs_cstotal.cs_nifree++; 1922 fs->fs_cs(fs, cg).cs_nifree++; 1923 if ((mode & IFMT) == IFDIR) { 1924 cgp->cg_cs.cs_ndir--; 1925 fs->fs_cstotal.cs_ndir--; 1926 fs->fs_cs(fs, cg).cs_ndir--; 1927 } 1928 fs->fs_fmod = 1; 1929 if (fs->fs_active != 0) 1930 atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); 1931 bdwrite(bp); 1932 return (0); 1933 } 1934 1935 /* 1936 * Check to see if a file is free. 1937 */ 1938 int 1939 ffs_checkfreefile(fs, devvp, ino) 1940 struct fs *fs; 1941 struct vnode *devvp; 1942 ino_t ino; 1943 { 1944 struct cg *cgp; 1945 struct buf *bp; 1946 ufs2_daddr_t cgbno; 1947 int error, ret, cg; 1948 u_int8_t *inosused; 1949 1950 cg = ino_to_cg(fs, ino); 1951 if (devvp->v_type != VCHR) { 1952 /* devvp is a snapshot */ 1953 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1954 } else { 1955 /* devvp is a normal disk device */ 1956 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1957 } 1958 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1959 return (1); 1960 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { 1961 brelse(bp); 1962 return (1); 1963 } 1964 cgp = (struct cg *)bp->b_data; 1965 if (!cg_chkmagic(cgp)) { 1966 brelse(bp); 1967 return (1); 1968 } 1969 inosused = cg_inosused(cgp); 1970 ino %= fs->fs_ipg; 1971 ret = isclr(inosused, ino); 1972 brelse(bp); 1973 return (ret); 1974 } 1975 1976 /* 1977 * Find a block of the specified size in the specified cylinder group. 1978 * 1979 * It is a panic if a request is made to find a block if none are 1980 * available. 1981 */ 1982 static ufs1_daddr_t 1983 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1984 struct fs *fs; 1985 struct cg *cgp; 1986 ufs2_daddr_t bpref; 1987 int allocsiz; 1988 { 1989 ufs1_daddr_t bno; 1990 int start, len, loc, i; 1991 int blk, field, subfield, pos; 1992 u_int8_t *blksfree; 1993 1994 /* 1995 * find the fragment by searching through the free block 1996 * map for an appropriate bit pattern 1997 */ 1998 if (bpref) 1999 start = dtogd(fs, bpref) / NBBY; 2000 else 2001 start = cgp->cg_frotor / NBBY; 2002 blksfree = cg_blksfree(cgp); 2003 len = howmany(fs->fs_fpg, NBBY) - start; 2004 loc = scanc((u_int)len, (u_char *)&blksfree[start], 2005 (u_char *)fragtbl[fs->fs_frag], 2006 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2007 if (loc == 0) { 2008 len = start + 1; 2009 start = 0; 2010 loc = scanc((u_int)len, (u_char *)&blksfree[0], 2011 (u_char *)fragtbl[fs->fs_frag], 2012 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 2013 if (loc == 0) { 2014 printf("start = %d, len = %d, fs = %s\n", 2015 start, len, fs->fs_fsmnt); 2016 panic("ffs_alloccg: map corrupted"); 2017 /* NOTREACHED */ 2018 } 2019 } 2020 bno = (start + len - loc) * NBBY; 2021 cgp->cg_frotor = bno; 2022 /* 2023 * found the byte in the map 2024 * sift through the bits to find the selected frag 2025 */ 2026 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2027 blk = blkmap(fs, blksfree, bno); 2028 blk <<= 1; 2029 field = around[allocsiz]; 2030 subfield = inside[allocsiz]; 2031 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2032 if ((blk & field) == subfield) 2033 return (bno + pos); 2034 field <<= 1; 2035 subfield <<= 1; 2036 } 2037 } 2038 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 2039 panic("ffs_alloccg: block not in map"); 2040 return (-1); 2041 } 2042 2043 /* 2044 * Update the cluster map because of an allocation or free. 2045 * 2046 * Cnt == 1 means free; cnt == -1 means allocating. 2047 */ 2048 void 2049 ffs_clusteracct(fs, cgp, blkno, cnt) 2050 struct fs *fs; 2051 struct cg *cgp; 2052 ufs1_daddr_t blkno; 2053 int cnt; 2054 { 2055 int32_t *sump; 2056 int32_t *lp; 2057 u_char *freemapp, *mapp; 2058 int i, start, end, forw, back, map, bit; 2059 2060 if (fs->fs_contigsumsize <= 0) 2061 return; 2062 freemapp = cg_clustersfree(cgp); 2063 sump = cg_clustersum(cgp); 2064 /* 2065 * Allocate or clear the actual block. 2066 */ 2067 if (cnt > 0) 2068 setbit(freemapp, blkno); 2069 else 2070 clrbit(freemapp, blkno); 2071 /* 2072 * Find the size of the cluster going forward. 2073 */ 2074 start = blkno + 1; 2075 end = start + fs->fs_contigsumsize; 2076 if (end >= cgp->cg_nclusterblks) 2077 end = cgp->cg_nclusterblks; 2078 mapp = &freemapp[start / NBBY]; 2079 map = *mapp++; 2080 bit = 1 << (start % NBBY); 2081 for (i = start; i < end; i++) { 2082 if ((map & bit) == 0) 2083 break; 2084 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2085 bit <<= 1; 2086 } else { 2087 map = *mapp++; 2088 bit = 1; 2089 } 2090 } 2091 forw = i - start; 2092 /* 2093 * Find the size of the cluster going backward. 2094 */ 2095 start = blkno - 1; 2096 end = start - fs->fs_contigsumsize; 2097 if (end < 0) 2098 end = -1; 2099 mapp = &freemapp[start / NBBY]; 2100 map = *mapp--; 2101 bit = 1 << (start % NBBY); 2102 for (i = start; i > end; i--) { 2103 if ((map & bit) == 0) 2104 break; 2105 if ((i & (NBBY - 1)) != 0) { 2106 bit >>= 1; 2107 } else { 2108 map = *mapp--; 2109 bit = 1 << (NBBY - 1); 2110 } 2111 } 2112 back = start - i; 2113 /* 2114 * Account for old cluster and the possibly new forward and 2115 * back clusters. 2116 */ 2117 i = back + forw + 1; 2118 if (i > fs->fs_contigsumsize) 2119 i = fs->fs_contigsumsize; 2120 sump[i] += cnt; 2121 if (back > 0) 2122 sump[back] -= cnt; 2123 if (forw > 0) 2124 sump[forw] -= cnt; 2125 /* 2126 * Update cluster summary information. 2127 */ 2128 lp = &sump[fs->fs_contigsumsize]; 2129 for (i = fs->fs_contigsumsize; i > 0; i--) 2130 if (*lp-- > 0) 2131 break; 2132 fs->fs_maxcluster[cgp->cg_cgx] = i; 2133 } 2134 2135 /* 2136 * Fserr prints the name of a filesystem with an error diagnostic. 2137 * 2138 * The form of the error message is: 2139 * fs: error message 2140 */ 2141 static void 2142 ffs_fserr(fs, inum, cp) 2143 struct fs *fs; 2144 ino_t inum; 2145 char *cp; 2146 { 2147 struct thread *td = curthread; /* XXX */ 2148 struct proc *p = td->td_proc; 2149 2150 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", 2151 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); 2152 } 2153 2154 /* 2155 * This function provides the capability for the fsck program to 2156 * update an active filesystem. Six operations are provided: 2157 * 2158 * adjrefcnt(inode, amt) - adjusts the reference count on the 2159 * specified inode by the specified amount. Under normal 2160 * operation the count should always go down. Decrementing 2161 * the count to zero will cause the inode to be freed. 2162 * adjblkcnt(inode, amt) - adjust the number of blocks used to 2163 * by the specifed amount. 2164 * freedirs(inode, count) - directory inodes [inode..inode + count - 1] 2165 * are marked as free. Inodes should never have to be marked 2166 * as in use. 2167 * freefiles(inode, count) - file inodes [inode..inode + count - 1] 2168 * are marked as free. Inodes should never have to be marked 2169 * as in use. 2170 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] 2171 * are marked as free. Blocks should never have to be marked 2172 * as in use. 2173 * setflags(flags, set/clear) - the fs_flags field has the specified 2174 * flags set (second parameter +1) or cleared (second parameter -1). 2175 */ 2176 2177 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); 2178 2179 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, 2180 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); 2181 2182 SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, 2183 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); 2184 2185 SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, 2186 sysctl_ffs_fsck, "Free Range of Directory Inodes"); 2187 2188 SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, 2189 sysctl_ffs_fsck, "Free Range of File Inodes"); 2190 2191 SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, 2192 sysctl_ffs_fsck, "Free Range of Blocks"); 2193 2194 SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, 2195 sysctl_ffs_fsck, "Change Filesystem Flags"); 2196 2197 #ifdef DEBUG 2198 static int fsckcmds = 0; 2199 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); 2200 #endif /* DEBUG */ 2201 2202 static int 2203 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) 2204 { 2205 struct fsck_cmd cmd; 2206 struct ufsmount *ump; 2207 struct vnode *vp; 2208 struct inode *ip; 2209 struct mount *mp; 2210 struct fs *fs; 2211 ufs2_daddr_t blkno; 2212 long blkcnt, blksize; 2213 struct file *fp; 2214 int filetype, error; 2215 2216 if (req->newlen > sizeof cmd) 2217 return (EBADRPC); 2218 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) 2219 return (error); 2220 if (cmd.version != FFS_CMD_VERSION) 2221 return (ERPCMISMATCH); 2222 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) 2223 return (error); 2224 vn_start_write(fp->f_data, &mp, V_WAIT); 2225 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { 2226 vn_finished_write(mp); 2227 fdrop(fp, curthread); 2228 return (EINVAL); 2229 } 2230 if (mp->mnt_flag & MNT_RDONLY) { 2231 vn_finished_write(mp); 2232 fdrop(fp, curthread); 2233 return (EROFS); 2234 } 2235 ump = VFSTOUFS(mp); 2236 fs = ump->um_fs; 2237 filetype = IFREG; 2238 2239 switch (oidp->oid_number) { 2240 2241 case FFS_SET_FLAGS: 2242 #ifdef DEBUG 2243 if (fsckcmds) 2244 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, 2245 cmd.size > 0 ? "set" : "clear"); 2246 #endif /* DEBUG */ 2247 if (cmd.size > 0) 2248 fs->fs_flags |= (long)cmd.value; 2249 else 2250 fs->fs_flags &= ~(long)cmd.value; 2251 break; 2252 2253 case FFS_ADJ_REFCNT: 2254 #ifdef DEBUG 2255 if (fsckcmds) { 2256 printf("%s: adjust inode %jd count by %jd\n", 2257 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2258 (intmax_t)cmd.size); 2259 } 2260 #endif /* DEBUG */ 2261 if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2262 break; 2263 ip = VTOI(vp); 2264 ip->i_nlink += cmd.size; 2265 DIP(ip, i_nlink) = ip->i_nlink; 2266 ip->i_effnlink += cmd.size; 2267 ip->i_flag |= IN_CHANGE; 2268 if (DOINGSOFTDEP(vp)) 2269 softdep_change_linkcnt(ip); 2270 vput(vp); 2271 break; 2272 2273 case FFS_ADJ_BLKCNT: 2274 #ifdef DEBUG 2275 if (fsckcmds) { 2276 printf("%s: adjust inode %jd block count by %jd\n", 2277 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, 2278 (intmax_t)cmd.size); 2279 } 2280 #endif /* DEBUG */ 2281 if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) 2282 break; 2283 ip = VTOI(vp); 2284 DIP(ip, i_blocks) += cmd.size; 2285 ip->i_flag |= IN_CHANGE; 2286 vput(vp); 2287 break; 2288 2289 case FFS_DIR_FREE: 2290 filetype = IFDIR; 2291 /* fall through */ 2292 2293 case FFS_FILE_FREE: 2294 #ifdef DEBUG 2295 if (fsckcmds) { 2296 if (cmd.size == 1) 2297 printf("%s: free %s inode %d\n", 2298 mp->mnt_stat.f_mntonname, 2299 filetype == IFDIR ? "directory" : "file", 2300 (ino_t)cmd.value); 2301 else 2302 printf("%s: free %s inodes %d-%d\n", 2303 mp->mnt_stat.f_mntonname, 2304 filetype == IFDIR ? "directory" : "file", 2305 (ino_t)cmd.value, 2306 (ino_t)(cmd.value + cmd.size - 1)); 2307 } 2308 #endif /* DEBUG */ 2309 while (cmd.size > 0) { 2310 if ((error = ffs_freefile(fs, ump->um_devvp, cmd.value, 2311 filetype))) 2312 break; 2313 cmd.size -= 1; 2314 cmd.value += 1; 2315 } 2316 break; 2317 2318 case FFS_BLK_FREE: 2319 #ifdef DEBUG 2320 if (fsckcmds) { 2321 if (cmd.size == 1) 2322 printf("%s: free block %jd\n", 2323 mp->mnt_stat.f_mntonname, 2324 (intmax_t)cmd.value); 2325 else 2326 printf("%s: free blocks %jd-%jd\n", 2327 mp->mnt_stat.f_mntonname, 2328 (intmax_t)cmd.value, 2329 (intmax_t)cmd.value + cmd.size - 1); 2330 } 2331 #endif /* DEBUG */ 2332 blkno = cmd.value; 2333 blkcnt = cmd.size; 2334 blksize = fs->fs_frag - (blkno % fs->fs_frag); 2335 while (blkcnt > 0) { 2336 if (blksize > blkcnt) 2337 blksize = blkcnt; 2338 ffs_blkfree(fs, ump->um_devvp, blkno, 2339 blksize * fs->fs_fsize, ROOTINO); 2340 blkno += blksize; 2341 blkcnt -= blksize; 2342 blksize = fs->fs_frag; 2343 } 2344 break; 2345 2346 default: 2347 #ifdef DEBUG 2348 if (fsckcmds) { 2349 printf("Invalid request %d from fsck\n", 2350 oidp->oid_number); 2351 } 2352 #endif /* DEBUG */ 2353 error = EINVAL; 2354 break; 2355 2356 } 2357 fdrop(fp, curthread); 2358 vn_finished_write(mp); 2359 return (error); 2360 } 2361