1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * Copyright (c) 1982, 1986, 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 36 * $FreeBSD$ 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/conf.h> 42 #include <sys/vnode.h> 43 #include <sys/stat.h> 44 #include <sys/mount.h> 45 #include <sys/sysctl.h> 46 #include <sys/syslog.h> 47 #include <sys/buf.h> 48 49 #include <fs/ext2fs/inode.h> 50 #include <fs/ext2fs/ext2_mount.h> 51 #include <fs/ext2fs/ext2fs.h> 52 #include <fs/ext2fs/fs.h> 53 #include <fs/ext2fs/ext2_extern.h> 54 55 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 56 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 57 static u_long ext2_dirpref(struct inode *); 58 static void ext2_fserr(struct m_ext2fs *, uid_t, char *); 59 static u_long ext2_hashalloc(struct inode *, int, long, int, 60 daddr_t (*)(struct inode *, int, daddr_t, 61 int)); 62 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 63 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 64 65 /* 66 * Allocate a block in the file system. 67 * 68 * A preference may be optionally specified. If a preference is given 69 * the following hierarchy is used to allocate a block: 70 * 1) allocate the requested block. 71 * 2) allocate a rotationally optimal block in the same cylinder. 72 * 3) allocate a block in the same cylinder group. 73 * 4) quadradically rehash into other cylinder groups, until an 74 * available block is located. 75 * If no block preference is given the following hierarchy is used 76 * to allocate a block: 77 * 1) allocate a block in the cylinder group that contains the 78 * inode for the file. 79 * 2) quadradically rehash into other cylinder groups, until an 80 * available block is located. 81 */ 82 int 83 ext2_alloc(ip, lbn, bpref, size, cred, bnp) 84 struct inode *ip; 85 int32_t lbn, bpref; 86 int size; 87 struct ucred *cred; 88 int32_t *bnp; 89 { 90 struct m_ext2fs *fs; 91 struct ext2mount *ump; 92 int32_t bno; 93 int cg; 94 *bnp = 0; 95 fs = ip->i_e2fs; 96 ump = ip->i_ump; 97 mtx_assert(EXT2_MTX(ump), MA_OWNED); 98 #ifdef DIAGNOSTIC 99 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 100 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 101 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 102 panic("ext2_alloc: bad size"); 103 } 104 if (cred == NOCRED) 105 panic("ext2_alloc: missing credential"); 106 #endif /* DIAGNOSTIC */ 107 if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0) 108 goto nospace; 109 if (cred->cr_uid != 0 && 110 fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount) 111 goto nospace; 112 if (bpref >= fs->e2fs->e2fs_bcount) 113 bpref = 0; 114 if (bpref == 0) 115 cg = ino_to_cg(fs, ip->i_number); 116 else 117 cg = dtog(fs, bpref); 118 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 119 ext2_alloccg); 120 if (bno > 0) { 121 /* set next_alloc fields as done in block_getblk */ 122 ip->i_next_alloc_block = lbn; 123 ip->i_next_alloc_goal = bno; 124 125 ip->i_blocks += btodb(fs->e2fs_bsize); 126 ip->i_flag |= IN_CHANGE | IN_UPDATE; 127 *bnp = bno; 128 return (0); 129 } 130 nospace: 131 EXT2_UNLOCK(ump); 132 ext2_fserr(fs, cred->cr_uid, "file system full"); 133 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 134 return (ENOSPC); 135 } 136 137 /* 138 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 139 * 140 * The vnode and an array of buffer pointers for a range of sequential 141 * logical blocks to be made contiguous is given. The allocator attempts 142 * to find a range of sequential blocks starting as close as possible to 143 * an fs_rotdelay offset from the end of the allocation for the logical 144 * block immediately preceding the current range. If successful, the 145 * physical block numbers in the buffer pointers and in the inode are 146 * changed to reflect the new allocation. If unsuccessful, the allocation 147 * is left unchanged. The success in doing the reallocation is returned. 148 * Note that the error return is not reflected back to the user. Rather 149 * the previous block allocation will be used. 150 */ 151 152 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 153 154 static int doasyncfree = 1; 155 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 156 "Use asychronous writes to update block pointers when freeing blocks"); 157 158 static int doreallocblks = 1; 159 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 160 161 int 162 ext2_reallocblks(ap) 163 struct vop_reallocblks_args /* { 164 struct vnode *a_vp; 165 struct cluster_save *a_buflist; 166 } */ *ap; 167 { 168 struct m_ext2fs *fs; 169 struct inode *ip; 170 struct vnode *vp; 171 struct buf *sbp, *ebp; 172 int32_t *bap, *sbap, *ebap = 0; 173 struct ext2mount *ump; 174 struct cluster_save *buflist; 175 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 176 int32_t start_lbn, end_lbn, soff, newblk, blkno; 177 int i, len, start_lvl, end_lvl, pref, ssize; 178 179 if (doreallocblks == 0) 180 return (ENOSPC); 181 182 vp = ap->a_vp; 183 ip = VTOI(vp); 184 fs = ip->i_e2fs; 185 ump = ip->i_ump; 186 187 if (fs->e2fs_contigsumsize <= 0) 188 return (ENOSPC); 189 190 buflist = ap->a_buflist; 191 len = buflist->bs_nchildren; 192 start_lbn = buflist->bs_children[0]->b_lblkno; 193 end_lbn = start_lbn + len - 1; 194 #ifdef DIAGNOSTIC 195 for (i = 1; i < len; i++) 196 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 197 panic("ext2_reallocblks: non-cluster"); 198 #endif 199 /* 200 * If the cluster crosses the boundary for the first indirect 201 * block, leave space for the indirect block. Indirect blocks 202 * are initially laid out in a position after the last direct 203 * block. Block reallocation would usually destroy locality by 204 * moving the indirect block out of the way to make room for 205 * data blocks if we didn't compensate here. We should also do 206 * this for other indirect block boundaries, but it is only 207 * important for the first one. 208 */ 209 if (start_lbn < NDADDR && end_lbn >= NDADDR) 210 return (ENOSPC); 211 /* 212 * If the latest allocation is in a new cylinder group, assume that 213 * the filesystem has decided to move and do not force it back to 214 * the previous cylinder group. 215 */ 216 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 217 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 218 return (ENOSPC); 219 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 220 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 221 return (ENOSPC); 222 /* 223 * Get the starting offset and block map for the first block. 224 */ 225 if (start_lvl == 0) { 226 sbap = &ip->i_db[0]; 227 soff = start_lbn; 228 } else { 229 idp = &start_ap[start_lvl - 1]; 230 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 231 brelse(sbp); 232 return (ENOSPC); 233 } 234 sbap = (int32_t *)sbp->b_data; 235 soff = idp->in_off; 236 } 237 /* 238 * If the block range spans two block maps, get the second map. 239 */ 240 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 241 ssize = len; 242 } else { 243 #ifdef DIAGNOSTIC 244 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 245 panic("ext2_reallocblk: start == end"); 246 #endif 247 ssize = len - (idp->in_off + 1); 248 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 249 goto fail; 250 ebap = (int32_t *)ebp->b_data; 251 } 252 /* 253 * Find the preferred location for the cluster. 254 */ 255 EXT2_LOCK(ump); 256 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 257 /* 258 * Search the block map looking for an allocation of the desired size. 259 */ 260 if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 261 len, ext2_clusteralloc)) == 0){ 262 EXT2_UNLOCK(ump); 263 goto fail; 264 } 265 /* 266 * We have found a new contiguous block. 267 * 268 * First we have to replace the old block pointers with the new 269 * block pointers in the inode and indirect blocks associated 270 * with the file. 271 */ 272 #ifdef DEBUG 273 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 274 (intmax_t)start_lbn, (intmax_t)end_lbn); 275 #endif /* DEBUG */ 276 blkno = newblk; 277 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 278 if (i == ssize) { 279 bap = ebap; 280 soff = -i; 281 } 282 #ifdef DIAGNOSTIC 283 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 284 panic("ext2_reallocblks: alloc mismatch"); 285 #endif 286 #ifdef DEBUG 287 printf(" %d,", *bap); 288 #endif /* DEBUG */ 289 *bap++ = blkno; 290 } 291 /* 292 * Next we must write out the modified inode and indirect blocks. 293 * For strict correctness, the writes should be synchronous since 294 * the old block values may have been written to disk. In practise 295 * they are almost never written, but if we are concerned about 296 * strict correctness, the `doasyncfree' flag should be set to zero. 297 * 298 * The test on `doasyncfree' should be changed to test a flag 299 * that shows whether the associated buffers and inodes have 300 * been written. The flag should be set when the cluster is 301 * started and cleared whenever the buffer or inode is flushed. 302 * We can then check below to see if it is set, and do the 303 * synchronous write only when it has been cleared. 304 */ 305 if (sbap != &ip->i_db[0]) { 306 if (doasyncfree) 307 bdwrite(sbp); 308 else 309 bwrite(sbp); 310 } else { 311 ip->i_flag |= IN_CHANGE | IN_UPDATE; 312 if (!doasyncfree) 313 ext2_update(vp, 1); 314 } 315 if (ssize < len) { 316 if (doasyncfree) 317 bdwrite(ebp); 318 else 319 bwrite(ebp); 320 } 321 /* 322 * Last, free the old blocks and assign the new blocks to the buffers. 323 */ 324 #ifdef DEBUG 325 printf("\n\tnew:"); 326 #endif /* DEBUG */ 327 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 328 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 329 fs->e2fs_bsize); 330 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 331 #ifdef DEBUG 332 printf(" %d,", blkno); 333 #endif /* DEBUG */ 334 } 335 #ifdef DEBUG 336 printf("\n"); 337 #endif /* DEBUG */ 338 return (0); 339 340 fail: 341 if (ssize < len) 342 brelse(ebp); 343 if (sbap != &ip->i_db[0]) 344 brelse(sbp); 345 return (ENOSPC); 346 } 347 348 /* 349 * Allocate an inode in the file system. 350 * 351 */ 352 int 353 ext2_valloc(pvp, mode, cred, vpp) 354 struct vnode *pvp; 355 int mode; 356 struct ucred *cred; 357 struct vnode **vpp; 358 { 359 struct timespec ts; 360 struct inode *pip; 361 struct m_ext2fs *fs; 362 struct inode *ip; 363 struct ext2mount *ump; 364 ino_t ino, ipref; 365 int i, error, cg; 366 367 *vpp = NULL; 368 pip = VTOI(pvp); 369 fs = pip->i_e2fs; 370 ump = pip->i_ump; 371 372 EXT2_LOCK(ump); 373 if (fs->e2fs->e2fs_ficount == 0) 374 goto noinodes; 375 /* 376 * If it is a directory then obtain a cylinder group based on 377 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 378 * always the next inode. 379 */ 380 if ((mode & IFMT) == IFDIR) { 381 cg = ext2_dirpref(pip); 382 if (fs->e2fs_contigdirs[cg] < 255) 383 fs->e2fs_contigdirs[cg]++; 384 } else { 385 cg = ino_to_cg(fs, pip->i_number); 386 if (fs->e2fs_contigdirs[cg] > 0) 387 fs->e2fs_contigdirs[cg]--; 388 } 389 ipref = cg * fs->e2fs->e2fs_ipg + 1; 390 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 391 392 if (ino == 0) 393 goto noinodes; 394 error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 395 if (error) { 396 ext2_vfree(pvp, ino, mode); 397 return (error); 398 } 399 ip = VTOI(*vpp); 400 401 /* 402 * The question is whether using VGET was such good idea at all: 403 * Linux doesn't read the old inode in when it is allocating a 404 * new one. I will set at least i_size and i_blocks to zero. 405 */ 406 ip->i_size = 0; 407 ip->i_blocks = 0; 408 ip->i_mode = 0; 409 ip->i_flags = 0; 410 /* now we want to make sure that the block pointers are zeroed out */ 411 for (i = 0; i < NDADDR; i++) 412 ip->i_db[i] = 0; 413 for (i = 0; i < NIADDR; i++) 414 ip->i_ib[i] = 0; 415 416 /* 417 * Set up a new generation number for this inode. 418 * XXX check if this makes sense in ext2 419 */ 420 if (ip->i_gen == 0 || ++ip->i_gen == 0) 421 ip->i_gen = random() / 2 + 1; 422 423 vfs_timestamp(&ts); 424 ip->i_birthtime = ts.tv_sec; 425 ip->i_birthnsec = ts.tv_nsec; 426 427 /* 428 printf("ext2_valloc: allocated inode %d\n", ino); 429 */ 430 return (0); 431 noinodes: 432 EXT2_UNLOCK(ump); 433 ext2_fserr(fs, cred->cr_uid, "out of inodes"); 434 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 435 return (ENOSPC); 436 } 437 438 /* 439 * Find a cylinder to place a directory. 440 * 441 * The policy implemented by this algorithm is to allocate a 442 * directory inode in the same cylinder group as its parent 443 * directory, but also to reserve space for its files inodes 444 * and data. Restrict the number of directories which may be 445 * allocated one after another in the same cylinder group 446 * without intervening allocation of files. 447 * 448 * If we allocate a first level directory then force allocation 449 * in another cylinder group. 450 * 451 */ 452 static u_long 453 ext2_dirpref(struct inode *pip) 454 { 455 struct m_ext2fs *fs; 456 int cg, prefcg, dirsize, cgsize; 457 int avgifree, avgbfree, avgndir, curdirsize; 458 int minifree, minbfree, maxndir; 459 int mincg, minndir; 460 int maxcontigdirs; 461 462 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 463 fs = pip->i_e2fs; 464 465 avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; 466 avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; 467 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 468 469 /* 470 * Force allocation in another cg if creating a first level dir. 471 */ 472 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 473 if (ITOV(pip)->v_vflag & VV_ROOT) { 474 prefcg = arc4random() % fs->e2fs_gcount; 475 mincg = prefcg; 476 minndir = fs->e2fs_ipg; 477 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 478 if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && 479 fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && 480 fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { 481 mincg = cg; 482 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; 483 } 484 for (cg = 0; cg < prefcg; cg++) 485 if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && 486 fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && 487 fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { 488 mincg = cg; 489 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; 490 } 491 492 return (mincg); 493 } 494 495 /* 496 * Count various limits which used for 497 * optimal allocation of a directory inode. 498 */ 499 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 500 minifree = avgifree - avgifree / 4; 501 if (minifree < 1) 502 minifree = 1; 503 minbfree = avgbfree - avgbfree / 4; 504 if (minbfree < 1) 505 minbfree = 1; 506 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 507 dirsize = AVGDIRSIZE; 508 curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 509 if (dirsize < curdirsize) 510 dirsize = curdirsize; 511 if (dirsize <= 0) 512 maxcontigdirs = 0; /* dirsize overflowed */ 513 else 514 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 515 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 516 if (maxcontigdirs == 0) 517 maxcontigdirs = 1; 518 519 /* 520 * Limit number of dirs in one cg and reserve space for 521 * regular files, but only if we have no deficit in 522 * inodes or space. 523 */ 524 prefcg = ino_to_cg(fs, pip->i_number); 525 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 526 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && 527 fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && 528 fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { 529 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 530 return (cg); 531 } 532 for (cg = 0; cg < prefcg; cg++) 533 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && 534 fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && 535 fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { 536 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 537 return (cg); 538 } 539 /* 540 * This is a backstop when we have deficit in space. 541 */ 542 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 543 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) 544 return (cg); 545 for (cg = 0; cg < prefcg; cg++) 546 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) 547 break; 548 return (cg); 549 } 550 551 /* 552 * Select the desired position for the next block in a file. 553 * 554 * we try to mimic what Remy does in inode_getblk/block_getblk 555 * 556 * we note: blocknr == 0 means that we're about to allocate either 557 * a direct block or a pointer block at the first level of indirection 558 * (In other words, stuff that will go in i_db[] or i_ib[]) 559 * 560 * blocknr != 0 means that we're allocating a block that is none 561 * of the above. Then, blocknr tells us the number of the block 562 * that will hold the pointer 563 */ 564 int32_t 565 ext2_blkpref(ip, lbn, indx, bap, blocknr) 566 struct inode *ip; 567 int32_t lbn; 568 int indx; 569 int32_t *bap; 570 int32_t blocknr; 571 { 572 int tmp; 573 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 574 575 /* if the next block is actually what we thought it is, 576 then set the goal to what we thought it should be 577 */ 578 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 579 return ip->i_next_alloc_goal; 580 581 /* now check whether we were provided with an array that basically 582 tells us previous blocks to which we want to stay closeby 583 */ 584 if (bap) 585 for (tmp = indx - 1; tmp >= 0; tmp--) 586 if (bap[tmp]) 587 return bap[tmp]; 588 589 /* else let's fall back to the blocknr, or, if there is none, 590 follow the rule that a block should be allocated near its inode 591 */ 592 return blocknr ? blocknr : 593 (int32_t)(ip->i_block_group * 594 EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 595 ip->i_e2fs->e2fs->e2fs_first_dblock; 596 } 597 598 /* 599 * Implement the cylinder overflow algorithm. 600 * 601 * The policy implemented by this algorithm is: 602 * 1) allocate the block in its requested cylinder group. 603 * 2) quadradically rehash on the cylinder group number. 604 * 3) brute force search for a free block. 605 */ 606 static u_long 607 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 608 daddr_t (*allocator)(struct inode *, int, daddr_t, int)) 609 { 610 struct m_ext2fs *fs; 611 ino_t result; 612 int i, icg = cg; 613 614 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 615 fs = ip->i_e2fs; 616 /* 617 * 1: preferred cylinder group 618 */ 619 result = (*allocator)(ip, cg, pref, size); 620 if (result) 621 return (result); 622 /* 623 * 2: quadratic rehash 624 */ 625 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 626 cg += i; 627 if (cg >= fs->e2fs_gcount) 628 cg -= fs->e2fs_gcount; 629 result = (*allocator)(ip, cg, 0, size); 630 if (result) 631 return (result); 632 } 633 /* 634 * 3: brute force search 635 * Note that we start at i == 2, since 0 was checked initially, 636 * and 1 is always checked in the quadratic rehash. 637 */ 638 cg = (icg + 2) % fs->e2fs_gcount; 639 for (i = 2; i < fs->e2fs_gcount; i++) { 640 result = (*allocator)(ip, cg, 0, size); 641 if (result) 642 return (result); 643 cg++; 644 if (cg == fs->e2fs_gcount) 645 cg = 0; 646 } 647 return (0); 648 } 649 650 /* 651 * Determine whether a block can be allocated. 652 * 653 * Check to see if a block of the appropriate size is available, 654 * and if it is, allocate it. 655 */ 656 static daddr_t 657 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 658 { 659 struct m_ext2fs *fs; 660 struct buf *bp; 661 struct ext2mount *ump; 662 daddr_t bno, runstart, runlen; 663 int bit, loc, end, error, start; 664 char *bbp; 665 /* XXX ondisk32 */ 666 fs = ip->i_e2fs; 667 ump = ip->i_ump; 668 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 669 return (0); 670 EXT2_UNLOCK(ump); 671 error = bread(ip->i_devvp, fsbtodb(fs, 672 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 673 (int)fs->e2fs_bsize, NOCRED, &bp); 674 if (error) { 675 brelse(bp); 676 EXT2_LOCK(ump); 677 return (0); 678 } 679 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) { 680 /* 681 * Another thread allocated the last block in this 682 * group while we were waiting for the buffer. 683 */ 684 brelse(bp); 685 EXT2_LOCK(ump); 686 return (0); 687 } 688 bbp = (char *)bp->b_data; 689 690 if (dtog(fs, bpref) != cg) 691 bpref = 0; 692 if (bpref != 0) { 693 bpref = dtogd(fs, bpref); 694 /* 695 * if the requested block is available, use it 696 */ 697 if (isclr(bbp, bpref)) { 698 bno = bpref; 699 goto gotit; 700 } 701 } 702 /* 703 * no blocks in the requested cylinder, so take next 704 * available one in this cylinder group. 705 * first try to get 8 contigous blocks, then fall back to a single 706 * block. 707 */ 708 if (bpref) 709 start = dtogd(fs, bpref) / NBBY; 710 else 711 start = 0; 712 end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 713 retry: 714 runlen = 0; 715 runstart = 0; 716 for (loc = start; loc < end; loc++) { 717 if (bbp[loc] == (char)0xff) { 718 runlen = 0; 719 continue; 720 } 721 722 /* Start of a run, find the number of high clear bits. */ 723 if (runlen == 0) { 724 bit = fls(bbp[loc]); 725 runlen = NBBY - bit; 726 runstart = loc * NBBY + bit; 727 } else if (bbp[loc] == 0) { 728 /* Continue a run. */ 729 runlen += NBBY; 730 } else { 731 /* 732 * Finish the current run. If it isn't long 733 * enough, start a new one. 734 */ 735 bit = ffs(bbp[loc]) - 1; 736 runlen += bit; 737 if (runlen >= 8) { 738 bno = runstart; 739 goto gotit; 740 } 741 742 /* Run was too short, start a new one. */ 743 bit = fls(bbp[loc]); 744 runlen = NBBY - bit; 745 runstart = loc * NBBY + bit; 746 } 747 748 /* If the current run is long enough, use it. */ 749 if (runlen >= 8) { 750 bno = runstart; 751 goto gotit; 752 } 753 } 754 if (start != 0) { 755 end = start; 756 start = 0; 757 goto retry; 758 } 759 760 bno = ext2_mapsearch(fs, bbp, bpref); 761 if (bno < 0){ 762 brelse(bp); 763 EXT2_LOCK(ump); 764 return (0); 765 } 766 gotit: 767 #ifdef DIAGNOSTIC 768 if (isset(bbp, bno)) { 769 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 770 cg, (intmax_t)bno, fs->e2fs_fsmnt); 771 panic("ext2fs_alloccg: dup alloc"); 772 } 773 #endif 774 setbit(bbp, bno); 775 EXT2_LOCK(ump); 776 ext2_clusteracct(fs, bbp, cg, bno, -1); 777 fs->e2fs->e2fs_fbcount--; 778 fs->e2fs_gd[cg].ext2bgd_nbfree--; 779 fs->e2fs_fmod = 1; 780 EXT2_UNLOCK(ump); 781 bdwrite(bp); 782 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 783 } 784 785 /* 786 * Determine whether a cluster can be allocated. 787 */ 788 static daddr_t 789 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 790 { 791 struct m_ext2fs *fs; 792 struct ext2mount *ump; 793 struct buf *bp; 794 char *bbp; 795 int bit, error, got, i, loc, run; 796 int32_t *lp; 797 daddr_t bno; 798 799 fs = ip->i_e2fs; 800 ump = ip->i_ump; 801 802 if (fs->e2fs_maxcluster[cg] < len) 803 return (0); 804 805 EXT2_UNLOCK(ump); 806 error = bread(ip->i_devvp, 807 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 808 (int)fs->e2fs_bsize, NOCRED, &bp); 809 if (error) 810 goto fail_lock; 811 812 bbp = (char *)bp->b_data; 813 bp->b_xflags |= BX_BKGRDWRITE; 814 815 EXT2_LOCK(ump); 816 /* 817 * Check to see if a cluster of the needed size (or bigger) is 818 * available in this cylinder group. 819 */ 820 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 821 for (i = len; i <= fs->e2fs_contigsumsize; i++) 822 if (*lp++ > 0) 823 break; 824 if (i > fs->e2fs_contigsumsize) { 825 /* 826 * Update the cluster summary information to reflect 827 * the true maximum-sized cluster so that future cluster 828 * allocation requests can avoid reading the bitmap only 829 * to find no cluster. 830 */ 831 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 832 for (i = len - 1; i > 0; i--) 833 if (*lp-- > 0) 834 break; 835 fs->e2fs_maxcluster[cg] = i; 836 goto fail; 837 } 838 EXT2_UNLOCK(ump); 839 840 /* Search the bitmap to find a big enough cluster like in FFS. */ 841 if (dtog(fs, bpref) != cg) 842 bpref = 0; 843 if (bpref != 0) 844 bpref = dtogd(fs, bpref); 845 loc = bpref / NBBY; 846 bit = 1 << (bpref % NBBY); 847 for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { 848 if ((bbp[loc] & bit) != 0) 849 run = 0; 850 else { 851 run++; 852 if (run == len) 853 break; 854 } 855 if ((got & (NBBY - 1)) != (NBBY - 1)) 856 bit <<= 1; 857 else { 858 loc++; 859 bit = 1; 860 } 861 } 862 863 if (got >= fs->e2fs->e2fs_fpg) 864 goto fail_lock; 865 866 /* Allocate the cluster that we found. */ 867 for (i = 1; i < len; i++) 868 if (!isclr(bbp, got - run + i)) 869 panic("ext2_clusteralloc: map mismatch"); 870 871 bno = got - run + 1; 872 if (bno >= fs->e2fs->e2fs_fpg) 873 panic("ext2_clusteralloc: allocated out of group"); 874 875 EXT2_LOCK(ump); 876 for (i = 0; i < len; i += fs->e2fs_fpb) { 877 setbit(bbp, bno + i); 878 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 879 fs->e2fs->e2fs_fbcount--; 880 fs->e2fs_gd[cg].ext2bgd_nbfree--; 881 } 882 fs->e2fs_fmod = 1; 883 EXT2_UNLOCK(ump); 884 885 bdwrite(bp); 886 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 887 888 fail_lock: 889 EXT2_LOCK(ump); 890 fail: 891 brelse(bp); 892 return (0); 893 } 894 895 /* 896 * Determine whether an inode can be allocated. 897 * 898 * Check to see if an inode is available, and if it is, 899 * allocate it using tode in the specified cylinder group. 900 */ 901 static daddr_t 902 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 903 { 904 struct m_ext2fs *fs; 905 struct buf *bp; 906 struct ext2mount *ump; 907 int error, start, len; 908 char *ibp, *loc; 909 ipref--; /* to avoid a lot of (ipref -1) */ 910 if (ipref == -1) 911 ipref = 0; 912 fs = ip->i_e2fs; 913 ump = ip->i_ump; 914 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 915 return (0); 916 EXT2_UNLOCK(ump); 917 error = bread(ip->i_devvp, fsbtodb(fs, 918 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 919 (int)fs->e2fs_bsize, NOCRED, &bp); 920 if (error) { 921 brelse(bp); 922 EXT2_LOCK(ump); 923 return (0); 924 } 925 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) { 926 /* 927 * Another thread allocated the last i-node in this 928 * group while we were waiting for the buffer. 929 */ 930 brelse(bp); 931 EXT2_LOCK(ump); 932 return (0); 933 } 934 ibp = (char *)bp->b_data; 935 if (ipref) { 936 ipref %= fs->e2fs->e2fs_ipg; 937 if (isclr(ibp, ipref)) 938 goto gotit; 939 } 940 start = ipref / NBBY; 941 len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); 942 loc = memcchr(&ibp[start], 0xff, len); 943 if (loc == NULL) { 944 len = start + 1; 945 start = 0; 946 loc = memcchr(&ibp[start], 0xff, len); 947 if (loc == NULL) { 948 printf("cg = %d, ipref = %lld, fs = %s\n", 949 cg, (long long)ipref, fs->e2fs_fsmnt); 950 panic("ext2fs_nodealloccg: map corrupted"); 951 /* NOTREACHED */ 952 } 953 } 954 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 955 gotit: 956 setbit(ibp, ipref); 957 EXT2_LOCK(ump); 958 fs->e2fs_gd[cg].ext2bgd_nifree--; 959 fs->e2fs->e2fs_ficount--; 960 fs->e2fs_fmod = 1; 961 if ((mode & IFMT) == IFDIR) { 962 fs->e2fs_gd[cg].ext2bgd_ndirs++; 963 fs->e2fs_total_dir++; 964 } 965 EXT2_UNLOCK(ump); 966 bdwrite(bp); 967 return (cg * fs->e2fs->e2fs_ipg + ipref +1); 968 } 969 970 /* 971 * Free a block or fragment. 972 * 973 */ 974 void 975 ext2_blkfree(ip, bno, size) 976 struct inode *ip; 977 int32_t bno; 978 long size; 979 { 980 struct m_ext2fs *fs; 981 struct buf *bp; 982 struct ext2mount *ump; 983 int cg, error; 984 char *bbp; 985 986 fs = ip->i_e2fs; 987 ump = ip->i_ump; 988 cg = dtog(fs, bno); 989 if ((u_int)bno >= fs->e2fs->e2fs_bcount) { 990 printf("bad block %lld, ino %llu\n", (long long)bno, 991 (unsigned long long)ip->i_number); 992 ext2_fserr(fs, ip->i_uid, "bad block"); 993 return; 994 } 995 error = bread(ip->i_devvp, 996 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 997 (int)fs->e2fs_bsize, NOCRED, &bp); 998 if (error) { 999 brelse(bp); 1000 return; 1001 } 1002 bbp = (char *)bp->b_data; 1003 bno = dtogd(fs, bno); 1004 if (isclr(bbp, bno)) { 1005 printf("block = %lld, fs = %s\n", 1006 (long long)bno, fs->e2fs_fsmnt); 1007 panic("blkfree: freeing free block"); 1008 } 1009 clrbit(bbp, bno); 1010 EXT2_LOCK(ump); 1011 ext2_clusteracct(fs, bbp, cg, bno, 1); 1012 fs->e2fs->e2fs_fbcount++; 1013 fs->e2fs_gd[cg].ext2bgd_nbfree++; 1014 fs->e2fs_fmod = 1; 1015 EXT2_UNLOCK(ump); 1016 bdwrite(bp); 1017 } 1018 1019 /* 1020 * Free an inode. 1021 * 1022 */ 1023 int 1024 ext2_vfree(pvp, ino, mode) 1025 struct vnode *pvp; 1026 ino_t ino; 1027 int mode; 1028 { 1029 struct m_ext2fs *fs; 1030 struct inode *pip; 1031 struct buf *bp; 1032 struct ext2mount *ump; 1033 int error, cg; 1034 char * ibp; 1035 /* mode_t save_i_mode; */ 1036 1037 pip = VTOI(pvp); 1038 fs = pip->i_e2fs; 1039 ump = pip->i_ump; 1040 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1041 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1042 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1043 1044 cg = ino_to_cg(fs, ino); 1045 error = bread(pip->i_devvp, 1046 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 1047 (int)fs->e2fs_bsize, NOCRED, &bp); 1048 if (error) { 1049 brelse(bp); 1050 return (0); 1051 } 1052 ibp = (char *)bp->b_data; 1053 ino = (ino - 1) % fs->e2fs->e2fs_ipg; 1054 if (isclr(ibp, ino)) { 1055 printf("ino = %llu, fs = %s\n", 1056 (unsigned long long)ino, fs->e2fs_fsmnt); 1057 if (fs->e2fs_ronly == 0) 1058 panic("ifree: freeing free inode"); 1059 } 1060 clrbit(ibp, ino); 1061 EXT2_LOCK(ump); 1062 fs->e2fs->e2fs_ficount++; 1063 fs->e2fs_gd[cg].ext2bgd_nifree++; 1064 if ((mode & IFMT) == IFDIR) { 1065 fs->e2fs_gd[cg].ext2bgd_ndirs--; 1066 fs->e2fs_total_dir--; 1067 } 1068 fs->e2fs_fmod = 1; 1069 EXT2_UNLOCK(ump); 1070 bdwrite(bp); 1071 return (0); 1072 } 1073 1074 /* 1075 * Find a block in the specified cylinder group. 1076 * 1077 * It is a panic if a request is made to find a block if none are 1078 * available. 1079 */ 1080 static daddr_t 1081 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1082 { 1083 char *loc; 1084 int start, len; 1085 1086 /* 1087 * find the fragment by searching through the free block 1088 * map for an appropriate bit pattern 1089 */ 1090 if (bpref) 1091 start = dtogd(fs, bpref) / NBBY; 1092 else 1093 start = 0; 1094 len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1095 loc = memcchr(&bbp[start], 0xff, len); 1096 if (loc == NULL) { 1097 len = start + 1; 1098 start = 0; 1099 loc = memcchr(&bbp[start], 0xff, len); 1100 if (loc == NULL) { 1101 printf("start = %d, len = %d, fs = %s\n", 1102 start, len, fs->e2fs_fsmnt); 1103 panic("ext2fs_alloccg: map corrupted"); 1104 /* NOTREACHED */ 1105 } 1106 } 1107 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1108 } 1109 1110 /* 1111 * Fserr prints the name of a file system with an error diagnostic. 1112 * 1113 * The form of the error message is: 1114 * fs: error message 1115 */ 1116 static void 1117 ext2_fserr(fs, uid, cp) 1118 struct m_ext2fs *fs; 1119 uid_t uid; 1120 char *cp; 1121 { 1122 1123 log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 1124 } 1125 1126 int 1127 cg_has_sb(int i) 1128 { 1129 int a3, a5, a7; 1130 1131 if (i == 0 || i == 1) 1132 return 1; 1133 for (a3 = 3, a5 = 5, a7 = 7; 1134 a3 <= i || a5 <= i || a7 <= i; 1135 a3 *= 3, a5 *= 5, a7 *= 7) 1136 if (i == a3 || i == a5 || i == a7) 1137 return 1; 1138 return 0; 1139 } 1140