1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/sdt.h> 46 #include <sys/stat.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 #include <sys/buf.h> 51 #include <sys/endian.h> 52 53 #include <fs/ext2fs/fs.h> 54 #include <fs/ext2fs/inode.h> 55 #include <fs/ext2fs/ext2_mount.h> 56 #include <fs/ext2fs/ext2fs.h> 57 #include <fs/ext2fs/ext2_extern.h> 58 59 SDT_PROVIDER_DEFINE(ext2fs); 60 /* 61 * ext2fs trace probe: 62 * arg0: verbosity. Higher numbers give more verbose messages 63 * arg1: Textual message 64 */ 65 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 66 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 67 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 68 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 70 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 71 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 72 "int", "daddr_t", "char*"); 73 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 75 76 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 77 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 78 static u_long ext2_dirpref(struct inode *); 79 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 80 daddr_t (*)(struct inode *, int, daddr_t, 81 int)); 82 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 83 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 84 85 /* 86 * Allocate a block in the filesystem. 87 * 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadradically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following hierarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadradically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102 int 103 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 104 struct ucred *cred, e4fs_daddr_t *bnp) 105 { 106 struct m_ext2fs *fs; 107 struct ext2mount *ump; 108 e4fs_daddr_t bno; 109 int cg; 110 111 *bnp = 0; 112 fs = ip->i_e2fs; 113 ump = ip->i_ump; 114 mtx_assert(EXT2_MTX(ump), MA_OWNED); 115 #ifdef INVARIANTS 116 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 117 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 118 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123 #endif /* INVARIANTS */ 124 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->e2fs_fbcount < fs->e2fs_rbcount) 128 goto nospace; 129 if (bpref >= fs->e2fs_bcount) 130 bpref = 0; 131 if (bpref == 0) 132 cg = ino_to_cg(fs, ip->i_number); 133 else 134 cg = dtog(fs, bpref); 135 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 136 ext2_alloccg); 137 if (bno > 0) { 138 /* set next_alloc fields as done in block_getblk */ 139 ip->i_next_alloc_block = lbn; 140 ip->i_next_alloc_goal = bno; 141 142 ip->i_blocks += btodb(fs->e2fs_bsize); 143 ip->i_flag |= IN_CHANGE | IN_UPDATE; 144 *bnp = bno; 145 return (0); 146 } 147 nospace: 148 EXT2_UNLOCK(ump); 149 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 150 return (ENOSPC); 151 } 152 153 /* 154 * Allocate EA's block for inode. 155 */ 156 e4fs_daddr_t 157 ext2_alloc_meta(struct inode *ip) 158 { 159 struct m_ext2fs *fs; 160 daddr_t blk; 161 162 fs = ip->i_e2fs; 163 164 EXT2_LOCK(ip->i_ump); 165 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 166 ext2_alloccg); 167 if (0 == blk) { 168 EXT2_UNLOCK(ip->i_ump); 169 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 170 } 171 172 return (blk); 173 } 174 175 /* 176 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 177 * 178 * The vnode and an array of buffer pointers for a range of sequential 179 * logical blocks to be made contiguous is given. The allocator attempts 180 * to find a range of sequential blocks starting as close as possible to 181 * an fs_rotdelay offset from the end of the allocation for the logical 182 * block immediately preceding the current range. If successful, the 183 * physical block numbers in the buffer pointers and in the inode are 184 * changed to reflect the new allocation. If unsuccessful, the allocation 185 * is left unchanged. The success in doing the reallocation is returned. 186 * Note that the error return is not reflected back to the user. Rather 187 * the previous block allocation will be used. 188 */ 189 190 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 191 192 static int doasyncfree = 1; 193 194 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 195 "Use asychronous writes to update block pointers when freeing blocks"); 196 197 static int doreallocblks = 0; 198 199 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 200 201 int 202 ext2_reallocblks(struct vop_reallocblks_args *ap) 203 { 204 struct m_ext2fs *fs; 205 struct inode *ip; 206 struct vnode *vp; 207 struct buf *sbp, *ebp; 208 uint32_t *bap, *sbap, *ebap; 209 struct ext2mount *ump; 210 struct cluster_save *buflist; 211 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 212 e2fs_lbn_t start_lbn, end_lbn; 213 int soff; 214 e2fs_daddr_t newblk, blkno; 215 int i, len, start_lvl, end_lvl, pref, ssize; 216 217 if (doreallocblks == 0) 218 return (ENOSPC); 219 220 vp = ap->a_vp; 221 ip = VTOI(vp); 222 fs = ip->i_e2fs; 223 ump = ip->i_ump; 224 225 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 226 return (ENOSPC); 227 228 buflist = ap->a_buflist; 229 len = buflist->bs_nchildren; 230 start_lbn = buflist->bs_children[0]->b_lblkno; 231 end_lbn = start_lbn + len - 1; 232 #ifdef INVARIANTS 233 for (i = 1; i < len; i++) 234 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 235 panic("ext2_reallocblks: non-cluster"); 236 #endif 237 /* 238 * If the cluster crosses the boundary for the first indirect 239 * block, leave space for the indirect block. Indirect blocks 240 * are initially laid out in a position after the last direct 241 * block. Block reallocation would usually destroy locality by 242 * moving the indirect block out of the way to make room for 243 * data blocks if we didn't compensate here. We should also do 244 * this for other indirect block boundaries, but it is only 245 * important for the first one. 246 */ 247 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 248 return (ENOSPC); 249 /* 250 * If the latest allocation is in a new cylinder group, assume that 251 * the filesystem has decided to move and do not force it back to 252 * the previous cylinder group. 253 */ 254 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 255 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 256 return (ENOSPC); 257 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 258 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 259 return (ENOSPC); 260 /* 261 * Get the starting offset and block map for the first block. 262 */ 263 if (start_lvl == 0) { 264 sbap = &ip->i_db[0]; 265 soff = start_lbn; 266 } else { 267 idp = &start_ap[start_lvl - 1]; 268 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 269 brelse(sbp); 270 return (ENOSPC); 271 } 272 sbap = (u_int *)sbp->b_data; 273 soff = idp->in_off; 274 } 275 /* 276 * If the block range spans two block maps, get the second map. 277 */ 278 ebap = NULL; 279 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 280 ssize = len; 281 } else { 282 #ifdef INVARIANTS 283 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 284 panic("ext2_reallocblks: start == end"); 285 #endif 286 ssize = len - (idp->in_off + 1); 287 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 288 goto fail; 289 ebap = (u_int *)ebp->b_data; 290 } 291 /* 292 * Find the preferred location for the cluster. 293 */ 294 EXT2_LOCK(ump); 295 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 296 /* 297 * Search the block map looking for an allocation of the desired size. 298 */ 299 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 300 len, ext2_clusteralloc)) == 0) { 301 EXT2_UNLOCK(ump); 302 goto fail; 303 } 304 /* 305 * We have found a new contiguous block. 306 * 307 * First we have to replace the old block pointers with the new 308 * block pointers in the inode and indirect blocks associated 309 * with the file. 310 */ 311 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 312 ip->i_number, start_lbn, end_lbn); 313 blkno = newblk; 314 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 315 if (i == ssize) { 316 bap = ebap; 317 soff = -i; 318 } 319 #ifdef INVARIANTS 320 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 321 panic("ext2_reallocblks: alloc mismatch"); 322 #endif 323 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 324 *bap++ = blkno; 325 } 326 /* 327 * Next we must write out the modified inode and indirect blocks. 328 * For strict correctness, the writes should be synchronous since 329 * the old block values may have been written to disk. In practise 330 * they are almost never written, but if we are concerned about 331 * strict correctness, the `doasyncfree' flag should be set to zero. 332 * 333 * The test on `doasyncfree' should be changed to test a flag 334 * that shows whether the associated buffers and inodes have 335 * been written. The flag should be set when the cluster is 336 * started and cleared whenever the buffer or inode is flushed. 337 * We can then check below to see if it is set, and do the 338 * synchronous write only when it has been cleared. 339 */ 340 if (sbap != &ip->i_db[0]) { 341 if (doasyncfree) 342 bdwrite(sbp); 343 else 344 bwrite(sbp); 345 } else { 346 ip->i_flag |= IN_CHANGE | IN_UPDATE; 347 if (!doasyncfree) 348 ext2_update(vp, 1); 349 } 350 if (ssize < len) { 351 if (doasyncfree) 352 bdwrite(ebp); 353 else 354 bwrite(ebp); 355 } 356 /* 357 * Last, free the old blocks and assign the new blocks to the buffers. 358 */ 359 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 360 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 361 fs->e2fs_bsize); 362 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 363 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 364 } 365 366 return (0); 367 368 fail: 369 if (ssize < len) 370 brelse(ebp); 371 if (sbap != &ip->i_db[0]) 372 brelse(sbp); 373 return (ENOSPC); 374 } 375 376 /* 377 * Allocate an inode in the filesystem. 378 * 379 */ 380 int 381 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 382 { 383 struct timespec ts; 384 struct m_ext2fs *fs; 385 struct ext2mount *ump; 386 struct inode *pip; 387 struct inode *ip; 388 struct vnode *vp; 389 struct thread *td; 390 ino_t ino, ipref; 391 int error, cg; 392 393 *vpp = NULL; 394 pip = VTOI(pvp); 395 fs = pip->i_e2fs; 396 ump = pip->i_ump; 397 398 EXT2_LOCK(ump); 399 if (fs->e2fs->e2fs_ficount == 0) 400 goto noinodes; 401 /* 402 * If it is a directory then obtain a cylinder group based on 403 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 404 * always the next inode. 405 */ 406 if ((mode & IFMT) == IFDIR) { 407 cg = ext2_dirpref(pip); 408 if (fs->e2fs_contigdirs[cg] < 255) 409 fs->e2fs_contigdirs[cg]++; 410 } else { 411 cg = ino_to_cg(fs, pip->i_number); 412 if (fs->e2fs_contigdirs[cg] > 0) 413 fs->e2fs_contigdirs[cg]--; 414 } 415 ipref = cg * fs->e2fs->e2fs_ipg + 1; 416 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 417 if (ino == 0) 418 goto noinodes; 419 420 td = curthread; 421 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 422 if (error || *vpp != NULL) { 423 return (error); 424 } 425 426 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 427 if (ip == NULL) { 428 return (ENOMEM); 429 } 430 431 /* Allocate a new vnode/inode. */ 432 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 433 free(ip, M_EXT2NODE); 434 return (error); 435 } 436 437 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 438 vp->v_data = ip; 439 ip->i_vnode = vp; 440 ip->i_e2fs = fs = ump->um_e2fs; 441 ip->i_ump = ump; 442 ip->i_number = ino; 443 ip->i_block_group = ino_to_cg(fs, ino); 444 ip->i_next_alloc_block = 0; 445 ip->i_next_alloc_goal = 0; 446 447 error = insmntque(vp, ump->um_mountp); 448 if (error) { 449 free(ip, M_EXT2NODE); 450 return (error); 451 } 452 453 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 454 if (error || *vpp != NULL) { 455 *vpp = NULL; 456 free(ip, M_EXT2NODE); 457 return (error); 458 } 459 460 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 461 vput(vp); 462 *vpp = NULL; 463 free(ip, M_EXT2NODE); 464 return (error); 465 } 466 467 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 468 && (S_ISREG(mode) || S_ISDIR(mode))) 469 ext4_ext_tree_init(ip); 470 else 471 memset(ip->i_data, 0, sizeof(ip->i_data)); 472 473 474 /* 475 * Set up a new generation number for this inode. 476 * Avoid zero values. 477 */ 478 do { 479 ip->i_gen = arc4random(); 480 } while (ip->i_gen == 0); 481 482 vfs_timestamp(&ts); 483 ip->i_birthtime = ts.tv_sec; 484 ip->i_birthnsec = ts.tv_nsec; 485 486 *vpp = vp; 487 488 return (0); 489 490 noinodes: 491 EXT2_UNLOCK(ump); 492 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 493 return (ENOSPC); 494 } 495 496 /* 497 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 498 */ 499 uint64_t 500 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 501 { 502 503 return (((uint64_t)(gd->ext4bgd_b_bitmap_hi) << 32) | 504 gd->ext2bgd_b_bitmap); 505 } 506 507 uint64_t 508 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 509 { 510 511 return (((uint64_t)(gd->ext4bgd_i_bitmap_hi) << 32) | 512 gd->ext2bgd_i_bitmap); 513 } 514 515 uint64_t 516 e2fs_gd_get_i_tables(struct ext2_gd *gd) 517 { 518 519 return (((uint64_t)(gd->ext4bgd_i_tables_hi) << 32) | 520 gd->ext2bgd_i_tables); 521 } 522 523 static uint32_t 524 e2fs_gd_get_nbfree(struct ext2_gd *gd) 525 { 526 527 return (((uint32_t)(gd->ext4bgd_nbfree_hi) << 16) | 528 gd->ext2bgd_nbfree); 529 } 530 531 static void 532 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 533 { 534 535 gd->ext2bgd_nbfree = val & 0xffff; 536 gd->ext4bgd_nbfree_hi = val >> 16; 537 } 538 539 static uint32_t 540 e2fs_gd_get_nifree(struct ext2_gd *gd) 541 { 542 543 return (((uint32_t)(gd->ext4bgd_nifree_hi) << 16) | 544 gd->ext2bgd_nifree); 545 } 546 547 static void 548 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 549 { 550 551 gd->ext2bgd_nifree = val & 0xffff; 552 gd->ext4bgd_nifree_hi = val >> 16; 553 } 554 555 uint32_t 556 e2fs_gd_get_ndirs(struct ext2_gd *gd) 557 { 558 559 return (((uint32_t)(gd->ext4bgd_ndirs_hi) << 16) | 560 gd->ext2bgd_ndirs); 561 } 562 563 static void 564 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 565 { 566 567 gd->ext2bgd_ndirs = val & 0xffff; 568 gd->ext4bgd_ndirs_hi = val >> 16; 569 } 570 571 static uint32_t 572 e2fs_gd_get_i_unused(struct ext2_gd *gd) 573 { 574 return (((uint32_t)(gd->ext4bgd_i_unused_hi) << 16) | 575 gd->ext4bgd_i_unused); 576 } 577 578 static void 579 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 580 { 581 582 gd->ext4bgd_i_unused = val & 0xffff; 583 gd->ext4bgd_i_unused_hi = val >> 16; 584 } 585 586 /* 587 * Find a cylinder to place a directory. 588 * 589 * The policy implemented by this algorithm is to allocate a 590 * directory inode in the same cylinder group as its parent 591 * directory, but also to reserve space for its files inodes 592 * and data. Restrict the number of directories which may be 593 * allocated one after another in the same cylinder group 594 * without intervening allocation of files. 595 * 596 * If we allocate a first level directory then force allocation 597 * in another cylinder group. 598 * 599 */ 600 static u_long 601 ext2_dirpref(struct inode *pip) 602 { 603 struct m_ext2fs *fs; 604 int cg, prefcg, cgsize; 605 uint64_t avgbfree, minbfree; 606 u_int avgifree, avgndir, curdirsize; 607 u_int minifree, maxndir; 608 u_int mincg, minndir; 609 u_int dirsize, maxcontigdirs; 610 611 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 612 fs = pip->i_e2fs; 613 614 avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; 615 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 616 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 617 618 /* 619 * Force allocation in another cg if creating a first level dir. 620 */ 621 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 622 if (ITOV(pip)->v_vflag & VV_ROOT) { 623 prefcg = arc4random() % fs->e2fs_gcount; 624 mincg = prefcg; 625 minndir = fs->e2fs_ipg; 626 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 627 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 628 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 629 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 630 mincg = cg; 631 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 632 } 633 for (cg = 0; cg < prefcg; cg++) 634 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 635 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 636 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 637 mincg = cg; 638 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 639 } 640 return (mincg); 641 } 642 /* 643 * Count various limits which used for 644 * optimal allocation of a directory inode. 645 */ 646 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 647 minifree = avgifree - avgifree / 4; 648 if (minifree < 1) 649 minifree = 1; 650 minbfree = avgbfree - avgbfree / 4; 651 if (minbfree < 1) 652 minbfree = 1; 653 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 654 dirsize = AVGDIRSIZE; 655 curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 656 if (dirsize < curdirsize) 657 dirsize = curdirsize; 658 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 659 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 660 if (maxcontigdirs == 0) 661 maxcontigdirs = 1; 662 663 /* 664 * Limit number of dirs in one cg and reserve space for 665 * regular files, but only if we have no deficit in 666 * inodes or space. 667 */ 668 prefcg = ino_to_cg(fs, pip->i_number); 669 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 670 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 671 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 672 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 673 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 674 return (cg); 675 } 676 for (cg = 0; cg < prefcg; cg++) 677 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 678 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 679 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 680 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 681 return (cg); 682 } 683 /* 684 * This is a backstop when we have deficit in space. 685 */ 686 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 687 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 688 return (cg); 689 for (cg = 0; cg < prefcg; cg++) 690 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 691 break; 692 return (cg); 693 } 694 695 /* 696 * Select the desired position for the next block in a file. 697 * 698 * we try to mimic what Remy does in inode_getblk/block_getblk 699 * 700 * we note: blocknr == 0 means that we're about to allocate either 701 * a direct block or a pointer block at the first level of indirection 702 * (In other words, stuff that will go in i_db[] or i_ib[]) 703 * 704 * blocknr != 0 means that we're allocating a block that is none 705 * of the above. Then, blocknr tells us the number of the block 706 * that will hold the pointer 707 */ 708 e4fs_daddr_t 709 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 710 e2fs_daddr_t blocknr) 711 { 712 struct m_ext2fs *fs; 713 int tmp; 714 715 fs = ip->i_e2fs; 716 717 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 718 719 /* 720 * If the next block is actually what we thought it is, then set the 721 * goal to what we thought it should be. 722 */ 723 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 724 return ip->i_next_alloc_goal; 725 726 /* 727 * Now check whether we were provided with an array that basically 728 * tells us previous blocks to which we want to stay close. 729 */ 730 if (bap) 731 for (tmp = indx - 1; tmp >= 0; tmp--) 732 if (bap[tmp]) 733 return bap[tmp]; 734 735 /* 736 * Else lets fall back to the blocknr or, if there is none, follow 737 * the rule that a block should be allocated near its inode. 738 */ 739 return (blocknr ? blocknr : 740 (e2fs_daddr_t)(ip->i_block_group * 741 EXT2_BLOCKS_PER_GROUP(fs)) + fs->e2fs->e2fs_first_dblock); 742 } 743 744 /* 745 * Implement the cylinder overflow algorithm. 746 * 747 * The policy implemented by this algorithm is: 748 * 1) allocate the block in its requested cylinder group. 749 * 2) quadradically rehash on the cylinder group number. 750 * 3) brute force search for a free block. 751 */ 752 static e4fs_daddr_t 753 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 754 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 755 { 756 struct m_ext2fs *fs; 757 e4fs_daddr_t result; 758 int i, icg = cg; 759 760 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 761 fs = ip->i_e2fs; 762 /* 763 * 1: preferred cylinder group 764 */ 765 result = (*allocator)(ip, cg, pref, size); 766 if (result) 767 return (result); 768 /* 769 * 2: quadratic rehash 770 */ 771 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 772 cg += i; 773 if (cg >= fs->e2fs_gcount) 774 cg -= fs->e2fs_gcount; 775 result = (*allocator)(ip, cg, 0, size); 776 if (result) 777 return (result); 778 } 779 /* 780 * 3: brute force search 781 * Note that we start at i == 2, since 0 was checked initially, 782 * and 1 is always checked in the quadratic rehash. 783 */ 784 cg = (icg + 2) % fs->e2fs_gcount; 785 for (i = 2; i < fs->e2fs_gcount; i++) { 786 result = (*allocator)(ip, cg, 0, size); 787 if (result) 788 return (result); 789 cg++; 790 if (cg == fs->e2fs_gcount) 791 cg = 0; 792 } 793 return (0); 794 } 795 796 static uint64_t 797 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 798 { 799 800 if (!ext2_cg_has_sb(fs, cg)) 801 return (0); 802 803 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 804 return (fs->e2fs->e3fs_first_meta_bg); 805 806 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 807 EXT2_DESCS_PER_BLOCK(fs)); 808 } 809 810 static uint64_t 811 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 812 { 813 unsigned long metagroup; 814 int first, last; 815 816 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 817 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 818 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 819 820 if (cg == first || cg == first + 1 || cg == last) 821 return (1); 822 823 return (0); 824 } 825 826 uint64_t 827 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 828 { 829 unsigned long first_meta_bg, metagroup; 830 831 first_meta_bg = fs->e2fs->e3fs_first_meta_bg; 832 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 833 834 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 835 metagroup < first_meta_bg) 836 return (ext2_cg_number_gdb_nometa(fs, cg)); 837 838 return ext2_cg_number_gdb_meta(fs, cg); 839 } 840 841 static int 842 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 843 { 844 int number; 845 846 number = ext2_cg_has_sb(fs, cg); 847 848 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 849 cg < fs->e2fs->e3fs_first_meta_bg * EXT2_DESCS_PER_BLOCK(fs)) { 850 if (number) { 851 number += ext2_cg_number_gdb(fs, cg); 852 number += fs->e2fs->e2fs_reserved_ngdb; 853 } 854 } else { 855 number += ext2_cg_number_gdb(fs, cg); 856 } 857 858 return (number); 859 } 860 861 static void 862 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 863 { 864 int i; 865 866 if (start_bit >= end_bit) 867 return; 868 869 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 870 setbit(bitmap, i); 871 if (i < end_bit) 872 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 873 } 874 875 static int 876 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 877 { 878 879 return ((block - fs->e2fs->e2fs_first_dblock) / fs->e2fs_bsize); 880 } 881 882 static int 883 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 884 { 885 886 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 887 } 888 889 static int 890 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 891 { 892 int bit, bit_max, inodes_per_block; 893 uint64_t start, tmp; 894 895 if (!(fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_BLOCK_UNINIT)) 896 return (0); 897 898 memset(bp->b_data, 0, fs->e2fs_bsize); 899 900 bit_max = ext2_number_base_meta_blocks(fs, cg); 901 if ((bit_max >> 3) >= fs->e2fs_bsize) 902 return (EINVAL); 903 904 for (bit = 0; bit < bit_max; bit++) 905 setbit(bp->b_data, bit); 906 907 start = (uint64_t)cg * fs->e2fs->e2fs_bpg + fs->e2fs->e2fs_first_dblock; 908 909 /* Set bits for block and inode bitmaps, and inode table. */ 910 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 911 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 912 ext2_block_in_group(fs, tmp, cg)) 913 setbit(bp->b_data, tmp - start); 914 915 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 916 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 917 ext2_block_in_group(fs, tmp, cg)) 918 setbit(bp->b_data, tmp - start); 919 920 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 921 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 922 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 923 fs->e2fs->e2fs_ipg / inodes_per_block ) { 924 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 925 ext2_block_in_group(fs, tmp, cg)) 926 setbit(bp->b_data, tmp - start); 927 tmp++; 928 } 929 930 /* 931 * Also if the number of blocks within the group is less than 932 * the blocksize * 8 ( which is the size of bitmap ), set rest 933 * of the block bitmap to 1 934 */ 935 ext2_mark_bitmap_end(fs->e2fs->e2fs_bpg, fs->e2fs_bsize * 8, 936 bp->b_data); 937 938 /* Clean the flag */ 939 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_BLOCK_UNINIT; 940 941 return (0); 942 } 943 944 static int 945 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 946 { 947 struct ext2_gd *gd; 948 uint64_t group_first_block; 949 unsigned int offset, max_bit; 950 951 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 952 /* 953 * It is not possible to check block bitmap in case of this feature, 954 * because the inode and block bitmaps and inode table 955 * blocks may not be in the group at all. 956 * So, skip check in this case. 957 */ 958 return (0); 959 } 960 961 gd = &fs->e2fs_gd[cg]; 962 max_bit = fs->e2fs_fpg; 963 group_first_block = ((uint64_t)cg) * fs->e2fs->e2fs_fpg + 964 fs->e2fs->e2fs_first_dblock; 965 966 /* Check block bitmap block number */ 967 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 968 if (offset >= max_bit || !isset(bp->b_data, offset)) { 969 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 970 "bad block bitmap, group", cg); 971 return (EINVAL); 972 } 973 974 /* Check inode bitmap block number */ 975 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 976 if (offset >= max_bit || !isset(bp->b_data, offset)) { 977 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 978 "bad inode bitmap", cg); 979 return (EINVAL); 980 } 981 982 /* Check inode table */ 983 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 984 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 985 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 986 "bad inode table, group", cg); 987 return (EINVAL); 988 } 989 990 return (0); 991 } 992 993 /* 994 * Determine whether a block can be allocated. 995 * 996 * Check to see if a block of the appropriate size is available, 997 * and if it is, allocate it. 998 */ 999 static daddr_t 1000 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1001 { 1002 struct m_ext2fs *fs; 1003 struct buf *bp; 1004 struct ext2mount *ump; 1005 daddr_t bno, runstart, runlen; 1006 int bit, loc, end, error, start; 1007 char *bbp; 1008 /* XXX ondisk32 */ 1009 fs = ip->i_e2fs; 1010 ump = ip->i_ump; 1011 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1012 return (0); 1013 1014 EXT2_UNLOCK(ump); 1015 error = bread(ip->i_devvp, fsbtodb(fs, 1016 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1017 (int)fs->e2fs_bsize, NOCRED, &bp); 1018 if (error) 1019 goto fail; 1020 1021 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1022 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1023 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1024 if (error) 1025 goto fail; 1026 1027 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1028 } 1029 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1030 if (error) 1031 goto fail; 1032 1033 error = ext2_b_bitmap_validate(fs,bp, cg); 1034 if (error) 1035 goto fail; 1036 1037 /* 1038 * Check, that another thread did not not allocate the last block in this 1039 * group while we were waiting for the buffer. 1040 */ 1041 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1042 goto fail; 1043 1044 bbp = (char *)bp->b_data; 1045 1046 if (dtog(fs, bpref) != cg) 1047 bpref = 0; 1048 if (bpref != 0) { 1049 bpref = dtogd(fs, bpref); 1050 /* 1051 * if the requested block is available, use it 1052 */ 1053 if (isclr(bbp, bpref)) { 1054 bno = bpref; 1055 goto gotit; 1056 } 1057 } 1058 /* 1059 * no blocks in the requested cylinder, so take next 1060 * available one in this cylinder group. 1061 * first try to get 8 contigous blocks, then fall back to a single 1062 * block. 1063 */ 1064 if (bpref) 1065 start = dtogd(fs, bpref) / NBBY; 1066 else 1067 start = 0; 1068 end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1069 retry: 1070 runlen = 0; 1071 runstart = 0; 1072 for (loc = start; loc < end; loc++) { 1073 if (bbp[loc] == (char)0xff) { 1074 runlen = 0; 1075 continue; 1076 } 1077 1078 /* Start of a run, find the number of high clear bits. */ 1079 if (runlen == 0) { 1080 bit = fls(bbp[loc]); 1081 runlen = NBBY - bit; 1082 runstart = loc * NBBY + bit; 1083 } else if (bbp[loc] == 0) { 1084 /* Continue a run. */ 1085 runlen += NBBY; 1086 } else { 1087 /* 1088 * Finish the current run. If it isn't long 1089 * enough, start a new one. 1090 */ 1091 bit = ffs(bbp[loc]) - 1; 1092 runlen += bit; 1093 if (runlen >= 8) { 1094 bno = runstart; 1095 goto gotit; 1096 } 1097 1098 /* Run was too short, start a new one. */ 1099 bit = fls(bbp[loc]); 1100 runlen = NBBY - bit; 1101 runstart = loc * NBBY + bit; 1102 } 1103 1104 /* If the current run is long enough, use it. */ 1105 if (runlen >= 8) { 1106 bno = runstart; 1107 goto gotit; 1108 } 1109 } 1110 if (start != 0) { 1111 end = start; 1112 start = 0; 1113 goto retry; 1114 } 1115 bno = ext2_mapsearch(fs, bbp, bpref); 1116 if (bno < 0) 1117 goto fail; 1118 1119 gotit: 1120 #ifdef INVARIANTS 1121 if (isset(bbp, bno)) { 1122 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1123 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1124 panic("ext2fs_alloccg: dup alloc"); 1125 } 1126 #endif 1127 setbit(bbp, bno); 1128 EXT2_LOCK(ump); 1129 ext2_clusteracct(fs, bbp, cg, bno, -1); 1130 fs->e2fs_fbcount--; 1131 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1132 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1133 fs->e2fs_fmod = 1; 1134 EXT2_UNLOCK(ump); 1135 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1136 bdwrite(bp); 1137 return (((uint64_t)cg) * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1138 1139 fail: 1140 brelse(bp); 1141 EXT2_LOCK(ump); 1142 return (0); 1143 } 1144 1145 /* 1146 * Determine whether a cluster can be allocated. 1147 */ 1148 static daddr_t 1149 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1150 { 1151 struct m_ext2fs *fs; 1152 struct ext2mount *ump; 1153 struct buf *bp; 1154 char *bbp; 1155 int bit, error, got, i, loc, run; 1156 int32_t *lp; 1157 daddr_t bno; 1158 1159 fs = ip->i_e2fs; 1160 ump = ip->i_ump; 1161 1162 if (fs->e2fs_maxcluster[cg] < len) 1163 return (0); 1164 1165 EXT2_UNLOCK(ump); 1166 error = bread(ip->i_devvp, 1167 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1168 (int)fs->e2fs_bsize, NOCRED, &bp); 1169 if (error) 1170 goto fail_lock; 1171 1172 bbp = (char *)bp->b_data; 1173 EXT2_LOCK(ump); 1174 /* 1175 * Check to see if a cluster of the needed size (or bigger) is 1176 * available in this cylinder group. 1177 */ 1178 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1179 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1180 if (*lp++ > 0) 1181 break; 1182 if (i > fs->e2fs_contigsumsize) { 1183 /* 1184 * Update the cluster summary information to reflect 1185 * the true maximum-sized cluster so that future cluster 1186 * allocation requests can avoid reading the bitmap only 1187 * to find no cluster. 1188 */ 1189 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1190 for (i = len - 1; i > 0; i--) 1191 if (*lp-- > 0) 1192 break; 1193 fs->e2fs_maxcluster[cg] = i; 1194 goto fail; 1195 } 1196 EXT2_UNLOCK(ump); 1197 1198 /* Search the bitmap to find a big enough cluster like in FFS. */ 1199 if (dtog(fs, bpref) != cg) 1200 bpref = 0; 1201 if (bpref != 0) 1202 bpref = dtogd(fs, bpref); 1203 loc = bpref / NBBY; 1204 bit = 1 << (bpref % NBBY); 1205 for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { 1206 if ((bbp[loc] & bit) != 0) 1207 run = 0; 1208 else { 1209 run++; 1210 if (run == len) 1211 break; 1212 } 1213 if ((got & (NBBY - 1)) != (NBBY - 1)) 1214 bit <<= 1; 1215 else { 1216 loc++; 1217 bit = 1; 1218 } 1219 } 1220 1221 if (got >= fs->e2fs->e2fs_fpg) 1222 goto fail_lock; 1223 1224 /* Allocate the cluster that we found. */ 1225 for (i = 1; i < len; i++) 1226 if (!isclr(bbp, got - run + i)) 1227 panic("ext2_clusteralloc: map mismatch"); 1228 1229 bno = got - run + 1; 1230 if (bno >= fs->e2fs->e2fs_fpg) 1231 panic("ext2_clusteralloc: allocated out of group"); 1232 1233 EXT2_LOCK(ump); 1234 for (i = 0; i < len; i += fs->e2fs_fpb) { 1235 setbit(bbp, bno + i); 1236 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1237 fs->e2fs_fbcount--; 1238 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1239 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1240 } 1241 fs->e2fs_fmod = 1; 1242 EXT2_UNLOCK(ump); 1243 1244 bdwrite(bp); 1245 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1246 1247 fail_lock: 1248 EXT2_LOCK(ump); 1249 fail: 1250 brelse(bp); 1251 return (0); 1252 } 1253 1254 static int 1255 ext2_zero_inode_table(struct inode *ip, int cg) 1256 { 1257 struct m_ext2fs *fs; 1258 struct buf *bp; 1259 int i, all_blks, used_blks; 1260 1261 fs = ip->i_e2fs; 1262 1263 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_ZEROED) 1264 return (0); 1265 1266 all_blks = fs->e2fs->e2fs_inode_size * fs->e2fs->e2fs_ipg / 1267 fs->e2fs_bsize; 1268 1269 used_blks = howmany(fs->e2fs->e2fs_ipg - 1270 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1271 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1272 1273 for (i = 0; i < all_blks - used_blks; i++) { 1274 bp = getblk(ip->i_devvp, fsbtodb(fs, 1275 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1276 fs->e2fs_bsize, 0, 0, 0); 1277 if (!bp) 1278 return (EIO); 1279 1280 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1281 bawrite(bp); 1282 } 1283 1284 fs->e2fs_gd[cg].ext4bgd_flags |= EXT2_BG_INODE_ZEROED; 1285 1286 return (0); 1287 } 1288 1289 /* 1290 * Determine whether an inode can be allocated. 1291 * 1292 * Check to see if an inode is available, and if it is, 1293 * allocate it using tode in the specified cylinder group. 1294 */ 1295 static daddr_t 1296 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1297 { 1298 struct m_ext2fs *fs; 1299 struct buf *bp; 1300 struct ext2mount *ump; 1301 int error, start, len, ifree; 1302 char *ibp, *loc; 1303 1304 ipref--; /* to avoid a lot of (ipref -1) */ 1305 if (ipref == -1) 1306 ipref = 0; 1307 fs = ip->i_e2fs; 1308 ump = ip->i_ump; 1309 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1310 return (0); 1311 EXT2_UNLOCK(ump); 1312 error = bread(ip->i_devvp, fsbtodb(fs, 1313 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1314 (int)fs->e2fs_bsize, NOCRED, &bp); 1315 if (error) { 1316 brelse(bp); 1317 EXT2_LOCK(ump); 1318 return (0); 1319 } 1320 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1321 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1322 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_UNINIT) { 1323 memset(bp->b_data, 0, fs->e2fs_bsize); 1324 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_INODE_UNINIT; 1325 } 1326 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1327 error = ext2_zero_inode_table(ip, cg); 1328 if (error) { 1329 brelse(bp); 1330 EXT2_LOCK(ump); 1331 return (0); 1332 } 1333 } 1334 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1335 if (error) { 1336 brelse(bp); 1337 EXT2_LOCK(ump); 1338 return (0); 1339 } 1340 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1341 /* 1342 * Another thread allocated the last i-node in this 1343 * group while we were waiting for the buffer. 1344 */ 1345 brelse(bp); 1346 EXT2_LOCK(ump); 1347 return (0); 1348 } 1349 ibp = (char *)bp->b_data; 1350 if (ipref) { 1351 ipref %= fs->e2fs->e2fs_ipg; 1352 if (isclr(ibp, ipref)) 1353 goto gotit; 1354 } 1355 start = ipref / NBBY; 1356 len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); 1357 loc = memcchr(&ibp[start], 0xff, len); 1358 if (loc == NULL) { 1359 len = start + 1; 1360 start = 0; 1361 loc = memcchr(&ibp[start], 0xff, len); 1362 if (loc == NULL) { 1363 SDT_PROBE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 1364 cg, ipref, fs->e2fs_fsmnt); 1365 brelse(bp); 1366 EXT2_LOCK(ump); 1367 return (0); 1368 } 1369 } 1370 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1371 gotit: 1372 setbit(ibp, ipref); 1373 EXT2_LOCK(ump); 1374 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1375 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1376 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1377 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1378 ifree = fs->e2fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1379 if (ipref + 1 > ifree) 1380 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1381 fs->e2fs->e2fs_ipg - (ipref + 1)); 1382 } 1383 fs->e2fs->e2fs_ficount--; 1384 fs->e2fs_fmod = 1; 1385 if ((mode & IFMT) == IFDIR) { 1386 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1387 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1388 fs->e2fs_total_dir++; 1389 } 1390 EXT2_UNLOCK(ump); 1391 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1392 bdwrite(bp); 1393 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1394 } 1395 1396 /* 1397 * Free a block or fragment. 1398 * 1399 */ 1400 void 1401 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1402 { 1403 struct m_ext2fs *fs; 1404 struct buf *bp; 1405 struct ext2mount *ump; 1406 int cg, error; 1407 char *bbp; 1408 1409 fs = ip->i_e2fs; 1410 ump = ip->i_ump; 1411 cg = dtog(fs, bno); 1412 if (bno >= fs->e2fs_bcount) { 1413 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, ip->i_number, bno); 1414 return; 1415 } 1416 error = bread(ip->i_devvp, 1417 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1418 (int)fs->e2fs_bsize, NOCRED, &bp); 1419 if (error) { 1420 brelse(bp); 1421 return; 1422 } 1423 bbp = (char *)bp->b_data; 1424 bno = dtogd(fs, bno); 1425 if (isclr(bbp, bno)) { 1426 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1427 (long long)bno, fs->e2fs_fsmnt); 1428 } 1429 clrbit(bbp, bno); 1430 EXT2_LOCK(ump); 1431 ext2_clusteracct(fs, bbp, cg, bno, 1); 1432 fs->e2fs_fbcount++; 1433 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1434 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1435 fs->e2fs_fmod = 1; 1436 EXT2_UNLOCK(ump); 1437 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1438 bdwrite(bp); 1439 } 1440 1441 /* 1442 * Free an inode. 1443 * 1444 */ 1445 int 1446 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1447 { 1448 struct m_ext2fs *fs; 1449 struct inode *pip; 1450 struct buf *bp; 1451 struct ext2mount *ump; 1452 int error, cg; 1453 char *ibp; 1454 1455 pip = VTOI(pvp); 1456 fs = pip->i_e2fs; 1457 ump = pip->i_ump; 1458 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1459 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1460 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1461 1462 cg = ino_to_cg(fs, ino); 1463 error = bread(pip->i_devvp, 1464 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1465 (int)fs->e2fs_bsize, NOCRED, &bp); 1466 if (error) { 1467 brelse(bp); 1468 return (0); 1469 } 1470 ibp = (char *)bp->b_data; 1471 ino = (ino - 1) % fs->e2fs->e2fs_ipg; 1472 if (isclr(ibp, ino)) { 1473 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1474 fs->e2fs_fsmnt, ino); 1475 if (fs->e2fs_ronly == 0) 1476 panic("ext2_vfree: freeing free inode"); 1477 } 1478 clrbit(ibp, ino); 1479 EXT2_LOCK(ump); 1480 fs->e2fs->e2fs_ficount++; 1481 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1482 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1483 if ((mode & IFMT) == IFDIR) { 1484 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1485 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1486 fs->e2fs_total_dir--; 1487 } 1488 fs->e2fs_fmod = 1; 1489 EXT2_UNLOCK(ump); 1490 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1491 bdwrite(bp); 1492 return (0); 1493 } 1494 1495 /* 1496 * Find a block in the specified cylinder group. 1497 * 1498 * It is a panic if a request is made to find a block if none are 1499 * available. 1500 */ 1501 static daddr_t 1502 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1503 { 1504 char *loc; 1505 int start, len; 1506 1507 /* 1508 * find the fragment by searching through the free block 1509 * map for an appropriate bit pattern 1510 */ 1511 if (bpref) 1512 start = dtogd(fs, bpref) / NBBY; 1513 else 1514 start = 0; 1515 len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1516 loc = memcchr(&bbp[start], 0xff, len); 1517 if (loc == NULL) { 1518 len = start + 1; 1519 start = 0; 1520 loc = memcchr(&bbp[start], 0xff, len); 1521 if (loc == NULL) { 1522 panic("ext2_mapsearch: map corrupted: start=%d, len=%d, fs=%s", 1523 start, len, fs->e2fs_fsmnt); 1524 /* NOTREACHED */ 1525 } 1526 } 1527 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1528 } 1529 1530 int 1531 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1532 { 1533 int a3, a5, a7; 1534 1535 if (cg == 0) 1536 return (1); 1537 1538 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1539 if (cg == fs->e2fs->e4fs_backup_bgs[0] || 1540 cg == fs->e2fs->e4fs_backup_bgs[1]) 1541 return (1); 1542 return (0); 1543 } 1544 1545 if ((cg <= 1) || 1546 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1547 return (1); 1548 1549 if (!(cg & 1)) 1550 return (0); 1551 1552 for (a3 = 3, a5 = 5, a7 = 7; 1553 a3 <= cg || a5 <= cg || a7 <= cg; 1554 a3 *= 3, a5 *= 5, a7 *= 7) 1555 if (cg == a3 || cg == a5 || cg == a7) 1556 return (1); 1557 return (0); 1558 } 1559