1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/sdt.h> 46 #include <sys/stat.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 #include <sys/buf.h> 51 #include <sys/endian.h> 52 53 #include <fs/ext2fs/fs.h> 54 #include <fs/ext2fs/inode.h> 55 #include <fs/ext2fs/ext2_mount.h> 56 #include <fs/ext2fs/ext2fs.h> 57 #include <fs/ext2fs/ext2_extern.h> 58 59 SDT_PROVIDER_DEFINE(ext2fs); 60 /* 61 * ext2fs trace probe: 62 * arg0: verbosity. Higher numbers give more verbose messages 63 * arg1: Textual message 64 */ 65 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 66 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 67 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 68 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 70 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 71 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 72 "int", "daddr_t", "char*"); 73 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 75 76 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 77 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 78 static u_long ext2_dirpref(struct inode *); 79 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 80 daddr_t (*)(struct inode *, int, daddr_t, 81 int)); 82 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 83 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 84 85 /* 86 * Allocate a block in the filesystem. 87 * 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadradically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following hierarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadradically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102 int 103 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 104 struct ucred *cred, e4fs_daddr_t *bnp) 105 { 106 struct m_ext2fs *fs; 107 struct ext2mount *ump; 108 e4fs_daddr_t bno; 109 int cg; 110 111 *bnp = 0; 112 fs = ip->i_e2fs; 113 ump = ip->i_ump; 114 mtx_assert(EXT2_MTX(ump), MA_OWNED); 115 #ifdef INVARIANTS 116 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 117 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 118 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123 #endif /* INVARIANTS */ 124 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->e2fs_fbcount < fs->e2fs_rbcount) 128 goto nospace; 129 if (bpref >= fs->e2fs_bcount) 130 bpref = 0; 131 if (bpref == 0) 132 cg = ino_to_cg(fs, ip->i_number); 133 else 134 cg = dtog(fs, bpref); 135 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 136 ext2_alloccg); 137 if (bno > 0) { 138 /* set next_alloc fields as done in block_getblk */ 139 ip->i_next_alloc_block = lbn; 140 ip->i_next_alloc_goal = bno; 141 142 ip->i_blocks += btodb(fs->e2fs_bsize); 143 ip->i_flag |= IN_CHANGE | IN_UPDATE; 144 *bnp = bno; 145 return (0); 146 } 147 nospace: 148 EXT2_UNLOCK(ump); 149 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 150 return (ENOSPC); 151 } 152 153 /* 154 * Allocate EA's block for inode. 155 */ 156 e4fs_daddr_t 157 ext2_alloc_meta(struct inode *ip) 158 { 159 struct m_ext2fs *fs; 160 daddr_t blk; 161 162 fs = ip->i_e2fs; 163 164 EXT2_LOCK(ip->i_ump); 165 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 166 ext2_alloccg); 167 if (0 == blk) { 168 EXT2_UNLOCK(ip->i_ump); 169 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 170 } 171 172 return (blk); 173 } 174 175 /* 176 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 177 * 178 * The vnode and an array of buffer pointers for a range of sequential 179 * logical blocks to be made contiguous is given. The allocator attempts 180 * to find a range of sequential blocks starting as close as possible to 181 * an fs_rotdelay offset from the end of the allocation for the logical 182 * block immediately preceding the current range. If successful, the 183 * physical block numbers in the buffer pointers and in the inode are 184 * changed to reflect the new allocation. If unsuccessful, the allocation 185 * is left unchanged. The success in doing the reallocation is returned. 186 * Note that the error return is not reflected back to the user. Rather 187 * the previous block allocation will be used. 188 */ 189 190 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 191 "EXT2FS filesystem"); 192 193 static int doasyncfree = 1; 194 195 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 196 "Use asychronous writes to update block pointers when freeing blocks"); 197 198 static int doreallocblks = 0; 199 200 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 201 202 int 203 ext2_reallocblks(struct vop_reallocblks_args *ap) 204 { 205 struct m_ext2fs *fs; 206 struct inode *ip; 207 struct vnode *vp; 208 struct buf *sbp, *ebp; 209 uint32_t *bap, *sbap, *ebap; 210 struct ext2mount *ump; 211 struct cluster_save *buflist; 212 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 213 e2fs_lbn_t start_lbn, end_lbn; 214 int soff; 215 e2fs_daddr_t newblk, blkno; 216 int i, len, start_lvl, end_lvl, pref, ssize; 217 218 if (doreallocblks == 0) 219 return (ENOSPC); 220 221 vp = ap->a_vp; 222 ip = VTOI(vp); 223 fs = ip->i_e2fs; 224 ump = ip->i_ump; 225 226 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 227 return (ENOSPC); 228 229 buflist = ap->a_buflist; 230 len = buflist->bs_nchildren; 231 start_lbn = buflist->bs_children[0]->b_lblkno; 232 end_lbn = start_lbn + len - 1; 233 #ifdef INVARIANTS 234 for (i = 1; i < len; i++) 235 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 236 panic("ext2_reallocblks: non-cluster"); 237 #endif 238 /* 239 * If the cluster crosses the boundary for the first indirect 240 * block, leave space for the indirect block. Indirect blocks 241 * are initially laid out in a position after the last direct 242 * block. Block reallocation would usually destroy locality by 243 * moving the indirect block out of the way to make room for 244 * data blocks if we didn't compensate here. We should also do 245 * this for other indirect block boundaries, but it is only 246 * important for the first one. 247 */ 248 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 249 return (ENOSPC); 250 /* 251 * If the latest allocation is in a new cylinder group, assume that 252 * the filesystem has decided to move and do not force it back to 253 * the previous cylinder group. 254 */ 255 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 256 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 257 return (ENOSPC); 258 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 259 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 260 return (ENOSPC); 261 /* 262 * Get the starting offset and block map for the first block. 263 */ 264 if (start_lvl == 0) { 265 sbap = &ip->i_db[0]; 266 soff = start_lbn; 267 } else { 268 idp = &start_ap[start_lvl - 1]; 269 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 270 brelse(sbp); 271 return (ENOSPC); 272 } 273 sbap = (u_int *)sbp->b_data; 274 soff = idp->in_off; 275 } 276 /* 277 * If the block range spans two block maps, get the second map. 278 */ 279 ebap = NULL; 280 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 281 ssize = len; 282 } else { 283 #ifdef INVARIANTS 284 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 285 panic("ext2_reallocblks: start == end"); 286 #endif 287 ssize = len - (idp->in_off + 1); 288 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 289 goto fail; 290 ebap = (u_int *)ebp->b_data; 291 } 292 /* 293 * Find the preferred location for the cluster. 294 */ 295 EXT2_LOCK(ump); 296 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 297 /* 298 * Search the block map looking for an allocation of the desired size. 299 */ 300 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 301 len, ext2_clusteralloc)) == 0) { 302 EXT2_UNLOCK(ump); 303 goto fail; 304 } 305 /* 306 * We have found a new contiguous block. 307 * 308 * First we have to replace the old block pointers with the new 309 * block pointers in the inode and indirect blocks associated 310 * with the file. 311 */ 312 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 313 ip->i_number, start_lbn, end_lbn); 314 blkno = newblk; 315 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 316 if (i == ssize) { 317 bap = ebap; 318 soff = -i; 319 } 320 #ifdef INVARIANTS 321 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 322 panic("ext2_reallocblks: alloc mismatch"); 323 #endif 324 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 325 *bap++ = blkno; 326 } 327 /* 328 * Next we must write out the modified inode and indirect blocks. 329 * For strict correctness, the writes should be synchronous since 330 * the old block values may have been written to disk. In practise 331 * they are almost never written, but if we are concerned about 332 * strict correctness, the `doasyncfree' flag should be set to zero. 333 * 334 * The test on `doasyncfree' should be changed to test a flag 335 * that shows whether the associated buffers and inodes have 336 * been written. The flag should be set when the cluster is 337 * started and cleared whenever the buffer or inode is flushed. 338 * We can then check below to see if it is set, and do the 339 * synchronous write only when it has been cleared. 340 */ 341 if (sbap != &ip->i_db[0]) { 342 if (doasyncfree) 343 bdwrite(sbp); 344 else 345 bwrite(sbp); 346 } else { 347 ip->i_flag |= IN_CHANGE | IN_UPDATE; 348 if (!doasyncfree) 349 ext2_update(vp, 1); 350 } 351 if (ssize < len) { 352 if (doasyncfree) 353 bdwrite(ebp); 354 else 355 bwrite(ebp); 356 } 357 /* 358 * Last, free the old blocks and assign the new blocks to the buffers. 359 */ 360 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 361 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 362 fs->e2fs_bsize); 363 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 364 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 365 } 366 367 return (0); 368 369 fail: 370 if (ssize < len) 371 brelse(ebp); 372 if (sbap != &ip->i_db[0]) 373 brelse(sbp); 374 return (ENOSPC); 375 } 376 377 /* 378 * Allocate an inode in the filesystem. 379 * 380 */ 381 int 382 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 383 { 384 struct timespec ts; 385 struct m_ext2fs *fs; 386 struct ext2mount *ump; 387 struct inode *pip; 388 struct inode *ip; 389 struct vnode *vp; 390 struct thread *td; 391 ino_t ino, ipref; 392 int error, cg; 393 394 *vpp = NULL; 395 pip = VTOI(pvp); 396 fs = pip->i_e2fs; 397 ump = pip->i_ump; 398 399 EXT2_LOCK(ump); 400 if (fs->e2fs->e2fs_ficount == 0) 401 goto noinodes; 402 /* 403 * If it is a directory then obtain a cylinder group based on 404 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 405 * always the next inode. 406 */ 407 if ((mode & IFMT) == IFDIR) { 408 cg = ext2_dirpref(pip); 409 if (fs->e2fs_contigdirs[cg] < 255) 410 fs->e2fs_contigdirs[cg]++; 411 } else { 412 cg = ino_to_cg(fs, pip->i_number); 413 if (fs->e2fs_contigdirs[cg] > 0) 414 fs->e2fs_contigdirs[cg]--; 415 } 416 ipref = cg * fs->e2fs->e2fs_ipg + 1; 417 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 418 if (ino == 0) 419 goto noinodes; 420 421 td = curthread; 422 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 423 if (error || *vpp != NULL) { 424 return (error); 425 } 426 427 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 428 if (ip == NULL) { 429 return (ENOMEM); 430 } 431 432 /* Allocate a new vnode/inode. */ 433 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 434 free(ip, M_EXT2NODE); 435 return (error); 436 } 437 438 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 439 vp->v_data = ip; 440 ip->i_vnode = vp; 441 ip->i_e2fs = fs = ump->um_e2fs; 442 ip->i_ump = ump; 443 ip->i_number = ino; 444 ip->i_block_group = ino_to_cg(fs, ino); 445 ip->i_next_alloc_block = 0; 446 ip->i_next_alloc_goal = 0; 447 448 error = insmntque(vp, ump->um_mountp); 449 if (error) { 450 free(ip, M_EXT2NODE); 451 return (error); 452 } 453 454 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 455 if (error || *vpp != NULL) { 456 *vpp = NULL; 457 free(ip, M_EXT2NODE); 458 return (error); 459 } 460 461 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 462 vput(vp); 463 *vpp = NULL; 464 free(ip, M_EXT2NODE); 465 return (error); 466 } 467 468 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 469 && (S_ISREG(mode) || S_ISDIR(mode))) 470 ext4_ext_tree_init(ip); 471 else 472 memset(ip->i_data, 0, sizeof(ip->i_data)); 473 474 475 /* 476 * Set up a new generation number for this inode. 477 * Avoid zero values. 478 */ 479 do { 480 ip->i_gen = arc4random(); 481 } while (ip->i_gen == 0); 482 483 vfs_timestamp(&ts); 484 ip->i_birthtime = ts.tv_sec; 485 ip->i_birthnsec = ts.tv_nsec; 486 487 *vpp = vp; 488 489 return (0); 490 491 noinodes: 492 EXT2_UNLOCK(ump); 493 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 494 return (ENOSPC); 495 } 496 497 /* 498 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 499 */ 500 uint64_t 501 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 502 { 503 504 return (((uint64_t)(gd->ext4bgd_b_bitmap_hi) << 32) | 505 gd->ext2bgd_b_bitmap); 506 } 507 508 uint64_t 509 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 510 { 511 512 return (((uint64_t)(gd->ext4bgd_i_bitmap_hi) << 32) | 513 gd->ext2bgd_i_bitmap); 514 } 515 516 uint64_t 517 e2fs_gd_get_i_tables(struct ext2_gd *gd) 518 { 519 520 return (((uint64_t)(gd->ext4bgd_i_tables_hi) << 32) | 521 gd->ext2bgd_i_tables); 522 } 523 524 static uint32_t 525 e2fs_gd_get_nbfree(struct ext2_gd *gd) 526 { 527 528 return (((uint32_t)(gd->ext4bgd_nbfree_hi) << 16) | 529 gd->ext2bgd_nbfree); 530 } 531 532 static void 533 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 534 { 535 536 gd->ext2bgd_nbfree = val & 0xffff; 537 gd->ext4bgd_nbfree_hi = val >> 16; 538 } 539 540 static uint32_t 541 e2fs_gd_get_nifree(struct ext2_gd *gd) 542 { 543 544 return (((uint32_t)(gd->ext4bgd_nifree_hi) << 16) | 545 gd->ext2bgd_nifree); 546 } 547 548 static void 549 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 550 { 551 552 gd->ext2bgd_nifree = val & 0xffff; 553 gd->ext4bgd_nifree_hi = val >> 16; 554 } 555 556 uint32_t 557 e2fs_gd_get_ndirs(struct ext2_gd *gd) 558 { 559 560 return (((uint32_t)(gd->ext4bgd_ndirs_hi) << 16) | 561 gd->ext2bgd_ndirs); 562 } 563 564 static void 565 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 566 { 567 568 gd->ext2bgd_ndirs = val & 0xffff; 569 gd->ext4bgd_ndirs_hi = val >> 16; 570 } 571 572 static uint32_t 573 e2fs_gd_get_i_unused(struct ext2_gd *gd) 574 { 575 return (((uint32_t)(gd->ext4bgd_i_unused_hi) << 16) | 576 gd->ext4bgd_i_unused); 577 } 578 579 static void 580 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 581 { 582 583 gd->ext4bgd_i_unused = val & 0xffff; 584 gd->ext4bgd_i_unused_hi = val >> 16; 585 } 586 587 /* 588 * Find a cylinder to place a directory. 589 * 590 * The policy implemented by this algorithm is to allocate a 591 * directory inode in the same cylinder group as its parent 592 * directory, but also to reserve space for its files inodes 593 * and data. Restrict the number of directories which may be 594 * allocated one after another in the same cylinder group 595 * without intervening allocation of files. 596 * 597 * If we allocate a first level directory then force allocation 598 * in another cylinder group. 599 * 600 */ 601 static u_long 602 ext2_dirpref(struct inode *pip) 603 { 604 struct m_ext2fs *fs; 605 int cg, prefcg, cgsize; 606 uint64_t avgbfree, minbfree; 607 u_int avgifree, avgndir, curdirsize; 608 u_int minifree, maxndir; 609 u_int mincg, minndir; 610 u_int dirsize, maxcontigdirs; 611 612 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 613 fs = pip->i_e2fs; 614 615 avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; 616 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 617 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 618 619 /* 620 * Force allocation in another cg if creating a first level dir. 621 */ 622 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 623 if (ITOV(pip)->v_vflag & VV_ROOT) { 624 prefcg = arc4random() % fs->e2fs_gcount; 625 mincg = prefcg; 626 minndir = fs->e2fs_ipg; 627 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 628 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 629 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 630 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 631 mincg = cg; 632 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 633 } 634 for (cg = 0; cg < prefcg; cg++) 635 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 636 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 637 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 638 mincg = cg; 639 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 640 } 641 return (mincg); 642 } 643 /* 644 * Count various limits which used for 645 * optimal allocation of a directory inode. 646 */ 647 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 648 minifree = avgifree - avgifree / 4; 649 if (minifree < 1) 650 minifree = 1; 651 minbfree = avgbfree - avgbfree / 4; 652 if (minbfree < 1) 653 minbfree = 1; 654 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 655 dirsize = AVGDIRSIZE; 656 curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 657 if (dirsize < curdirsize) 658 dirsize = curdirsize; 659 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 660 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 661 if (maxcontigdirs == 0) 662 maxcontigdirs = 1; 663 664 /* 665 * Limit number of dirs in one cg and reserve space for 666 * regular files, but only if we have no deficit in 667 * inodes or space. 668 */ 669 prefcg = ino_to_cg(fs, pip->i_number); 670 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 671 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 672 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 673 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 674 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 675 return (cg); 676 } 677 for (cg = 0; cg < prefcg; cg++) 678 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 679 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 680 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 681 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 682 return (cg); 683 } 684 /* 685 * This is a backstop when we have deficit in space. 686 */ 687 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 688 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 689 return (cg); 690 for (cg = 0; cg < prefcg; cg++) 691 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 692 break; 693 return (cg); 694 } 695 696 /* 697 * Select the desired position for the next block in a file. 698 * 699 * we try to mimic what Remy does in inode_getblk/block_getblk 700 * 701 * we note: blocknr == 0 means that we're about to allocate either 702 * a direct block or a pointer block at the first level of indirection 703 * (In other words, stuff that will go in i_db[] or i_ib[]) 704 * 705 * blocknr != 0 means that we're allocating a block that is none 706 * of the above. Then, blocknr tells us the number of the block 707 * that will hold the pointer 708 */ 709 e4fs_daddr_t 710 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 711 e2fs_daddr_t blocknr) 712 { 713 struct m_ext2fs *fs; 714 int tmp; 715 716 fs = ip->i_e2fs; 717 718 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 719 720 /* 721 * If the next block is actually what we thought it is, then set the 722 * goal to what we thought it should be. 723 */ 724 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 725 return ip->i_next_alloc_goal; 726 727 /* 728 * Now check whether we were provided with an array that basically 729 * tells us previous blocks to which we want to stay close. 730 */ 731 if (bap) 732 for (tmp = indx - 1; tmp >= 0; tmp--) 733 if (bap[tmp]) 734 return bap[tmp]; 735 736 /* 737 * Else lets fall back to the blocknr or, if there is none, follow 738 * the rule that a block should be allocated near its inode. 739 */ 740 return (blocknr ? blocknr : 741 (e2fs_daddr_t)(ip->i_block_group * 742 EXT2_BLOCKS_PER_GROUP(fs)) + fs->e2fs->e2fs_first_dblock); 743 } 744 745 /* 746 * Implement the cylinder overflow algorithm. 747 * 748 * The policy implemented by this algorithm is: 749 * 1) allocate the block in its requested cylinder group. 750 * 2) quadradically rehash on the cylinder group number. 751 * 3) brute force search for a free block. 752 */ 753 static e4fs_daddr_t 754 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 755 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 756 { 757 struct m_ext2fs *fs; 758 e4fs_daddr_t result; 759 int i, icg = cg; 760 761 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 762 fs = ip->i_e2fs; 763 /* 764 * 1: preferred cylinder group 765 */ 766 result = (*allocator)(ip, cg, pref, size); 767 if (result) 768 return (result); 769 /* 770 * 2: quadratic rehash 771 */ 772 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 773 cg += i; 774 if (cg >= fs->e2fs_gcount) 775 cg -= fs->e2fs_gcount; 776 result = (*allocator)(ip, cg, 0, size); 777 if (result) 778 return (result); 779 } 780 /* 781 * 3: brute force search 782 * Note that we start at i == 2, since 0 was checked initially, 783 * and 1 is always checked in the quadratic rehash. 784 */ 785 cg = (icg + 2) % fs->e2fs_gcount; 786 for (i = 2; i < fs->e2fs_gcount; i++) { 787 result = (*allocator)(ip, cg, 0, size); 788 if (result) 789 return (result); 790 cg++; 791 if (cg == fs->e2fs_gcount) 792 cg = 0; 793 } 794 return (0); 795 } 796 797 static uint64_t 798 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 799 { 800 801 if (!ext2_cg_has_sb(fs, cg)) 802 return (0); 803 804 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 805 return (fs->e2fs->e3fs_first_meta_bg); 806 807 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 808 EXT2_DESCS_PER_BLOCK(fs)); 809 } 810 811 static uint64_t 812 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 813 { 814 unsigned long metagroup; 815 int first, last; 816 817 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 818 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 819 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 820 821 if (cg == first || cg == first + 1 || cg == last) 822 return (1); 823 824 return (0); 825 } 826 827 uint64_t 828 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 829 { 830 unsigned long first_meta_bg, metagroup; 831 832 first_meta_bg = fs->e2fs->e3fs_first_meta_bg; 833 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 834 835 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 836 metagroup < first_meta_bg) 837 return (ext2_cg_number_gdb_nometa(fs, cg)); 838 839 return ext2_cg_number_gdb_meta(fs, cg); 840 } 841 842 static int 843 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 844 { 845 int number; 846 847 number = ext2_cg_has_sb(fs, cg); 848 849 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 850 cg < fs->e2fs->e3fs_first_meta_bg * EXT2_DESCS_PER_BLOCK(fs)) { 851 if (number) { 852 number += ext2_cg_number_gdb(fs, cg); 853 number += fs->e2fs->e2fs_reserved_ngdb; 854 } 855 } else { 856 number += ext2_cg_number_gdb(fs, cg); 857 } 858 859 return (number); 860 } 861 862 static void 863 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 864 { 865 int i; 866 867 if (start_bit >= end_bit) 868 return; 869 870 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 871 setbit(bitmap, i); 872 if (i < end_bit) 873 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 874 } 875 876 static int 877 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 878 { 879 880 return ((block - fs->e2fs->e2fs_first_dblock) / fs->e2fs_bsize); 881 } 882 883 static int 884 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 885 { 886 887 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 888 } 889 890 static int 891 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 892 { 893 int bit, bit_max, inodes_per_block; 894 uint64_t start, tmp; 895 896 if (!(fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_BLOCK_UNINIT)) 897 return (0); 898 899 memset(bp->b_data, 0, fs->e2fs_bsize); 900 901 bit_max = ext2_number_base_meta_blocks(fs, cg); 902 if ((bit_max >> 3) >= fs->e2fs_bsize) 903 return (EINVAL); 904 905 for (bit = 0; bit < bit_max; bit++) 906 setbit(bp->b_data, bit); 907 908 start = (uint64_t)cg * fs->e2fs->e2fs_bpg + fs->e2fs->e2fs_first_dblock; 909 910 /* Set bits for block and inode bitmaps, and inode table. */ 911 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 912 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 913 ext2_block_in_group(fs, tmp, cg)) 914 setbit(bp->b_data, tmp - start); 915 916 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 917 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 918 ext2_block_in_group(fs, tmp, cg)) 919 setbit(bp->b_data, tmp - start); 920 921 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 922 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 923 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 924 fs->e2fs->e2fs_ipg / inodes_per_block ) { 925 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 926 ext2_block_in_group(fs, tmp, cg)) 927 setbit(bp->b_data, tmp - start); 928 tmp++; 929 } 930 931 /* 932 * Also if the number of blocks within the group is less than 933 * the blocksize * 8 ( which is the size of bitmap ), set rest 934 * of the block bitmap to 1 935 */ 936 ext2_mark_bitmap_end(fs->e2fs->e2fs_bpg, fs->e2fs_bsize * 8, 937 bp->b_data); 938 939 /* Clean the flag */ 940 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_BLOCK_UNINIT; 941 942 return (0); 943 } 944 945 static int 946 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 947 { 948 struct ext2_gd *gd; 949 uint64_t group_first_block; 950 unsigned int offset, max_bit; 951 952 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 953 /* 954 * It is not possible to check block bitmap in case of this feature, 955 * because the inode and block bitmaps and inode table 956 * blocks may not be in the group at all. 957 * So, skip check in this case. 958 */ 959 return (0); 960 } 961 962 gd = &fs->e2fs_gd[cg]; 963 max_bit = fs->e2fs_fpg; 964 group_first_block = ((uint64_t)cg) * fs->e2fs->e2fs_fpg + 965 fs->e2fs->e2fs_first_dblock; 966 967 /* Check block bitmap block number */ 968 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 969 if (offset >= max_bit || !isset(bp->b_data, offset)) { 970 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 971 "bad block bitmap, group", cg); 972 return (EINVAL); 973 } 974 975 /* Check inode bitmap block number */ 976 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 977 if (offset >= max_bit || !isset(bp->b_data, offset)) { 978 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 979 "bad inode bitmap", cg); 980 return (EINVAL); 981 } 982 983 /* Check inode table */ 984 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 985 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 986 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 987 "bad inode table, group", cg); 988 return (EINVAL); 989 } 990 991 return (0); 992 } 993 994 /* 995 * Determine whether a block can be allocated. 996 * 997 * Check to see if a block of the appropriate size is available, 998 * and if it is, allocate it. 999 */ 1000 static daddr_t 1001 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1002 { 1003 struct m_ext2fs *fs; 1004 struct buf *bp; 1005 struct ext2mount *ump; 1006 daddr_t bno, runstart, runlen; 1007 int bit, loc, end, error, start; 1008 char *bbp; 1009 /* XXX ondisk32 */ 1010 fs = ip->i_e2fs; 1011 ump = ip->i_ump; 1012 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1013 return (0); 1014 1015 EXT2_UNLOCK(ump); 1016 error = bread(ip->i_devvp, fsbtodb(fs, 1017 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1018 (int)fs->e2fs_bsize, NOCRED, &bp); 1019 if (error) 1020 goto fail; 1021 1022 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1023 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1024 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1025 if (error) 1026 goto fail; 1027 1028 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1029 } 1030 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1031 if (error) 1032 goto fail; 1033 1034 error = ext2_b_bitmap_validate(fs,bp, cg); 1035 if (error) 1036 goto fail; 1037 1038 /* 1039 * Check, that another thread did not not allocate the last block in this 1040 * group while we were waiting for the buffer. 1041 */ 1042 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1043 goto fail; 1044 1045 bbp = (char *)bp->b_data; 1046 1047 if (dtog(fs, bpref) != cg) 1048 bpref = 0; 1049 if (bpref != 0) { 1050 bpref = dtogd(fs, bpref); 1051 /* 1052 * if the requested block is available, use it 1053 */ 1054 if (isclr(bbp, bpref)) { 1055 bno = bpref; 1056 goto gotit; 1057 } 1058 } 1059 /* 1060 * no blocks in the requested cylinder, so take next 1061 * available one in this cylinder group. 1062 * first try to get 8 contigous blocks, then fall back to a single 1063 * block. 1064 */ 1065 if (bpref) 1066 start = dtogd(fs, bpref) / NBBY; 1067 else 1068 start = 0; 1069 end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1070 retry: 1071 runlen = 0; 1072 runstart = 0; 1073 for (loc = start; loc < end; loc++) { 1074 if (bbp[loc] == (char)0xff) { 1075 runlen = 0; 1076 continue; 1077 } 1078 1079 /* Start of a run, find the number of high clear bits. */ 1080 if (runlen == 0) { 1081 bit = fls(bbp[loc]); 1082 runlen = NBBY - bit; 1083 runstart = loc * NBBY + bit; 1084 } else if (bbp[loc] == 0) { 1085 /* Continue a run. */ 1086 runlen += NBBY; 1087 } else { 1088 /* 1089 * Finish the current run. If it isn't long 1090 * enough, start a new one. 1091 */ 1092 bit = ffs(bbp[loc]) - 1; 1093 runlen += bit; 1094 if (runlen >= 8) { 1095 bno = runstart; 1096 goto gotit; 1097 } 1098 1099 /* Run was too short, start a new one. */ 1100 bit = fls(bbp[loc]); 1101 runlen = NBBY - bit; 1102 runstart = loc * NBBY + bit; 1103 } 1104 1105 /* If the current run is long enough, use it. */ 1106 if (runlen >= 8) { 1107 bno = runstart; 1108 goto gotit; 1109 } 1110 } 1111 if (start != 0) { 1112 end = start; 1113 start = 0; 1114 goto retry; 1115 } 1116 bno = ext2_mapsearch(fs, bbp, bpref); 1117 if (bno < 0) 1118 goto fail; 1119 1120 gotit: 1121 #ifdef INVARIANTS 1122 if (isset(bbp, bno)) { 1123 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1124 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1125 panic("ext2fs_alloccg: dup alloc"); 1126 } 1127 #endif 1128 setbit(bbp, bno); 1129 EXT2_LOCK(ump); 1130 ext2_clusteracct(fs, bbp, cg, bno, -1); 1131 fs->e2fs_fbcount--; 1132 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1133 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1134 fs->e2fs_fmod = 1; 1135 EXT2_UNLOCK(ump); 1136 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1137 bdwrite(bp); 1138 return (((uint64_t)cg) * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1139 1140 fail: 1141 brelse(bp); 1142 EXT2_LOCK(ump); 1143 return (0); 1144 } 1145 1146 /* 1147 * Determine whether a cluster can be allocated. 1148 */ 1149 static daddr_t 1150 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1151 { 1152 struct m_ext2fs *fs; 1153 struct ext2mount *ump; 1154 struct buf *bp; 1155 char *bbp; 1156 int bit, error, got, i, loc, run; 1157 int32_t *lp; 1158 daddr_t bno; 1159 1160 fs = ip->i_e2fs; 1161 ump = ip->i_ump; 1162 1163 if (fs->e2fs_maxcluster[cg] < len) 1164 return (0); 1165 1166 EXT2_UNLOCK(ump); 1167 error = bread(ip->i_devvp, 1168 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1169 (int)fs->e2fs_bsize, NOCRED, &bp); 1170 if (error) 1171 goto fail_lock; 1172 1173 bbp = (char *)bp->b_data; 1174 EXT2_LOCK(ump); 1175 /* 1176 * Check to see if a cluster of the needed size (or bigger) is 1177 * available in this cylinder group. 1178 */ 1179 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1180 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1181 if (*lp++ > 0) 1182 break; 1183 if (i > fs->e2fs_contigsumsize) { 1184 /* 1185 * Update the cluster summary information to reflect 1186 * the true maximum-sized cluster so that future cluster 1187 * allocation requests can avoid reading the bitmap only 1188 * to find no cluster. 1189 */ 1190 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1191 for (i = len - 1; i > 0; i--) 1192 if (*lp-- > 0) 1193 break; 1194 fs->e2fs_maxcluster[cg] = i; 1195 goto fail; 1196 } 1197 EXT2_UNLOCK(ump); 1198 1199 /* Search the bitmap to find a big enough cluster like in FFS. */ 1200 if (dtog(fs, bpref) != cg) 1201 bpref = 0; 1202 if (bpref != 0) 1203 bpref = dtogd(fs, bpref); 1204 loc = bpref / NBBY; 1205 bit = 1 << (bpref % NBBY); 1206 for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { 1207 if ((bbp[loc] & bit) != 0) 1208 run = 0; 1209 else { 1210 run++; 1211 if (run == len) 1212 break; 1213 } 1214 if ((got & (NBBY - 1)) != (NBBY - 1)) 1215 bit <<= 1; 1216 else { 1217 loc++; 1218 bit = 1; 1219 } 1220 } 1221 1222 if (got >= fs->e2fs->e2fs_fpg) 1223 goto fail_lock; 1224 1225 /* Allocate the cluster that we found. */ 1226 for (i = 1; i < len; i++) 1227 if (!isclr(bbp, got - run + i)) 1228 panic("ext2_clusteralloc: map mismatch"); 1229 1230 bno = got - run + 1; 1231 if (bno >= fs->e2fs->e2fs_fpg) 1232 panic("ext2_clusteralloc: allocated out of group"); 1233 1234 EXT2_LOCK(ump); 1235 for (i = 0; i < len; i += fs->e2fs_fpb) { 1236 setbit(bbp, bno + i); 1237 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1238 fs->e2fs_fbcount--; 1239 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1240 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1241 } 1242 fs->e2fs_fmod = 1; 1243 EXT2_UNLOCK(ump); 1244 1245 bdwrite(bp); 1246 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 1247 1248 fail_lock: 1249 EXT2_LOCK(ump); 1250 fail: 1251 brelse(bp); 1252 return (0); 1253 } 1254 1255 static int 1256 ext2_zero_inode_table(struct inode *ip, int cg) 1257 { 1258 struct m_ext2fs *fs; 1259 struct buf *bp; 1260 int i, all_blks, used_blks; 1261 1262 fs = ip->i_e2fs; 1263 1264 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_ZEROED) 1265 return (0); 1266 1267 all_blks = fs->e2fs->e2fs_inode_size * fs->e2fs->e2fs_ipg / 1268 fs->e2fs_bsize; 1269 1270 used_blks = howmany(fs->e2fs->e2fs_ipg - 1271 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1272 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1273 1274 for (i = 0; i < all_blks - used_blks; i++) { 1275 bp = getblk(ip->i_devvp, fsbtodb(fs, 1276 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1277 fs->e2fs_bsize, 0, 0, 0); 1278 if (!bp) 1279 return (EIO); 1280 1281 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1282 bawrite(bp); 1283 } 1284 1285 fs->e2fs_gd[cg].ext4bgd_flags |= EXT2_BG_INODE_ZEROED; 1286 1287 return (0); 1288 } 1289 1290 /* 1291 * Determine whether an inode can be allocated. 1292 * 1293 * Check to see if an inode is available, and if it is, 1294 * allocate it using tode in the specified cylinder group. 1295 */ 1296 static daddr_t 1297 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1298 { 1299 struct m_ext2fs *fs; 1300 struct buf *bp; 1301 struct ext2mount *ump; 1302 int error, start, len, ifree; 1303 char *ibp, *loc; 1304 1305 ipref--; /* to avoid a lot of (ipref -1) */ 1306 if (ipref == -1) 1307 ipref = 0; 1308 fs = ip->i_e2fs; 1309 ump = ip->i_ump; 1310 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1311 return (0); 1312 EXT2_UNLOCK(ump); 1313 error = bread(ip->i_devvp, fsbtodb(fs, 1314 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1315 (int)fs->e2fs_bsize, NOCRED, &bp); 1316 if (error) { 1317 EXT2_LOCK(ump); 1318 return (0); 1319 } 1320 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1321 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1322 if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_UNINIT) { 1323 memset(bp->b_data, 0, fs->e2fs_bsize); 1324 fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_INODE_UNINIT; 1325 } 1326 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1327 error = ext2_zero_inode_table(ip, cg); 1328 if (error) { 1329 brelse(bp); 1330 EXT2_LOCK(ump); 1331 return (0); 1332 } 1333 } 1334 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1335 if (error) { 1336 brelse(bp); 1337 EXT2_LOCK(ump); 1338 return (0); 1339 } 1340 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1341 /* 1342 * Another thread allocated the last i-node in this 1343 * group while we were waiting for the buffer. 1344 */ 1345 brelse(bp); 1346 EXT2_LOCK(ump); 1347 return (0); 1348 } 1349 ibp = (char *)bp->b_data; 1350 if (ipref) { 1351 ipref %= fs->e2fs->e2fs_ipg; 1352 if (isclr(ibp, ipref)) 1353 goto gotit; 1354 } 1355 start = ipref / NBBY; 1356 len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); 1357 loc = memcchr(&ibp[start], 0xff, len); 1358 if (loc == NULL) { 1359 len = start + 1; 1360 start = 0; 1361 loc = memcchr(&ibp[start], 0xff, len); 1362 if (loc == NULL) { 1363 SDT_PROBE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 1364 cg, ipref, fs->e2fs_fsmnt); 1365 brelse(bp); 1366 EXT2_LOCK(ump); 1367 return (0); 1368 } 1369 } 1370 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1371 gotit: 1372 setbit(ibp, ipref); 1373 EXT2_LOCK(ump); 1374 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1375 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1376 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1377 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1378 ifree = fs->e2fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1379 if (ipref + 1 > ifree) 1380 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1381 fs->e2fs->e2fs_ipg - (ipref + 1)); 1382 } 1383 fs->e2fs->e2fs_ficount--; 1384 fs->e2fs_fmod = 1; 1385 if ((mode & IFMT) == IFDIR) { 1386 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1387 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1388 fs->e2fs_total_dir++; 1389 } 1390 EXT2_UNLOCK(ump); 1391 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1392 bdwrite(bp); 1393 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1394 } 1395 1396 /* 1397 * Free a block or fragment. 1398 * 1399 */ 1400 void 1401 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1402 { 1403 struct m_ext2fs *fs; 1404 struct buf *bp; 1405 struct ext2mount *ump; 1406 int cg, error; 1407 char *bbp; 1408 1409 fs = ip->i_e2fs; 1410 ump = ip->i_ump; 1411 cg = dtog(fs, bno); 1412 if (bno >= fs->e2fs_bcount) { 1413 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, ip->i_number, bno); 1414 return; 1415 } 1416 error = bread(ip->i_devvp, 1417 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1418 (int)fs->e2fs_bsize, NOCRED, &bp); 1419 if (error) { 1420 return; 1421 } 1422 bbp = (char *)bp->b_data; 1423 bno = dtogd(fs, bno); 1424 if (isclr(bbp, bno)) { 1425 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1426 (long long)bno, fs->e2fs_fsmnt); 1427 } 1428 clrbit(bbp, bno); 1429 EXT2_LOCK(ump); 1430 ext2_clusteracct(fs, bbp, cg, bno, 1); 1431 fs->e2fs_fbcount++; 1432 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1433 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1434 fs->e2fs_fmod = 1; 1435 EXT2_UNLOCK(ump); 1436 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1437 bdwrite(bp); 1438 } 1439 1440 /* 1441 * Free an inode. 1442 * 1443 */ 1444 int 1445 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1446 { 1447 struct m_ext2fs *fs; 1448 struct inode *pip; 1449 struct buf *bp; 1450 struct ext2mount *ump; 1451 int error, cg; 1452 char *ibp; 1453 1454 pip = VTOI(pvp); 1455 fs = pip->i_e2fs; 1456 ump = pip->i_ump; 1457 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1458 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1459 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1460 1461 cg = ino_to_cg(fs, ino); 1462 error = bread(pip->i_devvp, 1463 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1464 (int)fs->e2fs_bsize, NOCRED, &bp); 1465 if (error) { 1466 return (0); 1467 } 1468 ibp = (char *)bp->b_data; 1469 ino = (ino - 1) % fs->e2fs->e2fs_ipg; 1470 if (isclr(ibp, ino)) { 1471 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1472 fs->e2fs_fsmnt, ino); 1473 if (fs->e2fs_ronly == 0) 1474 panic("ext2_vfree: freeing free inode"); 1475 } 1476 clrbit(ibp, ino); 1477 EXT2_LOCK(ump); 1478 fs->e2fs->e2fs_ficount++; 1479 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1480 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1481 if ((mode & IFMT) == IFDIR) { 1482 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1483 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1484 fs->e2fs_total_dir--; 1485 } 1486 fs->e2fs_fmod = 1; 1487 EXT2_UNLOCK(ump); 1488 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1489 bdwrite(bp); 1490 return (0); 1491 } 1492 1493 /* 1494 * Find a block in the specified cylinder group. 1495 * 1496 * It is a panic if a request is made to find a block if none are 1497 * available. 1498 */ 1499 static daddr_t 1500 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1501 { 1502 char *loc; 1503 int start, len; 1504 1505 /* 1506 * find the fragment by searching through the free block 1507 * map for an appropriate bit pattern 1508 */ 1509 if (bpref) 1510 start = dtogd(fs, bpref) / NBBY; 1511 else 1512 start = 0; 1513 len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1514 loc = memcchr(&bbp[start], 0xff, len); 1515 if (loc == NULL) { 1516 len = start + 1; 1517 start = 0; 1518 loc = memcchr(&bbp[start], 0xff, len); 1519 if (loc == NULL) { 1520 panic("ext2_mapsearch: map corrupted: start=%d, len=%d, fs=%s", 1521 start, len, fs->e2fs_fsmnt); 1522 /* NOTREACHED */ 1523 } 1524 } 1525 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1526 } 1527 1528 int 1529 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1530 { 1531 int a3, a5, a7; 1532 1533 if (cg == 0) 1534 return (1); 1535 1536 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1537 if (cg == fs->e2fs->e4fs_backup_bgs[0] || 1538 cg == fs->e2fs->e4fs_backup_bgs[1]) 1539 return (1); 1540 return (0); 1541 } 1542 1543 if ((cg <= 1) || 1544 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1545 return (1); 1546 1547 if (!(cg & 1)) 1548 return (0); 1549 1550 for (a3 = 3, a5 = 5, a7 = 7; 1551 a3 <= cg || a5 <= cg || a7 <= cg; 1552 a3 *= 3, a5 *= 5, a7 *= 7) 1553 if (cg == a3 || cg == a5 || cg == a7) 1554 return (1); 1555 return (0); 1556 } 1557