1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/sdt.h> 46 #include <sys/stat.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 #include <sys/buf.h> 51 #include <sys/endian.h> 52 53 #include <fs/ext2fs/fs.h> 54 #include <fs/ext2fs/inode.h> 55 #include <fs/ext2fs/ext2_mount.h> 56 #include <fs/ext2fs/ext2fs.h> 57 #include <fs/ext2fs/ext2_extern.h> 58 59 SDT_PROVIDER_DEFINE(ext2fs); 60 /* 61 * ext2fs trace probe: 62 * arg0: verbosity. Higher numbers give more verbose messages 63 * arg1: Textual message 64 */ 65 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 66 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 67 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 68 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 70 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 71 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 72 "int", "daddr_t", "char*"); 73 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 75 76 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 77 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 78 static u_long ext2_dirpref(struct inode *); 79 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 80 daddr_t (*)(struct inode *, int, daddr_t, 81 int)); 82 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 83 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 84 85 /* 86 * Allocate a block in the filesystem. 87 * 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadradically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following hierarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadradically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102 int 103 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 104 struct ucred *cred, e4fs_daddr_t *bnp) 105 { 106 struct m_ext2fs *fs; 107 struct ext2mount *ump; 108 e4fs_daddr_t bno; 109 int cg; 110 111 *bnp = 0; 112 fs = ip->i_e2fs; 113 ump = ip->i_ump; 114 mtx_assert(EXT2_MTX(ump), MA_OWNED); 115 #ifdef INVARIANTS 116 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 117 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 118 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123 #endif /* INVARIANTS */ 124 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->e2fs_fbcount < fs->e2fs_rbcount) 128 goto nospace; 129 if (bpref >= fs->e2fs_bcount) 130 bpref = 0; 131 if (bpref == 0) 132 cg = ino_to_cg(fs, ip->i_number); 133 else 134 cg = dtog(fs, bpref); 135 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 136 ext2_alloccg); 137 if (bno > 0) { 138 /* set next_alloc fields as done in block_getblk */ 139 ip->i_next_alloc_block = lbn; 140 ip->i_next_alloc_goal = bno; 141 142 ip->i_blocks += btodb(fs->e2fs_bsize); 143 ip->i_flag |= IN_CHANGE | IN_UPDATE; 144 *bnp = bno; 145 return (0); 146 } 147 nospace: 148 EXT2_UNLOCK(ump); 149 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 150 return (ENOSPC); 151 } 152 153 /* 154 * Allocate EA's block for inode. 155 */ 156 e4fs_daddr_t 157 ext2_alloc_meta(struct inode *ip) 158 { 159 struct m_ext2fs *fs; 160 daddr_t blk; 161 162 fs = ip->i_e2fs; 163 164 EXT2_LOCK(ip->i_ump); 165 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 166 ext2_alloccg); 167 if (0 == blk) { 168 EXT2_UNLOCK(ip->i_ump); 169 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 170 } 171 172 return (blk); 173 } 174 175 /* 176 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 177 * 178 * The vnode and an array of buffer pointers for a range of sequential 179 * logical blocks to be made contiguous is given. The allocator attempts 180 * to find a range of sequential blocks starting as close as possible to 181 * an fs_rotdelay offset from the end of the allocation for the logical 182 * block immediately preceding the current range. If successful, the 183 * physical block numbers in the buffer pointers and in the inode are 184 * changed to reflect the new allocation. If unsuccessful, the allocation 185 * is left unchanged. The success in doing the reallocation is returned. 186 * Note that the error return is not reflected back to the user. Rather 187 * the previous block allocation will be used. 188 */ 189 190 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 191 "EXT2FS filesystem"); 192 193 static int doasyncfree = 1; 194 195 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 196 "Use asychronous writes to update block pointers when freeing blocks"); 197 198 static int doreallocblks = 0; 199 200 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 201 202 int 203 ext2_reallocblks(struct vop_reallocblks_args *ap) 204 { 205 struct m_ext2fs *fs; 206 struct inode *ip; 207 struct vnode *vp; 208 struct buf *sbp, *ebp; 209 uint32_t *bap, *sbap, *ebap; 210 struct ext2mount *ump; 211 struct cluster_save *buflist; 212 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 213 e2fs_lbn_t start_lbn, end_lbn; 214 int soff; 215 e2fs_daddr_t newblk, blkno; 216 int i, len, start_lvl, end_lvl, pref, ssize; 217 218 if (doreallocblks == 0) 219 return (ENOSPC); 220 221 vp = ap->a_vp; 222 ip = VTOI(vp); 223 fs = ip->i_e2fs; 224 ump = ip->i_ump; 225 226 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 227 return (ENOSPC); 228 229 buflist = ap->a_buflist; 230 len = buflist->bs_nchildren; 231 start_lbn = buflist->bs_children[0]->b_lblkno; 232 end_lbn = start_lbn + len - 1; 233 #ifdef INVARIANTS 234 for (i = 1; i < len; i++) 235 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 236 panic("ext2_reallocblks: non-cluster"); 237 #endif 238 /* 239 * If the cluster crosses the boundary for the first indirect 240 * block, leave space for the indirect block. Indirect blocks 241 * are initially laid out in a position after the last direct 242 * block. Block reallocation would usually destroy locality by 243 * moving the indirect block out of the way to make room for 244 * data blocks if we didn't compensate here. We should also do 245 * this for other indirect block boundaries, but it is only 246 * important for the first one. 247 */ 248 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 249 return (ENOSPC); 250 /* 251 * If the latest allocation is in a new cylinder group, assume that 252 * the filesystem has decided to move and do not force it back to 253 * the previous cylinder group. 254 */ 255 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 256 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 257 return (ENOSPC); 258 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 259 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 260 return (ENOSPC); 261 /* 262 * Get the starting offset and block map for the first block. 263 */ 264 if (start_lvl == 0) { 265 sbap = &ip->i_db[0]; 266 soff = start_lbn; 267 } else { 268 idp = &start_ap[start_lvl - 1]; 269 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 270 brelse(sbp); 271 return (ENOSPC); 272 } 273 sbap = (u_int *)sbp->b_data; 274 soff = idp->in_off; 275 } 276 /* 277 * If the block range spans two block maps, get the second map. 278 */ 279 ebap = NULL; 280 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 281 ssize = len; 282 } else { 283 #ifdef INVARIANTS 284 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 285 panic("ext2_reallocblks: start == end"); 286 #endif 287 ssize = len - (idp->in_off + 1); 288 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 289 goto fail; 290 ebap = (u_int *)ebp->b_data; 291 } 292 /* 293 * Find the preferred location for the cluster. 294 */ 295 EXT2_LOCK(ump); 296 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 297 /* 298 * Search the block map looking for an allocation of the desired size. 299 */ 300 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 301 len, ext2_clusteralloc)) == 0) { 302 EXT2_UNLOCK(ump); 303 goto fail; 304 } 305 /* 306 * We have found a new contiguous block. 307 * 308 * First we have to replace the old block pointers with the new 309 * block pointers in the inode and indirect blocks associated 310 * with the file. 311 */ 312 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 313 ip->i_number, start_lbn, end_lbn); 314 blkno = newblk; 315 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 316 if (i == ssize) { 317 bap = ebap; 318 soff = -i; 319 } 320 #ifdef INVARIANTS 321 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 322 panic("ext2_reallocblks: alloc mismatch"); 323 #endif 324 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 325 *bap++ = blkno; 326 } 327 /* 328 * Next we must write out the modified inode and indirect blocks. 329 * For strict correctness, the writes should be synchronous since 330 * the old block values may have been written to disk. In practise 331 * they are almost never written, but if we are concerned about 332 * strict correctness, the `doasyncfree' flag should be set to zero. 333 * 334 * The test on `doasyncfree' should be changed to test a flag 335 * that shows whether the associated buffers and inodes have 336 * been written. The flag should be set when the cluster is 337 * started and cleared whenever the buffer or inode is flushed. 338 * We can then check below to see if it is set, and do the 339 * synchronous write only when it has been cleared. 340 */ 341 if (sbap != &ip->i_db[0]) { 342 if (doasyncfree) 343 bdwrite(sbp); 344 else 345 bwrite(sbp); 346 } else { 347 ip->i_flag |= IN_CHANGE | IN_UPDATE; 348 if (!doasyncfree) 349 ext2_update(vp, 1); 350 } 351 if (ssize < len) { 352 if (doasyncfree) 353 bdwrite(ebp); 354 else 355 bwrite(ebp); 356 } 357 /* 358 * Last, free the old blocks and assign the new blocks to the buffers. 359 */ 360 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 361 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 362 fs->e2fs_bsize); 363 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 364 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 365 } 366 367 return (0); 368 369 fail: 370 if (ssize < len) 371 brelse(ebp); 372 if (sbap != &ip->i_db[0]) 373 brelse(sbp); 374 return (ENOSPC); 375 } 376 377 /* 378 * Allocate an inode in the filesystem. 379 * 380 */ 381 int 382 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 383 { 384 struct timespec ts; 385 struct m_ext2fs *fs; 386 struct ext2mount *ump; 387 struct inode *pip; 388 struct inode *ip; 389 struct vnode *vp; 390 struct thread *td; 391 ino_t ino, ipref; 392 int error, cg; 393 394 *vpp = NULL; 395 pip = VTOI(pvp); 396 fs = pip->i_e2fs; 397 ump = pip->i_ump; 398 399 EXT2_LOCK(ump); 400 if (fs->e2fs_ficount == 0) 401 goto noinodes; 402 /* 403 * If it is a directory then obtain a cylinder group based on 404 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 405 * always the next inode. 406 */ 407 if ((mode & IFMT) == IFDIR) { 408 cg = ext2_dirpref(pip); 409 if (fs->e2fs_contigdirs[cg] < 255) 410 fs->e2fs_contigdirs[cg]++; 411 } else { 412 cg = ino_to_cg(fs, pip->i_number); 413 if (fs->e2fs_contigdirs[cg] > 0) 414 fs->e2fs_contigdirs[cg]--; 415 } 416 ipref = cg * fs->e2fs_ipg + 1; 417 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 418 if (ino == 0) 419 goto noinodes; 420 421 td = curthread; 422 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 423 if (error || *vpp != NULL) { 424 return (error); 425 } 426 427 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 428 if (ip == NULL) { 429 return (ENOMEM); 430 } 431 432 /* Allocate a new vnode/inode. */ 433 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 434 free(ip, M_EXT2NODE); 435 return (error); 436 } 437 438 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 439 vp->v_data = ip; 440 ip->i_vnode = vp; 441 ip->i_e2fs = fs = ump->um_e2fs; 442 ip->i_ump = ump; 443 ip->i_number = ino; 444 ip->i_block_group = ino_to_cg(fs, ino); 445 ip->i_next_alloc_block = 0; 446 ip->i_next_alloc_goal = 0; 447 448 error = insmntque(vp, ump->um_mountp); 449 if (error) { 450 free(ip, M_EXT2NODE); 451 return (error); 452 } 453 454 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 455 if (error || *vpp != NULL) { 456 *vpp = NULL; 457 free(ip, M_EXT2NODE); 458 return (error); 459 } 460 461 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 462 vput(vp); 463 *vpp = NULL; 464 free(ip, M_EXT2NODE); 465 return (error); 466 } 467 468 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 469 && (S_ISREG(mode) || S_ISDIR(mode))) 470 ext4_ext_tree_init(ip); 471 else 472 memset(ip->i_data, 0, sizeof(ip->i_data)); 473 474 475 /* 476 * Set up a new generation number for this inode. 477 * Avoid zero values. 478 */ 479 do { 480 ip->i_gen = arc4random(); 481 } while (ip->i_gen == 0); 482 483 vfs_timestamp(&ts); 484 ip->i_birthtime = ts.tv_sec; 485 ip->i_birthnsec = ts.tv_nsec; 486 487 *vpp = vp; 488 489 return (0); 490 491 noinodes: 492 EXT2_UNLOCK(ump); 493 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 494 return (ENOSPC); 495 } 496 497 /* 498 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 499 */ 500 uint64_t 501 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 502 { 503 504 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 505 le32toh(gd->ext2bgd_b_bitmap)); 506 } 507 508 uint64_t 509 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 510 { 511 512 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 513 le32toh(gd->ext2bgd_i_bitmap)); 514 } 515 516 uint64_t 517 e2fs_gd_get_i_tables(struct ext2_gd *gd) 518 { 519 520 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 521 le32toh(gd->ext2bgd_i_tables)); 522 } 523 524 static uint32_t 525 e2fs_gd_get_nbfree(struct ext2_gd *gd) 526 { 527 528 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 529 le16toh(gd->ext2bgd_nbfree)); 530 } 531 532 static void 533 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 534 { 535 536 gd->ext2bgd_nbfree = htole16(val & 0xffff); 537 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 538 } 539 540 static uint32_t 541 e2fs_gd_get_nifree(struct ext2_gd *gd) 542 { 543 544 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 545 le16toh(gd->ext2bgd_nifree)); 546 } 547 548 static void 549 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 550 { 551 552 gd->ext2bgd_nifree = htole16(val & 0xffff); 553 gd->ext4bgd_nifree_hi = htole16(val >> 16); 554 } 555 556 uint32_t 557 e2fs_gd_get_ndirs(struct ext2_gd *gd) 558 { 559 560 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 561 le16toh(gd->ext2bgd_ndirs)); 562 } 563 564 static void 565 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 566 { 567 568 gd->ext2bgd_ndirs = htole16(val & 0xffff); 569 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 570 } 571 572 static uint32_t 573 e2fs_gd_get_i_unused(struct ext2_gd *gd) 574 { 575 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 576 le16toh(gd->ext4bgd_i_unused)); 577 } 578 579 static void 580 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 581 { 582 583 gd->ext4bgd_i_unused = htole16(val & 0xffff); 584 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 585 } 586 587 /* 588 * Find a cylinder to place a directory. 589 * 590 * The policy implemented by this algorithm is to allocate a 591 * directory inode in the same cylinder group as its parent 592 * directory, but also to reserve space for its files inodes 593 * and data. Restrict the number of directories which may be 594 * allocated one after another in the same cylinder group 595 * without intervening allocation of files. 596 * 597 * If we allocate a first level directory then force allocation 598 * in another cylinder group. 599 * 600 */ 601 static u_long 602 ext2_dirpref(struct inode *pip) 603 { 604 struct m_ext2fs *fs; 605 int cg, prefcg, cgsize; 606 uint64_t avgbfree, minbfree; 607 u_int avgifree, avgndir, curdirsize; 608 u_int minifree, maxndir; 609 u_int mincg, minndir; 610 u_int dirsize, maxcontigdirs; 611 612 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 613 fs = pip->i_e2fs; 614 615 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 616 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 617 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 618 619 /* 620 * Force allocation in another cg if creating a first level dir. 621 */ 622 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 623 if (ITOV(pip)->v_vflag & VV_ROOT) { 624 prefcg = arc4random() % fs->e2fs_gcount; 625 mincg = prefcg; 626 minndir = fs->e2fs_ipg; 627 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 628 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 629 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 630 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 631 mincg = cg; 632 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 633 } 634 for (cg = 0; cg < prefcg; cg++) 635 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 636 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 637 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 638 mincg = cg; 639 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 640 } 641 return (mincg); 642 } 643 /* 644 * Count various limits which used for 645 * optimal allocation of a directory inode. 646 */ 647 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 648 minifree = avgifree - avgifree / 4; 649 if (minifree < 1) 650 minifree = 1; 651 minbfree = avgbfree - avgbfree / 4; 652 if (minbfree < 1) 653 minbfree = 1; 654 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 655 dirsize = AVGDIRSIZE; 656 curdirsize = avgndir ? 657 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 658 if (dirsize < curdirsize) 659 dirsize = curdirsize; 660 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 661 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 662 if (maxcontigdirs == 0) 663 maxcontigdirs = 1; 664 665 /* 666 * Limit number of dirs in one cg and reserve space for 667 * regular files, but only if we have no deficit in 668 * inodes or space. 669 */ 670 prefcg = ino_to_cg(fs, pip->i_number); 671 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 672 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 673 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 674 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 675 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 676 return (cg); 677 } 678 for (cg = 0; cg < prefcg; cg++) 679 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 680 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 681 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 682 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 683 return (cg); 684 } 685 /* 686 * This is a backstop when we have deficit in space. 687 */ 688 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 689 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 690 return (cg); 691 for (cg = 0; cg < prefcg; cg++) 692 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 693 break; 694 return (cg); 695 } 696 697 /* 698 * Select the desired position for the next block in a file. 699 * 700 * we try to mimic what Remy does in inode_getblk/block_getblk 701 * 702 * we note: blocknr == 0 means that we're about to allocate either 703 * a direct block or a pointer block at the first level of indirection 704 * (In other words, stuff that will go in i_db[] or i_ib[]) 705 * 706 * blocknr != 0 means that we're allocating a block that is none 707 * of the above. Then, blocknr tells us the number of the block 708 * that will hold the pointer 709 */ 710 e4fs_daddr_t 711 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 712 e2fs_daddr_t blocknr) 713 { 714 struct m_ext2fs *fs; 715 int tmp; 716 717 fs = ip->i_e2fs; 718 719 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 720 721 /* 722 * If the next block is actually what we thought it is, then set the 723 * goal to what we thought it should be. 724 */ 725 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 726 return ip->i_next_alloc_goal; 727 728 /* 729 * Now check whether we were provided with an array that basically 730 * tells us previous blocks to which we want to stay close. 731 */ 732 if (bap) 733 for (tmp = indx - 1; tmp >= 0; tmp--) 734 if (bap[tmp]) 735 return (le32toh(bap[tmp])); 736 737 /* 738 * Else lets fall back to the blocknr or, if there is none, follow 739 * the rule that a block should be allocated near its inode. 740 */ 741 return (blocknr ? blocknr : 742 (e2fs_daddr_t)(ip->i_block_group * 743 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 744 } 745 746 /* 747 * Implement the cylinder overflow algorithm. 748 * 749 * The policy implemented by this algorithm is: 750 * 1) allocate the block in its requested cylinder group. 751 * 2) quadradically rehash on the cylinder group number. 752 * 3) brute force search for a free block. 753 */ 754 static e4fs_daddr_t 755 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 756 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 757 { 758 struct m_ext2fs *fs; 759 e4fs_daddr_t result; 760 int i, icg = cg; 761 762 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 763 fs = ip->i_e2fs; 764 /* 765 * 1: preferred cylinder group 766 */ 767 result = (*allocator)(ip, cg, pref, size); 768 if (result) 769 return (result); 770 /* 771 * 2: quadratic rehash 772 */ 773 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 774 cg += i; 775 if (cg >= fs->e2fs_gcount) 776 cg -= fs->e2fs_gcount; 777 result = (*allocator)(ip, cg, 0, size); 778 if (result) 779 return (result); 780 } 781 /* 782 * 3: brute force search 783 * Note that we start at i == 2, since 0 was checked initially, 784 * and 1 is always checked in the quadratic rehash. 785 */ 786 cg = (icg + 2) % fs->e2fs_gcount; 787 for (i = 2; i < fs->e2fs_gcount; i++) { 788 result = (*allocator)(ip, cg, 0, size); 789 if (result) 790 return (result); 791 cg++; 792 if (cg == fs->e2fs_gcount) 793 cg = 0; 794 } 795 return (0); 796 } 797 798 static uint64_t 799 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 800 { 801 802 if (!ext2_cg_has_sb(fs, cg)) 803 return (0); 804 805 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 806 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 807 808 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 809 EXT2_DESCS_PER_BLOCK(fs)); 810 } 811 812 static uint64_t 813 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 814 { 815 unsigned long metagroup; 816 int first, last; 817 818 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 819 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 820 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 821 822 if (cg == first || cg == first + 1 || cg == last) 823 return (1); 824 825 return (0); 826 } 827 828 uint64_t 829 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 830 { 831 unsigned long first_meta_bg, metagroup; 832 833 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 834 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 835 836 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 837 metagroup < first_meta_bg) 838 return (ext2_cg_number_gdb_nometa(fs, cg)); 839 840 return ext2_cg_number_gdb_meta(fs, cg); 841 } 842 843 static int 844 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 845 { 846 int number; 847 848 number = ext2_cg_has_sb(fs, cg); 849 850 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 851 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 852 EXT2_DESCS_PER_BLOCK(fs)) { 853 if (number) { 854 number += ext2_cg_number_gdb(fs, cg); 855 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 856 } 857 } else { 858 number += ext2_cg_number_gdb(fs, cg); 859 } 860 861 return (number); 862 } 863 864 static void 865 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 866 { 867 int i; 868 869 if (start_bit >= end_bit) 870 return; 871 872 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 873 setbit(bitmap, i); 874 if (i < end_bit) 875 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 876 } 877 878 static int 879 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 880 { 881 882 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 883 fs->e2fs_bsize); 884 } 885 886 static int 887 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 888 { 889 890 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 891 } 892 893 static int 894 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 895 { 896 int bit, bit_max, inodes_per_block; 897 uint64_t start, tmp; 898 899 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 900 return (0); 901 902 memset(bp->b_data, 0, fs->e2fs_bsize); 903 904 bit_max = ext2_number_base_meta_blocks(fs, cg); 905 if ((bit_max >> 3) >= fs->e2fs_bsize) 906 return (EINVAL); 907 908 for (bit = 0; bit < bit_max; bit++) 909 setbit(bp->b_data, bit); 910 911 start = (uint64_t)cg * fs->e2fs_bpg + 912 le32toh(fs->e2fs->e2fs_first_dblock); 913 914 /* Set bits for block and inode bitmaps, and inode table. */ 915 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 916 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 917 ext2_block_in_group(fs, tmp, cg)) 918 setbit(bp->b_data, tmp - start); 919 920 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 921 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 922 ext2_block_in_group(fs, tmp, cg)) 923 setbit(bp->b_data, tmp - start); 924 925 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 926 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 927 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 928 fs->e2fs_ipg / inodes_per_block ) { 929 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 930 ext2_block_in_group(fs, tmp, cg)) 931 setbit(bp->b_data, tmp - start); 932 tmp++; 933 } 934 935 /* 936 * Also if the number of blocks within the group is less than 937 * the blocksize * 8 ( which is the size of bitmap ), set rest 938 * of the block bitmap to 1 939 */ 940 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 941 bp->b_data); 942 943 /* Clean the flag */ 944 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 945 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 946 947 return (0); 948 } 949 950 static int 951 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 952 { 953 struct ext2_gd *gd; 954 uint64_t group_first_block; 955 unsigned int offset, max_bit; 956 957 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 958 /* 959 * It is not possible to check block bitmap in case of this 960 * feature, because the inode and block bitmaps and inode table 961 * blocks may not be in the group at all. 962 * So, skip check in this case. 963 */ 964 return (0); 965 } 966 967 gd = &fs->e2fs_gd[cg]; 968 max_bit = fs->e2fs_fpg; 969 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 970 le32toh(fs->e2fs->e2fs_first_dblock); 971 972 /* Check block bitmap block number */ 973 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 974 if (offset >= max_bit || !isset(bp->b_data, offset)) { 975 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 976 "bad block bitmap, group", cg); 977 return (EINVAL); 978 } 979 980 /* Check inode bitmap block number */ 981 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 982 if (offset >= max_bit || !isset(bp->b_data, offset)) { 983 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 984 "bad inode bitmap", cg); 985 return (EINVAL); 986 } 987 988 /* Check inode table */ 989 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 990 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 991 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 992 "bad inode table, group", cg); 993 return (EINVAL); 994 } 995 996 return (0); 997 } 998 999 /* 1000 * Determine whether a block can be allocated. 1001 * 1002 * Check to see if a block of the appropriate size is available, 1003 * and if it is, allocate it. 1004 */ 1005 static daddr_t 1006 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1007 { 1008 struct m_ext2fs *fs; 1009 struct buf *bp; 1010 struct ext2mount *ump; 1011 daddr_t bno, runstart, runlen; 1012 int bit, loc, end, error, start; 1013 char *bbp; 1014 /* XXX ondisk32 */ 1015 fs = ip->i_e2fs; 1016 ump = ip->i_ump; 1017 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1018 return (0); 1019 1020 EXT2_UNLOCK(ump); 1021 error = bread(ip->i_devvp, fsbtodb(fs, 1022 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1023 (int)fs->e2fs_bsize, NOCRED, &bp); 1024 if (error) 1025 goto fail; 1026 1027 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1028 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1029 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1030 if (error) 1031 goto fail; 1032 1033 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1034 } 1035 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1036 if (error) 1037 goto fail; 1038 1039 error = ext2_b_bitmap_validate(fs,bp, cg); 1040 if (error) 1041 goto fail; 1042 1043 /* 1044 * Check, that another thread did not not allocate the last block in 1045 * this group while we were waiting for the buffer. 1046 */ 1047 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1048 goto fail; 1049 1050 bbp = (char *)bp->b_data; 1051 1052 if (dtog(fs, bpref) != cg) 1053 bpref = 0; 1054 if (bpref != 0) { 1055 bpref = dtogd(fs, bpref); 1056 /* 1057 * if the requested block is available, use it 1058 */ 1059 if (isclr(bbp, bpref)) { 1060 bno = bpref; 1061 goto gotit; 1062 } 1063 } 1064 /* 1065 * no blocks in the requested cylinder, so take next 1066 * available one in this cylinder group. 1067 * first try to get 8 contigous blocks, then fall back to a single 1068 * block. 1069 */ 1070 if (bpref) 1071 start = dtogd(fs, bpref) / NBBY; 1072 else 1073 start = 0; 1074 end = howmany(fs->e2fs_fpg, NBBY) - start; 1075 retry: 1076 runlen = 0; 1077 runstart = 0; 1078 for (loc = start; loc < end; loc++) { 1079 if (bbp[loc] == (char)0xff) { 1080 runlen = 0; 1081 continue; 1082 } 1083 1084 /* Start of a run, find the number of high clear bits. */ 1085 if (runlen == 0) { 1086 bit = fls(bbp[loc]); 1087 runlen = NBBY - bit; 1088 runstart = loc * NBBY + bit; 1089 } else if (bbp[loc] == 0) { 1090 /* Continue a run. */ 1091 runlen += NBBY; 1092 } else { 1093 /* 1094 * Finish the current run. If it isn't long 1095 * enough, start a new one. 1096 */ 1097 bit = ffs(bbp[loc]) - 1; 1098 runlen += bit; 1099 if (runlen >= 8) { 1100 bno = runstart; 1101 goto gotit; 1102 } 1103 1104 /* Run was too short, start a new one. */ 1105 bit = fls(bbp[loc]); 1106 runlen = NBBY - bit; 1107 runstart = loc * NBBY + bit; 1108 } 1109 1110 /* If the current run is long enough, use it. */ 1111 if (runlen >= 8) { 1112 bno = runstart; 1113 goto gotit; 1114 } 1115 } 1116 if (start != 0) { 1117 end = start; 1118 start = 0; 1119 goto retry; 1120 } 1121 bno = ext2_mapsearch(fs, bbp, bpref); 1122 if (bno < 0) 1123 goto fail; 1124 1125 gotit: 1126 #ifdef INVARIANTS 1127 if (isset(bbp, bno)) { 1128 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1129 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1130 panic("ext2fs_alloccg: dup alloc"); 1131 } 1132 #endif 1133 setbit(bbp, bno); 1134 EXT2_LOCK(ump); 1135 ext2_clusteracct(fs, bbp, cg, bno, -1); 1136 fs->e2fs_fbcount--; 1137 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1138 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1139 fs->e2fs_fmod = 1; 1140 EXT2_UNLOCK(ump); 1141 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1142 bdwrite(bp); 1143 return (((uint64_t)cg) * fs->e2fs_fpg + 1144 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1145 1146 fail: 1147 brelse(bp); 1148 EXT2_LOCK(ump); 1149 return (0); 1150 } 1151 1152 /* 1153 * Determine whether a cluster can be allocated. 1154 */ 1155 static daddr_t 1156 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1157 { 1158 struct m_ext2fs *fs; 1159 struct ext2mount *ump; 1160 struct buf *bp; 1161 char *bbp; 1162 int bit, error, got, i, loc, run; 1163 int32_t *lp; 1164 daddr_t bno; 1165 1166 fs = ip->i_e2fs; 1167 ump = ip->i_ump; 1168 1169 if (fs->e2fs_maxcluster[cg] < len) 1170 return (0); 1171 1172 EXT2_UNLOCK(ump); 1173 error = bread(ip->i_devvp, 1174 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1175 (int)fs->e2fs_bsize, NOCRED, &bp); 1176 if (error) 1177 goto fail_lock; 1178 1179 bbp = (char *)bp->b_data; 1180 EXT2_LOCK(ump); 1181 /* 1182 * Check to see if a cluster of the needed size (or bigger) is 1183 * available in this cylinder group. 1184 */ 1185 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1186 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1187 if (*lp++ > 0) 1188 break; 1189 if (i > fs->e2fs_contigsumsize) { 1190 /* 1191 * Update the cluster summary information to reflect 1192 * the true maximum-sized cluster so that future cluster 1193 * allocation requests can avoid reading the bitmap only 1194 * to find no cluster. 1195 */ 1196 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1197 for (i = len - 1; i > 0; i--) 1198 if (*lp-- > 0) 1199 break; 1200 fs->e2fs_maxcluster[cg] = i; 1201 goto fail; 1202 } 1203 EXT2_UNLOCK(ump); 1204 1205 /* Search the bitmap to find a big enough cluster like in FFS. */ 1206 if (dtog(fs, bpref) != cg) 1207 bpref = 0; 1208 if (bpref != 0) 1209 bpref = dtogd(fs, bpref); 1210 loc = bpref / NBBY; 1211 bit = 1 << (bpref % NBBY); 1212 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1213 if ((bbp[loc] & bit) != 0) 1214 run = 0; 1215 else { 1216 run++; 1217 if (run == len) 1218 break; 1219 } 1220 if ((got & (NBBY - 1)) != (NBBY - 1)) 1221 bit <<= 1; 1222 else { 1223 loc++; 1224 bit = 1; 1225 } 1226 } 1227 1228 if (got >= fs->e2fs_fpg) 1229 goto fail_lock; 1230 1231 /* Allocate the cluster that we found. */ 1232 for (i = 1; i < len; i++) 1233 if (!isclr(bbp, got - run + i)) 1234 panic("ext2_clusteralloc: map mismatch"); 1235 1236 bno = got - run + 1; 1237 if (bno >= fs->e2fs_fpg) 1238 panic("ext2_clusteralloc: allocated out of group"); 1239 1240 EXT2_LOCK(ump); 1241 for (i = 0; i < len; i += fs->e2fs_fpb) { 1242 setbit(bbp, bno + i); 1243 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1244 fs->e2fs_fbcount--; 1245 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1246 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1247 } 1248 fs->e2fs_fmod = 1; 1249 EXT2_UNLOCK(ump); 1250 1251 bdwrite(bp); 1252 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1253 + bno); 1254 1255 fail_lock: 1256 EXT2_LOCK(ump); 1257 fail: 1258 brelse(bp); 1259 return (0); 1260 } 1261 1262 static int 1263 ext2_zero_inode_table(struct inode *ip, int cg) 1264 { 1265 struct m_ext2fs *fs; 1266 struct buf *bp; 1267 int i, all_blks, used_blks; 1268 1269 fs = ip->i_e2fs; 1270 1271 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1272 return (0); 1273 1274 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1275 fs->e2fs_bsize; 1276 1277 used_blks = howmany(fs->e2fs_ipg - 1278 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1279 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1280 1281 for (i = 0; i < all_blks - used_blks; i++) { 1282 bp = getblk(ip->i_devvp, fsbtodb(fs, 1283 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1284 fs->e2fs_bsize, 0, 0, 0); 1285 if (!bp) 1286 return (EIO); 1287 1288 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1289 bawrite(bp); 1290 } 1291 1292 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1293 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1294 1295 return (0); 1296 } 1297 1298 static void 1299 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1300 { 1301 int i; 1302 1303 for (i = first; i <= last; i++) 1304 bitmap[i] = 0xff; 1305 } 1306 1307 1308 /* 1309 * Determine whether an inode can be allocated. 1310 * 1311 * Check to see if an inode is available, and if it is, 1312 * allocate it using tode in the specified cylinder group. 1313 */ 1314 static daddr_t 1315 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1316 { 1317 struct m_ext2fs *fs; 1318 struct buf *bp; 1319 struct ext2mount *ump; 1320 int error, start, len, ifree, ibytes; 1321 char *ibp, *loc; 1322 1323 ipref--; /* to avoid a lot of (ipref -1) */ 1324 if (ipref == -1) 1325 ipref = 0; 1326 fs = ip->i_e2fs; 1327 ump = ip->i_ump; 1328 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1329 return (0); 1330 EXT2_UNLOCK(ump); 1331 error = bread(ip->i_devvp, fsbtodb(fs, 1332 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1333 (int)fs->e2fs_bsize, NOCRED, &bp); 1334 if (error) { 1335 EXT2_LOCK(ump); 1336 return (0); 1337 } 1338 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1339 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1340 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1341 EXT2_BG_INODE_UNINIT) { 1342 ibytes = fs->e2fs_ipg / 8; 1343 memset(bp->b_data, 0, ibytes - 1); 1344 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1345 fs->e2fs_bsize - 1); 1346 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1347 fs->e2fs_gd[cg].ext4bgd_flags) & 1348 ~EXT2_BG_INODE_UNINIT); 1349 } 1350 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1351 error = ext2_zero_inode_table(ip, cg); 1352 if (error) { 1353 brelse(bp); 1354 EXT2_LOCK(ump); 1355 return (0); 1356 } 1357 } 1358 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1359 if (error) { 1360 brelse(bp); 1361 EXT2_LOCK(ump); 1362 return (0); 1363 } 1364 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1365 /* 1366 * Another thread allocated the last i-node in this 1367 * group while we were waiting for the buffer. 1368 */ 1369 brelse(bp); 1370 EXT2_LOCK(ump); 1371 return (0); 1372 } 1373 ibp = (char *)bp->b_data; 1374 if (ipref) { 1375 ipref %= fs->e2fs_ipg; 1376 if (isclr(ibp, ipref)) 1377 goto gotit; 1378 } 1379 start = ipref / NBBY; 1380 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1381 loc = memcchr(&ibp[start], 0xff, len); 1382 if (loc == NULL) { 1383 len = start + 1; 1384 start = 0; 1385 loc = memcchr(&ibp[start], 0xff, len); 1386 if (loc == NULL) { 1387 SDT_PROBE3(ext2fs, , alloc, 1388 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1389 fs->e2fs_fsmnt); 1390 brelse(bp); 1391 EXT2_LOCK(ump); 1392 return (0); 1393 } 1394 } 1395 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1396 gotit: 1397 setbit(ibp, ipref); 1398 EXT2_LOCK(ump); 1399 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1400 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1401 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1402 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1403 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1404 if (ipref + 1 > ifree) 1405 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1406 fs->e2fs_ipg - (ipref + 1)); 1407 } 1408 fs->e2fs_ficount--; 1409 fs->e2fs_fmod = 1; 1410 if ((mode & IFMT) == IFDIR) { 1411 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1412 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1413 fs->e2fs_total_dir++; 1414 } 1415 EXT2_UNLOCK(ump); 1416 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1417 bdwrite(bp); 1418 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1419 } 1420 1421 /* 1422 * Free a block or fragment. 1423 * 1424 */ 1425 void 1426 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1427 { 1428 struct m_ext2fs *fs; 1429 struct buf *bp; 1430 struct ext2mount *ump; 1431 int cg, error; 1432 char *bbp; 1433 1434 fs = ip->i_e2fs; 1435 ump = ip->i_ump; 1436 cg = dtog(fs, bno); 1437 if (bno >= fs->e2fs_bcount) { 1438 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1439 ip->i_number, bno); 1440 return; 1441 } 1442 error = bread(ip->i_devvp, 1443 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1444 (int)fs->e2fs_bsize, NOCRED, &bp); 1445 if (error) { 1446 return; 1447 } 1448 bbp = (char *)bp->b_data; 1449 bno = dtogd(fs, bno); 1450 if (isclr(bbp, bno)) { 1451 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1452 (long long)bno, fs->e2fs_fsmnt); 1453 } 1454 clrbit(bbp, bno); 1455 EXT2_LOCK(ump); 1456 ext2_clusteracct(fs, bbp, cg, bno, 1); 1457 fs->e2fs_fbcount++; 1458 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1459 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1460 fs->e2fs_fmod = 1; 1461 EXT2_UNLOCK(ump); 1462 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1463 bdwrite(bp); 1464 } 1465 1466 /* 1467 * Free an inode. 1468 * 1469 */ 1470 int 1471 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1472 { 1473 struct m_ext2fs *fs; 1474 struct inode *pip; 1475 struct buf *bp; 1476 struct ext2mount *ump; 1477 int error, cg; 1478 char *ibp; 1479 1480 pip = VTOI(pvp); 1481 fs = pip->i_e2fs; 1482 ump = pip->i_ump; 1483 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1484 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1485 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1486 1487 cg = ino_to_cg(fs, ino); 1488 error = bread(pip->i_devvp, 1489 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1490 (int)fs->e2fs_bsize, NOCRED, &bp); 1491 if (error) { 1492 return (0); 1493 } 1494 ibp = (char *)bp->b_data; 1495 ino = (ino - 1) % fs->e2fs_ipg; 1496 if (isclr(ibp, ino)) { 1497 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1498 fs->e2fs_fsmnt, ino); 1499 if (fs->e2fs_ronly == 0) 1500 panic("ext2_vfree: freeing free inode"); 1501 } 1502 clrbit(ibp, ino); 1503 EXT2_LOCK(ump); 1504 fs->e2fs_ficount++; 1505 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1506 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1507 if ((mode & IFMT) == IFDIR) { 1508 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1509 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1510 fs->e2fs_total_dir--; 1511 } 1512 fs->e2fs_fmod = 1; 1513 EXT2_UNLOCK(ump); 1514 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1515 bdwrite(bp); 1516 return (0); 1517 } 1518 1519 /* 1520 * Find a block in the specified cylinder group. 1521 * 1522 * It is a panic if a request is made to find a block if none are 1523 * available. 1524 */ 1525 static daddr_t 1526 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1527 { 1528 char *loc; 1529 int start, len; 1530 1531 /* 1532 * find the fragment by searching through the free block 1533 * map for an appropriate bit pattern 1534 */ 1535 if (bpref) 1536 start = dtogd(fs, bpref) / NBBY; 1537 else 1538 start = 0; 1539 len = howmany(fs->e2fs_fpg, NBBY) - start; 1540 loc = memcchr(&bbp[start], 0xff, len); 1541 if (loc == NULL) { 1542 len = start + 1; 1543 start = 0; 1544 loc = memcchr(&bbp[start], 0xff, len); 1545 if (loc == NULL) { 1546 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1547 "fs=%s", start, len, fs->e2fs_fsmnt); 1548 /* NOTREACHED */ 1549 } 1550 } 1551 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1552 } 1553 1554 int 1555 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1556 { 1557 int a3, a5, a7; 1558 1559 if (cg == 0) 1560 return (1); 1561 1562 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1563 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1564 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1565 return (1); 1566 return (0); 1567 } 1568 1569 if ((cg <= 1) || 1570 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1571 return (1); 1572 1573 if (!(cg & 1)) 1574 return (0); 1575 1576 for (a3 = 3, a5 = 5, a7 = 7; 1577 a3 <= cg || a5 <= cg || a7 <= cg; 1578 a3 *= 3, a5 *= 5, a7 *= 7) 1579 if (cg == a3 || cg == a5 || cg == a7) 1580 return (1); 1581 return (0); 1582 } 1583