1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/conf.h> 41 #include <sys/vnode.h> 42 #include <sys/sdt.h> 43 #include <sys/stat.h> 44 #include <sys/mount.h> 45 #include <sys/sysctl.h> 46 #include <sys/syslog.h> 47 #include <sys/buf.h> 48 #include <sys/endian.h> 49 50 #include <fs/ext2fs/fs.h> 51 #include <fs/ext2fs/inode.h> 52 #include <fs/ext2fs/ext2_mount.h> 53 #include <fs/ext2fs/ext2fs.h> 54 #include <fs/ext2fs/ext2_extern.h> 55 56 SDT_PROVIDER_DEFINE(ext2fs); 57 /* 58 * ext2fs trace probe: 59 * arg0: verbosity. Higher numbers give more verbose messages 60 * arg1: Textual message 61 */ 62 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 63 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 64 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 65 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 66 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 67 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 68 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 69 "int", "daddr_t", "char*"); 70 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 71 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 72 73 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 74 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 75 static u_long ext2_dirpref(struct inode *); 76 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 77 daddr_t (*)(struct inode *, int, daddr_t, 78 int)); 79 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 80 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 81 82 /* 83 * Allocate a block in the filesystem. 84 * 85 * A preference may be optionally specified. If a preference is given 86 * the following hierarchy is used to allocate a block: 87 * 1) allocate the requested block. 88 * 2) allocate a rotationally optimal block in the same cylinder. 89 * 3) allocate a block in the same cylinder group. 90 * 4) quadratically rehash into other cylinder groups, until an 91 * available block is located. 92 * If no block preference is given the following hierarchy is used 93 * to allocate a block: 94 * 1) allocate a block in the cylinder group that contains the 95 * inode for the file. 96 * 2) quadratically rehash into other cylinder groups, until an 97 * available block is located. 98 */ 99 int 100 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 101 struct ucred *cred, e4fs_daddr_t *bnp) 102 { 103 struct m_ext2fs *fs; 104 struct ext2mount *ump; 105 e4fs_daddr_t bno; 106 int cg; 107 108 *bnp = 0; 109 fs = ip->i_e2fs; 110 ump = ip->i_ump; 111 mtx_assert(EXT2_MTX(ump), MA_OWNED); 112 #ifdef INVARIANTS 113 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 114 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 115 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 116 panic("ext2_alloc: bad size"); 117 } 118 if (cred == NOCRED) 119 panic("ext2_alloc: missing credential"); 120 #endif /* INVARIANTS */ 121 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 122 goto nospace; 123 if (cred->cr_uid != 0 && 124 fs->e2fs_fbcount < fs->e2fs_rbcount) 125 goto nospace; 126 if (bpref >= fs->e2fs_bcount) 127 bpref = 0; 128 if (bpref == 0) 129 cg = ino_to_cg(fs, ip->i_number); 130 else 131 cg = dtog(fs, bpref); 132 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 133 ext2_alloccg); 134 if (bno > 0) { 135 /* set next_alloc fields as done in block_getblk */ 136 ip->i_next_alloc_block = lbn; 137 ip->i_next_alloc_goal = bno; 138 139 ip->i_blocks += btodb(fs->e2fs_bsize); 140 ip->i_flag |= IN_CHANGE | IN_UPDATE; 141 *bnp = bno; 142 return (0); 143 } 144 nospace: 145 EXT2_UNLOCK(ump); 146 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 147 return (ENOSPC); 148 } 149 150 /* 151 * Allocate EA's block for inode. 152 */ 153 e4fs_daddr_t 154 ext2_alloc_meta(struct inode *ip) 155 { 156 struct m_ext2fs *fs; 157 daddr_t blk; 158 159 fs = ip->i_e2fs; 160 161 EXT2_LOCK(ip->i_ump); 162 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 163 ext2_alloccg); 164 if (0 == blk) { 165 EXT2_UNLOCK(ip->i_ump); 166 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 167 } 168 169 return (blk); 170 } 171 172 /* 173 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 174 * 175 * The vnode and an array of buffer pointers for a range of sequential 176 * logical blocks to be made contiguous is given. The allocator attempts 177 * to find a range of sequential blocks starting as close as possible to 178 * an fs_rotdelay offset from the end of the allocation for the logical 179 * block immediately preceding the current range. If successful, the 180 * physical block numbers in the buffer pointers and in the inode are 181 * changed to reflect the new allocation. If unsuccessful, the allocation 182 * is left unchanged. The success in doing the reallocation is returned. 183 * Note that the error return is not reflected back to the user. Rather 184 * the previous block allocation will be used. 185 */ 186 187 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 188 "EXT2FS filesystem"); 189 190 static int doasyncfree = 1; 191 192 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 193 "Use asynchronous writes to update block pointers when freeing blocks"); 194 195 static int doreallocblks = 0; 196 197 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 198 199 int 200 ext2_reallocblks(struct vop_reallocblks_args *ap) 201 { 202 struct m_ext2fs *fs; 203 struct inode *ip; 204 struct vnode *vp; 205 struct buf *sbp, *ebp; 206 uint32_t *bap, *sbap, *ebap; 207 struct ext2mount *ump; 208 struct cluster_save *buflist; 209 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 210 e2fs_lbn_t start_lbn, end_lbn; 211 int soff; 212 e2fs_daddr_t newblk, blkno; 213 int i, len, start_lvl, end_lvl, pref, ssize; 214 215 if (doreallocblks == 0) 216 return (ENOSPC); 217 218 vp = ap->a_vp; 219 ip = VTOI(vp); 220 fs = ip->i_e2fs; 221 ump = ip->i_ump; 222 223 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 224 return (ENOSPC); 225 226 buflist = ap->a_buflist; 227 len = buflist->bs_nchildren; 228 start_lbn = buflist->bs_children[0]->b_lblkno; 229 end_lbn = start_lbn + len - 1; 230 #ifdef INVARIANTS 231 for (i = 1; i < len; i++) 232 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 233 panic("ext2_reallocblks: non-cluster"); 234 #endif 235 /* 236 * If the cluster crosses the boundary for the first indirect 237 * block, leave space for the indirect block. Indirect blocks 238 * are initially laid out in a position after the last direct 239 * block. Block reallocation would usually destroy locality by 240 * moving the indirect block out of the way to make room for 241 * data blocks if we didn't compensate here. We should also do 242 * this for other indirect block boundaries, but it is only 243 * important for the first one. 244 */ 245 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 246 return (ENOSPC); 247 /* 248 * If the latest allocation is in a new cylinder group, assume that 249 * the filesystem has decided to move and do not force it back to 250 * the previous cylinder group. 251 */ 252 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 253 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 254 return (ENOSPC); 255 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 256 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 257 return (ENOSPC); 258 /* 259 * Get the starting offset and block map for the first block. 260 */ 261 if (start_lvl == 0) { 262 sbap = &ip->i_db[0]; 263 soff = start_lbn; 264 } else { 265 idp = &start_ap[start_lvl - 1]; 266 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 267 brelse(sbp); 268 return (ENOSPC); 269 } 270 sbap = (u_int *)sbp->b_data; 271 soff = idp->in_off; 272 } 273 /* 274 * If the block range spans two block maps, get the second map. 275 */ 276 ebap = NULL; 277 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 278 ssize = len; 279 } else { 280 #ifdef INVARIANTS 281 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 282 panic("ext2_reallocblks: start == end"); 283 #endif 284 ssize = len - (idp->in_off + 1); 285 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 286 goto fail; 287 ebap = (u_int *)ebp->b_data; 288 } 289 /* 290 * Find the preferred location for the cluster. 291 */ 292 EXT2_LOCK(ump); 293 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 294 /* 295 * Search the block map looking for an allocation of the desired size. 296 */ 297 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 298 len, ext2_clusteralloc)) == 0) { 299 EXT2_UNLOCK(ump); 300 goto fail; 301 } 302 /* 303 * We have found a new contiguous block. 304 * 305 * First we have to replace the old block pointers with the new 306 * block pointers in the inode and indirect blocks associated 307 * with the file. 308 */ 309 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 310 ip->i_number, start_lbn, end_lbn); 311 blkno = newblk; 312 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 313 if (i == ssize) { 314 bap = ebap; 315 soff = -i; 316 } 317 #ifdef INVARIANTS 318 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 319 panic("ext2_reallocblks: alloc mismatch"); 320 #endif 321 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 322 *bap++ = blkno; 323 } 324 /* 325 * Next we must write out the modified inode and indirect blocks. 326 * For strict correctness, the writes should be synchronous since 327 * the old block values may have been written to disk. In practise 328 * they are almost never written, but if we are concerned about 329 * strict correctness, the `doasyncfree' flag should be set to zero. 330 * 331 * The test on `doasyncfree' should be changed to test a flag 332 * that shows whether the associated buffers and inodes have 333 * been written. The flag should be set when the cluster is 334 * started and cleared whenever the buffer or inode is flushed. 335 * We can then check below to see if it is set, and do the 336 * synchronous write only when it has been cleared. 337 */ 338 if (sbap != &ip->i_db[0]) { 339 if (doasyncfree) 340 bdwrite(sbp); 341 else 342 bwrite(sbp); 343 } else { 344 ip->i_flag |= IN_CHANGE | IN_UPDATE; 345 if (!doasyncfree) 346 ext2_update(vp, 1); 347 } 348 if (ssize < len) { 349 if (doasyncfree) 350 bdwrite(ebp); 351 else 352 bwrite(ebp); 353 } 354 /* 355 * Last, free the old blocks and assign the new blocks to the buffers. 356 */ 357 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 358 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 359 fs->e2fs_bsize); 360 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 361 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 362 } 363 364 return (0); 365 366 fail: 367 if (ssize < len) 368 brelse(ebp); 369 if (sbap != &ip->i_db[0]) 370 brelse(sbp); 371 return (ENOSPC); 372 } 373 374 /* 375 * Allocate an inode in the filesystem. 376 * 377 */ 378 int 379 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 380 { 381 struct timespec ts; 382 struct m_ext2fs *fs; 383 struct ext2mount *ump; 384 struct inode *pip; 385 struct inode *ip; 386 struct vnode *vp; 387 struct thread *td; 388 ino_t ino, ipref; 389 int error, cg; 390 391 *vpp = NULL; 392 pip = VTOI(pvp); 393 fs = pip->i_e2fs; 394 ump = pip->i_ump; 395 396 EXT2_LOCK(ump); 397 if (fs->e2fs_ficount == 0) 398 goto noinodes; 399 /* 400 * If it is a directory then obtain a cylinder group based on 401 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 402 * always the next inode. 403 */ 404 if ((mode & IFMT) == IFDIR) { 405 cg = ext2_dirpref(pip); 406 if (fs->e2fs_contigdirs[cg] < 255) 407 fs->e2fs_contigdirs[cg]++; 408 } else { 409 cg = ino_to_cg(fs, pip->i_number); 410 if (fs->e2fs_contigdirs[cg] > 0) 411 fs->e2fs_contigdirs[cg]--; 412 } 413 ipref = cg * fs->e2fs_ipg + 1; 414 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 415 if (ino == 0) 416 goto noinodes; 417 418 td = curthread; 419 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 420 if (error || *vpp != NULL) { 421 return (error); 422 } 423 424 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 425 426 /* Allocate a new vnode/inode. */ 427 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 428 free(ip, M_EXT2NODE); 429 return (error); 430 } 431 432 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 433 vp->v_data = ip; 434 ip->i_vnode = vp; 435 ip->i_e2fs = fs = ump->um_e2fs; 436 ip->i_ump = ump; 437 ip->i_number = ino; 438 ip->i_block_group = ino_to_cg(fs, ino); 439 ip->i_next_alloc_block = 0; 440 ip->i_next_alloc_goal = 0; 441 442 error = insmntque(vp, ump->um_mountp); 443 if (error) { 444 free(ip, M_EXT2NODE); 445 return (error); 446 } 447 448 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 449 if (error || *vpp != NULL) { 450 *vpp = NULL; 451 free(ip, M_EXT2NODE); 452 return (error); 453 } 454 455 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 456 vput(vp); 457 *vpp = NULL; 458 free(ip, M_EXT2NODE); 459 return (error); 460 } 461 462 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 463 && (S_ISREG(mode) || S_ISDIR(mode))) 464 ext4_ext_tree_init(ip); 465 else 466 memset(ip->i_data, 0, sizeof(ip->i_data)); 467 468 /* 469 * Set up a new generation number for this inode. 470 * Avoid zero values. 471 */ 472 do { 473 ip->i_gen = arc4random(); 474 } while (ip->i_gen == 0); 475 476 vfs_timestamp(&ts); 477 ip->i_birthtime = ts.tv_sec; 478 ip->i_birthnsec = ts.tv_nsec; 479 480 vn_set_state(vp, VSTATE_CONSTRUCTED); 481 *vpp = vp; 482 483 return (0); 484 485 noinodes: 486 EXT2_UNLOCK(ump); 487 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 488 return (ENOSPC); 489 } 490 491 /* 492 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 493 */ 494 uint64_t 495 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 496 { 497 498 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 499 le32toh(gd->ext2bgd_b_bitmap)); 500 } 501 502 uint64_t 503 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 504 { 505 506 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 507 le32toh(gd->ext2bgd_i_bitmap)); 508 } 509 510 uint64_t 511 e2fs_gd_get_i_tables(struct ext2_gd *gd) 512 { 513 514 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 515 le32toh(gd->ext2bgd_i_tables)); 516 } 517 518 static uint32_t 519 e2fs_gd_get_nbfree(struct ext2_gd *gd) 520 { 521 522 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 523 le16toh(gd->ext2bgd_nbfree)); 524 } 525 526 static void 527 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 528 { 529 530 gd->ext2bgd_nbfree = htole16(val & 0xffff); 531 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 532 } 533 534 static uint32_t 535 e2fs_gd_get_nifree(struct ext2_gd *gd) 536 { 537 538 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 539 le16toh(gd->ext2bgd_nifree)); 540 } 541 542 static void 543 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 544 { 545 546 gd->ext2bgd_nifree = htole16(val & 0xffff); 547 gd->ext4bgd_nifree_hi = htole16(val >> 16); 548 } 549 550 uint32_t 551 e2fs_gd_get_ndirs(struct ext2_gd *gd) 552 { 553 554 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 555 le16toh(gd->ext2bgd_ndirs)); 556 } 557 558 static void 559 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 560 { 561 562 gd->ext2bgd_ndirs = htole16(val & 0xffff); 563 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 564 } 565 566 static uint32_t 567 e2fs_gd_get_i_unused(struct ext2_gd *gd) 568 { 569 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 570 le16toh(gd->ext4bgd_i_unused)); 571 } 572 573 static void 574 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 575 { 576 577 gd->ext4bgd_i_unused = htole16(val & 0xffff); 578 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 579 } 580 581 /* 582 * Find a cylinder to place a directory. 583 * 584 * The policy implemented by this algorithm is to allocate a 585 * directory inode in the same cylinder group as its parent 586 * directory, but also to reserve space for its files inodes 587 * and data. Restrict the number of directories which may be 588 * allocated one after another in the same cylinder group 589 * without intervening allocation of files. 590 * 591 * If we allocate a first level directory then force allocation 592 * in another cylinder group. 593 * 594 */ 595 static u_long 596 ext2_dirpref(struct inode *pip) 597 { 598 struct m_ext2fs *fs; 599 int cg, prefcg, cgsize; 600 uint64_t avgbfree, minbfree; 601 u_int avgifree, avgndir, curdirsize; 602 u_int minifree, maxndir; 603 u_int mincg, minndir; 604 u_int dirsize, maxcontigdirs; 605 606 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 607 fs = pip->i_e2fs; 608 609 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 610 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 611 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 612 613 /* 614 * Force allocation in another cg if creating a first level dir. 615 */ 616 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 617 if (ITOV(pip)->v_vflag & VV_ROOT) { 618 prefcg = arc4random() % fs->e2fs_gcount; 619 mincg = prefcg; 620 minndir = fs->e2fs_ipg; 621 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 622 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 623 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 624 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 625 mincg = cg; 626 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 627 } 628 for (cg = 0; cg < prefcg; cg++) 629 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 630 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 631 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 632 mincg = cg; 633 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 634 } 635 return (mincg); 636 } 637 /* 638 * Count various limits which used for 639 * optimal allocation of a directory inode. 640 */ 641 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 642 minifree = avgifree - avgifree / 4; 643 if (minifree < 1) 644 minifree = 1; 645 minbfree = avgbfree - avgbfree / 4; 646 if (minbfree < 1) 647 minbfree = 1; 648 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 649 dirsize = AVGDIRSIZE; 650 curdirsize = avgndir ? 651 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 652 if (dirsize < curdirsize) 653 dirsize = curdirsize; 654 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 655 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 656 if (maxcontigdirs == 0) 657 maxcontigdirs = 1; 658 659 /* 660 * Limit number of dirs in one cg and reserve space for 661 * regular files, but only if we have no deficit in 662 * inodes or space. 663 */ 664 prefcg = ino_to_cg(fs, pip->i_number); 665 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 666 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 667 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 668 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 669 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 670 return (cg); 671 } 672 for (cg = 0; cg < prefcg; cg++) 673 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 674 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 675 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 676 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 677 return (cg); 678 } 679 /* 680 * This is a backstop when we have deficit in space. 681 */ 682 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 683 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 684 return (cg); 685 for (cg = 0; cg < prefcg; cg++) 686 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 687 break; 688 return (cg); 689 } 690 691 /* 692 * Select the desired position for the next block in a file. 693 * 694 * we try to mimic what Remy does in inode_getblk/block_getblk 695 * 696 * we note: blocknr == 0 means that we're about to allocate either 697 * a direct block or a pointer block at the first level of indirection 698 * (In other words, stuff that will go in i_db[] or i_ib[]) 699 * 700 * blocknr != 0 means that we're allocating a block that is none 701 * of the above. Then, blocknr tells us the number of the block 702 * that will hold the pointer 703 */ 704 e4fs_daddr_t 705 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 706 e2fs_daddr_t blocknr) 707 { 708 struct m_ext2fs *fs; 709 int tmp; 710 711 fs = ip->i_e2fs; 712 713 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 714 715 /* 716 * If the next block is actually what we thought it is, then set the 717 * goal to what we thought it should be. 718 */ 719 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 720 return ip->i_next_alloc_goal; 721 722 /* 723 * Now check whether we were provided with an array that basically 724 * tells us previous blocks to which we want to stay close. 725 */ 726 if (bap) 727 for (tmp = indx - 1; tmp >= 0; tmp--) 728 if (bap[tmp]) 729 return (le32toh(bap[tmp])); 730 731 /* 732 * Else lets fall back to the blocknr or, if there is none, follow 733 * the rule that a block should be allocated near its inode. 734 */ 735 return (blocknr ? blocknr : 736 (e2fs_daddr_t)(ip->i_block_group * 737 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 738 } 739 740 /* 741 * Implement the cylinder overflow algorithm. 742 * 743 * The policy implemented by this algorithm is: 744 * 1) allocate the block in its requested cylinder group. 745 * 2) quadratically rehash on the cylinder group number. 746 * 3) brute force search for a free block. 747 */ 748 static e4fs_daddr_t 749 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 750 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 751 { 752 struct m_ext2fs *fs; 753 e4fs_daddr_t result; 754 int i, icg = cg; 755 756 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 757 fs = ip->i_e2fs; 758 /* 759 * 1: preferred cylinder group 760 */ 761 result = (*allocator)(ip, cg, pref, size); 762 if (result) 763 return (result); 764 /* 765 * 2: quadratic rehash 766 */ 767 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 768 cg += i; 769 if (cg >= fs->e2fs_gcount) 770 cg -= fs->e2fs_gcount; 771 result = (*allocator)(ip, cg, 0, size); 772 if (result) 773 return (result); 774 } 775 /* 776 * 3: brute force search 777 * Note that we start at i == 2, since 0 was checked initially, 778 * and 1 is always checked in the quadratic rehash. 779 */ 780 cg = (icg + 2) % fs->e2fs_gcount; 781 for (i = 2; i < fs->e2fs_gcount; i++) { 782 result = (*allocator)(ip, cg, 0, size); 783 if (result) 784 return (result); 785 cg++; 786 if (cg == fs->e2fs_gcount) 787 cg = 0; 788 } 789 return (0); 790 } 791 792 static uint64_t 793 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 794 { 795 796 if (!ext2_cg_has_sb(fs, cg)) 797 return (0); 798 799 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 800 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 801 802 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 803 EXT2_DESCS_PER_BLOCK(fs)); 804 } 805 806 static uint64_t 807 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 808 { 809 unsigned long metagroup; 810 int first, last; 811 812 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 813 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 814 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 815 816 if (cg == first || cg == first + 1 || cg == last) 817 return (1); 818 819 return (0); 820 } 821 822 uint64_t 823 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 824 { 825 unsigned long first_meta_bg, metagroup; 826 827 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 828 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 829 830 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 831 metagroup < first_meta_bg) 832 return (ext2_cg_number_gdb_nometa(fs, cg)); 833 834 return ext2_cg_number_gdb_meta(fs, cg); 835 } 836 837 static int 838 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 839 { 840 int number; 841 842 number = ext2_cg_has_sb(fs, cg); 843 844 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 845 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 846 EXT2_DESCS_PER_BLOCK(fs)) { 847 if (number) { 848 number += ext2_cg_number_gdb(fs, cg); 849 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 850 } 851 } else { 852 number += ext2_cg_number_gdb(fs, cg); 853 } 854 855 return (number); 856 } 857 858 static void 859 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 860 { 861 int i; 862 863 if (start_bit >= end_bit) 864 return; 865 866 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 867 setbit(bitmap, i); 868 if (i < end_bit) 869 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 870 } 871 872 static int 873 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 874 { 875 876 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 877 fs->e2fs_bsize); 878 } 879 880 static int 881 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 882 { 883 884 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 885 } 886 887 static int 888 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 889 { 890 int bit, bit_max, inodes_per_block; 891 uint64_t start, tmp; 892 893 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 894 return (0); 895 896 memset(bp->b_data, 0, fs->e2fs_bsize); 897 898 bit_max = ext2_number_base_meta_blocks(fs, cg); 899 if ((bit_max >> 3) >= fs->e2fs_bsize) 900 return (EINVAL); 901 902 for (bit = 0; bit < bit_max; bit++) 903 setbit(bp->b_data, bit); 904 905 start = (uint64_t)cg * fs->e2fs_bpg + 906 le32toh(fs->e2fs->e2fs_first_dblock); 907 908 /* Set bits for block and inode bitmaps, and inode table. */ 909 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 910 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 911 ext2_block_in_group(fs, tmp, cg)) 912 setbit(bp->b_data, tmp - start); 913 914 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 915 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 916 ext2_block_in_group(fs, tmp, cg)) 917 setbit(bp->b_data, tmp - start); 918 919 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 920 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 921 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 922 fs->e2fs_ipg / inodes_per_block ) { 923 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 924 ext2_block_in_group(fs, tmp, cg)) 925 setbit(bp->b_data, tmp - start); 926 tmp++; 927 } 928 929 /* 930 * Also if the number of blocks within the group is less than 931 * the blocksize * 8 ( which is the size of bitmap ), set rest 932 * of the block bitmap to 1 933 */ 934 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 935 bp->b_data); 936 937 /* Clean the flag */ 938 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 939 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 940 941 return (0); 942 } 943 944 static int 945 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 946 { 947 struct ext2_gd *gd; 948 uint64_t group_first_block; 949 unsigned int offset, max_bit; 950 951 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 952 /* 953 * It is not possible to check block bitmap in case of this 954 * feature, because the inode and block bitmaps and inode table 955 * blocks may not be in the group at all. 956 * So, skip check in this case. 957 */ 958 return (0); 959 } 960 961 gd = &fs->e2fs_gd[cg]; 962 max_bit = fs->e2fs_fpg; 963 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 964 le32toh(fs->e2fs->e2fs_first_dblock); 965 966 /* Check block bitmap block number */ 967 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 968 if (offset >= max_bit || !isset(bp->b_data, offset)) { 969 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 970 "bad block bitmap, group", cg); 971 return (EINVAL); 972 } 973 974 /* Check inode bitmap block number */ 975 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 976 if (offset >= max_bit || !isset(bp->b_data, offset)) { 977 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 978 "bad inode bitmap", cg); 979 return (EINVAL); 980 } 981 982 /* Check inode table */ 983 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 984 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 985 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 986 "bad inode table, group", cg); 987 return (EINVAL); 988 } 989 990 return (0); 991 } 992 993 /* 994 * Determine whether a block can be allocated. 995 * 996 * Check to see if a block of the appropriate size is available, 997 * and if it is, allocate it. 998 */ 999 static daddr_t 1000 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1001 { 1002 struct m_ext2fs *fs; 1003 struct buf *bp; 1004 struct ext2mount *ump; 1005 daddr_t bno, runstart, runlen; 1006 int bit, loc, end, error, start; 1007 char *bbp; 1008 /* XXX ondisk32 */ 1009 fs = ip->i_e2fs; 1010 ump = ip->i_ump; 1011 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1012 return (0); 1013 1014 EXT2_UNLOCK(ump); 1015 error = bread(ip->i_devvp, fsbtodb(fs, 1016 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1017 (int)fs->e2fs_bsize, NOCRED, &bp); 1018 if (error) 1019 goto fail; 1020 1021 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1022 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1023 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1024 if (error) 1025 goto fail; 1026 1027 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1028 } 1029 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1030 if (error) 1031 goto fail; 1032 1033 error = ext2_b_bitmap_validate(fs,bp, cg); 1034 if (error) 1035 goto fail; 1036 1037 /* 1038 * Check, that another thread did not not allocate the last block in 1039 * this group while we were waiting for the buffer. 1040 */ 1041 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1042 goto fail; 1043 1044 bbp = (char *)bp->b_data; 1045 1046 if (dtog(fs, bpref) != cg) 1047 bpref = 0; 1048 if (bpref != 0) { 1049 bpref = dtogd(fs, bpref); 1050 /* 1051 * if the requested block is available, use it 1052 */ 1053 if (isclr(bbp, bpref)) { 1054 bno = bpref; 1055 goto gotit; 1056 } 1057 } 1058 /* 1059 * no blocks in the requested cylinder, so take next 1060 * available one in this cylinder group. 1061 * first try to get 8 contigous blocks, then fall back to a single 1062 * block. 1063 */ 1064 if (bpref) 1065 start = dtogd(fs, bpref) / NBBY; 1066 else 1067 start = 0; 1068 end = howmany(fs->e2fs_fpg, NBBY); 1069 retry: 1070 runlen = 0; 1071 runstart = 0; 1072 for (loc = start; loc < end; loc++) { 1073 if (bbp[loc] == (char)0xff) { 1074 runlen = 0; 1075 continue; 1076 } 1077 1078 /* Start of a run, find the number of high clear bits. */ 1079 if (runlen == 0) { 1080 bit = fls(bbp[loc]); 1081 runlen = NBBY - bit; 1082 runstart = loc * NBBY + bit; 1083 } else if (bbp[loc] == 0) { 1084 /* Continue a run. */ 1085 runlen += NBBY; 1086 } else { 1087 /* 1088 * Finish the current run. If it isn't long 1089 * enough, start a new one. 1090 */ 1091 bit = ffs(bbp[loc]) - 1; 1092 runlen += bit; 1093 if (runlen >= 8) { 1094 bno = runstart; 1095 goto gotit; 1096 } 1097 1098 /* Run was too short, start a new one. */ 1099 bit = fls(bbp[loc]); 1100 runlen = NBBY - bit; 1101 runstart = loc * NBBY + bit; 1102 } 1103 1104 /* If the current run is long enough, use it. */ 1105 if (runlen >= 8) { 1106 bno = runstart; 1107 goto gotit; 1108 } 1109 } 1110 if (start != 0) { 1111 end = start; 1112 start = 0; 1113 goto retry; 1114 } 1115 bno = ext2_mapsearch(fs, bbp, bpref); 1116 if (bno < 0) 1117 goto fail; 1118 1119 gotit: 1120 #ifdef INVARIANTS 1121 if (isset(bbp, bno)) { 1122 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1123 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1124 panic("ext2fs_alloccg: dup alloc"); 1125 } 1126 #endif 1127 setbit(bbp, bno); 1128 EXT2_LOCK(ump); 1129 ext2_clusteracct(fs, bbp, cg, bno, -1); 1130 fs->e2fs_fbcount--; 1131 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1132 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1133 fs->e2fs_fmod = 1; 1134 EXT2_UNLOCK(ump); 1135 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1136 bdwrite(bp); 1137 return (((uint64_t)cg) * fs->e2fs_fpg + 1138 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1139 1140 fail: 1141 brelse(bp); 1142 EXT2_LOCK(ump); 1143 return (0); 1144 } 1145 1146 /* 1147 * Determine whether a cluster can be allocated. 1148 */ 1149 static daddr_t 1150 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1151 { 1152 struct m_ext2fs *fs; 1153 struct ext2mount *ump; 1154 struct buf *bp; 1155 char *bbp; 1156 int bit, error, got, i, loc, run; 1157 int32_t *lp; 1158 daddr_t bno; 1159 1160 fs = ip->i_e2fs; 1161 ump = ip->i_ump; 1162 1163 if (fs->e2fs_maxcluster[cg] < len) 1164 return (0); 1165 1166 EXT2_UNLOCK(ump); 1167 error = bread(ip->i_devvp, 1168 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1169 (int)fs->e2fs_bsize, NOCRED, &bp); 1170 if (error) 1171 goto fail_lock; 1172 1173 bbp = (char *)bp->b_data; 1174 EXT2_LOCK(ump); 1175 /* 1176 * Check to see if a cluster of the needed size (or bigger) is 1177 * available in this cylinder group. 1178 */ 1179 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1180 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1181 if (*lp++ > 0) 1182 break; 1183 if (i > fs->e2fs_contigsumsize) { 1184 /* 1185 * Update the cluster summary information to reflect 1186 * the true maximum-sized cluster so that future cluster 1187 * allocation requests can avoid reading the bitmap only 1188 * to find no cluster. 1189 */ 1190 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1191 for (i = len - 1; i > 0; i--) 1192 if (*lp-- > 0) 1193 break; 1194 fs->e2fs_maxcluster[cg] = i; 1195 goto fail; 1196 } 1197 EXT2_UNLOCK(ump); 1198 1199 /* Search the bitmap to find a big enough cluster like in FFS. */ 1200 if (dtog(fs, bpref) != cg) 1201 bpref = 0; 1202 if (bpref != 0) 1203 bpref = dtogd(fs, bpref); 1204 loc = bpref / NBBY; 1205 bit = 1 << (bpref % NBBY); 1206 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1207 if ((bbp[loc] & bit) != 0) 1208 run = 0; 1209 else { 1210 run++; 1211 if (run == len) 1212 break; 1213 } 1214 if ((got & (NBBY - 1)) != (NBBY - 1)) 1215 bit <<= 1; 1216 else { 1217 loc++; 1218 bit = 1; 1219 } 1220 } 1221 1222 if (got >= fs->e2fs_fpg) 1223 goto fail_lock; 1224 1225 /* Allocate the cluster that we found. */ 1226 for (i = 1; i < len; i++) 1227 if (!isclr(bbp, got - run + i)) 1228 panic("ext2_clusteralloc: map mismatch"); 1229 1230 bno = got - run + 1; 1231 if (bno >= fs->e2fs_fpg) 1232 panic("ext2_clusteralloc: allocated out of group"); 1233 1234 EXT2_LOCK(ump); 1235 for (i = 0; i < len; i += fs->e2fs_fpb) { 1236 setbit(bbp, bno + i); 1237 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1238 fs->e2fs_fbcount--; 1239 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1240 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1241 } 1242 fs->e2fs_fmod = 1; 1243 EXT2_UNLOCK(ump); 1244 1245 bdwrite(bp); 1246 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1247 + bno); 1248 1249 fail_lock: 1250 EXT2_LOCK(ump); 1251 fail: 1252 brelse(bp); 1253 return (0); 1254 } 1255 1256 static int 1257 ext2_zero_inode_table(struct inode *ip, int cg) 1258 { 1259 struct m_ext2fs *fs; 1260 struct buf *bp; 1261 int i, all_blks, used_blks; 1262 1263 fs = ip->i_e2fs; 1264 1265 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1266 return (0); 1267 1268 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1269 fs->e2fs_bsize; 1270 1271 used_blks = howmany(fs->e2fs_ipg - 1272 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1273 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1274 1275 for (i = 0; i < all_blks - used_blks; i++) { 1276 bp = getblk(ip->i_devvp, fsbtodb(fs, 1277 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1278 fs->e2fs_bsize, 0, 0, 0); 1279 if (!bp) 1280 return (EIO); 1281 1282 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1283 bawrite(bp); 1284 } 1285 1286 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1287 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1288 1289 return (0); 1290 } 1291 1292 static void 1293 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1294 { 1295 int i; 1296 1297 for (i = first; i <= last; i++) 1298 bitmap[i] = 0xff; 1299 } 1300 1301 /* 1302 * Determine whether an inode can be allocated. 1303 * 1304 * Check to see if an inode is available, and if it is, 1305 * allocate it using tode in the specified cylinder group. 1306 */ 1307 static daddr_t 1308 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1309 { 1310 struct m_ext2fs *fs; 1311 struct buf *bp; 1312 struct ext2mount *ump; 1313 int error, start, len, ifree, ibytes; 1314 char *ibp, *loc; 1315 1316 ipref--; /* to avoid a lot of (ipref -1) */ 1317 if (ipref == -1) 1318 ipref = 0; 1319 fs = ip->i_e2fs; 1320 ump = ip->i_ump; 1321 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1322 return (0); 1323 EXT2_UNLOCK(ump); 1324 error = bread(ip->i_devvp, fsbtodb(fs, 1325 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1326 (int)fs->e2fs_bsize, NOCRED, &bp); 1327 if (error) { 1328 EXT2_LOCK(ump); 1329 return (0); 1330 } 1331 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1332 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1333 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1334 EXT2_BG_INODE_UNINIT) { 1335 ibytes = fs->e2fs_ipg / 8; 1336 memset(bp->b_data, 0, ibytes - 1); 1337 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1338 fs->e2fs_bsize - 1); 1339 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1340 fs->e2fs_gd[cg].ext4bgd_flags) & 1341 ~EXT2_BG_INODE_UNINIT); 1342 } 1343 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1344 error = ext2_zero_inode_table(ip, cg); 1345 if (error) { 1346 brelse(bp); 1347 EXT2_LOCK(ump); 1348 return (0); 1349 } 1350 } 1351 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1352 if (error) { 1353 brelse(bp); 1354 EXT2_LOCK(ump); 1355 return (0); 1356 } 1357 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1358 /* 1359 * Another thread allocated the last i-node in this 1360 * group while we were waiting for the buffer. 1361 */ 1362 brelse(bp); 1363 EXT2_LOCK(ump); 1364 return (0); 1365 } 1366 ibp = (char *)bp->b_data; 1367 if (ipref) { 1368 ipref %= fs->e2fs_ipg; 1369 if (isclr(ibp, ipref)) 1370 goto gotit; 1371 } 1372 start = ipref / NBBY; 1373 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1374 loc = memcchr(&ibp[start], 0xff, len); 1375 if (loc == NULL) { 1376 len = start + 1; 1377 start = 0; 1378 loc = memcchr(&ibp[start], 0xff, len); 1379 if (loc == NULL) { 1380 SDT_PROBE3(ext2fs, , alloc, 1381 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1382 fs->e2fs_fsmnt); 1383 brelse(bp); 1384 EXT2_LOCK(ump); 1385 return (0); 1386 } 1387 } 1388 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1389 gotit: 1390 setbit(ibp, ipref); 1391 EXT2_LOCK(ump); 1392 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1393 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1394 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1395 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1396 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1397 if (ipref + 1 > ifree) 1398 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1399 fs->e2fs_ipg - (ipref + 1)); 1400 } 1401 fs->e2fs_ficount--; 1402 fs->e2fs_fmod = 1; 1403 if ((mode & IFMT) == IFDIR) { 1404 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1405 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1406 fs->e2fs_total_dir++; 1407 } 1408 EXT2_UNLOCK(ump); 1409 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1410 bdwrite(bp); 1411 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1412 } 1413 1414 /* 1415 * Free a block or fragment. 1416 * 1417 */ 1418 void 1419 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1420 { 1421 struct m_ext2fs *fs; 1422 struct buf *bp; 1423 struct ext2mount *ump; 1424 int cg, error; 1425 char *bbp; 1426 1427 fs = ip->i_e2fs; 1428 ump = ip->i_ump; 1429 cg = dtog(fs, bno); 1430 if (bno >= fs->e2fs_bcount) { 1431 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1432 ip->i_number, bno); 1433 return; 1434 } 1435 error = bread(ip->i_devvp, 1436 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1437 (int)fs->e2fs_bsize, NOCRED, &bp); 1438 if (error) { 1439 return; 1440 } 1441 bbp = (char *)bp->b_data; 1442 bno = dtogd(fs, bno); 1443 if (isclr(bbp, bno)) { 1444 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1445 (long long)bno, fs->e2fs_fsmnt); 1446 } 1447 clrbit(bbp, bno); 1448 EXT2_LOCK(ump); 1449 ext2_clusteracct(fs, bbp, cg, bno, 1); 1450 fs->e2fs_fbcount++; 1451 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1452 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1453 fs->e2fs_fmod = 1; 1454 EXT2_UNLOCK(ump); 1455 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1456 bdwrite(bp); 1457 } 1458 1459 /* 1460 * Free an inode. 1461 * 1462 */ 1463 int 1464 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1465 { 1466 struct m_ext2fs *fs; 1467 struct inode *pip; 1468 struct buf *bp; 1469 struct ext2mount *ump; 1470 int error, cg; 1471 char *ibp; 1472 1473 pip = VTOI(pvp); 1474 fs = pip->i_e2fs; 1475 ump = pip->i_ump; 1476 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1477 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1478 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1479 1480 cg = ino_to_cg(fs, ino); 1481 error = bread(pip->i_devvp, 1482 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1483 (int)fs->e2fs_bsize, NOCRED, &bp); 1484 if (error) { 1485 return (0); 1486 } 1487 ibp = (char *)bp->b_data; 1488 ino = (ino - 1) % fs->e2fs_ipg; 1489 if (isclr(ibp, ino)) { 1490 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1491 fs->e2fs_fsmnt, ino); 1492 if (fs->e2fs_ronly == 0) 1493 panic("ext2_vfree: freeing free inode"); 1494 } 1495 clrbit(ibp, ino); 1496 EXT2_LOCK(ump); 1497 fs->e2fs_ficount++; 1498 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1499 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1500 if ((mode & IFMT) == IFDIR) { 1501 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1502 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1503 fs->e2fs_total_dir--; 1504 } 1505 fs->e2fs_fmod = 1; 1506 EXT2_UNLOCK(ump); 1507 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1508 bdwrite(bp); 1509 return (0); 1510 } 1511 1512 /* 1513 * Find a block in the specified cylinder group. 1514 * 1515 * It is a panic if a request is made to find a block if none are 1516 * available. 1517 */ 1518 static daddr_t 1519 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1520 { 1521 char *loc; 1522 int start, len; 1523 1524 /* 1525 * find the fragment by searching through the free block 1526 * map for an appropriate bit pattern 1527 */ 1528 if (bpref) 1529 start = dtogd(fs, bpref) / NBBY; 1530 else 1531 start = 0; 1532 len = howmany(fs->e2fs_fpg, NBBY) - start; 1533 loc = memcchr(&bbp[start], 0xff, len); 1534 if (loc == NULL) { 1535 len = start + 1; 1536 start = 0; 1537 loc = memcchr(&bbp[start], 0xff, len); 1538 if (loc == NULL) { 1539 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1540 "fs=%s", start, len, fs->e2fs_fsmnt); 1541 /* NOTREACHED */ 1542 } 1543 } 1544 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1545 } 1546 1547 int 1548 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1549 { 1550 int a3, a5, a7; 1551 1552 if (cg == 0) 1553 return (1); 1554 1555 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1556 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1557 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1558 return (1); 1559 return (0); 1560 } 1561 1562 if ((cg <= 1) || 1563 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1564 return (1); 1565 1566 if (!(cg & 1)) 1567 return (0); 1568 1569 for (a3 = 3, a5 = 5, a7 = 7; 1570 a3 <= cg || a5 <= cg || a7 <= cg; 1571 a3 *= 3, a5 *= 5, a7 *= 7) 1572 if (cg == a3 || cg == a5 || cg == a7) 1573 return (1); 1574 return (0); 1575 } 1576