1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/conf.h> 43 #include <sys/vnode.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/mount.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/buf.h> 50 #include <sys/endian.h> 51 52 #include <fs/ext2fs/fs.h> 53 #include <fs/ext2fs/inode.h> 54 #include <fs/ext2fs/ext2_mount.h> 55 #include <fs/ext2fs/ext2fs.h> 56 #include <fs/ext2fs/ext2_extern.h> 57 58 SDT_PROVIDER_DEFINE(ext2fs); 59 /* 60 * ext2fs trace probe: 61 * arg0: verbosity. Higher numbers give more verbose messages 62 * arg1: Textual message 63 */ 64 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 65 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 66 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 67 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 68 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 69 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 70 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 71 "int", "daddr_t", "char*"); 72 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 73 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 74 75 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 76 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 77 static u_long ext2_dirpref(struct inode *); 78 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 79 daddr_t (*)(struct inode *, int, daddr_t, 80 int)); 81 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 82 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 83 84 /* 85 * Allocate a block in the filesystem. 86 * 87 * A preference may be optionally specified. If a preference is given 88 * the following hierarchy is used to allocate a block: 89 * 1) allocate the requested block. 90 * 2) allocate a rotationally optimal block in the same cylinder. 91 * 3) allocate a block in the same cylinder group. 92 * 4) quadratically rehash into other cylinder groups, until an 93 * available block is located. 94 * If no block preference is given the following hierarchy is used 95 * to allocate a block: 96 * 1) allocate a block in the cylinder group that contains the 97 * inode for the file. 98 * 2) quadratically rehash into other cylinder groups, until an 99 * available block is located. 100 */ 101 int 102 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 103 struct ucred *cred, e4fs_daddr_t *bnp) 104 { 105 struct m_ext2fs *fs; 106 struct ext2mount *ump; 107 e4fs_daddr_t bno; 108 int cg; 109 110 *bnp = 0; 111 fs = ip->i_e2fs; 112 ump = ip->i_ump; 113 mtx_assert(EXT2_MTX(ump), MA_OWNED); 114 #ifdef INVARIANTS 115 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 116 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 117 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 118 panic("ext2_alloc: bad size"); 119 } 120 if (cred == NOCRED) 121 panic("ext2_alloc: missing credential"); 122 #endif /* INVARIANTS */ 123 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 124 goto nospace; 125 if (cred->cr_uid != 0 && 126 fs->e2fs_fbcount < fs->e2fs_rbcount) 127 goto nospace; 128 if (bpref >= fs->e2fs_bcount) 129 bpref = 0; 130 if (bpref == 0) 131 cg = ino_to_cg(fs, ip->i_number); 132 else 133 cg = dtog(fs, bpref); 134 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 135 ext2_alloccg); 136 if (bno > 0) { 137 /* set next_alloc fields as done in block_getblk */ 138 ip->i_next_alloc_block = lbn; 139 ip->i_next_alloc_goal = bno; 140 141 ip->i_blocks += btodb(fs->e2fs_bsize); 142 ip->i_flag |= IN_CHANGE | IN_UPDATE; 143 *bnp = bno; 144 return (0); 145 } 146 nospace: 147 EXT2_UNLOCK(ump); 148 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 149 return (ENOSPC); 150 } 151 152 /* 153 * Allocate EA's block for inode. 154 */ 155 e4fs_daddr_t 156 ext2_alloc_meta(struct inode *ip) 157 { 158 struct m_ext2fs *fs; 159 daddr_t blk; 160 161 fs = ip->i_e2fs; 162 163 EXT2_LOCK(ip->i_ump); 164 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 165 ext2_alloccg); 166 if (0 == blk) { 167 EXT2_UNLOCK(ip->i_ump); 168 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 169 } 170 171 return (blk); 172 } 173 174 /* 175 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 176 * 177 * The vnode and an array of buffer pointers for a range of sequential 178 * logical blocks to be made contiguous is given. The allocator attempts 179 * to find a range of sequential blocks starting as close as possible to 180 * an fs_rotdelay offset from the end of the allocation for the logical 181 * block immediately preceding the current range. If successful, the 182 * physical block numbers in the buffer pointers and in the inode are 183 * changed to reflect the new allocation. If unsuccessful, the allocation 184 * is left unchanged. The success in doing the reallocation is returned. 185 * Note that the error return is not reflected back to the user. Rather 186 * the previous block allocation will be used. 187 */ 188 189 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 190 "EXT2FS filesystem"); 191 192 static int doasyncfree = 1; 193 194 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 195 "Use asynchronous writes to update block pointers when freeing blocks"); 196 197 static int doreallocblks = 0; 198 199 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 200 201 int 202 ext2_reallocblks(struct vop_reallocblks_args *ap) 203 { 204 struct m_ext2fs *fs; 205 struct inode *ip; 206 struct vnode *vp; 207 struct buf *sbp, *ebp; 208 uint32_t *bap, *sbap, *ebap; 209 struct ext2mount *ump; 210 struct cluster_save *buflist; 211 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 212 e2fs_lbn_t start_lbn, end_lbn; 213 int soff; 214 e2fs_daddr_t newblk, blkno; 215 int i, len, start_lvl, end_lvl, pref, ssize; 216 217 if (doreallocblks == 0) 218 return (ENOSPC); 219 220 vp = ap->a_vp; 221 ip = VTOI(vp); 222 fs = ip->i_e2fs; 223 ump = ip->i_ump; 224 225 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 226 return (ENOSPC); 227 228 buflist = ap->a_buflist; 229 len = buflist->bs_nchildren; 230 start_lbn = buflist->bs_children[0]->b_lblkno; 231 end_lbn = start_lbn + len - 1; 232 #ifdef INVARIANTS 233 for (i = 1; i < len; i++) 234 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 235 panic("ext2_reallocblks: non-cluster"); 236 #endif 237 /* 238 * If the cluster crosses the boundary for the first indirect 239 * block, leave space for the indirect block. Indirect blocks 240 * are initially laid out in a position after the last direct 241 * block. Block reallocation would usually destroy locality by 242 * moving the indirect block out of the way to make room for 243 * data blocks if we didn't compensate here. We should also do 244 * this for other indirect block boundaries, but it is only 245 * important for the first one. 246 */ 247 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 248 return (ENOSPC); 249 /* 250 * If the latest allocation is in a new cylinder group, assume that 251 * the filesystem has decided to move and do not force it back to 252 * the previous cylinder group. 253 */ 254 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 255 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 256 return (ENOSPC); 257 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 258 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 259 return (ENOSPC); 260 /* 261 * Get the starting offset and block map for the first block. 262 */ 263 if (start_lvl == 0) { 264 sbap = &ip->i_db[0]; 265 soff = start_lbn; 266 } else { 267 idp = &start_ap[start_lvl - 1]; 268 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 269 brelse(sbp); 270 return (ENOSPC); 271 } 272 sbap = (u_int *)sbp->b_data; 273 soff = idp->in_off; 274 } 275 /* 276 * If the block range spans two block maps, get the second map. 277 */ 278 ebap = NULL; 279 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 280 ssize = len; 281 } else { 282 #ifdef INVARIANTS 283 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 284 panic("ext2_reallocblks: start == end"); 285 #endif 286 ssize = len - (idp->in_off + 1); 287 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 288 goto fail; 289 ebap = (u_int *)ebp->b_data; 290 } 291 /* 292 * Find the preferred location for the cluster. 293 */ 294 EXT2_LOCK(ump); 295 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 296 /* 297 * Search the block map looking for an allocation of the desired size. 298 */ 299 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 300 len, ext2_clusteralloc)) == 0) { 301 EXT2_UNLOCK(ump); 302 goto fail; 303 } 304 /* 305 * We have found a new contiguous block. 306 * 307 * First we have to replace the old block pointers with the new 308 * block pointers in the inode and indirect blocks associated 309 * with the file. 310 */ 311 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 312 ip->i_number, start_lbn, end_lbn); 313 blkno = newblk; 314 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 315 if (i == ssize) { 316 bap = ebap; 317 soff = -i; 318 } 319 #ifdef INVARIANTS 320 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 321 panic("ext2_reallocblks: alloc mismatch"); 322 #endif 323 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 324 *bap++ = blkno; 325 } 326 /* 327 * Next we must write out the modified inode and indirect blocks. 328 * For strict correctness, the writes should be synchronous since 329 * the old block values may have been written to disk. In practise 330 * they are almost never written, but if we are concerned about 331 * strict correctness, the `doasyncfree' flag should be set to zero. 332 * 333 * The test on `doasyncfree' should be changed to test a flag 334 * that shows whether the associated buffers and inodes have 335 * been written. The flag should be set when the cluster is 336 * started and cleared whenever the buffer or inode is flushed. 337 * We can then check below to see if it is set, and do the 338 * synchronous write only when it has been cleared. 339 */ 340 if (sbap != &ip->i_db[0]) { 341 if (doasyncfree) 342 bdwrite(sbp); 343 else 344 bwrite(sbp); 345 } else { 346 ip->i_flag |= IN_CHANGE | IN_UPDATE; 347 if (!doasyncfree) 348 ext2_update(vp, 1); 349 } 350 if (ssize < len) { 351 if (doasyncfree) 352 bdwrite(ebp); 353 else 354 bwrite(ebp); 355 } 356 /* 357 * Last, free the old blocks and assign the new blocks to the buffers. 358 */ 359 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 360 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 361 fs->e2fs_bsize); 362 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 363 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 364 } 365 366 return (0); 367 368 fail: 369 if (ssize < len) 370 brelse(ebp); 371 if (sbap != &ip->i_db[0]) 372 brelse(sbp); 373 return (ENOSPC); 374 } 375 376 /* 377 * Allocate an inode in the filesystem. 378 * 379 */ 380 int 381 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 382 { 383 struct timespec ts; 384 struct m_ext2fs *fs; 385 struct ext2mount *ump; 386 struct inode *pip; 387 struct inode *ip; 388 struct vnode *vp; 389 struct thread *td; 390 ino_t ino, ipref; 391 int error, cg; 392 393 *vpp = NULL; 394 pip = VTOI(pvp); 395 fs = pip->i_e2fs; 396 ump = pip->i_ump; 397 398 EXT2_LOCK(ump); 399 if (fs->e2fs_ficount == 0) 400 goto noinodes; 401 /* 402 * If it is a directory then obtain a cylinder group based on 403 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 404 * always the next inode. 405 */ 406 if ((mode & IFMT) == IFDIR) { 407 cg = ext2_dirpref(pip); 408 if (fs->e2fs_contigdirs[cg] < 255) 409 fs->e2fs_contigdirs[cg]++; 410 } else { 411 cg = ino_to_cg(fs, pip->i_number); 412 if (fs->e2fs_contigdirs[cg] > 0) 413 fs->e2fs_contigdirs[cg]--; 414 } 415 ipref = cg * fs->e2fs_ipg + 1; 416 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 417 if (ino == 0) 418 goto noinodes; 419 420 td = curthread; 421 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 422 if (error || *vpp != NULL) { 423 return (error); 424 } 425 426 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 427 428 /* Allocate a new vnode/inode. */ 429 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 430 free(ip, M_EXT2NODE); 431 return (error); 432 } 433 434 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 435 vp->v_data = ip; 436 ip->i_vnode = vp; 437 ip->i_e2fs = fs = ump->um_e2fs; 438 ip->i_ump = ump; 439 ip->i_number = ino; 440 ip->i_block_group = ino_to_cg(fs, ino); 441 ip->i_next_alloc_block = 0; 442 ip->i_next_alloc_goal = 0; 443 444 error = insmntque(vp, ump->um_mountp); 445 if (error) { 446 free(ip, M_EXT2NODE); 447 return (error); 448 } 449 450 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 451 if (error || *vpp != NULL) { 452 *vpp = NULL; 453 free(ip, M_EXT2NODE); 454 return (error); 455 } 456 457 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 458 vput(vp); 459 *vpp = NULL; 460 free(ip, M_EXT2NODE); 461 return (error); 462 } 463 464 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 465 && (S_ISREG(mode) || S_ISDIR(mode))) 466 ext4_ext_tree_init(ip); 467 else 468 memset(ip->i_data, 0, sizeof(ip->i_data)); 469 470 /* 471 * Set up a new generation number for this inode. 472 * Avoid zero values. 473 */ 474 do { 475 ip->i_gen = arc4random(); 476 } while (ip->i_gen == 0); 477 478 vfs_timestamp(&ts); 479 ip->i_birthtime = ts.tv_sec; 480 ip->i_birthnsec = ts.tv_nsec; 481 482 vn_set_state(vp, VSTATE_CONSTRUCTED); 483 *vpp = vp; 484 485 return (0); 486 487 noinodes: 488 EXT2_UNLOCK(ump); 489 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 490 return (ENOSPC); 491 } 492 493 /* 494 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 495 */ 496 uint64_t 497 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 498 { 499 500 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 501 le32toh(gd->ext2bgd_b_bitmap)); 502 } 503 504 uint64_t 505 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 506 { 507 508 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 509 le32toh(gd->ext2bgd_i_bitmap)); 510 } 511 512 uint64_t 513 e2fs_gd_get_i_tables(struct ext2_gd *gd) 514 { 515 516 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 517 le32toh(gd->ext2bgd_i_tables)); 518 } 519 520 static uint32_t 521 e2fs_gd_get_nbfree(struct ext2_gd *gd) 522 { 523 524 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 525 le16toh(gd->ext2bgd_nbfree)); 526 } 527 528 static void 529 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 530 { 531 532 gd->ext2bgd_nbfree = htole16(val & 0xffff); 533 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 534 } 535 536 static uint32_t 537 e2fs_gd_get_nifree(struct ext2_gd *gd) 538 { 539 540 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 541 le16toh(gd->ext2bgd_nifree)); 542 } 543 544 static void 545 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 546 { 547 548 gd->ext2bgd_nifree = htole16(val & 0xffff); 549 gd->ext4bgd_nifree_hi = htole16(val >> 16); 550 } 551 552 uint32_t 553 e2fs_gd_get_ndirs(struct ext2_gd *gd) 554 { 555 556 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 557 le16toh(gd->ext2bgd_ndirs)); 558 } 559 560 static void 561 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 562 { 563 564 gd->ext2bgd_ndirs = htole16(val & 0xffff); 565 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 566 } 567 568 static uint32_t 569 e2fs_gd_get_i_unused(struct ext2_gd *gd) 570 { 571 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 572 le16toh(gd->ext4bgd_i_unused)); 573 } 574 575 static void 576 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 577 { 578 579 gd->ext4bgd_i_unused = htole16(val & 0xffff); 580 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 581 } 582 583 /* 584 * Find a cylinder to place a directory. 585 * 586 * The policy implemented by this algorithm is to allocate a 587 * directory inode in the same cylinder group as its parent 588 * directory, but also to reserve space for its files inodes 589 * and data. Restrict the number of directories which may be 590 * allocated one after another in the same cylinder group 591 * without intervening allocation of files. 592 * 593 * If we allocate a first level directory then force allocation 594 * in another cylinder group. 595 * 596 */ 597 static u_long 598 ext2_dirpref(struct inode *pip) 599 { 600 struct m_ext2fs *fs; 601 int cg, prefcg, cgsize; 602 uint64_t avgbfree, minbfree; 603 u_int avgifree, avgndir, curdirsize; 604 u_int minifree, maxndir; 605 u_int mincg, minndir; 606 u_int dirsize, maxcontigdirs; 607 608 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 609 fs = pip->i_e2fs; 610 611 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 612 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 613 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 614 615 /* 616 * Force allocation in another cg if creating a first level dir. 617 */ 618 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 619 if (ITOV(pip)->v_vflag & VV_ROOT) { 620 prefcg = arc4random() % fs->e2fs_gcount; 621 mincg = prefcg; 622 minndir = fs->e2fs_ipg; 623 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 624 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 625 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 626 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 627 mincg = cg; 628 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 629 } 630 for (cg = 0; cg < prefcg; cg++) 631 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 632 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 633 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 634 mincg = cg; 635 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 636 } 637 return (mincg); 638 } 639 /* 640 * Count various limits which used for 641 * optimal allocation of a directory inode. 642 */ 643 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 644 minifree = avgifree - avgifree / 4; 645 if (minifree < 1) 646 minifree = 1; 647 minbfree = avgbfree - avgbfree / 4; 648 if (minbfree < 1) 649 minbfree = 1; 650 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 651 dirsize = AVGDIRSIZE; 652 curdirsize = avgndir ? 653 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 654 if (dirsize < curdirsize) 655 dirsize = curdirsize; 656 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 657 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 658 if (maxcontigdirs == 0) 659 maxcontigdirs = 1; 660 661 /* 662 * Limit number of dirs in one cg and reserve space for 663 * regular files, but only if we have no deficit in 664 * inodes or space. 665 */ 666 prefcg = ino_to_cg(fs, pip->i_number); 667 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 668 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 669 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 670 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 671 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 672 return (cg); 673 } 674 for (cg = 0; cg < prefcg; cg++) 675 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 676 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 677 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 678 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 679 return (cg); 680 } 681 /* 682 * This is a backstop when we have deficit in space. 683 */ 684 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 685 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 686 return (cg); 687 for (cg = 0; cg < prefcg; cg++) 688 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 689 break; 690 return (cg); 691 } 692 693 /* 694 * Select the desired position for the next block in a file. 695 * 696 * we try to mimic what Remy does in inode_getblk/block_getblk 697 * 698 * we note: blocknr == 0 means that we're about to allocate either 699 * a direct block or a pointer block at the first level of indirection 700 * (In other words, stuff that will go in i_db[] or i_ib[]) 701 * 702 * blocknr != 0 means that we're allocating a block that is none 703 * of the above. Then, blocknr tells us the number of the block 704 * that will hold the pointer 705 */ 706 e4fs_daddr_t 707 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 708 e2fs_daddr_t blocknr) 709 { 710 struct m_ext2fs *fs; 711 int tmp; 712 713 fs = ip->i_e2fs; 714 715 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 716 717 /* 718 * If the next block is actually what we thought it is, then set the 719 * goal to what we thought it should be. 720 */ 721 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 722 return ip->i_next_alloc_goal; 723 724 /* 725 * Now check whether we were provided with an array that basically 726 * tells us previous blocks to which we want to stay close. 727 */ 728 if (bap) 729 for (tmp = indx - 1; tmp >= 0; tmp--) 730 if (bap[tmp]) 731 return (le32toh(bap[tmp])); 732 733 /* 734 * Else lets fall back to the blocknr or, if there is none, follow 735 * the rule that a block should be allocated near its inode. 736 */ 737 return (blocknr ? blocknr : 738 (e2fs_daddr_t)(ip->i_block_group * 739 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 740 } 741 742 /* 743 * Implement the cylinder overflow algorithm. 744 * 745 * The policy implemented by this algorithm is: 746 * 1) allocate the block in its requested cylinder group. 747 * 2) quadratically rehash on the cylinder group number. 748 * 3) brute force search for a free block. 749 */ 750 static e4fs_daddr_t 751 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 752 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 753 { 754 struct m_ext2fs *fs; 755 e4fs_daddr_t result; 756 int i, icg = cg; 757 758 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 759 fs = ip->i_e2fs; 760 /* 761 * 1: preferred cylinder group 762 */ 763 result = (*allocator)(ip, cg, pref, size); 764 if (result) 765 return (result); 766 /* 767 * 2: quadratic rehash 768 */ 769 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 770 cg += i; 771 if (cg >= fs->e2fs_gcount) 772 cg -= fs->e2fs_gcount; 773 result = (*allocator)(ip, cg, 0, size); 774 if (result) 775 return (result); 776 } 777 /* 778 * 3: brute force search 779 * Note that we start at i == 2, since 0 was checked initially, 780 * and 1 is always checked in the quadratic rehash. 781 */ 782 cg = (icg + 2) % fs->e2fs_gcount; 783 for (i = 2; i < fs->e2fs_gcount; i++) { 784 result = (*allocator)(ip, cg, 0, size); 785 if (result) 786 return (result); 787 cg++; 788 if (cg == fs->e2fs_gcount) 789 cg = 0; 790 } 791 return (0); 792 } 793 794 static uint64_t 795 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 796 { 797 798 if (!ext2_cg_has_sb(fs, cg)) 799 return (0); 800 801 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 802 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 803 804 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 805 EXT2_DESCS_PER_BLOCK(fs)); 806 } 807 808 static uint64_t 809 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 810 { 811 unsigned long metagroup; 812 int first, last; 813 814 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 815 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 816 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 817 818 if (cg == first || cg == first + 1 || cg == last) 819 return (1); 820 821 return (0); 822 } 823 824 uint64_t 825 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 826 { 827 unsigned long first_meta_bg, metagroup; 828 829 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 830 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 831 832 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 833 metagroup < first_meta_bg) 834 return (ext2_cg_number_gdb_nometa(fs, cg)); 835 836 return ext2_cg_number_gdb_meta(fs, cg); 837 } 838 839 static int 840 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 841 { 842 int number; 843 844 number = ext2_cg_has_sb(fs, cg); 845 846 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 847 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 848 EXT2_DESCS_PER_BLOCK(fs)) { 849 if (number) { 850 number += ext2_cg_number_gdb(fs, cg); 851 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 852 } 853 } else { 854 number += ext2_cg_number_gdb(fs, cg); 855 } 856 857 return (number); 858 } 859 860 static void 861 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 862 { 863 int i; 864 865 if (start_bit >= end_bit) 866 return; 867 868 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 869 setbit(bitmap, i); 870 if (i < end_bit) 871 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 872 } 873 874 static int 875 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 876 { 877 878 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 879 fs->e2fs_bsize); 880 } 881 882 static int 883 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 884 { 885 886 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 887 } 888 889 static int 890 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 891 { 892 int bit, bit_max, inodes_per_block; 893 uint64_t start, tmp; 894 895 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 896 return (0); 897 898 memset(bp->b_data, 0, fs->e2fs_bsize); 899 900 bit_max = ext2_number_base_meta_blocks(fs, cg); 901 if ((bit_max >> 3) >= fs->e2fs_bsize) 902 return (EINVAL); 903 904 for (bit = 0; bit < bit_max; bit++) 905 setbit(bp->b_data, bit); 906 907 start = (uint64_t)cg * fs->e2fs_bpg + 908 le32toh(fs->e2fs->e2fs_first_dblock); 909 910 /* Set bits for block and inode bitmaps, and inode table. */ 911 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 912 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 913 ext2_block_in_group(fs, tmp, cg)) 914 setbit(bp->b_data, tmp - start); 915 916 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 917 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 918 ext2_block_in_group(fs, tmp, cg)) 919 setbit(bp->b_data, tmp - start); 920 921 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 922 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 923 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 924 fs->e2fs_ipg / inodes_per_block ) { 925 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 926 ext2_block_in_group(fs, tmp, cg)) 927 setbit(bp->b_data, tmp - start); 928 tmp++; 929 } 930 931 /* 932 * Also if the number of blocks within the group is less than 933 * the blocksize * 8 ( which is the size of bitmap ), set rest 934 * of the block bitmap to 1 935 */ 936 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 937 bp->b_data); 938 939 /* Clean the flag */ 940 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 941 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 942 943 return (0); 944 } 945 946 static int 947 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 948 { 949 struct ext2_gd *gd; 950 uint64_t group_first_block; 951 unsigned int offset, max_bit; 952 953 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 954 /* 955 * It is not possible to check block bitmap in case of this 956 * feature, because the inode and block bitmaps and inode table 957 * blocks may not be in the group at all. 958 * So, skip check in this case. 959 */ 960 return (0); 961 } 962 963 gd = &fs->e2fs_gd[cg]; 964 max_bit = fs->e2fs_fpg; 965 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 966 le32toh(fs->e2fs->e2fs_first_dblock); 967 968 /* Check block bitmap block number */ 969 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 970 if (offset >= max_bit || !isset(bp->b_data, offset)) { 971 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 972 "bad block bitmap, group", cg); 973 return (EINVAL); 974 } 975 976 /* Check inode bitmap block number */ 977 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 978 if (offset >= max_bit || !isset(bp->b_data, offset)) { 979 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 980 "bad inode bitmap", cg); 981 return (EINVAL); 982 } 983 984 /* Check inode table */ 985 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 986 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 987 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 988 "bad inode table, group", cg); 989 return (EINVAL); 990 } 991 992 return (0); 993 } 994 995 /* 996 * Determine whether a block can be allocated. 997 * 998 * Check to see if a block of the appropriate size is available, 999 * and if it is, allocate it. 1000 */ 1001 static daddr_t 1002 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1003 { 1004 struct m_ext2fs *fs; 1005 struct buf *bp; 1006 struct ext2mount *ump; 1007 daddr_t bno, runstart, runlen; 1008 int bit, loc, end, error, start; 1009 char *bbp; 1010 /* XXX ondisk32 */ 1011 fs = ip->i_e2fs; 1012 ump = ip->i_ump; 1013 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1014 return (0); 1015 1016 EXT2_UNLOCK(ump); 1017 error = bread(ip->i_devvp, fsbtodb(fs, 1018 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1019 (int)fs->e2fs_bsize, NOCRED, &bp); 1020 if (error) 1021 goto fail; 1022 1023 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1024 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1025 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1026 if (error) 1027 goto fail; 1028 1029 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1030 } 1031 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1032 if (error) 1033 goto fail; 1034 1035 error = ext2_b_bitmap_validate(fs,bp, cg); 1036 if (error) 1037 goto fail; 1038 1039 /* 1040 * Check, that another thread did not not allocate the last block in 1041 * this group while we were waiting for the buffer. 1042 */ 1043 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1044 goto fail; 1045 1046 bbp = (char *)bp->b_data; 1047 1048 if (dtog(fs, bpref) != cg) 1049 bpref = 0; 1050 if (bpref != 0) { 1051 bpref = dtogd(fs, bpref); 1052 /* 1053 * if the requested block is available, use it 1054 */ 1055 if (isclr(bbp, bpref)) { 1056 bno = bpref; 1057 goto gotit; 1058 } 1059 } 1060 /* 1061 * no blocks in the requested cylinder, so take next 1062 * available one in this cylinder group. 1063 * first try to get 8 contigous blocks, then fall back to a single 1064 * block. 1065 */ 1066 if (bpref) 1067 start = dtogd(fs, bpref) / NBBY; 1068 else 1069 start = 0; 1070 end = howmany(fs->e2fs_fpg, NBBY); 1071 retry: 1072 runlen = 0; 1073 runstart = 0; 1074 for (loc = start; loc < end; loc++) { 1075 if (bbp[loc] == (char)0xff) { 1076 runlen = 0; 1077 continue; 1078 } 1079 1080 /* Start of a run, find the number of high clear bits. */ 1081 if (runlen == 0) { 1082 bit = fls(bbp[loc]); 1083 runlen = NBBY - bit; 1084 runstart = loc * NBBY + bit; 1085 } else if (bbp[loc] == 0) { 1086 /* Continue a run. */ 1087 runlen += NBBY; 1088 } else { 1089 /* 1090 * Finish the current run. If it isn't long 1091 * enough, start a new one. 1092 */ 1093 bit = ffs(bbp[loc]) - 1; 1094 runlen += bit; 1095 if (runlen >= 8) { 1096 bno = runstart; 1097 goto gotit; 1098 } 1099 1100 /* Run was too short, start a new one. */ 1101 bit = fls(bbp[loc]); 1102 runlen = NBBY - bit; 1103 runstart = loc * NBBY + bit; 1104 } 1105 1106 /* If the current run is long enough, use it. */ 1107 if (runlen >= 8) { 1108 bno = runstart; 1109 goto gotit; 1110 } 1111 } 1112 if (start != 0) { 1113 end = start; 1114 start = 0; 1115 goto retry; 1116 } 1117 bno = ext2_mapsearch(fs, bbp, bpref); 1118 if (bno < 0) 1119 goto fail; 1120 1121 gotit: 1122 #ifdef INVARIANTS 1123 if (isset(bbp, bno)) { 1124 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1125 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1126 panic("ext2fs_alloccg: dup alloc"); 1127 } 1128 #endif 1129 setbit(bbp, bno); 1130 EXT2_LOCK(ump); 1131 ext2_clusteracct(fs, bbp, cg, bno, -1); 1132 fs->e2fs_fbcount--; 1133 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1134 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1135 fs->e2fs_fmod = 1; 1136 EXT2_UNLOCK(ump); 1137 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1138 bdwrite(bp); 1139 return (((uint64_t)cg) * fs->e2fs_fpg + 1140 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1141 1142 fail: 1143 brelse(bp); 1144 EXT2_LOCK(ump); 1145 return (0); 1146 } 1147 1148 /* 1149 * Determine whether a cluster can be allocated. 1150 */ 1151 static daddr_t 1152 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1153 { 1154 struct m_ext2fs *fs; 1155 struct ext2mount *ump; 1156 struct buf *bp; 1157 char *bbp; 1158 int bit, error, got, i, loc, run; 1159 int32_t *lp; 1160 daddr_t bno; 1161 1162 fs = ip->i_e2fs; 1163 ump = ip->i_ump; 1164 1165 if (fs->e2fs_maxcluster[cg] < len) 1166 return (0); 1167 1168 EXT2_UNLOCK(ump); 1169 error = bread(ip->i_devvp, 1170 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1171 (int)fs->e2fs_bsize, NOCRED, &bp); 1172 if (error) 1173 goto fail_lock; 1174 1175 bbp = (char *)bp->b_data; 1176 EXT2_LOCK(ump); 1177 /* 1178 * Check to see if a cluster of the needed size (or bigger) is 1179 * available in this cylinder group. 1180 */ 1181 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1182 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1183 if (*lp++ > 0) 1184 break; 1185 if (i > fs->e2fs_contigsumsize) { 1186 /* 1187 * Update the cluster summary information to reflect 1188 * the true maximum-sized cluster so that future cluster 1189 * allocation requests can avoid reading the bitmap only 1190 * to find no cluster. 1191 */ 1192 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1193 for (i = len - 1; i > 0; i--) 1194 if (*lp-- > 0) 1195 break; 1196 fs->e2fs_maxcluster[cg] = i; 1197 goto fail; 1198 } 1199 EXT2_UNLOCK(ump); 1200 1201 /* Search the bitmap to find a big enough cluster like in FFS. */ 1202 if (dtog(fs, bpref) != cg) 1203 bpref = 0; 1204 if (bpref != 0) 1205 bpref = dtogd(fs, bpref); 1206 loc = bpref / NBBY; 1207 bit = 1 << (bpref % NBBY); 1208 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1209 if ((bbp[loc] & bit) != 0) 1210 run = 0; 1211 else { 1212 run++; 1213 if (run == len) 1214 break; 1215 } 1216 if ((got & (NBBY - 1)) != (NBBY - 1)) 1217 bit <<= 1; 1218 else { 1219 loc++; 1220 bit = 1; 1221 } 1222 } 1223 1224 if (got >= fs->e2fs_fpg) 1225 goto fail_lock; 1226 1227 /* Allocate the cluster that we found. */ 1228 for (i = 1; i < len; i++) 1229 if (!isclr(bbp, got - run + i)) 1230 panic("ext2_clusteralloc: map mismatch"); 1231 1232 bno = got - run + 1; 1233 if (bno >= fs->e2fs_fpg) 1234 panic("ext2_clusteralloc: allocated out of group"); 1235 1236 EXT2_LOCK(ump); 1237 for (i = 0; i < len; i += fs->e2fs_fpb) { 1238 setbit(bbp, bno + i); 1239 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1240 fs->e2fs_fbcount--; 1241 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1242 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1243 } 1244 fs->e2fs_fmod = 1; 1245 EXT2_UNLOCK(ump); 1246 1247 bdwrite(bp); 1248 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1249 + bno); 1250 1251 fail_lock: 1252 EXT2_LOCK(ump); 1253 fail: 1254 brelse(bp); 1255 return (0); 1256 } 1257 1258 static int 1259 ext2_zero_inode_table(struct inode *ip, int cg) 1260 { 1261 struct m_ext2fs *fs; 1262 struct buf *bp; 1263 int i, all_blks, used_blks; 1264 1265 fs = ip->i_e2fs; 1266 1267 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1268 return (0); 1269 1270 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1271 fs->e2fs_bsize; 1272 1273 used_blks = howmany(fs->e2fs_ipg - 1274 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1275 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1276 1277 for (i = 0; i < all_blks - used_blks; i++) { 1278 bp = getblk(ip->i_devvp, fsbtodb(fs, 1279 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1280 fs->e2fs_bsize, 0, 0, 0); 1281 if (!bp) 1282 return (EIO); 1283 1284 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1285 bawrite(bp); 1286 } 1287 1288 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1289 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1290 1291 return (0); 1292 } 1293 1294 static void 1295 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1296 { 1297 int i; 1298 1299 for (i = first; i <= last; i++) 1300 bitmap[i] = 0xff; 1301 } 1302 1303 /* 1304 * Determine whether an inode can be allocated. 1305 * 1306 * Check to see if an inode is available, and if it is, 1307 * allocate it using tode in the specified cylinder group. 1308 */ 1309 static daddr_t 1310 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1311 { 1312 struct m_ext2fs *fs; 1313 struct buf *bp; 1314 struct ext2mount *ump; 1315 int error, start, len, ifree, ibytes; 1316 char *ibp, *loc; 1317 1318 ipref--; /* to avoid a lot of (ipref -1) */ 1319 if (ipref == -1) 1320 ipref = 0; 1321 fs = ip->i_e2fs; 1322 ump = ip->i_ump; 1323 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1324 return (0); 1325 EXT2_UNLOCK(ump); 1326 error = bread(ip->i_devvp, fsbtodb(fs, 1327 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1328 (int)fs->e2fs_bsize, NOCRED, &bp); 1329 if (error) { 1330 EXT2_LOCK(ump); 1331 return (0); 1332 } 1333 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1334 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1335 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1336 EXT2_BG_INODE_UNINIT) { 1337 ibytes = fs->e2fs_ipg / 8; 1338 memset(bp->b_data, 0, ibytes - 1); 1339 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1340 fs->e2fs_bsize - 1); 1341 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1342 fs->e2fs_gd[cg].ext4bgd_flags) & 1343 ~EXT2_BG_INODE_UNINIT); 1344 } 1345 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1346 error = ext2_zero_inode_table(ip, cg); 1347 if (error) { 1348 brelse(bp); 1349 EXT2_LOCK(ump); 1350 return (0); 1351 } 1352 } 1353 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1354 if (error) { 1355 brelse(bp); 1356 EXT2_LOCK(ump); 1357 return (0); 1358 } 1359 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1360 /* 1361 * Another thread allocated the last i-node in this 1362 * group while we were waiting for the buffer. 1363 */ 1364 brelse(bp); 1365 EXT2_LOCK(ump); 1366 return (0); 1367 } 1368 ibp = (char *)bp->b_data; 1369 if (ipref) { 1370 ipref %= fs->e2fs_ipg; 1371 if (isclr(ibp, ipref)) 1372 goto gotit; 1373 } 1374 start = ipref / NBBY; 1375 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1376 loc = memcchr(&ibp[start], 0xff, len); 1377 if (loc == NULL) { 1378 len = start + 1; 1379 start = 0; 1380 loc = memcchr(&ibp[start], 0xff, len); 1381 if (loc == NULL) { 1382 SDT_PROBE3(ext2fs, , alloc, 1383 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1384 fs->e2fs_fsmnt); 1385 brelse(bp); 1386 EXT2_LOCK(ump); 1387 return (0); 1388 } 1389 } 1390 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1391 gotit: 1392 setbit(ibp, ipref); 1393 EXT2_LOCK(ump); 1394 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1395 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1396 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1397 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1398 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1399 if (ipref + 1 > ifree) 1400 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1401 fs->e2fs_ipg - (ipref + 1)); 1402 } 1403 fs->e2fs_ficount--; 1404 fs->e2fs_fmod = 1; 1405 if ((mode & IFMT) == IFDIR) { 1406 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1407 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1408 fs->e2fs_total_dir++; 1409 } 1410 EXT2_UNLOCK(ump); 1411 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1412 bdwrite(bp); 1413 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1414 } 1415 1416 /* 1417 * Free a block or fragment. 1418 * 1419 */ 1420 void 1421 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1422 { 1423 struct m_ext2fs *fs; 1424 struct buf *bp; 1425 struct ext2mount *ump; 1426 int cg, error; 1427 char *bbp; 1428 1429 fs = ip->i_e2fs; 1430 ump = ip->i_ump; 1431 cg = dtog(fs, bno); 1432 if (bno >= fs->e2fs_bcount) { 1433 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1434 ip->i_number, bno); 1435 return; 1436 } 1437 error = bread(ip->i_devvp, 1438 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1439 (int)fs->e2fs_bsize, NOCRED, &bp); 1440 if (error) { 1441 return; 1442 } 1443 bbp = (char *)bp->b_data; 1444 bno = dtogd(fs, bno); 1445 if (isclr(bbp, bno)) { 1446 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1447 (long long)bno, fs->e2fs_fsmnt); 1448 } 1449 clrbit(bbp, bno); 1450 EXT2_LOCK(ump); 1451 ext2_clusteracct(fs, bbp, cg, bno, 1); 1452 fs->e2fs_fbcount++; 1453 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1454 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1455 fs->e2fs_fmod = 1; 1456 EXT2_UNLOCK(ump); 1457 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1458 bdwrite(bp); 1459 } 1460 1461 /* 1462 * Free an inode. 1463 * 1464 */ 1465 int 1466 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1467 { 1468 struct m_ext2fs *fs; 1469 struct inode *pip; 1470 struct buf *bp; 1471 struct ext2mount *ump; 1472 int error, cg; 1473 char *ibp; 1474 1475 pip = VTOI(pvp); 1476 fs = pip->i_e2fs; 1477 ump = pip->i_ump; 1478 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1479 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1480 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1481 1482 cg = ino_to_cg(fs, ino); 1483 error = bread(pip->i_devvp, 1484 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1485 (int)fs->e2fs_bsize, NOCRED, &bp); 1486 if (error) { 1487 return (0); 1488 } 1489 ibp = (char *)bp->b_data; 1490 ino = (ino - 1) % fs->e2fs_ipg; 1491 if (isclr(ibp, ino)) { 1492 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1493 fs->e2fs_fsmnt, ino); 1494 if (fs->e2fs_ronly == 0) 1495 panic("ext2_vfree: freeing free inode"); 1496 } 1497 clrbit(ibp, ino); 1498 EXT2_LOCK(ump); 1499 fs->e2fs_ficount++; 1500 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1501 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1502 if ((mode & IFMT) == IFDIR) { 1503 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1504 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1505 fs->e2fs_total_dir--; 1506 } 1507 fs->e2fs_fmod = 1; 1508 EXT2_UNLOCK(ump); 1509 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1510 bdwrite(bp); 1511 return (0); 1512 } 1513 1514 /* 1515 * Find a block in the specified cylinder group. 1516 * 1517 * It is a panic if a request is made to find a block if none are 1518 * available. 1519 */ 1520 static daddr_t 1521 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1522 { 1523 char *loc; 1524 int start, len; 1525 1526 /* 1527 * find the fragment by searching through the free block 1528 * map for an appropriate bit pattern 1529 */ 1530 if (bpref) 1531 start = dtogd(fs, bpref) / NBBY; 1532 else 1533 start = 0; 1534 len = howmany(fs->e2fs_fpg, NBBY) - start; 1535 loc = memcchr(&bbp[start], 0xff, len); 1536 if (loc == NULL) { 1537 len = start + 1; 1538 start = 0; 1539 loc = memcchr(&bbp[start], 0xff, len); 1540 if (loc == NULL) { 1541 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1542 "fs=%s", start, len, fs->e2fs_fsmnt); 1543 /* NOTREACHED */ 1544 } 1545 } 1546 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1547 } 1548 1549 int 1550 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1551 { 1552 int a3, a5, a7; 1553 1554 if (cg == 0) 1555 return (1); 1556 1557 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1558 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1559 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1560 return (1); 1561 return (0); 1562 } 1563 1564 if ((cg <= 1) || 1565 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1566 return (1); 1567 1568 if (!(cg & 1)) 1569 return (0); 1570 1571 for (a3 = 3, a5 = 5, a7 = 7; 1572 a3 <= cg || a5 <= cg || a7 <= cg; 1573 a3 *= 3, a5 *= 5, a7 *= 7) 1574 if (cg == a3 || cg == a5 || cg == a7) 1575 return (1); 1576 return (0); 1577 } 1578