1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/sdt.h> 46 #include <sys/stat.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 #include <sys/buf.h> 51 #include <sys/endian.h> 52 53 #include <fs/ext2fs/fs.h> 54 #include <fs/ext2fs/inode.h> 55 #include <fs/ext2fs/ext2_mount.h> 56 #include <fs/ext2fs/ext2fs.h> 57 #include <fs/ext2fs/ext2_extern.h> 58 59 SDT_PROVIDER_DEFINE(ext2fs); 60 /* 61 * ext2fs trace probe: 62 * arg0: verbosity. Higher numbers give more verbose messages 63 * arg1: Textual message 64 */ 65 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 66 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 67 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 68 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 70 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 71 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 72 "int", "daddr_t", "char*"); 73 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 75 76 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 77 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 78 static u_long ext2_dirpref(struct inode *); 79 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 80 daddr_t (*)(struct inode *, int, daddr_t, 81 int)); 82 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 83 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 84 85 /* 86 * Allocate a block in the filesystem. 87 * 88 * A preference may be optionally specified. If a preference is given 89 * the following hierarchy is used to allocate a block: 90 * 1) allocate the requested block. 91 * 2) allocate a rotationally optimal block in the same cylinder. 92 * 3) allocate a block in the same cylinder group. 93 * 4) quadratically rehash into other cylinder groups, until an 94 * available block is located. 95 * If no block preference is given the following hierarchy is used 96 * to allocate a block: 97 * 1) allocate a block in the cylinder group that contains the 98 * inode for the file. 99 * 2) quadratically rehash into other cylinder groups, until an 100 * available block is located. 101 */ 102 int 103 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 104 struct ucred *cred, e4fs_daddr_t *bnp) 105 { 106 struct m_ext2fs *fs; 107 struct ext2mount *ump; 108 e4fs_daddr_t bno; 109 int cg; 110 111 *bnp = 0; 112 fs = ip->i_e2fs; 113 ump = ip->i_ump; 114 mtx_assert(EXT2_MTX(ump), MA_OWNED); 115 #ifdef INVARIANTS 116 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 117 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 118 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 119 panic("ext2_alloc: bad size"); 120 } 121 if (cred == NOCRED) 122 panic("ext2_alloc: missing credential"); 123 #endif /* INVARIANTS */ 124 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 125 goto nospace; 126 if (cred->cr_uid != 0 && 127 fs->e2fs_fbcount < fs->e2fs_rbcount) 128 goto nospace; 129 if (bpref >= fs->e2fs_bcount) 130 bpref = 0; 131 if (bpref == 0) 132 cg = ino_to_cg(fs, ip->i_number); 133 else 134 cg = dtog(fs, bpref); 135 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 136 ext2_alloccg); 137 if (bno > 0) { 138 /* set next_alloc fields as done in block_getblk */ 139 ip->i_next_alloc_block = lbn; 140 ip->i_next_alloc_goal = bno; 141 142 ip->i_blocks += btodb(fs->e2fs_bsize); 143 ip->i_flag |= IN_CHANGE | IN_UPDATE; 144 *bnp = bno; 145 return (0); 146 } 147 nospace: 148 EXT2_UNLOCK(ump); 149 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 150 return (ENOSPC); 151 } 152 153 /* 154 * Allocate EA's block for inode. 155 */ 156 e4fs_daddr_t 157 ext2_alloc_meta(struct inode *ip) 158 { 159 struct m_ext2fs *fs; 160 daddr_t blk; 161 162 fs = ip->i_e2fs; 163 164 EXT2_LOCK(ip->i_ump); 165 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 166 ext2_alloccg); 167 if (0 == blk) { 168 EXT2_UNLOCK(ip->i_ump); 169 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 170 } 171 172 return (blk); 173 } 174 175 /* 176 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 177 * 178 * The vnode and an array of buffer pointers for a range of sequential 179 * logical blocks to be made contiguous is given. The allocator attempts 180 * to find a range of sequential blocks starting as close as possible to 181 * an fs_rotdelay offset from the end of the allocation for the logical 182 * block immediately preceding the current range. If successful, the 183 * physical block numbers in the buffer pointers and in the inode are 184 * changed to reflect the new allocation. If unsuccessful, the allocation 185 * is left unchanged. The success in doing the reallocation is returned. 186 * Note that the error return is not reflected back to the user. Rather 187 * the previous block allocation will be used. 188 */ 189 190 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 191 "EXT2FS filesystem"); 192 193 static int doasyncfree = 1; 194 195 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 196 "Use asynchronous writes to update block pointers when freeing blocks"); 197 198 static int doreallocblks = 0; 199 200 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 201 202 int 203 ext2_reallocblks(struct vop_reallocblks_args *ap) 204 { 205 struct m_ext2fs *fs; 206 struct inode *ip; 207 struct vnode *vp; 208 struct buf *sbp, *ebp; 209 uint32_t *bap, *sbap, *ebap; 210 struct ext2mount *ump; 211 struct cluster_save *buflist; 212 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 213 e2fs_lbn_t start_lbn, end_lbn; 214 int soff; 215 e2fs_daddr_t newblk, blkno; 216 int i, len, start_lvl, end_lvl, pref, ssize; 217 218 if (doreallocblks == 0) 219 return (ENOSPC); 220 221 vp = ap->a_vp; 222 ip = VTOI(vp); 223 fs = ip->i_e2fs; 224 ump = ip->i_ump; 225 226 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 227 return (ENOSPC); 228 229 buflist = ap->a_buflist; 230 len = buflist->bs_nchildren; 231 start_lbn = buflist->bs_children[0]->b_lblkno; 232 end_lbn = start_lbn + len - 1; 233 #ifdef INVARIANTS 234 for (i = 1; i < len; i++) 235 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 236 panic("ext2_reallocblks: non-cluster"); 237 #endif 238 /* 239 * If the cluster crosses the boundary for the first indirect 240 * block, leave space for the indirect block. Indirect blocks 241 * are initially laid out in a position after the last direct 242 * block. Block reallocation would usually destroy locality by 243 * moving the indirect block out of the way to make room for 244 * data blocks if we didn't compensate here. We should also do 245 * this for other indirect block boundaries, but it is only 246 * important for the first one. 247 */ 248 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 249 return (ENOSPC); 250 /* 251 * If the latest allocation is in a new cylinder group, assume that 252 * the filesystem has decided to move and do not force it back to 253 * the previous cylinder group. 254 */ 255 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 256 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 257 return (ENOSPC); 258 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 259 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 260 return (ENOSPC); 261 /* 262 * Get the starting offset and block map for the first block. 263 */ 264 if (start_lvl == 0) { 265 sbap = &ip->i_db[0]; 266 soff = start_lbn; 267 } else { 268 idp = &start_ap[start_lvl - 1]; 269 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 270 brelse(sbp); 271 return (ENOSPC); 272 } 273 sbap = (u_int *)sbp->b_data; 274 soff = idp->in_off; 275 } 276 /* 277 * If the block range spans two block maps, get the second map. 278 */ 279 ebap = NULL; 280 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 281 ssize = len; 282 } else { 283 #ifdef INVARIANTS 284 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 285 panic("ext2_reallocblks: start == end"); 286 #endif 287 ssize = len - (idp->in_off + 1); 288 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 289 goto fail; 290 ebap = (u_int *)ebp->b_data; 291 } 292 /* 293 * Find the preferred location for the cluster. 294 */ 295 EXT2_LOCK(ump); 296 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 297 /* 298 * Search the block map looking for an allocation of the desired size. 299 */ 300 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 301 len, ext2_clusteralloc)) == 0) { 302 EXT2_UNLOCK(ump); 303 goto fail; 304 } 305 /* 306 * We have found a new contiguous block. 307 * 308 * First we have to replace the old block pointers with the new 309 * block pointers in the inode and indirect blocks associated 310 * with the file. 311 */ 312 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 313 ip->i_number, start_lbn, end_lbn); 314 blkno = newblk; 315 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 316 if (i == ssize) { 317 bap = ebap; 318 soff = -i; 319 } 320 #ifdef INVARIANTS 321 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 322 panic("ext2_reallocblks: alloc mismatch"); 323 #endif 324 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 325 *bap++ = blkno; 326 } 327 /* 328 * Next we must write out the modified inode and indirect blocks. 329 * For strict correctness, the writes should be synchronous since 330 * the old block values may have been written to disk. In practise 331 * they are almost never written, but if we are concerned about 332 * strict correctness, the `doasyncfree' flag should be set to zero. 333 * 334 * The test on `doasyncfree' should be changed to test a flag 335 * that shows whether the associated buffers and inodes have 336 * been written. The flag should be set when the cluster is 337 * started and cleared whenever the buffer or inode is flushed. 338 * We can then check below to see if it is set, and do the 339 * synchronous write only when it has been cleared. 340 */ 341 if (sbap != &ip->i_db[0]) { 342 if (doasyncfree) 343 bdwrite(sbp); 344 else 345 bwrite(sbp); 346 } else { 347 ip->i_flag |= IN_CHANGE | IN_UPDATE; 348 if (!doasyncfree) 349 ext2_update(vp, 1); 350 } 351 if (ssize < len) { 352 if (doasyncfree) 353 bdwrite(ebp); 354 else 355 bwrite(ebp); 356 } 357 /* 358 * Last, free the old blocks and assign the new blocks to the buffers. 359 */ 360 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 361 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 362 fs->e2fs_bsize); 363 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 364 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 365 } 366 367 return (0); 368 369 fail: 370 if (ssize < len) 371 brelse(ebp); 372 if (sbap != &ip->i_db[0]) 373 brelse(sbp); 374 return (ENOSPC); 375 } 376 377 /* 378 * Allocate an inode in the filesystem. 379 * 380 */ 381 int 382 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 383 { 384 struct timespec ts; 385 struct m_ext2fs *fs; 386 struct ext2mount *ump; 387 struct inode *pip; 388 struct inode *ip; 389 struct vnode *vp; 390 struct thread *td; 391 ino_t ino, ipref; 392 int error, cg; 393 394 *vpp = NULL; 395 pip = VTOI(pvp); 396 fs = pip->i_e2fs; 397 ump = pip->i_ump; 398 399 EXT2_LOCK(ump); 400 if (fs->e2fs_ficount == 0) 401 goto noinodes; 402 /* 403 * If it is a directory then obtain a cylinder group based on 404 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 405 * always the next inode. 406 */ 407 if ((mode & IFMT) == IFDIR) { 408 cg = ext2_dirpref(pip); 409 if (fs->e2fs_contigdirs[cg] < 255) 410 fs->e2fs_contigdirs[cg]++; 411 } else { 412 cg = ino_to_cg(fs, pip->i_number); 413 if (fs->e2fs_contigdirs[cg] > 0) 414 fs->e2fs_contigdirs[cg]--; 415 } 416 ipref = cg * fs->e2fs_ipg + 1; 417 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 418 if (ino == 0) 419 goto noinodes; 420 421 td = curthread; 422 error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 423 if (error || *vpp != NULL) { 424 return (error); 425 } 426 427 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 428 429 /* Allocate a new vnode/inode. */ 430 if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { 431 free(ip, M_EXT2NODE); 432 return (error); 433 } 434 435 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 436 vp->v_data = ip; 437 ip->i_vnode = vp; 438 ip->i_e2fs = fs = ump->um_e2fs; 439 ip->i_ump = ump; 440 ip->i_number = ino; 441 ip->i_block_group = ino_to_cg(fs, ino); 442 ip->i_next_alloc_block = 0; 443 ip->i_next_alloc_goal = 0; 444 445 error = insmntque(vp, ump->um_mountp); 446 if (error) { 447 free(ip, M_EXT2NODE); 448 return (error); 449 } 450 451 error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); 452 if (error || *vpp != NULL) { 453 *vpp = NULL; 454 free(ip, M_EXT2NODE); 455 return (error); 456 } 457 458 if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { 459 vput(vp); 460 *vpp = NULL; 461 free(ip, M_EXT2NODE); 462 return (error); 463 } 464 465 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 466 && (S_ISREG(mode) || S_ISDIR(mode))) 467 ext4_ext_tree_init(ip); 468 else 469 memset(ip->i_data, 0, sizeof(ip->i_data)); 470 471 /* 472 * Set up a new generation number for this inode. 473 * Avoid zero values. 474 */ 475 do { 476 ip->i_gen = arc4random(); 477 } while (ip->i_gen == 0); 478 479 vfs_timestamp(&ts); 480 ip->i_birthtime = ts.tv_sec; 481 ip->i_birthnsec = ts.tv_nsec; 482 483 vn_set_state(vp, VSTATE_CONSTRUCTED); 484 *vpp = vp; 485 486 return (0); 487 488 noinodes: 489 EXT2_UNLOCK(ump); 490 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 491 return (ENOSPC); 492 } 493 494 /* 495 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 496 */ 497 uint64_t 498 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 499 { 500 501 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 502 le32toh(gd->ext2bgd_b_bitmap)); 503 } 504 505 uint64_t 506 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 507 { 508 509 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 510 le32toh(gd->ext2bgd_i_bitmap)); 511 } 512 513 uint64_t 514 e2fs_gd_get_i_tables(struct ext2_gd *gd) 515 { 516 517 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 518 le32toh(gd->ext2bgd_i_tables)); 519 } 520 521 static uint32_t 522 e2fs_gd_get_nbfree(struct ext2_gd *gd) 523 { 524 525 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 526 le16toh(gd->ext2bgd_nbfree)); 527 } 528 529 static void 530 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 531 { 532 533 gd->ext2bgd_nbfree = htole16(val & 0xffff); 534 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 535 } 536 537 static uint32_t 538 e2fs_gd_get_nifree(struct ext2_gd *gd) 539 { 540 541 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 542 le16toh(gd->ext2bgd_nifree)); 543 } 544 545 static void 546 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 547 { 548 549 gd->ext2bgd_nifree = htole16(val & 0xffff); 550 gd->ext4bgd_nifree_hi = htole16(val >> 16); 551 } 552 553 uint32_t 554 e2fs_gd_get_ndirs(struct ext2_gd *gd) 555 { 556 557 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 558 le16toh(gd->ext2bgd_ndirs)); 559 } 560 561 static void 562 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 563 { 564 565 gd->ext2bgd_ndirs = htole16(val & 0xffff); 566 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 567 } 568 569 static uint32_t 570 e2fs_gd_get_i_unused(struct ext2_gd *gd) 571 { 572 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 573 le16toh(gd->ext4bgd_i_unused)); 574 } 575 576 static void 577 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 578 { 579 580 gd->ext4bgd_i_unused = htole16(val & 0xffff); 581 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 582 } 583 584 /* 585 * Find a cylinder to place a directory. 586 * 587 * The policy implemented by this algorithm is to allocate a 588 * directory inode in the same cylinder group as its parent 589 * directory, but also to reserve space for its files inodes 590 * and data. Restrict the number of directories which may be 591 * allocated one after another in the same cylinder group 592 * without intervening allocation of files. 593 * 594 * If we allocate a first level directory then force allocation 595 * in another cylinder group. 596 * 597 */ 598 static u_long 599 ext2_dirpref(struct inode *pip) 600 { 601 struct m_ext2fs *fs; 602 int cg, prefcg, cgsize; 603 uint64_t avgbfree, minbfree; 604 u_int avgifree, avgndir, curdirsize; 605 u_int minifree, maxndir; 606 u_int mincg, minndir; 607 u_int dirsize, maxcontigdirs; 608 609 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 610 fs = pip->i_e2fs; 611 612 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 613 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 614 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 615 616 /* 617 * Force allocation in another cg if creating a first level dir. 618 */ 619 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 620 if (ITOV(pip)->v_vflag & VV_ROOT) { 621 prefcg = arc4random() % fs->e2fs_gcount; 622 mincg = prefcg; 623 minndir = fs->e2fs_ipg; 624 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 625 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 626 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 627 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 628 mincg = cg; 629 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 630 } 631 for (cg = 0; cg < prefcg; cg++) 632 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 633 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 634 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 635 mincg = cg; 636 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 637 } 638 return (mincg); 639 } 640 /* 641 * Count various limits which used for 642 * optimal allocation of a directory inode. 643 */ 644 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 645 minifree = avgifree - avgifree / 4; 646 if (minifree < 1) 647 minifree = 1; 648 minbfree = avgbfree - avgbfree / 4; 649 if (minbfree < 1) 650 minbfree = 1; 651 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 652 dirsize = AVGDIRSIZE; 653 curdirsize = avgndir ? 654 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 655 if (dirsize < curdirsize) 656 dirsize = curdirsize; 657 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 658 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 659 if (maxcontigdirs == 0) 660 maxcontigdirs = 1; 661 662 /* 663 * Limit number of dirs in one cg and reserve space for 664 * regular files, but only if we have no deficit in 665 * inodes or space. 666 */ 667 prefcg = ino_to_cg(fs, pip->i_number); 668 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 669 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 670 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 671 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 672 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 673 return (cg); 674 } 675 for (cg = 0; cg < prefcg; cg++) 676 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 677 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 678 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 679 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 680 return (cg); 681 } 682 /* 683 * This is a backstop when we have deficit in space. 684 */ 685 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 686 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 687 return (cg); 688 for (cg = 0; cg < prefcg; cg++) 689 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 690 break; 691 return (cg); 692 } 693 694 /* 695 * Select the desired position for the next block in a file. 696 * 697 * we try to mimic what Remy does in inode_getblk/block_getblk 698 * 699 * we note: blocknr == 0 means that we're about to allocate either 700 * a direct block or a pointer block at the first level of indirection 701 * (In other words, stuff that will go in i_db[] or i_ib[]) 702 * 703 * blocknr != 0 means that we're allocating a block that is none 704 * of the above. Then, blocknr tells us the number of the block 705 * that will hold the pointer 706 */ 707 e4fs_daddr_t 708 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 709 e2fs_daddr_t blocknr) 710 { 711 struct m_ext2fs *fs; 712 int tmp; 713 714 fs = ip->i_e2fs; 715 716 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 717 718 /* 719 * If the next block is actually what we thought it is, then set the 720 * goal to what we thought it should be. 721 */ 722 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 723 return ip->i_next_alloc_goal; 724 725 /* 726 * Now check whether we were provided with an array that basically 727 * tells us previous blocks to which we want to stay close. 728 */ 729 if (bap) 730 for (tmp = indx - 1; tmp >= 0; tmp--) 731 if (bap[tmp]) 732 return (le32toh(bap[tmp])); 733 734 /* 735 * Else lets fall back to the blocknr or, if there is none, follow 736 * the rule that a block should be allocated near its inode. 737 */ 738 return (blocknr ? blocknr : 739 (e2fs_daddr_t)(ip->i_block_group * 740 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 741 } 742 743 /* 744 * Implement the cylinder overflow algorithm. 745 * 746 * The policy implemented by this algorithm is: 747 * 1) allocate the block in its requested cylinder group. 748 * 2) quadratically rehash on the cylinder group number. 749 * 3) brute force search for a free block. 750 */ 751 static e4fs_daddr_t 752 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 753 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 754 { 755 struct m_ext2fs *fs; 756 e4fs_daddr_t result; 757 int i, icg = cg; 758 759 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 760 fs = ip->i_e2fs; 761 /* 762 * 1: preferred cylinder group 763 */ 764 result = (*allocator)(ip, cg, pref, size); 765 if (result) 766 return (result); 767 /* 768 * 2: quadratic rehash 769 */ 770 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 771 cg += i; 772 if (cg >= fs->e2fs_gcount) 773 cg -= fs->e2fs_gcount; 774 result = (*allocator)(ip, cg, 0, size); 775 if (result) 776 return (result); 777 } 778 /* 779 * 3: brute force search 780 * Note that we start at i == 2, since 0 was checked initially, 781 * and 1 is always checked in the quadratic rehash. 782 */ 783 cg = (icg + 2) % fs->e2fs_gcount; 784 for (i = 2; i < fs->e2fs_gcount; i++) { 785 result = (*allocator)(ip, cg, 0, size); 786 if (result) 787 return (result); 788 cg++; 789 if (cg == fs->e2fs_gcount) 790 cg = 0; 791 } 792 return (0); 793 } 794 795 static uint64_t 796 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 797 { 798 799 if (!ext2_cg_has_sb(fs, cg)) 800 return (0); 801 802 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 803 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 804 805 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 806 EXT2_DESCS_PER_BLOCK(fs)); 807 } 808 809 static uint64_t 810 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 811 { 812 unsigned long metagroup; 813 int first, last; 814 815 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 816 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 817 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 818 819 if (cg == first || cg == first + 1 || cg == last) 820 return (1); 821 822 return (0); 823 } 824 825 uint64_t 826 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 827 { 828 unsigned long first_meta_bg, metagroup; 829 830 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 831 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 832 833 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 834 metagroup < first_meta_bg) 835 return (ext2_cg_number_gdb_nometa(fs, cg)); 836 837 return ext2_cg_number_gdb_meta(fs, cg); 838 } 839 840 static int 841 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 842 { 843 int number; 844 845 number = ext2_cg_has_sb(fs, cg); 846 847 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 848 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 849 EXT2_DESCS_PER_BLOCK(fs)) { 850 if (number) { 851 number += ext2_cg_number_gdb(fs, cg); 852 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 853 } 854 } else { 855 number += ext2_cg_number_gdb(fs, cg); 856 } 857 858 return (number); 859 } 860 861 static void 862 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 863 { 864 int i; 865 866 if (start_bit >= end_bit) 867 return; 868 869 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 870 setbit(bitmap, i); 871 if (i < end_bit) 872 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 873 } 874 875 static int 876 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 877 { 878 879 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 880 fs->e2fs_bsize); 881 } 882 883 static int 884 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 885 { 886 887 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 888 } 889 890 static int 891 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 892 { 893 int bit, bit_max, inodes_per_block; 894 uint64_t start, tmp; 895 896 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 897 return (0); 898 899 memset(bp->b_data, 0, fs->e2fs_bsize); 900 901 bit_max = ext2_number_base_meta_blocks(fs, cg); 902 if ((bit_max >> 3) >= fs->e2fs_bsize) 903 return (EINVAL); 904 905 for (bit = 0; bit < bit_max; bit++) 906 setbit(bp->b_data, bit); 907 908 start = (uint64_t)cg * fs->e2fs_bpg + 909 le32toh(fs->e2fs->e2fs_first_dblock); 910 911 /* Set bits for block and inode bitmaps, and inode table. */ 912 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 913 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 914 ext2_block_in_group(fs, tmp, cg)) 915 setbit(bp->b_data, tmp - start); 916 917 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 918 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 919 ext2_block_in_group(fs, tmp, cg)) 920 setbit(bp->b_data, tmp - start); 921 922 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 923 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 924 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 925 fs->e2fs_ipg / inodes_per_block ) { 926 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 927 ext2_block_in_group(fs, tmp, cg)) 928 setbit(bp->b_data, tmp - start); 929 tmp++; 930 } 931 932 /* 933 * Also if the number of blocks within the group is less than 934 * the blocksize * 8 ( which is the size of bitmap ), set rest 935 * of the block bitmap to 1 936 */ 937 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 938 bp->b_data); 939 940 /* Clean the flag */ 941 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 942 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 943 944 return (0); 945 } 946 947 static int 948 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 949 { 950 struct ext2_gd *gd; 951 uint64_t group_first_block; 952 unsigned int offset, max_bit; 953 954 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 955 /* 956 * It is not possible to check block bitmap in case of this 957 * feature, because the inode and block bitmaps and inode table 958 * blocks may not be in the group at all. 959 * So, skip check in this case. 960 */ 961 return (0); 962 } 963 964 gd = &fs->e2fs_gd[cg]; 965 max_bit = fs->e2fs_fpg; 966 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 967 le32toh(fs->e2fs->e2fs_first_dblock); 968 969 /* Check block bitmap block number */ 970 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 971 if (offset >= max_bit || !isset(bp->b_data, offset)) { 972 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 973 "bad block bitmap, group", cg); 974 return (EINVAL); 975 } 976 977 /* Check inode bitmap block number */ 978 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 979 if (offset >= max_bit || !isset(bp->b_data, offset)) { 980 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 981 "bad inode bitmap", cg); 982 return (EINVAL); 983 } 984 985 /* Check inode table */ 986 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 987 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 988 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 989 "bad inode table, group", cg); 990 return (EINVAL); 991 } 992 993 return (0); 994 } 995 996 /* 997 * Determine whether a block can be allocated. 998 * 999 * Check to see if a block of the appropriate size is available, 1000 * and if it is, allocate it. 1001 */ 1002 static daddr_t 1003 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 1004 { 1005 struct m_ext2fs *fs; 1006 struct buf *bp; 1007 struct ext2mount *ump; 1008 daddr_t bno, runstart, runlen; 1009 int bit, loc, end, error, start; 1010 char *bbp; 1011 /* XXX ondisk32 */ 1012 fs = ip->i_e2fs; 1013 ump = ip->i_ump; 1014 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1015 return (0); 1016 1017 EXT2_UNLOCK(ump); 1018 error = bread(ip->i_devvp, fsbtodb(fs, 1019 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1020 (int)fs->e2fs_bsize, NOCRED, &bp); 1021 if (error) 1022 goto fail; 1023 1024 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1025 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1026 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1027 if (error) 1028 goto fail; 1029 1030 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1031 } 1032 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1033 if (error) 1034 goto fail; 1035 1036 error = ext2_b_bitmap_validate(fs,bp, cg); 1037 if (error) 1038 goto fail; 1039 1040 /* 1041 * Check, that another thread did not not allocate the last block in 1042 * this group while we were waiting for the buffer. 1043 */ 1044 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1045 goto fail; 1046 1047 bbp = (char *)bp->b_data; 1048 1049 if (dtog(fs, bpref) != cg) 1050 bpref = 0; 1051 if (bpref != 0) { 1052 bpref = dtogd(fs, bpref); 1053 /* 1054 * if the requested block is available, use it 1055 */ 1056 if (isclr(bbp, bpref)) { 1057 bno = bpref; 1058 goto gotit; 1059 } 1060 } 1061 /* 1062 * no blocks in the requested cylinder, so take next 1063 * available one in this cylinder group. 1064 * first try to get 8 contigous blocks, then fall back to a single 1065 * block. 1066 */ 1067 if (bpref) 1068 start = dtogd(fs, bpref) / NBBY; 1069 else 1070 start = 0; 1071 end = howmany(fs->e2fs_fpg, NBBY); 1072 retry: 1073 runlen = 0; 1074 runstart = 0; 1075 for (loc = start; loc < end; loc++) { 1076 if (bbp[loc] == (char)0xff) { 1077 runlen = 0; 1078 continue; 1079 } 1080 1081 /* Start of a run, find the number of high clear bits. */ 1082 if (runlen == 0) { 1083 bit = fls(bbp[loc]); 1084 runlen = NBBY - bit; 1085 runstart = loc * NBBY + bit; 1086 } else if (bbp[loc] == 0) { 1087 /* Continue a run. */ 1088 runlen += NBBY; 1089 } else { 1090 /* 1091 * Finish the current run. If it isn't long 1092 * enough, start a new one. 1093 */ 1094 bit = ffs(bbp[loc]) - 1; 1095 runlen += bit; 1096 if (runlen >= 8) { 1097 bno = runstart; 1098 goto gotit; 1099 } 1100 1101 /* Run was too short, start a new one. */ 1102 bit = fls(bbp[loc]); 1103 runlen = NBBY - bit; 1104 runstart = loc * NBBY + bit; 1105 } 1106 1107 /* If the current run is long enough, use it. */ 1108 if (runlen >= 8) { 1109 bno = runstart; 1110 goto gotit; 1111 } 1112 } 1113 if (start != 0) { 1114 end = start; 1115 start = 0; 1116 goto retry; 1117 } 1118 bno = ext2_mapsearch(fs, bbp, bpref); 1119 if (bno < 0) 1120 goto fail; 1121 1122 gotit: 1123 #ifdef INVARIANTS 1124 if (isset(bbp, bno)) { 1125 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1126 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1127 panic("ext2fs_alloccg: dup alloc"); 1128 } 1129 #endif 1130 setbit(bbp, bno); 1131 EXT2_LOCK(ump); 1132 ext2_clusteracct(fs, bbp, cg, bno, -1); 1133 fs->e2fs_fbcount--; 1134 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1135 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1136 fs->e2fs_fmod = 1; 1137 EXT2_UNLOCK(ump); 1138 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1139 bdwrite(bp); 1140 return (((uint64_t)cg) * fs->e2fs_fpg + 1141 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1142 1143 fail: 1144 brelse(bp); 1145 EXT2_LOCK(ump); 1146 return (0); 1147 } 1148 1149 /* 1150 * Determine whether a cluster can be allocated. 1151 */ 1152 static daddr_t 1153 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1154 { 1155 struct m_ext2fs *fs; 1156 struct ext2mount *ump; 1157 struct buf *bp; 1158 char *bbp; 1159 int bit, error, got, i, loc, run; 1160 int32_t *lp; 1161 daddr_t bno; 1162 1163 fs = ip->i_e2fs; 1164 ump = ip->i_ump; 1165 1166 if (fs->e2fs_maxcluster[cg] < len) 1167 return (0); 1168 1169 EXT2_UNLOCK(ump); 1170 error = bread(ip->i_devvp, 1171 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1172 (int)fs->e2fs_bsize, NOCRED, &bp); 1173 if (error) 1174 goto fail_lock; 1175 1176 bbp = (char *)bp->b_data; 1177 EXT2_LOCK(ump); 1178 /* 1179 * Check to see if a cluster of the needed size (or bigger) is 1180 * available in this cylinder group. 1181 */ 1182 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1183 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1184 if (*lp++ > 0) 1185 break; 1186 if (i > fs->e2fs_contigsumsize) { 1187 /* 1188 * Update the cluster summary information to reflect 1189 * the true maximum-sized cluster so that future cluster 1190 * allocation requests can avoid reading the bitmap only 1191 * to find no cluster. 1192 */ 1193 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1194 for (i = len - 1; i > 0; i--) 1195 if (*lp-- > 0) 1196 break; 1197 fs->e2fs_maxcluster[cg] = i; 1198 goto fail; 1199 } 1200 EXT2_UNLOCK(ump); 1201 1202 /* Search the bitmap to find a big enough cluster like in FFS. */ 1203 if (dtog(fs, bpref) != cg) 1204 bpref = 0; 1205 if (bpref != 0) 1206 bpref = dtogd(fs, bpref); 1207 loc = bpref / NBBY; 1208 bit = 1 << (bpref % NBBY); 1209 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1210 if ((bbp[loc] & bit) != 0) 1211 run = 0; 1212 else { 1213 run++; 1214 if (run == len) 1215 break; 1216 } 1217 if ((got & (NBBY - 1)) != (NBBY - 1)) 1218 bit <<= 1; 1219 else { 1220 loc++; 1221 bit = 1; 1222 } 1223 } 1224 1225 if (got >= fs->e2fs_fpg) 1226 goto fail_lock; 1227 1228 /* Allocate the cluster that we found. */ 1229 for (i = 1; i < len; i++) 1230 if (!isclr(bbp, got - run + i)) 1231 panic("ext2_clusteralloc: map mismatch"); 1232 1233 bno = got - run + 1; 1234 if (bno >= fs->e2fs_fpg) 1235 panic("ext2_clusteralloc: allocated out of group"); 1236 1237 EXT2_LOCK(ump); 1238 for (i = 0; i < len; i += fs->e2fs_fpb) { 1239 setbit(bbp, bno + i); 1240 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1241 fs->e2fs_fbcount--; 1242 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1243 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1244 } 1245 fs->e2fs_fmod = 1; 1246 EXT2_UNLOCK(ump); 1247 1248 bdwrite(bp); 1249 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1250 + bno); 1251 1252 fail_lock: 1253 EXT2_LOCK(ump); 1254 fail: 1255 brelse(bp); 1256 return (0); 1257 } 1258 1259 static int 1260 ext2_zero_inode_table(struct inode *ip, int cg) 1261 { 1262 struct m_ext2fs *fs; 1263 struct buf *bp; 1264 int i, all_blks, used_blks; 1265 1266 fs = ip->i_e2fs; 1267 1268 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1269 return (0); 1270 1271 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1272 fs->e2fs_bsize; 1273 1274 used_blks = howmany(fs->e2fs_ipg - 1275 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1276 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1277 1278 for (i = 0; i < all_blks - used_blks; i++) { 1279 bp = getblk(ip->i_devvp, fsbtodb(fs, 1280 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1281 fs->e2fs_bsize, 0, 0, 0); 1282 if (!bp) 1283 return (EIO); 1284 1285 vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); 1286 bawrite(bp); 1287 } 1288 1289 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1290 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1291 1292 return (0); 1293 } 1294 1295 static void 1296 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1297 { 1298 int i; 1299 1300 for (i = first; i <= last; i++) 1301 bitmap[i] = 0xff; 1302 } 1303 1304 /* 1305 * Determine whether an inode can be allocated. 1306 * 1307 * Check to see if an inode is available, and if it is, 1308 * allocate it using tode in the specified cylinder group. 1309 */ 1310 static daddr_t 1311 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1312 { 1313 struct m_ext2fs *fs; 1314 struct buf *bp; 1315 struct ext2mount *ump; 1316 int error, start, len, ifree, ibytes; 1317 char *ibp, *loc; 1318 1319 ipref--; /* to avoid a lot of (ipref -1) */ 1320 if (ipref == -1) 1321 ipref = 0; 1322 fs = ip->i_e2fs; 1323 ump = ip->i_ump; 1324 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1325 return (0); 1326 EXT2_UNLOCK(ump); 1327 error = bread(ip->i_devvp, fsbtodb(fs, 1328 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1329 (int)fs->e2fs_bsize, NOCRED, &bp); 1330 if (error) { 1331 EXT2_LOCK(ump); 1332 return (0); 1333 } 1334 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1335 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1336 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1337 EXT2_BG_INODE_UNINIT) { 1338 ibytes = fs->e2fs_ipg / 8; 1339 memset(bp->b_data, 0, ibytes - 1); 1340 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1341 fs->e2fs_bsize - 1); 1342 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1343 fs->e2fs_gd[cg].ext4bgd_flags) & 1344 ~EXT2_BG_INODE_UNINIT); 1345 } 1346 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1347 error = ext2_zero_inode_table(ip, cg); 1348 if (error) { 1349 brelse(bp); 1350 EXT2_LOCK(ump); 1351 return (0); 1352 } 1353 } 1354 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1355 if (error) { 1356 brelse(bp); 1357 EXT2_LOCK(ump); 1358 return (0); 1359 } 1360 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1361 /* 1362 * Another thread allocated the last i-node in this 1363 * group while we were waiting for the buffer. 1364 */ 1365 brelse(bp); 1366 EXT2_LOCK(ump); 1367 return (0); 1368 } 1369 ibp = (char *)bp->b_data; 1370 if (ipref) { 1371 ipref %= fs->e2fs_ipg; 1372 if (isclr(ibp, ipref)) 1373 goto gotit; 1374 } 1375 start = ipref / NBBY; 1376 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1377 loc = memcchr(&ibp[start], 0xff, len); 1378 if (loc == NULL) { 1379 len = start + 1; 1380 start = 0; 1381 loc = memcchr(&ibp[start], 0xff, len); 1382 if (loc == NULL) { 1383 SDT_PROBE3(ext2fs, , alloc, 1384 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1385 fs->e2fs_fsmnt); 1386 brelse(bp); 1387 EXT2_LOCK(ump); 1388 return (0); 1389 } 1390 } 1391 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1392 gotit: 1393 setbit(ibp, ipref); 1394 EXT2_LOCK(ump); 1395 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1396 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1397 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1398 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1399 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1400 if (ipref + 1 > ifree) 1401 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1402 fs->e2fs_ipg - (ipref + 1)); 1403 } 1404 fs->e2fs_ficount--; 1405 fs->e2fs_fmod = 1; 1406 if ((mode & IFMT) == IFDIR) { 1407 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1408 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1409 fs->e2fs_total_dir++; 1410 } 1411 EXT2_UNLOCK(ump); 1412 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1413 bdwrite(bp); 1414 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1415 } 1416 1417 /* 1418 * Free a block or fragment. 1419 * 1420 */ 1421 void 1422 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1423 { 1424 struct m_ext2fs *fs; 1425 struct buf *bp; 1426 struct ext2mount *ump; 1427 int cg, error; 1428 char *bbp; 1429 1430 fs = ip->i_e2fs; 1431 ump = ip->i_ump; 1432 cg = dtog(fs, bno); 1433 if (bno >= fs->e2fs_bcount) { 1434 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1435 ip->i_number, bno); 1436 return; 1437 } 1438 error = bread(ip->i_devvp, 1439 fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1440 (int)fs->e2fs_bsize, NOCRED, &bp); 1441 if (error) { 1442 return; 1443 } 1444 bbp = (char *)bp->b_data; 1445 bno = dtogd(fs, bno); 1446 if (isclr(bbp, bno)) { 1447 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1448 (long long)bno, fs->e2fs_fsmnt); 1449 } 1450 clrbit(bbp, bno); 1451 EXT2_LOCK(ump); 1452 ext2_clusteracct(fs, bbp, cg, bno, 1); 1453 fs->e2fs_fbcount++; 1454 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1455 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1456 fs->e2fs_fmod = 1; 1457 EXT2_UNLOCK(ump); 1458 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1459 bdwrite(bp); 1460 } 1461 1462 /* 1463 * Free an inode. 1464 * 1465 */ 1466 int 1467 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1468 { 1469 struct m_ext2fs *fs; 1470 struct inode *pip; 1471 struct buf *bp; 1472 struct ext2mount *ump; 1473 int error, cg; 1474 char *ibp; 1475 1476 pip = VTOI(pvp); 1477 fs = pip->i_e2fs; 1478 ump = pip->i_ump; 1479 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1480 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1481 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1482 1483 cg = ino_to_cg(fs, ino); 1484 error = bread(pip->i_devvp, 1485 fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1486 (int)fs->e2fs_bsize, NOCRED, &bp); 1487 if (error) { 1488 return (0); 1489 } 1490 ibp = (char *)bp->b_data; 1491 ino = (ino - 1) % fs->e2fs_ipg; 1492 if (isclr(ibp, ino)) { 1493 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1494 fs->e2fs_fsmnt, ino); 1495 if (fs->e2fs_ronly == 0) 1496 panic("ext2_vfree: freeing free inode"); 1497 } 1498 clrbit(ibp, ino); 1499 EXT2_LOCK(ump); 1500 fs->e2fs_ficount++; 1501 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1502 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1503 if ((mode & IFMT) == IFDIR) { 1504 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1505 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1506 fs->e2fs_total_dir--; 1507 } 1508 fs->e2fs_fmod = 1; 1509 EXT2_UNLOCK(ump); 1510 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1511 bdwrite(bp); 1512 return (0); 1513 } 1514 1515 /* 1516 * Find a block in the specified cylinder group. 1517 * 1518 * It is a panic if a request is made to find a block if none are 1519 * available. 1520 */ 1521 static daddr_t 1522 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1523 { 1524 char *loc; 1525 int start, len; 1526 1527 /* 1528 * find the fragment by searching through the free block 1529 * map for an appropriate bit pattern 1530 */ 1531 if (bpref) 1532 start = dtogd(fs, bpref) / NBBY; 1533 else 1534 start = 0; 1535 len = howmany(fs->e2fs_fpg, NBBY) - start; 1536 loc = memcchr(&bbp[start], 0xff, len); 1537 if (loc == NULL) { 1538 len = start + 1; 1539 start = 0; 1540 loc = memcchr(&bbp[start], 0xff, len); 1541 if (loc == NULL) { 1542 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1543 "fs=%s", start, len, fs->e2fs_fsmnt); 1544 /* NOTREACHED */ 1545 } 1546 } 1547 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1548 } 1549 1550 int 1551 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1552 { 1553 int a3, a5, a7; 1554 1555 if (cg == 0) 1556 return (1); 1557 1558 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1559 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1560 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1561 return (1); 1562 return (0); 1563 } 1564 1565 if ((cg <= 1) || 1566 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1567 return (1); 1568 1569 if (!(cg & 1)) 1570 return (0); 1571 1572 for (a3 = 3, a5 = 5, a7 = 7; 1573 a3 <= cg || a5 <= cg || a7 <= cg; 1574 a3 *= 3, a5 *= 5, a7 *= 7) 1575 if (cg == a3 || cg == a5 || cg == a7) 1576 return (1); 1577 return (0); 1578 } 1579