1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/sdt.h> 56 #include <sys/stat.h> 57 #include <sys/mutex.h> 58 59 #include <geom/geom.h> 60 #include <geom/geom_vfs.h> 61 62 #include <fs/ext2fs/fs.h> 63 #include <fs/ext2fs/ext2_mount.h> 64 #include <fs/ext2fs/inode.h> 65 66 #include <fs/ext2fs/ext2fs.h> 67 #include <fs/ext2fs/ext2_dinode.h> 68 #include <fs/ext2fs/ext2_extern.h> 69 #include <fs/ext2fs/ext2_extents.h> 70 71 SDT_PROVIDER_DECLARE(ext2fs); 72 /* 73 * ext2fs trace probe: 74 * arg0: verbosity. Higher numbers give more verbose messages 75 * arg1: Textual message 76 */ 77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 80 81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 82 static int ext2_mountfs(struct vnode *, struct mount *); 83 static int ext2_reload(struct mount *mp, struct thread *td); 84 static int ext2_sbupdate(struct ext2mount *, int); 85 static int ext2_cgupdate(struct ext2mount *, int); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_mount_t ext2_mount; 93 94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 96 97 static struct vfsops ext2fs_vfsops = { 98 .vfs_fhtovp = ext2_fhtovp, 99 .vfs_mount = ext2_mount, 100 .vfs_root = ext2_root, /* root inode via vget */ 101 .vfs_statfs = ext2_statfs, 102 .vfs_sync = ext2_sync, 103 .vfs_unmount = ext2_unmount, 104 .vfs_vget = ext2_vget, 105 }; 106 107 VFS_SET(ext2fs_vfsops, ext2fs, 0); 108 109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 110 int ronly); 111 static int ext2_compute_sb_data(struct vnode * devvp, 112 struct ext2fs * es, struct m_ext2fs * fs); 113 114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 115 "noclusterw", "noexec", "export", "force", "from", "multilabel", 116 "suiddir", "nosymfollow", "sync", "union", NULL }; 117 118 /* 119 * VFS Operations. 120 * 121 * mount system call 122 */ 123 static int 124 ext2_mount(struct mount *mp) 125 { 126 struct vfsoptlist *opts; 127 struct vnode *devvp; 128 struct thread *td; 129 struct ext2mount *ump = NULL; 130 struct m_ext2fs *fs; 131 struct nameidata nd, *ndp = &nd; 132 accmode_t accmode; 133 char *path, *fspec; 134 int error, flags, len; 135 136 td = curthread; 137 opts = mp->mnt_optnew; 138 139 if (vfs_filteropt(opts, ext2_opts)) 140 return (EINVAL); 141 142 vfs_getopt(opts, "fspath", (void **)&path, NULL); 143 /* Double-check the length of path.. */ 144 if (strlen(path) >= MAXMNTLEN) 145 return (ENAMETOOLONG); 146 147 fspec = NULL; 148 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 149 if (!error && fspec[len - 1] != '\0') 150 return (EINVAL); 151 152 /* 153 * If updating, check whether changing from read-only to 154 * read/write; if there is no device name, that's all we do. 155 */ 156 if (mp->mnt_flag & MNT_UPDATE) { 157 ump = VFSTOEXT2(mp); 158 fs = ump->um_e2fs; 159 error = 0; 160 if (fs->e2fs_ronly == 0 && 161 vfs_flagopt(opts, "ro", NULL, 0)) { 162 error = VFS_SYNC(mp, MNT_WAIT); 163 if (error) 164 return (error); 165 flags = WRITECLOSE; 166 if (mp->mnt_flag & MNT_FORCE) 167 flags |= FORCECLOSE; 168 error = ext2_flushfiles(mp, flags, td); 169 if (error == 0 && fs->e2fs_wasvalid && 170 ext2_cgupdate(ump, MNT_WAIT) == 0) { 171 fs->e2fs->e2fs_state = 172 htole16((le16toh(fs->e2fs->e2fs_state) | 173 E2FS_ISCLEAN)); 174 ext2_sbupdate(ump, MNT_WAIT); 175 } 176 fs->e2fs_ronly = 1; 177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 178 g_topology_lock(); 179 g_access(ump->um_cp, 0, -1, 0); 180 g_topology_unlock(); 181 } 182 if (!error && (mp->mnt_flag & MNT_RELOAD)) 183 error = ext2_reload(mp, td); 184 if (error) 185 return (error); 186 devvp = ump->um_devvp; 187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 189 return (EPERM); 190 191 /* 192 * If upgrade to read-write by non-root, then verify 193 * that user has necessary permissions on the device. 194 */ 195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 196 error = VOP_ACCESS(devvp, VREAD | VWRITE, 197 td->td_ucred, td); 198 if (error) 199 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 200 if (error) { 201 VOP_UNLOCK(devvp); 202 return (error); 203 } 204 VOP_UNLOCK(devvp); 205 g_topology_lock(); 206 error = g_access(ump->um_cp, 0, 1, 0); 207 g_topology_unlock(); 208 if (error) 209 return (error); 210 211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 213 if (mp->mnt_flag & MNT_FORCE) { 214 printf( 215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 216 } else { 217 printf( 218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 219 fs->e2fs_fsmnt); 220 return (EPERM); 221 } 222 } 223 fs->e2fs->e2fs_state = 224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 225 (void)ext2_cgupdate(ump, MNT_WAIT); 226 fs->e2fs_ronly = 0; 227 MNT_ILOCK(mp); 228 mp->mnt_flag &= ~MNT_RDONLY; 229 MNT_IUNLOCK(mp); 230 } 231 if (vfs_flagopt(opts, "export", NULL, 0)) { 232 /* Process export requests in vfs_mount.c. */ 233 return (error); 234 } 235 } 236 237 /* 238 * Not an update, or updating the name: look up the name 239 * and verify that it refers to a sensible disk device. 240 */ 241 if (fspec == NULL) 242 return (EINVAL); 243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec); 244 if ((error = namei(ndp)) != 0) 245 return (error); 246 NDFREE_PNBUF(ndp); 247 devvp = ndp->ni_vp; 248 249 if (!vn_isdisk_error(devvp, &error)) { 250 vput(devvp); 251 return (error); 252 } 253 254 /* 255 * If mount by non-root, then verify that user has necessary 256 * permissions on the device. 257 * 258 * XXXRW: VOP_ACCESS() enough? 259 */ 260 accmode = VREAD; 261 if ((mp->mnt_flag & MNT_RDONLY) == 0) 262 accmode |= VWRITE; 263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 264 if (error) 265 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 266 if (error) { 267 vput(devvp); 268 return (error); 269 } 270 271 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 272 error = ext2_mountfs(devvp, mp); 273 } else { 274 if (devvp != ump->um_devvp) { 275 vput(devvp); 276 return (EINVAL); /* needs translation */ 277 } else 278 vput(devvp); 279 } 280 if (error) { 281 vrele(devvp); 282 return (error); 283 } 284 ump = VFSTOEXT2(mp); 285 fs = ump->um_e2fs; 286 287 /* 288 * Note that this strncpy() is ok because of a check at the start 289 * of ext2_mount(). 290 */ 291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 293 vfs_mountedfrom(mp, fspec); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBLOCKSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 } 401 if (b_bitmap <= last_cg_block) { 402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 403 "block bitmap overlaps gds", i); 404 return (EINVAL); 405 } 406 if (b_bitmap < first_block || b_bitmap > last_block) { 407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 408 "block bitmap not in group", i); 409 return (EINVAL); 410 } 411 412 i_bitmap = e2fs_gd_get_i_bitmap(gd); 413 if (i_bitmap == 0) { 414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 415 "inode bitmap is zero", i); 416 return (EINVAL); 417 } 418 if (i_bitmap <= last_cg_block) { 419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 420 "inode bitmap overlaps gds", i); 421 return (EINVAL); 422 } 423 if (i_bitmap < first_block || i_bitmap > last_block) { 424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 425 "inode bitmap not in group blk", i); 426 return (EINVAL); 427 } 428 429 i_tables = e2fs_gd_get_i_tables(gd); 430 if (i_tables == 0) { 431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 432 "inode table is zero", i); 433 return (EINVAL); 434 } 435 if (i_tables <= last_cg_block) { 436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 437 "inode tables overlaps gds", i); 438 return (EINVAL); 439 } 440 if (i_tables < first_block || 441 i_tables + fs->e2fs_itpg - 1 > last_block) { 442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 443 "inode tables not in group blk", i); 444 return (EINVAL); 445 } 446 447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 448 first_block += EXT2_BLOCKS_PER_GROUP(fs); 449 } 450 451 return (0); 452 } 453 454 /* 455 * This computes the fields of the m_ext2fs structure from the 456 * data in the ext2fs structure read in. 457 */ 458 static int 459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 460 struct m_ext2fs *fs) 461 { 462 struct buf *bp; 463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 464 int i, j; 465 int g_count = 0; 466 int error; 467 468 /* Check checksum features */ 469 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 470 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 471 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 472 "incorrect checksum features combination"); 473 return (EINVAL); 474 } 475 476 /* Precompute checksum seed for all metadata */ 477 ext2_sb_csum_set_seed(fs); 478 479 /* Verify sb csum if possible */ 480 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 481 error = ext2_sb_csum_verify(fs); 482 if (error) { 483 return (error); 484 } 485 } 486 487 /* Check for block size = 1K|2K|4K */ 488 if (le32toh(es->e2fs_log_bsize) > 2) { 489 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 490 "bad block size"); 491 return (EINVAL); 492 } 493 494 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 495 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 496 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 497 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 498 499 /* Check for fragment size */ 500 if (le32toh(es->e2fs_log_fsize) > 501 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 502 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 503 "invalid log cluster size"); 504 return (EINVAL); 505 } 506 507 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 508 if (fs->e2fs_fsize != fs->e2fs_bsize) { 509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 510 "fragment size != block size"); 511 return (EINVAL); 512 } 513 514 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 515 516 /* Check reserved gdt blocks for future filesystem expansion */ 517 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 518 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 519 "number of reserved GDT blocks too large"); 520 return (EINVAL); 521 } 522 523 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 524 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 525 } else { 526 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 527 528 /* 529 * Check first ino. 530 */ 531 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 532 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 533 "invalid first ino"); 534 return (EINVAL); 535 } 536 537 /* 538 * Simple sanity check for superblock inode size value. 539 */ 540 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 541 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 542 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 543 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 544 "invalid inode size"); 545 return (EINVAL); 546 } 547 } 548 549 /* Check group descriptors */ 550 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 551 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 552 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 553 "unsupported 64bit descriptor size"); 554 return (EINVAL); 555 } 556 557 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 558 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 559 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 560 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 561 "zero blocks/fragments per group"); 562 return (EINVAL); 563 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 564 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 565 "blocks per group not equal fragments per group"); 566 return (EINVAL); 567 } 568 569 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 570 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 571 "non-standard group size unsupported"); 572 return (EINVAL); 573 } 574 575 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 576 if (fs->e2fs_ipb == 0 || 577 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 578 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 579 "bad inodes per block size"); 580 return (EINVAL); 581 } 582 583 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 584 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 586 "invalid inodes per group"); 587 return (EINVAL); 588 } 589 590 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 591 592 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 593 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 594 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 595 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 596 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 597 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 598 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 599 } 600 if (fs->e2fs_rbcount > fs->e2fs_bcount || 601 fs->e2fs_fbcount > fs->e2fs_bcount) { 602 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 603 "invalid block count"); 604 return (EINVAL); 605 } 606 607 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 608 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 610 "invalid number of free inodes"); 611 return (EINVAL); 612 } 613 614 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 615 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 617 "first data block out of range"); 618 return (EINVAL); 619 } 620 621 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 622 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 623 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 624 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 625 "groups count too large"); 626 return (EINVAL); 627 } 628 629 /* Check for extra isize in big inodes. */ 630 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 631 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 632 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 633 "no space for extra inode timestamps"); 634 return (EINVAL); 635 } 636 637 /* s_resuid / s_resgid ? */ 638 639 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 640 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 642 } else { 643 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 644 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 645 fs->e2fs_bsize / sizeof(struct ext2_gd)); 646 } 647 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 648 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 649 M_EXT2MNT, M_WAITOK | M_ZERO); 650 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 651 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 652 653 for (i = 0; i < fs->e2fs_gdbcount; i++) { 654 error = bread(devvp, 655 fsbtodb(fs, ext2_cg_location(fs, i)), 656 fs->e2fs_bsize, NOCRED, &bp); 657 if (error) { 658 /* 659 * fs->e2fs_gd and fs->e2fs_contigdirs 660 * will be freed later by the caller, 661 * because this function could be called from 662 * MNT_UPDATE path. 663 */ 664 return (error); 665 } 666 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 667 memcpy(&fs->e2fs_gd[ 668 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 669 bp->b_data, fs->e2fs_bsize); 670 } else { 671 for (j = 0; j < e2fs_descpb && 672 g_count < fs->e2fs_gcount; j++, g_count++) 673 memcpy(&fs->e2fs_gd[g_count], 674 bp->b_data + j * E2FS_REV0_GD_SIZE, 675 E2FS_REV0_GD_SIZE); 676 } 677 brelse(bp); 678 bp = NULL; 679 } 680 681 /* Validate cgs consistency */ 682 error = ext2_cg_validate(fs); 683 if (error) 684 return (error); 685 686 /* Verfy cgs csum */ 687 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 688 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 689 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 690 if (error) 691 return (error); 692 } 693 /* Initialization for the ext2 Orlov allocator variant. */ 694 fs->e2fs_total_dir = 0; 695 for (i = 0; i < fs->e2fs_gcount; i++) 696 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 697 698 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 699 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 700 fs->e2fs_maxfilesize = 0x7fffffff; 701 else { 702 fs->e2fs_maxfilesize = 0xffffffffffff; 703 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 704 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 705 } 706 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 707 fs->e2fs_uhash = 3; 708 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 709 #ifdef __CHAR_UNSIGNED__ 710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 711 fs->e2fs_uhash = 3; 712 #else 713 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 714 #endif 715 } 716 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 717 error = ext2_sb_csum_verify(fs); 718 719 return (error); 720 } 721 722 /* 723 * Reload all incore data for a filesystem (used after running fsck on 724 * the root filesystem and finding things to fix). The filesystem must 725 * be mounted read-only. 726 * 727 * Things to do to update the mount: 728 * 1) invalidate all cached meta-data. 729 * 2) re-read superblock from disk. 730 * 3) invalidate all cluster summary information. 731 * 4) invalidate all inactive vnodes. 732 * 5) invalidate all cached file data. 733 * 6) re-read inode data for all active vnodes. 734 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 735 */ 736 static int 737 ext2_reload(struct mount *mp, struct thread *td) 738 { 739 struct vnode *vp, *mvp, *devvp; 740 struct inode *ip; 741 struct buf *bp; 742 struct ext2fs *es; 743 struct m_ext2fs *fs; 744 struct csum *sump; 745 int error, i; 746 int32_t *lp; 747 748 if ((mp->mnt_flag & MNT_RDONLY) == 0) 749 return (EINVAL); 750 /* 751 * Step 1: invalidate all cached meta-data. 752 */ 753 devvp = VFSTOEXT2(mp)->um_devvp; 754 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 755 if (vinvalbuf(devvp, 0, 0, 0) != 0) 756 panic("ext2_reload: dirty1"); 757 VOP_UNLOCK(devvp); 758 759 /* 760 * Step 2: re-read superblock from disk. 761 * constants have been adjusted for ext2 762 */ 763 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0) 764 return (error); 765 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET); 766 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 767 brelse(bp); 768 return (EIO); /* XXX needs translation */ 769 } 770 fs = VFSTOEXT2(mp)->um_e2fs; 771 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 772 773 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 774 brelse(bp); 775 return (error); 776 } 777 778 brelse(bp); 779 780 /* 781 * Step 3: invalidate all cluster summary information. 782 */ 783 if (fs->e2fs_contigsumsize > 0) { 784 lp = fs->e2fs_maxcluster; 785 sump = fs->e2fs_clustersum; 786 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 787 *lp++ = fs->e2fs_contigsumsize; 788 sump->cs_init = 0; 789 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 790 } 791 } 792 793 loop: 794 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 795 /* 796 * Step 4: invalidate all cached file data. 797 */ 798 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 799 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 800 goto loop; 801 } 802 if (vinvalbuf(vp, 0, 0, 0)) 803 panic("ext2_reload: dirty2"); 804 805 /* 806 * Step 5: re-read inode data for all active vnodes. 807 */ 808 ip = VTOI(vp); 809 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 810 (int)fs->e2fs_bsize, NOCRED, &bp); 811 if (error) { 812 vput(vp); 813 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 814 return (error); 815 } 816 817 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 818 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 819 820 brelse(bp); 821 vput(vp); 822 823 if (error) { 824 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 825 return (error); 826 } 827 } 828 return (0); 829 } 830 831 /* 832 * Common code for mount and mountroot. 833 */ 834 static int 835 ext2_mountfs(struct vnode *devvp, struct mount *mp) 836 { 837 struct ext2mount *ump; 838 struct buf *bp; 839 struct m_ext2fs *fs; 840 struct ext2fs *es; 841 struct cdev *dev = devvp->v_rdev; 842 struct g_consumer *cp; 843 struct bufobj *bo; 844 struct csum *sump; 845 int error; 846 int ronly; 847 int i; 848 u_long size; 849 int32_t *lp; 850 int32_t e2fs_maxcontig; 851 852 bp = NULL; 853 ump = NULL; 854 855 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 856 /* XXX: use VOP_ACESS to check FS perms */ 857 g_topology_lock(); 858 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 859 g_topology_unlock(); 860 VOP_UNLOCK(devvp); 861 if (error) 862 return (error); 863 864 if (PAGE_SIZE != SBLOCKBLKSIZE) { 865 printf("WARNING: Unsupported page size %d\n", PAGE_SIZE); 866 error = EINVAL; 867 goto out; 868 } 869 if (cp->provider->sectorsize > PAGE_SIZE) { 870 printf("WARNING: Device sectorsize(%d) is more than %d\n", 871 cp->provider->sectorsize, PAGE_SIZE); 872 error = EINVAL; 873 goto out; 874 } 875 876 bo = &devvp->v_bufobj; 877 bo->bo_private = cp; 878 bo->bo_ops = g_vfs_bufops; 879 if (devvp->v_rdev->si_iosize_max != 0) 880 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 881 if (mp->mnt_iosize_max > maxphys) 882 mp->mnt_iosize_max = maxphys; 883 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0) 884 goto out; 885 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET); 886 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 887 error = EINVAL; /* XXX needs translation */ 888 goto out; 889 } 890 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 891 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 892 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 893 printf( 894 "WARNING: Filesystem was not properly dismounted\n"); 895 } else { 896 printf( 897 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 898 error = EPERM; 899 goto out; 900 } 901 } 902 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 903 904 /* 905 * I don't know whether this is the right strategy. Note that 906 * we dynamically allocate both an m_ext2fs and an ext2fs 907 * while Linux keeps the super block in a locked buffer. 908 */ 909 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 910 M_EXT2MNT, M_WAITOK | M_ZERO); 911 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 912 M_EXT2MNT, M_WAITOK); 913 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 914 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 915 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 916 goto out; 917 918 /* 919 * Calculate the maximum contiguous blocks and size of cluster summary 920 * array. In FFS this is done by newfs; however, the superblock 921 * in ext2fs doesn't have these variables, so we can calculate 922 * them here. 923 */ 924 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize); 925 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 926 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN; 927 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 928 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 929 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 930 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 931 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 932 lp = ump->um_e2fs->e2fs_maxcluster; 933 sump = ump->um_e2fs->e2fs_clustersum; 934 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 935 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 936 sump->cs_init = 0; 937 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 938 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 939 } 940 } 941 942 brelse(bp); 943 bp = NULL; 944 fs = ump->um_e2fs; 945 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 946 947 /* 948 * If the fs is not mounted read-only, make sure the super block is 949 * always written back on a sync(). 950 */ 951 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 952 if (ronly == 0) { 953 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 954 fs->e2fs->e2fs_state = 955 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 956 } 957 mp->mnt_data = ump; 958 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 959 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 960 MNT_ILOCK(mp); 961 mp->mnt_flag |= MNT_LOCAL; 962 MNT_IUNLOCK(mp); 963 ump->um_mountp = mp; 964 ump->um_dev = dev; 965 ump->um_devvp = devvp; 966 ump->um_bo = &devvp->v_bufobj; 967 ump->um_cp = cp; 968 969 /* 970 * Setting those two parameters allowed us to use 971 * ufs_bmap w/o changse! 972 */ 973 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 974 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 975 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 976 if (ronly == 0) 977 ext2_sbupdate(ump, MNT_WAIT); 978 /* 979 * Initialize filesystem stat information in mount struct. 980 */ 981 MNT_ILOCK(mp); 982 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 983 MNTK_USES_BCACHE; 984 MNT_IUNLOCK(mp); 985 return (0); 986 out: 987 if (bp) 988 brelse(bp); 989 if (cp != NULL) { 990 g_topology_lock(); 991 g_vfs_close(cp); 992 g_topology_unlock(); 993 } 994 if (ump) { 995 mtx_destroy(EXT2_MTX(ump)); 996 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 997 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 998 free(ump->um_e2fs->e2fs, M_EXT2MNT); 999 free(ump->um_e2fs, M_EXT2MNT); 1000 free(ump, M_EXT2MNT); 1001 mp->mnt_data = NULL; 1002 } 1003 return (error); 1004 } 1005 1006 /* 1007 * Unmount system call. 1008 */ 1009 static int 1010 ext2_unmount(struct mount *mp, int mntflags) 1011 { 1012 struct ext2mount *ump; 1013 struct m_ext2fs *fs; 1014 struct csum *sump; 1015 int error, flags, i, ronly; 1016 1017 flags = 0; 1018 if (mntflags & MNT_FORCE) { 1019 if (mp->mnt_flag & MNT_ROOTFS) 1020 return (EINVAL); 1021 flags |= FORCECLOSE; 1022 } 1023 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1024 return (error); 1025 ump = VFSTOEXT2(mp); 1026 fs = ump->um_e2fs; 1027 ronly = fs->e2fs_ronly; 1028 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1029 if (fs->e2fs_wasvalid) 1030 fs->e2fs->e2fs_state = 1031 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1032 ext2_sbupdate(ump, MNT_WAIT); 1033 } 1034 1035 g_topology_lock(); 1036 g_vfs_close(ump->um_cp); 1037 g_topology_unlock(); 1038 vrele(ump->um_devvp); 1039 sump = fs->e2fs_clustersum; 1040 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1041 free(sump->cs_sum, M_EXT2MNT); 1042 free(fs->e2fs_clustersum, M_EXT2MNT); 1043 free(fs->e2fs_maxcluster, M_EXT2MNT); 1044 free(fs->e2fs_gd, M_EXT2MNT); 1045 free(fs->e2fs_contigdirs, M_EXT2MNT); 1046 free(fs->e2fs, M_EXT2MNT); 1047 free(fs, M_EXT2MNT); 1048 free(ump, M_EXT2MNT); 1049 mp->mnt_data = NULL; 1050 return (error); 1051 } 1052 1053 /* 1054 * Flush out all the files in a filesystem. 1055 */ 1056 static int 1057 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1058 { 1059 int error; 1060 1061 error = vflush(mp, 0, flags, td); 1062 return (error); 1063 } 1064 1065 /* 1066 * Get filesystem statistics. 1067 */ 1068 int 1069 ext2_statfs(struct mount *mp, struct statfs *sbp) 1070 { 1071 struct ext2mount *ump; 1072 struct m_ext2fs *fs; 1073 uint32_t overhead, overhead_per_group, ngdb; 1074 int i, ngroups; 1075 1076 ump = VFSTOEXT2(mp); 1077 fs = ump->um_e2fs; 1078 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1079 panic("ext2_statfs"); 1080 1081 /* 1082 * Compute the overhead (FS structures) 1083 */ 1084 overhead_per_group = 1085 1 /* block bitmap */ + 1086 1 /* inode bitmap */ + 1087 fs->e2fs_itpg; 1088 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1089 fs->e2fs_gcount * overhead_per_group; 1090 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1091 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1092 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1093 if (ext2_cg_has_sb(fs, i)) 1094 ngroups++; 1095 } 1096 } else { 1097 ngroups = fs->e2fs_gcount; 1098 } 1099 ngdb = fs->e2fs_gdbcount; 1100 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1101 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1102 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1103 overhead += ngroups * (1 /* superblock */ + ngdb); 1104 1105 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1106 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1107 sbp->f_blocks = fs->e2fs_bcount - overhead; 1108 sbp->f_bfree = fs->e2fs_fbcount; 1109 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1110 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1111 sbp->f_ffree = fs->e2fs_ficount; 1112 return (0); 1113 } 1114 1115 /* 1116 * Go through the disk queues to initiate sandbagged IO; 1117 * go through the inodes to write those that have been modified; 1118 * initiate the writing of the super block if it has been modified. 1119 * 1120 * Note: we are always called with the filesystem marked `MPBUSY'. 1121 */ 1122 static int 1123 ext2_sync(struct mount *mp, int waitfor) 1124 { 1125 struct vnode *mvp, *vp; 1126 struct thread *td; 1127 struct inode *ip; 1128 struct ext2mount *ump = VFSTOEXT2(mp); 1129 struct m_ext2fs *fs; 1130 int error, allerror = 0; 1131 1132 td = curthread; 1133 fs = ump->um_e2fs; 1134 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1135 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1136 } 1137 1138 /* 1139 * Write back each (modified) inode. 1140 */ 1141 loop: 1142 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1143 if (vp->v_type == VNON) { 1144 VI_UNLOCK(vp); 1145 continue; 1146 } 1147 ip = VTOI(vp); 1148 if ((ip->i_flag & 1149 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1150 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1151 waitfor == MNT_LAZY)) { 1152 VI_UNLOCK(vp); 1153 continue; 1154 } 1155 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1156 if (error) { 1157 if (error == ENOENT) { 1158 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1159 goto loop; 1160 } 1161 continue; 1162 } 1163 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1164 allerror = error; 1165 vput(vp); 1166 } 1167 1168 /* 1169 * Force stale filesystem control information to be flushed. 1170 */ 1171 if (waitfor != MNT_LAZY) { 1172 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1173 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1174 allerror = error; 1175 VOP_UNLOCK(ump->um_devvp); 1176 } 1177 1178 /* 1179 * Write back modified superblock. 1180 */ 1181 if (fs->e2fs_fmod != 0) { 1182 fs->e2fs_fmod = 0; 1183 fs->e2fs->e2fs_wtime = htole32(time_second); 1184 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1185 allerror = error; 1186 } 1187 return (allerror); 1188 } 1189 1190 /* 1191 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1192 * in from disk. If it is in core, wait for the lock bit to clear, then 1193 * return the inode locked. Detection and handling of mount points must be 1194 * done by the calling routine. 1195 */ 1196 static int 1197 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1198 { 1199 struct m_ext2fs *fs; 1200 struct inode *ip; 1201 struct ext2mount *ump; 1202 struct buf *bp; 1203 struct vnode *vp; 1204 struct thread *td; 1205 unsigned int i, used_blocks; 1206 int error; 1207 1208 td = curthread; 1209 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1210 if (error || *vpp != NULL) 1211 return (error); 1212 1213 ump = VFSTOEXT2(mp); 1214 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1215 1216 /* Allocate a new vnode/inode. */ 1217 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1218 *vpp = NULL; 1219 free(ip, M_EXT2NODE); 1220 return (error); 1221 } 1222 vp->v_data = ip; 1223 ip->i_vnode = vp; 1224 ip->i_e2fs = fs = ump->um_e2fs; 1225 ip->i_ump = ump; 1226 ip->i_number = ino; 1227 cluster_init_vn(&ip->i_clusterw); 1228 1229 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1230 error = insmntque(vp, mp); 1231 if (error != 0) { 1232 free(ip, M_EXT2NODE); 1233 *vpp = NULL; 1234 return (error); 1235 } 1236 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1237 if (error || *vpp != NULL) 1238 return (error); 1239 1240 /* Read in the disk contents for the inode, copy into the inode. */ 1241 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1242 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1243 /* 1244 * The inode does not contain anything useful, so it would 1245 * be misleading to leave it on its hash chain. With mode 1246 * still zero, it will be unlinked and returned to the free 1247 * list by vput(). 1248 */ 1249 brelse(bp); 1250 vput(vp); 1251 *vpp = NULL; 1252 return (error); 1253 } 1254 /* convert ext2 inode to dinode */ 1255 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1256 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1257 if (error) { 1258 brelse(bp); 1259 vput(vp); 1260 *vpp = NULL; 1261 return (error); 1262 } 1263 ip->i_block_group = ino_to_cg(fs, ino); 1264 ip->i_next_alloc_block = 0; 1265 ip->i_next_alloc_goal = 0; 1266 1267 /* 1268 * Now we want to make sure that block pointers for unused 1269 * blocks are zeroed out - ext2_balloc depends on this 1270 * although for regular files and directories only 1271 * 1272 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1273 * out because we could corrupt the extent tree. 1274 */ 1275 if (!(ip->i_flag & IN_E4EXTENTS) && 1276 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1277 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1278 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1279 ip->i_db[i] = 0; 1280 } 1281 1282 bqrelse(bp); 1283 1284 #ifdef EXT2FS_PRINT_EXTENTS 1285 ext2_print_inode(ip); 1286 error = ext4_ext_walk(ip); 1287 if (error) { 1288 vput(vp); 1289 *vpp = NULL; 1290 return (error); 1291 } 1292 #endif 1293 1294 /* 1295 * Initialize the vnode from the inode, check for aliases. 1296 * Note that the underlying vnode may have changed. 1297 */ 1298 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1299 vput(vp); 1300 *vpp = NULL; 1301 return (error); 1302 } 1303 1304 /* 1305 * Finish inode initialization. 1306 */ 1307 1308 vn_set_state(vp, VSTATE_CONSTRUCTED); 1309 *vpp = vp; 1310 return (0); 1311 } 1312 1313 /* 1314 * File handle to vnode 1315 * 1316 * Have to be really careful about stale file handles: 1317 * - check that the inode number is valid 1318 * - call ext2_vget() to get the locked inode 1319 * - check for an unallocated inode (i_mode == 0) 1320 * - check that the given client host has export rights and return 1321 * those rights via. exflagsp and credanonp 1322 */ 1323 static int 1324 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1325 { 1326 struct inode *ip; 1327 struct ufid *ufhp; 1328 struct vnode *nvp; 1329 struct m_ext2fs *fs; 1330 int error; 1331 1332 ufhp = (struct ufid *)fhp; 1333 fs = VFSTOEXT2(mp)->um_e2fs; 1334 if (ufhp->ufid_ino < EXT2_ROOTINO || 1335 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1336 return (ESTALE); 1337 1338 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1339 if (error) { 1340 *vpp = NULLVP; 1341 return (error); 1342 } 1343 ip = VTOI(nvp); 1344 if (ip->i_mode == 0 || 1345 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1346 vput(nvp); 1347 *vpp = NULLVP; 1348 return (ESTALE); 1349 } 1350 *vpp = nvp; 1351 vnode_create_vobject(*vpp, 0, curthread); 1352 return (0); 1353 } 1354 1355 /* 1356 * Write a superblock and associated information back to disk. 1357 */ 1358 static int 1359 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1360 { 1361 struct m_ext2fs *fs = mp->um_e2fs; 1362 struct ext2fs *es = fs->e2fs; 1363 struct buf *bp; 1364 int error = 0; 1365 1366 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1367 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1368 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1369 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1370 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1371 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1372 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1373 } 1374 1375 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1376 1377 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1378 ext2_sb_csum_set(fs); 1379 1380 error = bread(mp->um_devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp); 1381 if (error != 0) 1382 return (error); 1383 1384 memcpy((char *)bp->b_data + SBLOCKOFFSET, (caddr_t)es, 1385 (u_int)sizeof(struct ext2fs)); 1386 if (waitfor == MNT_WAIT) 1387 error = bwrite(bp); 1388 else 1389 bawrite(bp); 1390 1391 /* 1392 * The buffers for group descriptors, inode bitmaps and block bitmaps 1393 * are not busy at this point and are (hopefully) written by the 1394 * usual sync mechanism. No need to write them here. 1395 */ 1396 return (error); 1397 } 1398 int 1399 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1400 { 1401 struct m_ext2fs *fs = mp->um_e2fs; 1402 struct buf *bp; 1403 int i, j, g_count = 0, error = 0, allerror = 0; 1404 1405 allerror = ext2_sbupdate(mp, waitfor); 1406 1407 /* Update gd csums */ 1408 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1409 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1410 ext2_gd_csum_set(fs); 1411 1412 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1413 bp = getblk(mp->um_devvp, fsbtodb(fs, 1414 ext2_cg_location(fs, i)), 1415 fs->e2fs_bsize, 0, 0, 0); 1416 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1417 memcpy(bp->b_data, &fs->e2fs_gd[ 1418 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1419 fs->e2fs_bsize); 1420 } else { 1421 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1422 g_count < fs->e2fs_gcount; j++, g_count++) 1423 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1424 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1425 } 1426 if (waitfor == MNT_WAIT) 1427 error = bwrite(bp); 1428 else 1429 bawrite(bp); 1430 } 1431 1432 if (!allerror && error) 1433 allerror = error; 1434 return (allerror); 1435 } 1436 1437 /* 1438 * Return the root of a filesystem. 1439 */ 1440 static int 1441 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1442 { 1443 struct vnode *nvp; 1444 int error; 1445 1446 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1447 if (error) 1448 return (error); 1449 *vpp = nvp; 1450 return (0); 1451 } 1452