1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/sdt.h> 56 #include <sys/stat.h> 57 #include <sys/mutex.h> 58 59 #include <geom/geom.h> 60 #include <geom/geom_vfs.h> 61 62 #include <fs/ext2fs/fs.h> 63 #include <fs/ext2fs/ext2_mount.h> 64 #include <fs/ext2fs/inode.h> 65 66 #include <fs/ext2fs/ext2fs.h> 67 #include <fs/ext2fs/ext2_dinode.h> 68 #include <fs/ext2fs/ext2_extern.h> 69 #include <fs/ext2fs/ext2_extents.h> 70 71 SDT_PROVIDER_DECLARE(ext2fs); 72 /* 73 * ext2fs trace probe: 74 * arg0: verbosity. Higher numbers give more verbose messages 75 * arg1: Textual message 76 */ 77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 80 81 82 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 83 static int ext2_mountfs(struct vnode *, struct mount *); 84 static int ext2_reload(struct mount *mp, struct thread *td); 85 static int ext2_sbupdate(struct ext2mount *, int); 86 static int ext2_cgupdate(struct ext2mount *, int); 87 static vfs_unmount_t ext2_unmount; 88 static vfs_root_t ext2_root; 89 static vfs_statfs_t ext2_statfs; 90 static vfs_sync_t ext2_sync; 91 static vfs_vget_t ext2_vget; 92 static vfs_fhtovp_t ext2_fhtovp; 93 static vfs_mount_t ext2_mount; 94 95 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 96 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 97 98 static struct vfsops ext2fs_vfsops = { 99 .vfs_fhtovp = ext2_fhtovp, 100 .vfs_mount = ext2_mount, 101 .vfs_root = ext2_root, /* root inode via vget */ 102 .vfs_statfs = ext2_statfs, 103 .vfs_sync = ext2_sync, 104 .vfs_unmount = ext2_unmount, 105 .vfs_vget = ext2_vget, 106 }; 107 108 VFS_SET(ext2fs_vfsops, ext2fs, 0); 109 110 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 111 int ronly); 112 static int ext2_compute_sb_data(struct vnode * devvp, 113 struct ext2fs * es, struct m_ext2fs * fs); 114 115 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 116 "noclusterw", "noexec", "export", "force", "from", "multilabel", 117 "suiddir", "nosymfollow", "sync", "union", NULL }; 118 119 /* 120 * VFS Operations. 121 * 122 * mount system call 123 */ 124 static int 125 ext2_mount(struct mount *mp) 126 { 127 struct vfsoptlist *opts; 128 struct vnode *devvp; 129 struct thread *td; 130 struct ext2mount *ump = NULL; 131 struct m_ext2fs *fs; 132 struct nameidata nd, *ndp = &nd; 133 accmode_t accmode; 134 char *path, *fspec; 135 int error, flags, len; 136 137 td = curthread; 138 opts = mp->mnt_optnew; 139 140 if (vfs_filteropt(opts, ext2_opts)) 141 return (EINVAL); 142 143 vfs_getopt(opts, "fspath", (void **)&path, NULL); 144 /* Double-check the length of path.. */ 145 if (strlen(path) >= MAXMNTLEN) 146 return (ENAMETOOLONG); 147 148 fspec = NULL; 149 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 150 if (!error && fspec[len - 1] != '\0') 151 return (EINVAL); 152 153 /* 154 * If updating, check whether changing from read-only to 155 * read/write; if there is no device name, that's all we do. 156 */ 157 if (mp->mnt_flag & MNT_UPDATE) { 158 ump = VFSTOEXT2(mp); 159 fs = ump->um_e2fs; 160 error = 0; 161 if (fs->e2fs_ronly == 0 && 162 vfs_flagopt(opts, "ro", NULL, 0)) { 163 error = VFS_SYNC(mp, MNT_WAIT); 164 if (error) 165 return (error); 166 flags = WRITECLOSE; 167 if (mp->mnt_flag & MNT_FORCE) 168 flags |= FORCECLOSE; 169 error = ext2_flushfiles(mp, flags, td); 170 if (error == 0 && fs->e2fs_wasvalid && 171 ext2_cgupdate(ump, MNT_WAIT) == 0) { 172 fs->e2fs->e2fs_state = 173 htole16((le16toh(fs->e2fs->e2fs_state) | 174 E2FS_ISCLEAN)); 175 ext2_sbupdate(ump, MNT_WAIT); 176 } 177 fs->e2fs_ronly = 1; 178 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 179 g_topology_lock(); 180 g_access(ump->um_cp, 0, -1, 0); 181 g_topology_unlock(); 182 } 183 if (!error && (mp->mnt_flag & MNT_RELOAD)) 184 error = ext2_reload(mp, td); 185 if (error) 186 return (error); 187 devvp = ump->um_devvp; 188 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 189 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 190 return (EPERM); 191 192 /* 193 * If upgrade to read-write by non-root, then verify 194 * that user has necessary permissions on the device. 195 */ 196 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 197 error = VOP_ACCESS(devvp, VREAD | VWRITE, 198 td->td_ucred, td); 199 if (error) 200 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 201 if (error) { 202 VOP_UNLOCK(devvp); 203 return (error); 204 } 205 VOP_UNLOCK(devvp); 206 g_topology_lock(); 207 error = g_access(ump->um_cp, 0, 1, 0); 208 g_topology_unlock(); 209 if (error) 210 return (error); 211 212 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 213 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 214 if (mp->mnt_flag & MNT_FORCE) { 215 printf( 216 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 217 } else { 218 printf( 219 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 220 fs->e2fs_fsmnt); 221 return (EPERM); 222 } 223 } 224 fs->e2fs->e2fs_state = 225 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 226 (void)ext2_cgupdate(ump, MNT_WAIT); 227 fs->e2fs_ronly = 0; 228 MNT_ILOCK(mp); 229 mp->mnt_flag &= ~MNT_RDONLY; 230 MNT_IUNLOCK(mp); 231 } 232 if (vfs_flagopt(opts, "export", NULL, 0)) { 233 /* Process export requests in vfs_mount.c. */ 234 return (error); 235 } 236 } 237 238 /* 239 * Not an update, or updating the name: look up the name 240 * and verify that it refers to a sensible disk device. 241 */ 242 if (fspec == NULL) 243 return (EINVAL); 244 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 245 if ((error = namei(ndp)) != 0) 246 return (error); 247 NDFREE(ndp, NDF_ONLY_PNBUF); 248 devvp = ndp->ni_vp; 249 250 if (!vn_isdisk(devvp, &error)) { 251 vput(devvp); 252 return (error); 253 } 254 255 /* 256 * If mount by non-root, then verify that user has necessary 257 * permissions on the device. 258 * 259 * XXXRW: VOP_ACCESS() enough? 260 */ 261 accmode = VREAD; 262 if ((mp->mnt_flag & MNT_RDONLY) == 0) 263 accmode |= VWRITE; 264 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 265 if (error) 266 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 267 if (error) { 268 vput(devvp); 269 return (error); 270 } 271 272 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 273 error = ext2_mountfs(devvp, mp); 274 } else { 275 if (devvp != ump->um_devvp) { 276 vput(devvp); 277 return (EINVAL); /* needs translation */ 278 } else 279 vput(devvp); 280 } 281 if (error) { 282 vrele(devvp); 283 return (error); 284 } 285 ump = VFSTOEXT2(mp); 286 fs = ump->um_e2fs; 287 288 /* 289 * Note that this strncpy() is ok because of a check at the start 290 * of ext2_mount(). 291 */ 292 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 293 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 294 vfs_mountedfrom(mp, fspec); 295 return (0); 296 } 297 298 static int 299 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 300 { 301 uint32_t i, mask; 302 303 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 304 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 305 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 306 return (1); 307 } 308 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 309 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 310 if (mask) { 311 printf("WARNING: mount of %s denied due to " 312 "unsupported optional features:\n", devtoname(dev)); 313 for (i = 0; 314 i < sizeof(incompat)/sizeof(struct ext2_feature); 315 i++) 316 if (mask & incompat[i].mask) 317 printf("%s ", incompat[i].name); 318 printf("\n"); 319 return (1); 320 } 321 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 322 if (!ronly && mask) { 323 printf("WARNING: R/W mount of %s denied due to " 324 "unsupported optional features:\n", devtoname(dev)); 325 for (i = 0; 326 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 327 i++) 328 if (mask & ro_compat[i].mask) 329 printf("%s ", ro_compat[i].name); 330 printf("\n"); 331 return (1); 332 } 333 } 334 return (0); 335 } 336 337 static e4fs_daddr_t 338 ext2_cg_location(struct m_ext2fs *fs, int number) 339 { 340 int cg, descpb, logical_sb, has_super = 0; 341 342 /* 343 * Adjust logical superblock block number. 344 * Godmar thinks: if the blocksize is greater than 1024, then 345 * the superblock is logically part of block zero. 346 */ 347 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 348 349 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 350 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 351 return (logical_sb + number + 1); 352 353 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 354 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 355 else 356 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 357 358 cg = descpb * number; 359 360 if (ext2_cg_has_sb(fs, cg)) 361 has_super = 1; 362 363 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 364 le32toh(fs->e2fs->e2fs_first_dblock)); 365 } 366 367 static int 368 ext2_cg_validate(struct m_ext2fs *fs) 369 { 370 uint64_t b_bitmap; 371 uint64_t i_bitmap; 372 uint64_t i_tables; 373 uint64_t first_block, last_block, last_cg_block; 374 struct ext2_gd *gd; 375 unsigned int i, cg_count; 376 377 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 378 last_cg_block = ext2_cg_number_gdb(fs, 0); 379 cg_count = fs->e2fs_gcount; 380 381 for (i = 0; i < fs->e2fs_gcount; i++) { 382 gd = &fs->e2fs_gd[i]; 383 384 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 385 i == fs->e2fs_gcount - 1) { 386 last_block = fs->e2fs_bcount - 1; 387 } else { 388 last_block = first_block + 389 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 390 } 391 392 if ((cg_count == fs->e2fs_gcount) && 393 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 394 cg_count = i; 395 396 b_bitmap = e2fs_gd_get_b_bitmap(gd); 397 if (b_bitmap == 0) { 398 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 399 "block bitmap is zero", i); 400 return (EINVAL); 401 402 } 403 if (b_bitmap <= last_cg_block) { 404 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 405 "block bitmap overlaps gds", i); 406 return (EINVAL); 407 } 408 if (b_bitmap < first_block || b_bitmap > last_block) { 409 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 410 "block bitmap not in group", i); 411 return (EINVAL); 412 } 413 414 i_bitmap = e2fs_gd_get_i_bitmap(gd); 415 if (i_bitmap == 0) { 416 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 417 "inode bitmap is zero", i); 418 return (EINVAL); 419 } 420 if (i_bitmap <= last_cg_block) { 421 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 422 "inode bitmap overlaps gds", i); 423 return (EINVAL); 424 } 425 if (i_bitmap < first_block || i_bitmap > last_block) { 426 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 427 "inode bitmap not in group blk", i); 428 return (EINVAL); 429 } 430 431 i_tables = e2fs_gd_get_i_tables(gd); 432 if (i_tables == 0) { 433 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 434 "inode table is zero", i); 435 return (EINVAL); 436 } 437 if (i_tables <= last_cg_block) { 438 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 439 "inode talbes overlaps gds", i); 440 return (EINVAL); 441 } 442 if (i_tables < first_block || 443 i_tables + fs->e2fs_itpg - 1 > last_block) { 444 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 445 "inode tables not in group blk", i); 446 return (EINVAL); 447 } 448 449 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 450 first_block += EXT2_BLOCKS_PER_GROUP(fs); 451 } 452 453 return (0); 454 } 455 456 /* 457 * This computes the fields of the m_ext2fs structure from the 458 * data in the ext2fs structure read in. 459 */ 460 static int 461 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 462 struct m_ext2fs *fs) 463 { 464 struct buf *bp; 465 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 466 int i, j; 467 int g_count = 0; 468 int error; 469 470 /* Check checksum features */ 471 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 472 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 473 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 474 "incorrect checksum features combination"); 475 return (EINVAL); 476 } 477 478 /* Precompute checksum seed for all metadata */ 479 ext2_sb_csum_set_seed(fs); 480 481 /* Verify sb csum if possible */ 482 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 483 error = ext2_sb_csum_verify(fs); 484 if (error) { 485 return (error); 486 } 487 } 488 489 /* Check for block size = 1K|2K|4K */ 490 if (le32toh(es->e2fs_log_bsize) > 2) { 491 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 492 "bad block size"); 493 return (EINVAL); 494 } 495 496 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 497 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 498 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 499 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 500 501 /* Check for fragment size */ 502 if (le32toh(es->e2fs_log_fsize) > 503 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 504 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 505 "invalid log cluster size"); 506 return (EINVAL); 507 } 508 509 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 510 if (fs->e2fs_fsize != fs->e2fs_bsize) { 511 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 512 "fragment size != block size"); 513 return (EINVAL); 514 } 515 516 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 517 518 /* Check reserved gdt blocks for future filesystem expansion */ 519 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 520 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 521 "number of reserved GDT blocks too large"); 522 return (EINVAL); 523 } 524 525 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 526 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 527 } else { 528 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 529 530 /* 531 * Check first ino. 532 */ 533 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 534 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 535 "invalid first ino"); 536 return (EINVAL); 537 } 538 539 /* 540 * Simple sanity check for superblock inode size value. 541 */ 542 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 543 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 544 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 545 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 546 "invalid inode size"); 547 return (EINVAL); 548 } 549 } 550 551 /* Check group descriptors */ 552 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 553 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 554 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 555 "unsupported 64bit descriptor size"); 556 return (EINVAL); 557 } 558 559 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 560 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 561 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 562 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 563 "zero blocks/fragments per group"); 564 return (EINVAL); 565 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 566 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 567 "blocks per group not equal fragments per group"); 568 return (EINVAL); 569 } 570 571 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 572 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 573 "non-standard group size unsupported"); 574 return (EINVAL); 575 } 576 577 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 578 if (fs->e2fs_ipb == 0 || 579 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 580 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 581 "bad inodes per block size"); 582 return (EINVAL); 583 } 584 585 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 586 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 587 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 588 "invalid inodes per group"); 589 return (EINVAL); 590 } 591 592 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 593 594 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 595 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 596 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 597 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 598 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 599 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 600 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 601 } 602 if (fs->e2fs_rbcount > fs->e2fs_bcount || 603 fs->e2fs_fbcount > fs->e2fs_bcount) { 604 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 605 "invalid block count"); 606 return (EINVAL); 607 } 608 609 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 610 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 611 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 612 "invalid number of free inodes"); 613 return (EINVAL); 614 } 615 616 if (le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 617 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 618 "first data block out of range"); 619 return (EINVAL); 620 } 621 622 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 623 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 624 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 625 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 626 "groups count too large"); 627 return (EINVAL); 628 } 629 630 /* Check for extra isize in big inodes. */ 631 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 632 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 633 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 634 "no space for extra inode timestamps"); 635 return (EINVAL); 636 } 637 638 /* s_resuid / s_resgid ? */ 639 640 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 641 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 642 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 643 } else { 644 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 645 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 646 fs->e2fs_bsize / sizeof(struct ext2_gd)); 647 } 648 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 649 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 650 M_EXT2MNT, M_WAITOK | M_ZERO); 651 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 652 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 653 654 for (i = 0; i < fs->e2fs_gdbcount; i++) { 655 error = bread(devvp, 656 fsbtodb(fs, ext2_cg_location(fs, i)), 657 fs->e2fs_bsize, NOCRED, &bp); 658 if (error) { 659 /* 660 * fs->e2fs_gd and fs->e2fs_contigdirs 661 * will be freed later by the caller, 662 * because this function could be called from 663 * MNT_UPDATE path. 664 */ 665 return (error); 666 } 667 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 668 memcpy(&fs->e2fs_gd[ 669 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 670 bp->b_data, fs->e2fs_bsize); 671 } else { 672 for (j = 0; j < e2fs_descpb && 673 g_count < fs->e2fs_gcount; j++, g_count++) 674 memcpy(&fs->e2fs_gd[g_count], 675 bp->b_data + j * E2FS_REV0_GD_SIZE, 676 E2FS_REV0_GD_SIZE); 677 } 678 brelse(bp); 679 bp = NULL; 680 } 681 682 /* Validate cgs consistency */ 683 error = ext2_cg_validate(fs); 684 if (error) 685 return (error); 686 687 /* Verfy cgs csum */ 688 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 689 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 690 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 691 if (error) 692 return (error); 693 } 694 /* Initialization for the ext2 Orlov allocator variant. */ 695 fs->e2fs_total_dir = 0; 696 for (i = 0; i < fs->e2fs_gcount; i++) 697 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 698 699 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 700 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 701 fs->e2fs_maxfilesize = 0x7fffffff; 702 else { 703 fs->e2fs_maxfilesize = 0xffffffffffff; 704 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 705 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 706 } 707 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 708 fs->e2fs_uhash = 3; 709 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 710 #ifdef __CHAR_UNSIGNED__ 711 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 712 fs->e2fs_uhash = 3; 713 #else 714 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 715 #endif 716 } 717 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 718 error = ext2_sb_csum_verify(fs); 719 720 return (error); 721 } 722 723 /* 724 * Reload all incore data for a filesystem (used after running fsck on 725 * the root filesystem and finding things to fix). The filesystem must 726 * be mounted read-only. 727 * 728 * Things to do to update the mount: 729 * 1) invalidate all cached meta-data. 730 * 2) re-read superblock from disk. 731 * 3) invalidate all cluster summary information. 732 * 4) invalidate all inactive vnodes. 733 * 5) invalidate all cached file data. 734 * 6) re-read inode data for all active vnodes. 735 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 736 */ 737 static int 738 ext2_reload(struct mount *mp, struct thread *td) 739 { 740 struct vnode *vp, *mvp, *devvp; 741 struct inode *ip; 742 struct buf *bp; 743 struct ext2fs *es; 744 struct m_ext2fs *fs; 745 struct csum *sump; 746 int error, i; 747 int32_t *lp; 748 749 if ((mp->mnt_flag & MNT_RDONLY) == 0) 750 return (EINVAL); 751 /* 752 * Step 1: invalidate all cached meta-data. 753 */ 754 devvp = VFSTOEXT2(mp)->um_devvp; 755 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 756 if (vinvalbuf(devvp, 0, 0, 0) != 0) 757 panic("ext2_reload: dirty1"); 758 VOP_UNLOCK(devvp); 759 760 /* 761 * Step 2: re-read superblock from disk. 762 * constants have been adjusted for ext2 763 */ 764 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 765 return (error); 766 es = (struct ext2fs *)bp->b_data; 767 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 768 brelse(bp); 769 return (EIO); /* XXX needs translation */ 770 } 771 fs = VFSTOEXT2(mp)->um_e2fs; 772 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 773 774 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 775 brelse(bp); 776 return (error); 777 } 778 #ifdef UNKLAR 779 if (fs->fs_sbsize < SBSIZE) 780 bp->b_flags |= B_INVAL; 781 #endif 782 brelse(bp); 783 784 /* 785 * Step 3: invalidate all cluster summary information. 786 */ 787 if (fs->e2fs_contigsumsize > 0) { 788 lp = fs->e2fs_maxcluster; 789 sump = fs->e2fs_clustersum; 790 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 791 *lp++ = fs->e2fs_contigsumsize; 792 sump->cs_init = 0; 793 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 794 } 795 } 796 797 loop: 798 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 799 /* 800 * Step 4: invalidate all cached file data. 801 */ 802 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 803 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 804 goto loop; 805 } 806 if (vinvalbuf(vp, 0, 0, 0)) 807 panic("ext2_reload: dirty2"); 808 809 /* 810 * Step 5: re-read inode data for all active vnodes. 811 */ 812 ip = VTOI(vp); 813 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 814 (int)fs->e2fs_bsize, NOCRED, &bp); 815 if (error) { 816 VOP_UNLOCK(vp); 817 vrele(vp); 818 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 819 return (error); 820 } 821 822 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 823 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 824 825 brelse(bp); 826 VOP_UNLOCK(vp); 827 vrele(vp); 828 829 if (error) { 830 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 831 return (error); 832 } 833 } 834 return (0); 835 } 836 837 /* 838 * Common code for mount and mountroot. 839 */ 840 static int 841 ext2_mountfs(struct vnode *devvp, struct mount *mp) 842 { 843 struct ext2mount *ump; 844 struct buf *bp; 845 struct m_ext2fs *fs; 846 struct ext2fs *es; 847 struct cdev *dev = devvp->v_rdev; 848 struct g_consumer *cp; 849 struct bufobj *bo; 850 struct csum *sump; 851 int error; 852 int ronly; 853 int i; 854 u_long size; 855 int32_t *lp; 856 int32_t e2fs_maxcontig; 857 858 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 859 /* XXX: use VOP_ACESS to check FS perms */ 860 g_topology_lock(); 861 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 862 g_topology_unlock(); 863 VOP_UNLOCK(devvp); 864 if (error) 865 return (error); 866 867 /* XXX: should we check for some sectorsize or 512 instead? */ 868 if (((SBSIZE % cp->provider->sectorsize) != 0) || 869 (SBSIZE < cp->provider->sectorsize)) { 870 g_topology_lock(); 871 g_vfs_close(cp); 872 g_topology_unlock(); 873 return (EINVAL); 874 } 875 876 bo = &devvp->v_bufobj; 877 bo->bo_private = cp; 878 bo->bo_ops = g_vfs_bufops; 879 if (devvp->v_rdev->si_iosize_max != 0) 880 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 881 if (mp->mnt_iosize_max > MAXPHYS) 882 mp->mnt_iosize_max = MAXPHYS; 883 884 bp = NULL; 885 ump = NULL; 886 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 887 goto out; 888 es = (struct ext2fs *)bp->b_data; 889 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 890 error = EINVAL; /* XXX needs translation */ 891 goto out; 892 } 893 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 894 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 895 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 896 printf( 897 "WARNING: Filesystem was not properly dismounted\n"); 898 } else { 899 printf( 900 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 901 error = EPERM; 902 goto out; 903 } 904 } 905 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 906 907 /* 908 * I don't know whether this is the right strategy. Note that 909 * we dynamically allocate both an m_ext2fs and an ext2fs 910 * while Linux keeps the super block in a locked buffer. 911 */ 912 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 913 M_EXT2MNT, M_WAITOK | M_ZERO); 914 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 915 M_EXT2MNT, M_WAITOK); 916 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 917 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 918 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 919 goto out; 920 921 /* 922 * Calculate the maximum contiguous blocks and size of cluster summary 923 * array. In FFS this is done by newfs; however, the superblock 924 * in ext2fs doesn't have these variables, so we can calculate 925 * them here. 926 */ 927 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 928 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 929 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 930 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 931 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 932 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 933 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 934 lp = ump->um_e2fs->e2fs_maxcluster; 935 sump = ump->um_e2fs->e2fs_clustersum; 936 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 937 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 938 sump->cs_init = 0; 939 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 940 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 941 } 942 } 943 944 brelse(bp); 945 bp = NULL; 946 fs = ump->um_e2fs; 947 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 948 949 /* 950 * If the fs is not mounted read-only, make sure the super block is 951 * always written back on a sync(). 952 */ 953 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 954 if (ronly == 0) { 955 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 956 fs->e2fs->e2fs_state = 957 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 958 } 959 mp->mnt_data = ump; 960 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 961 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 962 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 963 MNT_ILOCK(mp); 964 mp->mnt_flag |= MNT_LOCAL; 965 MNT_IUNLOCK(mp); 966 ump->um_mountp = mp; 967 ump->um_dev = dev; 968 ump->um_devvp = devvp; 969 ump->um_bo = &devvp->v_bufobj; 970 ump->um_cp = cp; 971 972 /* 973 * Setting those two parameters allowed us to use 974 * ufs_bmap w/o changse! 975 */ 976 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 977 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 978 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 979 if (ronly == 0) 980 ext2_sbupdate(ump, MNT_WAIT); 981 /* 982 * Initialize filesystem stat information in mount struct. 983 */ 984 MNT_ILOCK(mp); 985 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 986 MNTK_USES_BCACHE; 987 MNT_IUNLOCK(mp); 988 return (0); 989 out: 990 if (bp) 991 brelse(bp); 992 if (cp != NULL) { 993 g_topology_lock(); 994 g_vfs_close(cp); 995 g_topology_unlock(); 996 } 997 if (ump) { 998 mtx_destroy(EXT2_MTX(ump)); 999 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1000 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1001 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1002 free(ump->um_e2fs, M_EXT2MNT); 1003 free(ump, M_EXT2MNT); 1004 mp->mnt_data = NULL; 1005 } 1006 return (error); 1007 } 1008 1009 /* 1010 * Unmount system call. 1011 */ 1012 static int 1013 ext2_unmount(struct mount *mp, int mntflags) 1014 { 1015 struct ext2mount *ump; 1016 struct m_ext2fs *fs; 1017 struct csum *sump; 1018 int error, flags, i, ronly; 1019 1020 flags = 0; 1021 if (mntflags & MNT_FORCE) { 1022 if (mp->mnt_flag & MNT_ROOTFS) 1023 return (EINVAL); 1024 flags |= FORCECLOSE; 1025 } 1026 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1027 return (error); 1028 ump = VFSTOEXT2(mp); 1029 fs = ump->um_e2fs; 1030 ronly = fs->e2fs_ronly; 1031 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1032 if (fs->e2fs_wasvalid) 1033 fs->e2fs->e2fs_state = 1034 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1035 ext2_sbupdate(ump, MNT_WAIT); 1036 } 1037 1038 g_topology_lock(); 1039 g_vfs_close(ump->um_cp); 1040 g_topology_unlock(); 1041 vrele(ump->um_devvp); 1042 sump = fs->e2fs_clustersum; 1043 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1044 free(sump->cs_sum, M_EXT2MNT); 1045 free(fs->e2fs_clustersum, M_EXT2MNT); 1046 free(fs->e2fs_maxcluster, M_EXT2MNT); 1047 free(fs->e2fs_gd, M_EXT2MNT); 1048 free(fs->e2fs_contigdirs, M_EXT2MNT); 1049 free(fs->e2fs, M_EXT2MNT); 1050 free(fs, M_EXT2MNT); 1051 free(ump, M_EXT2MNT); 1052 mp->mnt_data = NULL; 1053 MNT_ILOCK(mp); 1054 mp->mnt_flag &= ~MNT_LOCAL; 1055 MNT_IUNLOCK(mp); 1056 return (error); 1057 } 1058 1059 /* 1060 * Flush out all the files in a filesystem. 1061 */ 1062 static int 1063 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1064 { 1065 int error; 1066 1067 error = vflush(mp, 0, flags, td); 1068 return (error); 1069 } 1070 1071 /* 1072 * Get filesystem statistics. 1073 */ 1074 int 1075 ext2_statfs(struct mount *mp, struct statfs *sbp) 1076 { 1077 struct ext2mount *ump; 1078 struct m_ext2fs *fs; 1079 uint32_t overhead, overhead_per_group, ngdb; 1080 int i, ngroups; 1081 1082 ump = VFSTOEXT2(mp); 1083 fs = ump->um_e2fs; 1084 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1085 panic("ext2_statfs"); 1086 1087 /* 1088 * Compute the overhead (FS structures) 1089 */ 1090 overhead_per_group = 1091 1 /* block bitmap */ + 1092 1 /* inode bitmap */ + 1093 fs->e2fs_itpg; 1094 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1095 fs->e2fs_gcount * overhead_per_group; 1096 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1097 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1098 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1099 if (ext2_cg_has_sb(fs, i)) 1100 ngroups++; 1101 } 1102 } else { 1103 ngroups = fs->e2fs_gcount; 1104 } 1105 ngdb = fs->e2fs_gdbcount; 1106 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1107 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1108 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1109 overhead += ngroups * (1 /* superblock */ + ngdb); 1110 1111 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1112 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1113 sbp->f_blocks = fs->e2fs_bcount - overhead; 1114 sbp->f_bfree = fs->e2fs_fbcount; 1115 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1116 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1117 sbp->f_ffree = fs->e2fs_ficount; 1118 return (0); 1119 } 1120 1121 /* 1122 * Go through the disk queues to initiate sandbagged IO; 1123 * go through the inodes to write those that have been modified; 1124 * initiate the writing of the super block if it has been modified. 1125 * 1126 * Note: we are always called with the filesystem marked `MPBUSY'. 1127 */ 1128 static int 1129 ext2_sync(struct mount *mp, int waitfor) 1130 { 1131 struct vnode *mvp, *vp; 1132 struct thread *td; 1133 struct inode *ip; 1134 struct ext2mount *ump = VFSTOEXT2(mp); 1135 struct m_ext2fs *fs; 1136 int error, allerror = 0; 1137 1138 td = curthread; 1139 fs = ump->um_e2fs; 1140 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1141 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1142 } 1143 1144 /* 1145 * Write back each (modified) inode. 1146 */ 1147 loop: 1148 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1149 if (vp->v_type == VNON) { 1150 VI_UNLOCK(vp); 1151 continue; 1152 } 1153 ip = VTOI(vp); 1154 if ((ip->i_flag & 1155 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1156 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1157 waitfor == MNT_LAZY)) { 1158 VI_UNLOCK(vp); 1159 continue; 1160 } 1161 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 1162 if (error) { 1163 if (error == ENOENT) { 1164 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1165 goto loop; 1166 } 1167 continue; 1168 } 1169 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1170 allerror = error; 1171 VOP_UNLOCK(vp); 1172 vrele(vp); 1173 } 1174 1175 /* 1176 * Force stale filesystem control information to be flushed. 1177 */ 1178 if (waitfor != MNT_LAZY) { 1179 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1180 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1181 allerror = error; 1182 VOP_UNLOCK(ump->um_devvp); 1183 } 1184 1185 /* 1186 * Write back modified superblock. 1187 */ 1188 if (fs->e2fs_fmod != 0) { 1189 fs->e2fs_fmod = 0; 1190 fs->e2fs->e2fs_wtime = htole32(time_second); 1191 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1192 allerror = error; 1193 } 1194 return (allerror); 1195 } 1196 1197 /* 1198 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1199 * in from disk. If it is in core, wait for the lock bit to clear, then 1200 * return the inode locked. Detection and handling of mount points must be 1201 * done by the calling routine. 1202 */ 1203 static int 1204 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1205 { 1206 struct m_ext2fs *fs; 1207 struct inode *ip; 1208 struct ext2mount *ump; 1209 struct buf *bp; 1210 struct vnode *vp; 1211 struct thread *td; 1212 unsigned int i, used_blocks; 1213 int error; 1214 1215 td = curthread; 1216 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1217 if (error || *vpp != NULL) 1218 return (error); 1219 1220 ump = VFSTOEXT2(mp); 1221 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1222 1223 /* Allocate a new vnode/inode. */ 1224 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1225 *vpp = NULL; 1226 free(ip, M_EXT2NODE); 1227 return (error); 1228 } 1229 vp->v_data = ip; 1230 ip->i_vnode = vp; 1231 ip->i_e2fs = fs = ump->um_e2fs; 1232 ip->i_ump = ump; 1233 ip->i_number = ino; 1234 1235 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1236 error = insmntque(vp, mp); 1237 if (error != 0) { 1238 free(ip, M_EXT2NODE); 1239 *vpp = NULL; 1240 return (error); 1241 } 1242 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1243 if (error || *vpp != NULL) 1244 return (error); 1245 1246 /* Read in the disk contents for the inode, copy into the inode. */ 1247 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1248 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1249 /* 1250 * The inode does not contain anything useful, so it would 1251 * be misleading to leave it on its hash chain. With mode 1252 * still zero, it will be unlinked and returned to the free 1253 * list by vput(). 1254 */ 1255 brelse(bp); 1256 vput(vp); 1257 *vpp = NULL; 1258 return (error); 1259 } 1260 /* convert ext2 inode to dinode */ 1261 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1262 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1263 if (error) { 1264 brelse(bp); 1265 vput(vp); 1266 *vpp = NULL; 1267 return (error); 1268 } 1269 ip->i_block_group = ino_to_cg(fs, ino); 1270 ip->i_next_alloc_block = 0; 1271 ip->i_next_alloc_goal = 0; 1272 1273 /* 1274 * Now we want to make sure that block pointers for unused 1275 * blocks are zeroed out - ext2_balloc depends on this 1276 * although for regular files and directories only 1277 * 1278 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1279 * out because we could corrupt the extent tree. 1280 */ 1281 if (!(ip->i_flag & IN_E4EXTENTS) && 1282 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1283 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1284 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1285 ip->i_db[i] = 0; 1286 } 1287 #ifdef EXT2FS_PRINT_EXTENTS 1288 ext2_print_inode(ip); 1289 ext4_ext_print_extent_tree_status(ip); 1290 #endif 1291 bqrelse(bp); 1292 1293 /* 1294 * Initialize the vnode from the inode, check for aliases. 1295 * Note that the underlying vnode may have changed. 1296 */ 1297 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1298 vput(vp); 1299 *vpp = NULL; 1300 return (error); 1301 } 1302 1303 /* 1304 * Finish inode initialization. 1305 */ 1306 1307 *vpp = vp; 1308 return (0); 1309 } 1310 1311 /* 1312 * File handle to vnode 1313 * 1314 * Have to be really careful about stale file handles: 1315 * - check that the inode number is valid 1316 * - call ext2_vget() to get the locked inode 1317 * - check for an unallocated inode (i_mode == 0) 1318 * - check that the given client host has export rights and return 1319 * those rights via. exflagsp and credanonp 1320 */ 1321 static int 1322 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1323 { 1324 struct inode *ip; 1325 struct ufid *ufhp; 1326 struct vnode *nvp; 1327 struct m_ext2fs *fs; 1328 int error; 1329 1330 ufhp = (struct ufid *)fhp; 1331 fs = VFSTOEXT2(mp)->um_e2fs; 1332 if (ufhp->ufid_ino < EXT2_ROOTINO || 1333 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1334 return (ESTALE); 1335 1336 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1337 if (error) { 1338 *vpp = NULLVP; 1339 return (error); 1340 } 1341 ip = VTOI(nvp); 1342 if (ip->i_mode == 0 || 1343 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1344 vput(nvp); 1345 *vpp = NULLVP; 1346 return (ESTALE); 1347 } 1348 *vpp = nvp; 1349 vnode_create_vobject(*vpp, 0, curthread); 1350 return (0); 1351 } 1352 1353 /* 1354 * Write a superblock and associated information back to disk. 1355 */ 1356 static int 1357 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1358 { 1359 struct m_ext2fs *fs = mp->um_e2fs; 1360 struct ext2fs *es = fs->e2fs; 1361 struct buf *bp; 1362 int error = 0; 1363 1364 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1365 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1366 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1367 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1368 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1369 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1370 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1371 } 1372 1373 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1374 1375 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1376 ext2_sb_csum_set(fs); 1377 1378 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1379 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1380 if (waitfor == MNT_WAIT) 1381 error = bwrite(bp); 1382 else 1383 bawrite(bp); 1384 1385 /* 1386 * The buffers for group descriptors, inode bitmaps and block bitmaps 1387 * are not busy at this point and are (hopefully) written by the 1388 * usual sync mechanism. No need to write them here. 1389 */ 1390 return (error); 1391 } 1392 int 1393 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1394 { 1395 struct m_ext2fs *fs = mp->um_e2fs; 1396 struct buf *bp; 1397 int i, j, g_count = 0, error = 0, allerror = 0; 1398 1399 allerror = ext2_sbupdate(mp, waitfor); 1400 1401 /* Update gd csums */ 1402 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1403 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1404 ext2_gd_csum_set(fs); 1405 1406 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1407 bp = getblk(mp->um_devvp, fsbtodb(fs, 1408 ext2_cg_location(fs, i)), 1409 fs->e2fs_bsize, 0, 0, 0); 1410 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1411 memcpy(bp->b_data, &fs->e2fs_gd[ 1412 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1413 fs->e2fs_bsize); 1414 } else { 1415 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1416 g_count < fs->e2fs_gcount; j++, g_count++) 1417 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1418 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1419 } 1420 if (waitfor == MNT_WAIT) 1421 error = bwrite(bp); 1422 else 1423 bawrite(bp); 1424 } 1425 1426 if (!allerror && error) 1427 allerror = error; 1428 return (allerror); 1429 } 1430 1431 /* 1432 * Return the root of a filesystem. 1433 */ 1434 static int 1435 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1436 { 1437 struct vnode *nvp; 1438 int error; 1439 1440 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1441 if (error) 1442 return (error); 1443 *vpp = nvp; 1444 return (0); 1445 } 1446