1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/namei.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/conf.h> 49 #include <sys/endian.h> 50 #include <sys/fcntl.h> 51 #include <sys/malloc.h> 52 #include <sys/sdt.h> 53 #include <sys/stat.h> 54 #include <sys/mutex.h> 55 56 #include <geom/geom.h> 57 #include <geom/geom_vfs.h> 58 59 #include <fs/ext2fs/fs.h> 60 #include <fs/ext2fs/ext2_mount.h> 61 #include <fs/ext2fs/inode.h> 62 63 #include <fs/ext2fs/ext2fs.h> 64 #include <fs/ext2fs/ext2_dinode.h> 65 #include <fs/ext2fs/ext2_extern.h> 66 #include <fs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 79 static int ext2_mountfs(struct vnode *, struct mount *); 80 static int ext2_reload(struct mount *mp, struct thread *td); 81 static int ext2_sbupdate(struct ext2mount *, int); 82 static int ext2_cgupdate(struct ext2mount *, int); 83 static vfs_unmount_t ext2_unmount; 84 static vfs_root_t ext2_root; 85 static vfs_statfs_t ext2_statfs; 86 static vfs_sync_t ext2_sync; 87 static vfs_vget_t ext2_vget; 88 static vfs_fhtovp_t ext2_fhtovp; 89 static vfs_mount_t ext2_mount; 90 91 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 92 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 93 94 static struct vfsops ext2fs_vfsops = { 95 .vfs_fhtovp = ext2_fhtovp, 96 .vfs_mount = ext2_mount, 97 .vfs_root = ext2_root, /* root inode via vget */ 98 .vfs_statfs = ext2_statfs, 99 .vfs_sync = ext2_sync, 100 .vfs_unmount = ext2_unmount, 101 .vfs_vget = ext2_vget, 102 }; 103 104 VFS_SET(ext2fs_vfsops, ext2fs, 0); 105 106 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 107 int ronly); 108 static int ext2_compute_sb_data(struct vnode * devvp, 109 struct ext2fs * es, struct m_ext2fs * fs); 110 111 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 112 "noclusterw", "noexec", "export", "force", "from", "multilabel", 113 "suiddir", "nosymfollow", "sync", "union", NULL }; 114 115 /* 116 * VFS Operations. 117 * 118 * mount system call 119 */ 120 static int 121 ext2_mount(struct mount *mp) 122 { 123 struct vfsoptlist *opts; 124 struct vnode *devvp; 125 struct thread *td; 126 struct ext2mount *ump = NULL; 127 struct m_ext2fs *fs; 128 struct nameidata nd, *ndp = &nd; 129 accmode_t accmode; 130 char *path, *fspec; 131 int error, flags, len; 132 133 td = curthread; 134 opts = mp->mnt_optnew; 135 136 if (vfs_filteropt(opts, ext2_opts)) 137 return (EINVAL); 138 139 vfs_getopt(opts, "fspath", (void **)&path, NULL); 140 /* Double-check the length of path.. */ 141 if (strlen(path) >= MAXMNTLEN) 142 return (ENAMETOOLONG); 143 144 fspec = NULL; 145 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 146 if (!error && fspec[len - 1] != '\0') 147 return (EINVAL); 148 149 /* 150 * If updating, check whether changing from read-only to 151 * read/write; if there is no device name, that's all we do. 152 */ 153 if (mp->mnt_flag & MNT_UPDATE) { 154 ump = VFSTOEXT2(mp); 155 fs = ump->um_e2fs; 156 error = 0; 157 if (fs->e2fs_ronly == 0 && 158 vfs_flagopt(opts, "ro", NULL, 0)) { 159 error = VFS_SYNC(mp, MNT_WAIT); 160 if (error) 161 return (error); 162 flags = WRITECLOSE; 163 if (mp->mnt_flag & MNT_FORCE) 164 flags |= FORCECLOSE; 165 error = ext2_flushfiles(mp, flags, td); 166 if (error == 0 && fs->e2fs_wasvalid && 167 ext2_cgupdate(ump, MNT_WAIT) == 0) { 168 fs->e2fs->e2fs_state = 169 htole16((le16toh(fs->e2fs->e2fs_state) | 170 E2FS_ISCLEAN)); 171 ext2_sbupdate(ump, MNT_WAIT); 172 } 173 fs->e2fs_ronly = 1; 174 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 175 g_topology_lock(); 176 g_access(ump->um_cp, 0, -1, 0); 177 g_topology_unlock(); 178 } 179 if (!error && (mp->mnt_flag & MNT_RELOAD)) 180 error = ext2_reload(mp, td); 181 if (error) 182 return (error); 183 devvp = ump->um_devvp; 184 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 185 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 186 return (EPERM); 187 188 /* 189 * If upgrade to read-write by non-root, then verify 190 * that user has necessary permissions on the device. 191 */ 192 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 193 error = VOP_ACCESS(devvp, VREAD | VWRITE, 194 td->td_ucred, td); 195 if (error) 196 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 197 if (error) { 198 VOP_UNLOCK(devvp); 199 return (error); 200 } 201 VOP_UNLOCK(devvp); 202 g_topology_lock(); 203 error = g_access(ump->um_cp, 0, 1, 0); 204 g_topology_unlock(); 205 if (error) 206 return (error); 207 208 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 209 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 210 if (mp->mnt_flag & MNT_FORCE) { 211 printf( 212 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 213 } else { 214 printf( 215 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 216 fs->e2fs_fsmnt); 217 return (EPERM); 218 } 219 } 220 fs->e2fs->e2fs_state = 221 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 222 (void)ext2_cgupdate(ump, MNT_WAIT); 223 fs->e2fs_ronly = 0; 224 MNT_ILOCK(mp); 225 mp->mnt_flag &= ~MNT_RDONLY; 226 MNT_IUNLOCK(mp); 227 } 228 if (vfs_flagopt(opts, "export", NULL, 0)) { 229 /* Process export requests in vfs_mount.c. */ 230 return (error); 231 } 232 } 233 234 /* 235 * Not an update, or updating the name: look up the name 236 * and verify that it refers to a sensible disk device. 237 */ 238 if (fspec == NULL) 239 return (EINVAL); 240 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec); 241 if ((error = namei(ndp)) != 0) 242 return (error); 243 NDFREE_PNBUF(ndp); 244 devvp = ndp->ni_vp; 245 246 if (!vn_isdisk_error(devvp, &error)) { 247 vput(devvp); 248 return (error); 249 } 250 251 /* 252 * If mount by non-root, then verify that user has necessary 253 * permissions on the device. 254 * 255 * XXXRW: VOP_ACCESS() enough? 256 */ 257 accmode = VREAD; 258 if ((mp->mnt_flag & MNT_RDONLY) == 0) 259 accmode |= VWRITE; 260 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 261 if (error) 262 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 263 if (error) { 264 vput(devvp); 265 return (error); 266 } 267 268 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 269 error = ext2_mountfs(devvp, mp); 270 } else { 271 if (devvp != ump->um_devvp) { 272 vput(devvp); 273 return (EINVAL); /* needs translation */ 274 } else 275 vput(devvp); 276 } 277 if (error) { 278 vrele(devvp); 279 return (error); 280 } 281 ump = VFSTOEXT2(mp); 282 fs = ump->um_e2fs; 283 284 /* 285 * Note that this strncpy() is ok because of a check at the start 286 * of ext2_mount(). 287 */ 288 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 289 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 290 vfs_mountedfrom(mp, fspec); 291 return (0); 292 } 293 294 static int 295 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 296 { 297 uint32_t i, mask; 298 299 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 300 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 301 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 302 return (1); 303 } 304 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 305 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 306 if (mask) { 307 printf("WARNING: mount of %s denied due to " 308 "unsupported optional features:\n", devtoname(dev)); 309 for (i = 0; 310 i < sizeof(incompat)/sizeof(struct ext2_feature); 311 i++) 312 if (mask & incompat[i].mask) 313 printf("%s ", incompat[i].name); 314 printf("\n"); 315 return (1); 316 } 317 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 318 if (!ronly && mask) { 319 printf("WARNING: R/W mount of %s denied due to " 320 "unsupported optional features:\n", devtoname(dev)); 321 for (i = 0; 322 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 323 i++) 324 if (mask & ro_compat[i].mask) 325 printf("%s ", ro_compat[i].name); 326 printf("\n"); 327 return (1); 328 } 329 } 330 return (0); 331 } 332 333 static e4fs_daddr_t 334 ext2_cg_location(struct m_ext2fs *fs, int number) 335 { 336 int cg, descpb, logical_sb, has_super = 0; 337 338 /* 339 * Adjust logical superblock block number. 340 * Godmar thinks: if the blocksize is greater than 1024, then 341 * the superblock is logically part of block zero. 342 */ 343 logical_sb = fs->e2fs_bsize > SBLOCKSIZE ? 0 : 1; 344 345 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 346 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 347 return (logical_sb + number + 1); 348 349 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 350 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 351 else 352 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 353 354 cg = descpb * number; 355 356 if (ext2_cg_has_sb(fs, cg)) 357 has_super = 1; 358 359 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 360 le32toh(fs->e2fs->e2fs_first_dblock)); 361 } 362 363 static int 364 ext2_cg_validate(struct m_ext2fs *fs) 365 { 366 uint64_t b_bitmap; 367 uint64_t i_bitmap; 368 uint64_t i_tables; 369 uint64_t first_block, last_block, last_cg_block; 370 struct ext2_gd *gd; 371 unsigned int i, cg_count; 372 373 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 374 last_cg_block = ext2_cg_number_gdb(fs, 0); 375 cg_count = fs->e2fs_gcount; 376 377 for (i = 0; i < fs->e2fs_gcount; i++) { 378 gd = &fs->e2fs_gd[i]; 379 380 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 381 i == fs->e2fs_gcount - 1) { 382 last_block = fs->e2fs_bcount - 1; 383 } else { 384 last_block = first_block + 385 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 386 } 387 388 if ((cg_count == fs->e2fs_gcount) && 389 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 390 cg_count = i; 391 392 b_bitmap = e2fs_gd_get_b_bitmap(gd); 393 if (b_bitmap == 0) { 394 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 395 "block bitmap is zero", i); 396 return (EINVAL); 397 } 398 if (b_bitmap <= last_cg_block) { 399 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 400 "block bitmap overlaps gds", i); 401 return (EINVAL); 402 } 403 if (b_bitmap < first_block || b_bitmap > last_block) { 404 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 405 "block bitmap not in group", i); 406 return (EINVAL); 407 } 408 409 i_bitmap = e2fs_gd_get_i_bitmap(gd); 410 if (i_bitmap == 0) { 411 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 412 "inode bitmap is zero", i); 413 return (EINVAL); 414 } 415 if (i_bitmap <= last_cg_block) { 416 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 417 "inode bitmap overlaps gds", i); 418 return (EINVAL); 419 } 420 if (i_bitmap < first_block || i_bitmap > last_block) { 421 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 422 "inode bitmap not in group blk", i); 423 return (EINVAL); 424 } 425 426 i_tables = e2fs_gd_get_i_tables(gd); 427 if (i_tables == 0) { 428 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 429 "inode table is zero", i); 430 return (EINVAL); 431 } 432 if (i_tables <= last_cg_block) { 433 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 434 "inode tables overlaps gds", i); 435 return (EINVAL); 436 } 437 if (i_tables < first_block || 438 i_tables + fs->e2fs_itpg - 1 > last_block) { 439 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 440 "inode tables not in group blk", i); 441 return (EINVAL); 442 } 443 444 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 445 first_block += EXT2_BLOCKS_PER_GROUP(fs); 446 } 447 448 return (0); 449 } 450 451 /* 452 * This computes the fields of the m_ext2fs structure from the 453 * data in the ext2fs structure read in. 454 */ 455 static int 456 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 457 struct m_ext2fs *fs) 458 { 459 struct buf *bp; 460 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 461 int i, j; 462 int g_count = 0; 463 int error; 464 465 /* Check checksum features */ 466 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 467 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 468 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 469 "incorrect checksum features combination"); 470 return (EINVAL); 471 } 472 473 /* Precompute checksum seed for all metadata */ 474 ext2_sb_csum_set_seed(fs); 475 476 /* Verify sb csum if possible */ 477 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 478 error = ext2_sb_csum_verify(fs); 479 if (error) { 480 return (error); 481 } 482 } 483 484 /* Check for block size = 1K|2K|4K */ 485 if (le32toh(es->e2fs_log_bsize) > 2) { 486 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 487 "bad block size"); 488 return (EINVAL); 489 } 490 491 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 492 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 493 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 494 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 495 496 /* Check for fragment size */ 497 if (le32toh(es->e2fs_log_fsize) > 498 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 499 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 500 "invalid log cluster size"); 501 return (EINVAL); 502 } 503 504 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 505 if (fs->e2fs_fsize != fs->e2fs_bsize) { 506 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 507 "fragment size != block size"); 508 return (EINVAL); 509 } 510 511 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 512 513 /* Check reserved gdt blocks for future filesystem expansion */ 514 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 515 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 516 "number of reserved GDT blocks too large"); 517 return (EINVAL); 518 } 519 520 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 521 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 522 } else { 523 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 524 525 /* 526 * Check first ino. 527 */ 528 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 529 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 530 "invalid first ino"); 531 return (EINVAL); 532 } 533 534 /* 535 * Simple sanity check for superblock inode size value. 536 */ 537 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 538 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 539 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 540 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 541 "invalid inode size"); 542 return (EINVAL); 543 } 544 } 545 546 /* Check group descriptors */ 547 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 548 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 549 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 550 "unsupported 64bit descriptor size"); 551 return (EINVAL); 552 } 553 554 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 555 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 556 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 557 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 558 "zero blocks/fragments per group"); 559 return (EINVAL); 560 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 561 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 562 "blocks per group not equal fragments per group"); 563 return (EINVAL); 564 } 565 566 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 567 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 568 "non-standard group size unsupported"); 569 return (EINVAL); 570 } 571 572 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 573 if (fs->e2fs_ipb == 0 || 574 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 575 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 576 "bad inodes per block size"); 577 return (EINVAL); 578 } 579 580 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 581 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 582 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 583 "invalid inodes per group"); 584 return (EINVAL); 585 } 586 587 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 588 589 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 590 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 591 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 592 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 593 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 594 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 595 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 596 } 597 if (fs->e2fs_rbcount > fs->e2fs_bcount || 598 fs->e2fs_fbcount > fs->e2fs_bcount) { 599 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 600 "invalid block count"); 601 return (EINVAL); 602 } 603 604 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 605 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 606 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 607 "invalid number of free inodes"); 608 return (EINVAL); 609 } 610 611 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 612 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 613 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 614 "first data block out of range"); 615 return (EINVAL); 616 } 617 618 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 619 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 620 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 621 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 622 "groups count too large"); 623 return (EINVAL); 624 } 625 626 /* Check for extra isize in big inodes. */ 627 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 628 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 629 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 630 "no space for extra inode timestamps"); 631 return (EINVAL); 632 } 633 634 /* s_resuid / s_resgid ? */ 635 636 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 637 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 638 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 639 } else { 640 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 642 fs->e2fs_bsize / sizeof(struct ext2_gd)); 643 } 644 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 645 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 646 M_EXT2MNT, M_WAITOK | M_ZERO); 647 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 648 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 649 650 for (i = 0; i < fs->e2fs_gdbcount; i++) { 651 error = bread(devvp, 652 fsbtodb(fs, ext2_cg_location(fs, i)), 653 fs->e2fs_bsize, NOCRED, &bp); 654 if (error) { 655 /* 656 * fs->e2fs_gd and fs->e2fs_contigdirs 657 * will be freed later by the caller, 658 * because this function could be called from 659 * MNT_UPDATE path. 660 */ 661 return (error); 662 } 663 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 664 memcpy(&fs->e2fs_gd[ 665 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 666 bp->b_data, fs->e2fs_bsize); 667 } else { 668 for (j = 0; j < e2fs_descpb && 669 g_count < fs->e2fs_gcount; j++, g_count++) 670 memcpy(&fs->e2fs_gd[g_count], 671 bp->b_data + j * E2FS_REV0_GD_SIZE, 672 E2FS_REV0_GD_SIZE); 673 } 674 brelse(bp); 675 bp = NULL; 676 } 677 678 /* Validate cgs consistency */ 679 error = ext2_cg_validate(fs); 680 if (error) 681 return (error); 682 683 /* Verfy cgs csum */ 684 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 685 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 686 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 687 if (error) 688 return (error); 689 } 690 /* Initialization for the ext2 Orlov allocator variant. */ 691 fs->e2fs_total_dir = 0; 692 for (i = 0; i < fs->e2fs_gcount; i++) 693 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 694 695 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 696 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 697 fs->e2fs_maxfilesize = 0x7fffffff; 698 else { 699 fs->e2fs_maxfilesize = 0xffffffffffff; 700 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 701 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 702 } 703 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 704 fs->e2fs_uhash = 3; 705 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 706 #ifdef __CHAR_UNSIGNED__ 707 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 708 fs->e2fs_uhash = 3; 709 #else 710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 711 #endif 712 } 713 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 714 error = ext2_sb_csum_verify(fs); 715 716 return (error); 717 } 718 719 /* 720 * Reload all incore data for a filesystem (used after running fsck on 721 * the root filesystem and finding things to fix). The filesystem must 722 * be mounted read-only. 723 * 724 * Things to do to update the mount: 725 * 1) invalidate all cached meta-data. 726 * 2) re-read superblock from disk. 727 * 3) invalidate all cluster summary information. 728 * 4) invalidate all inactive vnodes. 729 * 5) invalidate all cached file data. 730 * 6) re-read inode data for all active vnodes. 731 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 732 */ 733 static int 734 ext2_reload(struct mount *mp, struct thread *td) 735 { 736 struct vnode *vp, *mvp, *devvp; 737 struct inode *ip; 738 struct buf *bp; 739 struct ext2fs *es; 740 struct m_ext2fs *fs; 741 struct csum *sump; 742 int error, i; 743 int32_t *lp; 744 745 if ((mp->mnt_flag & MNT_RDONLY) == 0) 746 return (EINVAL); 747 /* 748 * Step 1: invalidate all cached meta-data. 749 */ 750 devvp = VFSTOEXT2(mp)->um_devvp; 751 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 752 if (vinvalbuf(devvp, 0, 0, 0) != 0) 753 panic("ext2_reload: dirty1"); 754 VOP_UNLOCK(devvp); 755 756 /* 757 * Step 2: re-read superblock from disk. 758 * constants have been adjusted for ext2 759 */ 760 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0) 761 return (error); 762 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET); 763 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 764 brelse(bp); 765 return (EIO); /* XXX needs translation */ 766 } 767 fs = VFSTOEXT2(mp)->um_e2fs; 768 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 769 770 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 771 brelse(bp); 772 return (error); 773 } 774 775 brelse(bp); 776 777 /* 778 * Step 3: invalidate all cluster summary information. 779 */ 780 if (fs->e2fs_contigsumsize > 0) { 781 lp = fs->e2fs_maxcluster; 782 sump = fs->e2fs_clustersum; 783 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 784 *lp++ = fs->e2fs_contigsumsize; 785 sump->cs_init = 0; 786 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 787 } 788 } 789 790 loop: 791 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 792 /* 793 * Step 4: invalidate all cached file data. 794 */ 795 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 796 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 797 goto loop; 798 } 799 if (vinvalbuf(vp, 0, 0, 0)) 800 panic("ext2_reload: dirty2"); 801 802 /* 803 * Step 5: re-read inode data for all active vnodes. 804 */ 805 ip = VTOI(vp); 806 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 807 (int)fs->e2fs_bsize, NOCRED, &bp); 808 if (error) { 809 vput(vp); 810 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 811 return (error); 812 } 813 814 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 815 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 816 817 brelse(bp); 818 vput(vp); 819 820 if (error) { 821 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 822 return (error); 823 } 824 } 825 return (0); 826 } 827 828 /* 829 * Common code for mount and mountroot. 830 */ 831 static int 832 ext2_mountfs(struct vnode *devvp, struct mount *mp) 833 { 834 struct ext2mount *ump; 835 struct buf *bp; 836 struct m_ext2fs *fs; 837 struct ext2fs *es; 838 struct cdev *dev = devvp->v_rdev; 839 struct g_consumer *cp; 840 struct bufobj *bo; 841 struct csum *sump; 842 int error; 843 int ronly; 844 int i; 845 u_long size; 846 int32_t *lp; 847 int32_t e2fs_maxcontig; 848 849 bp = NULL; 850 ump = NULL; 851 852 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 853 /* XXX: use VOP_ACESS to check FS perms */ 854 g_topology_lock(); 855 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 856 g_topology_unlock(); 857 VOP_UNLOCK(devvp); 858 if (error) 859 return (error); 860 861 if (PAGE_SIZE != SBLOCKBLKSIZE) { 862 printf("WARNING: Unsupported page size %d\n", PAGE_SIZE); 863 error = EINVAL; 864 goto out; 865 } 866 if (cp->provider->sectorsize > PAGE_SIZE) { 867 printf("WARNING: Device sectorsize(%d) is more than %d\n", 868 cp->provider->sectorsize, PAGE_SIZE); 869 error = EINVAL; 870 goto out; 871 } 872 873 bo = &devvp->v_bufobj; 874 bo->bo_private = cp; 875 bo->bo_ops = g_vfs_bufops; 876 if (devvp->v_rdev->si_iosize_max != 0) 877 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 878 if (mp->mnt_iosize_max > maxphys) 879 mp->mnt_iosize_max = maxphys; 880 if ((error = bread(devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp)) != 0) 881 goto out; 882 es = (struct ext2fs *)((char *)bp->b_data + SBLOCKOFFSET); 883 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 884 error = EINVAL; /* XXX needs translation */ 885 goto out; 886 } 887 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 888 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 889 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 890 printf( 891 "WARNING: Filesystem was not properly dismounted\n"); 892 } else { 893 printf( 894 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 895 error = EPERM; 896 goto out; 897 } 898 } 899 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 900 901 /* 902 * I don't know whether this is the right strategy. Note that 903 * we dynamically allocate both an m_ext2fs and an ext2fs 904 * while Linux keeps the super block in a locked buffer. 905 */ 906 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 907 M_EXT2MNT, M_WAITOK | M_ZERO); 908 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 909 M_EXT2MNT, M_WAITOK); 910 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 911 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 912 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 913 goto out; 914 915 /* 916 * Calculate the maximum contiguous blocks and size of cluster summary 917 * array. In FFS this is done by newfs; however, the superblock 918 * in ext2fs doesn't have these variables, so we can calculate 919 * them here. 920 */ 921 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize); 922 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 923 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN; 924 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 925 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 926 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 927 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 928 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 929 lp = ump->um_e2fs->e2fs_maxcluster; 930 sump = ump->um_e2fs->e2fs_clustersum; 931 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 932 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 933 sump->cs_init = 0; 934 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 935 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 936 } 937 } 938 939 brelse(bp); 940 bp = NULL; 941 fs = ump->um_e2fs; 942 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 943 944 /* 945 * If the fs is not mounted read-only, make sure the super block is 946 * always written back on a sync(). 947 */ 948 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 949 if (ronly == 0) { 950 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 951 fs->e2fs->e2fs_state = 952 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 953 } 954 mp->mnt_data = ump; 955 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 956 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 957 MNT_ILOCK(mp); 958 mp->mnt_flag |= MNT_LOCAL; 959 MNT_IUNLOCK(mp); 960 ump->um_mountp = mp; 961 ump->um_dev = dev; 962 ump->um_devvp = devvp; 963 ump->um_bo = &devvp->v_bufobj; 964 ump->um_cp = cp; 965 966 /* 967 * Setting those two parameters allowed us to use 968 * ufs_bmap w/o changse! 969 */ 970 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 971 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 972 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 973 if (ronly == 0) 974 ext2_sbupdate(ump, MNT_WAIT); 975 /* 976 * Initialize filesystem stat information in mount struct. 977 */ 978 MNT_ILOCK(mp); 979 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 980 MNTK_USES_BCACHE; 981 MNT_IUNLOCK(mp); 982 return (0); 983 out: 984 if (bp) 985 brelse(bp); 986 if (cp != NULL) { 987 g_topology_lock(); 988 g_vfs_close(cp); 989 g_topology_unlock(); 990 } 991 if (ump) { 992 mtx_destroy(EXT2_MTX(ump)); 993 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 994 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 995 free(ump->um_e2fs->e2fs, M_EXT2MNT); 996 free(ump->um_e2fs, M_EXT2MNT); 997 free(ump, M_EXT2MNT); 998 mp->mnt_data = NULL; 999 } 1000 return (error); 1001 } 1002 1003 /* 1004 * Unmount system call. 1005 */ 1006 static int 1007 ext2_unmount(struct mount *mp, int mntflags) 1008 { 1009 struct ext2mount *ump; 1010 struct m_ext2fs *fs; 1011 struct csum *sump; 1012 int error, flags, i, ronly; 1013 1014 flags = 0; 1015 if (mntflags & MNT_FORCE) { 1016 if (mp->mnt_flag & MNT_ROOTFS) 1017 return (EINVAL); 1018 flags |= FORCECLOSE; 1019 } 1020 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1021 return (error); 1022 ump = VFSTOEXT2(mp); 1023 fs = ump->um_e2fs; 1024 ronly = fs->e2fs_ronly; 1025 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1026 if (fs->e2fs_wasvalid) 1027 fs->e2fs->e2fs_state = 1028 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1029 ext2_sbupdate(ump, MNT_WAIT); 1030 } 1031 1032 g_topology_lock(); 1033 g_vfs_close(ump->um_cp); 1034 g_topology_unlock(); 1035 vrele(ump->um_devvp); 1036 sump = fs->e2fs_clustersum; 1037 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1038 free(sump->cs_sum, M_EXT2MNT); 1039 free(fs->e2fs_clustersum, M_EXT2MNT); 1040 free(fs->e2fs_maxcluster, M_EXT2MNT); 1041 free(fs->e2fs_gd, M_EXT2MNT); 1042 free(fs->e2fs_contigdirs, M_EXT2MNT); 1043 free(fs->e2fs, M_EXT2MNT); 1044 free(fs, M_EXT2MNT); 1045 free(ump, M_EXT2MNT); 1046 mp->mnt_data = NULL; 1047 return (error); 1048 } 1049 1050 /* 1051 * Flush out all the files in a filesystem. 1052 */ 1053 static int 1054 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1055 { 1056 int error; 1057 1058 error = vflush(mp, 0, flags, td); 1059 return (error); 1060 } 1061 1062 /* 1063 * Get filesystem statistics. 1064 */ 1065 int 1066 ext2_statfs(struct mount *mp, struct statfs *sbp) 1067 { 1068 struct ext2mount *ump; 1069 struct m_ext2fs *fs; 1070 uint32_t overhead, overhead_per_group, ngdb; 1071 int i, ngroups; 1072 1073 ump = VFSTOEXT2(mp); 1074 fs = ump->um_e2fs; 1075 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1076 panic("ext2_statfs"); 1077 1078 /* 1079 * Compute the overhead (FS structures) 1080 */ 1081 overhead_per_group = 1082 1 /* block bitmap */ + 1083 1 /* inode bitmap */ + 1084 fs->e2fs_itpg; 1085 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1086 fs->e2fs_gcount * overhead_per_group; 1087 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1088 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1089 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1090 if (ext2_cg_has_sb(fs, i)) 1091 ngroups++; 1092 } 1093 } else { 1094 ngroups = fs->e2fs_gcount; 1095 } 1096 ngdb = fs->e2fs_gdbcount; 1097 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1098 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1099 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1100 overhead += ngroups * (1 /* superblock */ + ngdb); 1101 1102 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1103 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1104 sbp->f_blocks = fs->e2fs_bcount - overhead; 1105 sbp->f_bfree = fs->e2fs_fbcount; 1106 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1107 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1108 sbp->f_ffree = fs->e2fs_ficount; 1109 return (0); 1110 } 1111 1112 /* 1113 * Go through the disk queues to initiate sandbagged IO; 1114 * go through the inodes to write those that have been modified; 1115 * initiate the writing of the super block if it has been modified. 1116 * 1117 * Note: we are always called with the filesystem marked `MPBUSY'. 1118 */ 1119 static int 1120 ext2_sync(struct mount *mp, int waitfor) 1121 { 1122 struct vnode *mvp, *vp; 1123 struct thread *td; 1124 struct inode *ip; 1125 struct ext2mount *ump = VFSTOEXT2(mp); 1126 struct m_ext2fs *fs; 1127 int error, allerror = 0; 1128 1129 td = curthread; 1130 fs = ump->um_e2fs; 1131 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1132 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1133 } 1134 1135 /* 1136 * Write back each (modified) inode. 1137 */ 1138 loop: 1139 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1140 if (vp->v_type == VNON) { 1141 VI_UNLOCK(vp); 1142 continue; 1143 } 1144 ip = VTOI(vp); 1145 if ((ip->i_flag & 1146 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1147 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1148 waitfor == MNT_LAZY)) { 1149 VI_UNLOCK(vp); 1150 continue; 1151 } 1152 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1153 if (error) { 1154 if (error == ENOENT) { 1155 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1156 goto loop; 1157 } 1158 continue; 1159 } 1160 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1161 allerror = error; 1162 vput(vp); 1163 } 1164 1165 /* 1166 * Force stale filesystem control information to be flushed. 1167 */ 1168 if (waitfor != MNT_LAZY) { 1169 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1170 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1171 allerror = error; 1172 VOP_UNLOCK(ump->um_devvp); 1173 } 1174 1175 /* 1176 * Write back modified superblock. 1177 */ 1178 if (fs->e2fs_fmod != 0) { 1179 fs->e2fs_fmod = 0; 1180 fs->e2fs->e2fs_wtime = htole32(time_second); 1181 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1182 allerror = error; 1183 } 1184 return (allerror); 1185 } 1186 1187 /* 1188 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1189 * in from disk. If it is in core, wait for the lock bit to clear, then 1190 * return the inode locked. Detection and handling of mount points must be 1191 * done by the calling routine. 1192 */ 1193 static int 1194 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1195 { 1196 struct m_ext2fs *fs; 1197 struct inode *ip; 1198 struct ext2mount *ump; 1199 struct buf *bp; 1200 struct vnode *vp; 1201 struct thread *td; 1202 unsigned int i, used_blocks; 1203 int error; 1204 1205 td = curthread; 1206 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1207 if (error || *vpp != NULL) 1208 return (error); 1209 1210 ump = VFSTOEXT2(mp); 1211 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1212 1213 /* Allocate a new vnode/inode. */ 1214 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1215 *vpp = NULL; 1216 free(ip, M_EXT2NODE); 1217 return (error); 1218 } 1219 vp->v_data = ip; 1220 ip->i_vnode = vp; 1221 ip->i_e2fs = fs = ump->um_e2fs; 1222 ip->i_ump = ump; 1223 ip->i_number = ino; 1224 cluster_init_vn(&ip->i_clusterw); 1225 1226 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1227 error = insmntque(vp, mp); 1228 if (error != 0) { 1229 free(ip, M_EXT2NODE); 1230 *vpp = NULL; 1231 return (error); 1232 } 1233 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1234 if (error || *vpp != NULL) 1235 return (error); 1236 1237 /* Read in the disk contents for the inode, copy into the inode. */ 1238 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1239 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1240 /* 1241 * The inode does not contain anything useful, so it would 1242 * be misleading to leave it on its hash chain. With mode 1243 * still zero, it will be unlinked and returned to the free 1244 * list by vput(). 1245 */ 1246 brelse(bp); 1247 vput(vp); 1248 *vpp = NULL; 1249 return (error); 1250 } 1251 /* convert ext2 inode to dinode */ 1252 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1253 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1254 if (error) { 1255 brelse(bp); 1256 vput(vp); 1257 *vpp = NULL; 1258 return (error); 1259 } 1260 ip->i_block_group = ino_to_cg(fs, ino); 1261 ip->i_next_alloc_block = 0; 1262 ip->i_next_alloc_goal = 0; 1263 1264 /* 1265 * Now we want to make sure that block pointers for unused 1266 * blocks are zeroed out - ext2_balloc depends on this 1267 * although for regular files and directories only 1268 * 1269 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1270 * out because we could corrupt the extent tree. 1271 */ 1272 if (!(ip->i_flag & IN_E4EXTENTS) && 1273 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1274 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1275 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1276 ip->i_db[i] = 0; 1277 } 1278 1279 bqrelse(bp); 1280 1281 #ifdef EXT2FS_PRINT_EXTENTS 1282 ext2_print_inode(ip); 1283 error = ext4_ext_walk(ip); 1284 if (error) { 1285 vput(vp); 1286 *vpp = NULL; 1287 return (error); 1288 } 1289 #endif 1290 1291 /* 1292 * Initialize the vnode from the inode, check for aliases. 1293 * Note that the underlying vnode may have changed. 1294 */ 1295 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1296 vput(vp); 1297 *vpp = NULL; 1298 return (error); 1299 } 1300 1301 /* 1302 * Finish inode initialization. 1303 */ 1304 1305 vn_set_state(vp, VSTATE_CONSTRUCTED); 1306 *vpp = vp; 1307 return (0); 1308 } 1309 1310 /* 1311 * File handle to vnode 1312 * 1313 * Have to be really careful about stale file handles: 1314 * - check that the inode number is valid 1315 * - call ext2_vget() to get the locked inode 1316 * - check for an unallocated inode (i_mode == 0) 1317 * - check that the given client host has export rights and return 1318 * those rights via. exflagsp and credanonp 1319 */ 1320 static int 1321 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1322 { 1323 struct inode *ip; 1324 struct ufid *ufhp; 1325 struct vnode *nvp; 1326 struct m_ext2fs *fs; 1327 int error; 1328 1329 ufhp = (struct ufid *)fhp; 1330 fs = VFSTOEXT2(mp)->um_e2fs; 1331 if (ufhp->ufid_ino < EXT2_ROOTINO || 1332 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1333 return (ESTALE); 1334 1335 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1336 if (error) { 1337 *vpp = NULLVP; 1338 return (error); 1339 } 1340 ip = VTOI(nvp); 1341 if (ip->i_mode == 0 || 1342 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1343 vput(nvp); 1344 *vpp = NULLVP; 1345 return (ESTALE); 1346 } 1347 *vpp = nvp; 1348 vnode_create_vobject(*vpp, 0, curthread); 1349 return (0); 1350 } 1351 1352 /* 1353 * Write a superblock and associated information back to disk. 1354 */ 1355 static int 1356 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1357 { 1358 struct m_ext2fs *fs = mp->um_e2fs; 1359 struct ext2fs *es = fs->e2fs; 1360 struct buf *bp; 1361 int error = 0; 1362 1363 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1364 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1365 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1366 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1367 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1368 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1369 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1370 } 1371 1372 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1373 1374 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1375 ext2_sb_csum_set(fs); 1376 1377 error = bread(mp->um_devvp, SBLOCK, SBLOCKBLKSIZE, NOCRED, &bp); 1378 if (error != 0) 1379 return (error); 1380 1381 memcpy((char *)bp->b_data + SBLOCKOFFSET, (caddr_t)es, 1382 (u_int)sizeof(struct ext2fs)); 1383 if (waitfor == MNT_WAIT) 1384 error = bwrite(bp); 1385 else 1386 bawrite(bp); 1387 1388 /* 1389 * The buffers for group descriptors, inode bitmaps and block bitmaps 1390 * are not busy at this point and are (hopefully) written by the 1391 * usual sync mechanism. No need to write them here. 1392 */ 1393 return (error); 1394 } 1395 int 1396 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1397 { 1398 struct m_ext2fs *fs = mp->um_e2fs; 1399 struct buf *bp; 1400 int i, j, g_count = 0, error = 0, allerror = 0; 1401 1402 allerror = ext2_sbupdate(mp, waitfor); 1403 1404 /* Update gd csums */ 1405 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1406 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1407 ext2_gd_csum_set(fs); 1408 1409 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1410 bp = getblk(mp->um_devvp, fsbtodb(fs, 1411 ext2_cg_location(fs, i)), 1412 fs->e2fs_bsize, 0, 0, 0); 1413 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1414 memcpy(bp->b_data, &fs->e2fs_gd[ 1415 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1416 fs->e2fs_bsize); 1417 } else { 1418 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1419 g_count < fs->e2fs_gcount; j++, g_count++) 1420 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1421 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1422 } 1423 if (waitfor == MNT_WAIT) 1424 error = bwrite(bp); 1425 else 1426 bawrite(bp); 1427 } 1428 1429 if (!allerror && error) 1430 allerror = error; 1431 return (allerror); 1432 } 1433 1434 /* 1435 * Return the root of a filesystem. 1436 */ 1437 static int 1438 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1439 { 1440 struct vnode *nvp; 1441 int error; 1442 1443 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1444 if (error) 1445 return (error); 1446 *vpp = nvp; 1447 return (0); 1448 } 1449