1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/sdt.h> 56 #include <sys/stat.h> 57 #include <sys/mutex.h> 58 59 #include <geom/geom.h> 60 #include <geom/geom_vfs.h> 61 62 #include <fs/ext2fs/fs.h> 63 #include <fs/ext2fs/ext2_mount.h> 64 #include <fs/ext2fs/inode.h> 65 66 #include <fs/ext2fs/ext2fs.h> 67 #include <fs/ext2fs/ext2_dinode.h> 68 #include <fs/ext2fs/ext2_extern.h> 69 #include <fs/ext2fs/ext2_extents.h> 70 71 SDT_PROVIDER_DECLARE(ext2fs); 72 /* 73 * ext2fs trace probe: 74 * arg0: verbosity. Higher numbers give more verbose messages 75 * arg1: Textual message 76 */ 77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 80 81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 82 static int ext2_mountfs(struct vnode *, struct mount *); 83 static int ext2_reload(struct mount *mp, struct thread *td); 84 static int ext2_sbupdate(struct ext2mount *, int); 85 static int ext2_cgupdate(struct ext2mount *, int); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_mount_t ext2_mount; 93 94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 96 97 static struct vfsops ext2fs_vfsops = { 98 .vfs_fhtovp = ext2_fhtovp, 99 .vfs_mount = ext2_mount, 100 .vfs_root = ext2_root, /* root inode via vget */ 101 .vfs_statfs = ext2_statfs, 102 .vfs_sync = ext2_sync, 103 .vfs_unmount = ext2_unmount, 104 .vfs_vget = ext2_vget, 105 }; 106 107 VFS_SET(ext2fs_vfsops, ext2fs, 0); 108 109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 110 int ronly); 111 static int ext2_compute_sb_data(struct vnode * devvp, 112 struct ext2fs * es, struct m_ext2fs * fs); 113 114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 115 "noclusterw", "noexec", "export", "force", "from", "multilabel", 116 "suiddir", "nosymfollow", "sync", "union", NULL }; 117 118 /* 119 * VFS Operations. 120 * 121 * mount system call 122 */ 123 static int 124 ext2_mount(struct mount *mp) 125 { 126 struct vfsoptlist *opts; 127 struct vnode *devvp; 128 struct thread *td; 129 struct ext2mount *ump = NULL; 130 struct m_ext2fs *fs; 131 struct nameidata nd, *ndp = &nd; 132 accmode_t accmode; 133 char *path, *fspec; 134 int error, flags, len; 135 136 td = curthread; 137 opts = mp->mnt_optnew; 138 139 if (vfs_filteropt(opts, ext2_opts)) 140 return (EINVAL); 141 142 vfs_getopt(opts, "fspath", (void **)&path, NULL); 143 /* Double-check the length of path.. */ 144 if (strlen(path) >= MAXMNTLEN) 145 return (ENAMETOOLONG); 146 147 fspec = NULL; 148 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 149 if (!error && fspec[len - 1] != '\0') 150 return (EINVAL); 151 152 /* 153 * If updating, check whether changing from read-only to 154 * read/write; if there is no device name, that's all we do. 155 */ 156 if (mp->mnt_flag & MNT_UPDATE) { 157 ump = VFSTOEXT2(mp); 158 fs = ump->um_e2fs; 159 error = 0; 160 if (fs->e2fs_ronly == 0 && 161 vfs_flagopt(opts, "ro", NULL, 0)) { 162 error = VFS_SYNC(mp, MNT_WAIT); 163 if (error) 164 return (error); 165 flags = WRITECLOSE; 166 if (mp->mnt_flag & MNT_FORCE) 167 flags |= FORCECLOSE; 168 error = ext2_flushfiles(mp, flags, td); 169 if (error == 0 && fs->e2fs_wasvalid && 170 ext2_cgupdate(ump, MNT_WAIT) == 0) { 171 fs->e2fs->e2fs_state = 172 htole16((le16toh(fs->e2fs->e2fs_state) | 173 E2FS_ISCLEAN)); 174 ext2_sbupdate(ump, MNT_WAIT); 175 } 176 fs->e2fs_ronly = 1; 177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 178 g_topology_lock(); 179 g_access(ump->um_cp, 0, -1, 0); 180 g_topology_unlock(); 181 } 182 if (!error && (mp->mnt_flag & MNT_RELOAD)) 183 error = ext2_reload(mp, td); 184 if (error) 185 return (error); 186 devvp = ump->um_devvp; 187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 189 return (EPERM); 190 191 /* 192 * If upgrade to read-write by non-root, then verify 193 * that user has necessary permissions on the device. 194 */ 195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 196 error = VOP_ACCESS(devvp, VREAD | VWRITE, 197 td->td_ucred, td); 198 if (error) 199 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 200 if (error) { 201 VOP_UNLOCK(devvp); 202 return (error); 203 } 204 VOP_UNLOCK(devvp); 205 g_topology_lock(); 206 error = g_access(ump->um_cp, 0, 1, 0); 207 g_topology_unlock(); 208 if (error) 209 return (error); 210 211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 213 if (mp->mnt_flag & MNT_FORCE) { 214 printf( 215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 216 } else { 217 printf( 218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 219 fs->e2fs_fsmnt); 220 return (EPERM); 221 } 222 } 223 fs->e2fs->e2fs_state = 224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 225 (void)ext2_cgupdate(ump, MNT_WAIT); 226 fs->e2fs_ronly = 0; 227 MNT_ILOCK(mp); 228 mp->mnt_flag &= ~MNT_RDONLY; 229 MNT_IUNLOCK(mp); 230 } 231 if (vfs_flagopt(opts, "export", NULL, 0)) { 232 /* Process export requests in vfs_mount.c. */ 233 return (error); 234 } 235 } 236 237 /* 238 * Not an update, or updating the name: look up the name 239 * and verify that it refers to a sensible disk device. 240 */ 241 if (fspec == NULL) 242 return (EINVAL); 243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 244 if ((error = namei(ndp)) != 0) 245 return (error); 246 NDFREE(ndp, NDF_ONLY_PNBUF); 247 devvp = ndp->ni_vp; 248 249 if (!vn_isdisk_error(devvp, &error)) { 250 vput(devvp); 251 return (error); 252 } 253 254 /* 255 * If mount by non-root, then verify that user has necessary 256 * permissions on the device. 257 * 258 * XXXRW: VOP_ACCESS() enough? 259 */ 260 accmode = VREAD; 261 if ((mp->mnt_flag & MNT_RDONLY) == 0) 262 accmode |= VWRITE; 263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 264 if (error) 265 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 266 if (error) { 267 vput(devvp); 268 return (error); 269 } 270 271 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 272 error = ext2_mountfs(devvp, mp); 273 } else { 274 if (devvp != ump->um_devvp) { 275 vput(devvp); 276 return (EINVAL); /* needs translation */ 277 } else 278 vput(devvp); 279 } 280 if (error) { 281 vrele(devvp); 282 return (error); 283 } 284 ump = VFSTOEXT2(mp); 285 fs = ump->um_e2fs; 286 287 /* 288 * Note that this strncpy() is ok because of a check at the start 289 * of ext2_mount(). 290 */ 291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 293 vfs_mountedfrom(mp, fspec); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 } 401 if (b_bitmap <= last_cg_block) { 402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 403 "block bitmap overlaps gds", i); 404 return (EINVAL); 405 } 406 if (b_bitmap < first_block || b_bitmap > last_block) { 407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 408 "block bitmap not in group", i); 409 return (EINVAL); 410 } 411 412 i_bitmap = e2fs_gd_get_i_bitmap(gd); 413 if (i_bitmap == 0) { 414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 415 "inode bitmap is zero", i); 416 return (EINVAL); 417 } 418 if (i_bitmap <= last_cg_block) { 419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 420 "inode bitmap overlaps gds", i); 421 return (EINVAL); 422 } 423 if (i_bitmap < first_block || i_bitmap > last_block) { 424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 425 "inode bitmap not in group blk", i); 426 return (EINVAL); 427 } 428 429 i_tables = e2fs_gd_get_i_tables(gd); 430 if (i_tables == 0) { 431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 432 "inode table is zero", i); 433 return (EINVAL); 434 } 435 if (i_tables <= last_cg_block) { 436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 437 "inode tables overlaps gds", i); 438 return (EINVAL); 439 } 440 if (i_tables < first_block || 441 i_tables + fs->e2fs_itpg - 1 > last_block) { 442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 443 "inode tables not in group blk", i); 444 return (EINVAL); 445 } 446 447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 448 first_block += EXT2_BLOCKS_PER_GROUP(fs); 449 } 450 451 return (0); 452 } 453 454 /* 455 * This computes the fields of the m_ext2fs structure from the 456 * data in the ext2fs structure read in. 457 */ 458 static int 459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 460 struct m_ext2fs *fs) 461 { 462 struct buf *bp; 463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 464 int i, j; 465 int g_count = 0; 466 int error; 467 468 /* Check checksum features */ 469 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 470 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 471 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 472 "incorrect checksum features combination"); 473 return (EINVAL); 474 } 475 476 /* Precompute checksum seed for all metadata */ 477 ext2_sb_csum_set_seed(fs); 478 479 /* Verify sb csum if possible */ 480 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 481 error = ext2_sb_csum_verify(fs); 482 if (error) { 483 return (error); 484 } 485 } 486 487 /* Check for block size = 1K|2K|4K */ 488 if (le32toh(es->e2fs_log_bsize) > 2) { 489 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 490 "bad block size"); 491 return (EINVAL); 492 } 493 494 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 495 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 496 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 497 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 498 499 /* Check for fragment size */ 500 if (le32toh(es->e2fs_log_fsize) > 501 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 502 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 503 "invalid log cluster size"); 504 return (EINVAL); 505 } 506 507 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 508 if (fs->e2fs_fsize != fs->e2fs_bsize) { 509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 510 "fragment size != block size"); 511 return (EINVAL); 512 } 513 514 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 515 516 /* Check reserved gdt blocks for future filesystem expansion */ 517 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 518 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 519 "number of reserved GDT blocks too large"); 520 return (EINVAL); 521 } 522 523 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 524 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 525 } else { 526 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 527 528 /* 529 * Check first ino. 530 */ 531 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 532 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 533 "invalid first ino"); 534 return (EINVAL); 535 } 536 537 /* 538 * Simple sanity check for superblock inode size value. 539 */ 540 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 541 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 542 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 543 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 544 "invalid inode size"); 545 return (EINVAL); 546 } 547 } 548 549 /* Check group descriptors */ 550 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 551 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 552 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 553 "unsupported 64bit descriptor size"); 554 return (EINVAL); 555 } 556 557 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 558 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 559 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 560 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 561 "zero blocks/fragments per group"); 562 return (EINVAL); 563 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 564 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 565 "blocks per group not equal fragments per group"); 566 return (EINVAL); 567 } 568 569 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 570 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 571 "non-standard group size unsupported"); 572 return (EINVAL); 573 } 574 575 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 576 if (fs->e2fs_ipb == 0 || 577 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 578 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 579 "bad inodes per block size"); 580 return (EINVAL); 581 } 582 583 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 584 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 586 "invalid inodes per group"); 587 return (EINVAL); 588 } 589 590 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 591 592 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 593 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 594 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 595 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 596 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 597 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 598 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 599 } 600 if (fs->e2fs_rbcount > fs->e2fs_bcount || 601 fs->e2fs_fbcount > fs->e2fs_bcount) { 602 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 603 "invalid block count"); 604 return (EINVAL); 605 } 606 607 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 608 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 610 "invalid number of free inodes"); 611 return (EINVAL); 612 } 613 614 if (le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 615 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 616 "first data block out of range"); 617 return (EINVAL); 618 } 619 620 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 621 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 622 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 624 "groups count too large"); 625 return (EINVAL); 626 } 627 628 /* Check for extra isize in big inodes. */ 629 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 630 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 632 "no space for extra inode timestamps"); 633 return (EINVAL); 634 } 635 636 /* s_resuid / s_resgid ? */ 637 638 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 639 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 640 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 641 } else { 642 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 643 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 644 fs->e2fs_bsize / sizeof(struct ext2_gd)); 645 } 646 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 647 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 648 M_EXT2MNT, M_WAITOK | M_ZERO); 649 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 650 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 651 652 for (i = 0; i < fs->e2fs_gdbcount; i++) { 653 error = bread(devvp, 654 fsbtodb(fs, ext2_cg_location(fs, i)), 655 fs->e2fs_bsize, NOCRED, &bp); 656 if (error) { 657 /* 658 * fs->e2fs_gd and fs->e2fs_contigdirs 659 * will be freed later by the caller, 660 * because this function could be called from 661 * MNT_UPDATE path. 662 */ 663 return (error); 664 } 665 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 666 memcpy(&fs->e2fs_gd[ 667 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 668 bp->b_data, fs->e2fs_bsize); 669 } else { 670 for (j = 0; j < e2fs_descpb && 671 g_count < fs->e2fs_gcount; j++, g_count++) 672 memcpy(&fs->e2fs_gd[g_count], 673 bp->b_data + j * E2FS_REV0_GD_SIZE, 674 E2FS_REV0_GD_SIZE); 675 } 676 brelse(bp); 677 bp = NULL; 678 } 679 680 /* Validate cgs consistency */ 681 error = ext2_cg_validate(fs); 682 if (error) 683 return (error); 684 685 /* Verfy cgs csum */ 686 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 687 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 688 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 689 if (error) 690 return (error); 691 } 692 /* Initialization for the ext2 Orlov allocator variant. */ 693 fs->e2fs_total_dir = 0; 694 for (i = 0; i < fs->e2fs_gcount; i++) 695 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 696 697 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 698 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 699 fs->e2fs_maxfilesize = 0x7fffffff; 700 else { 701 fs->e2fs_maxfilesize = 0xffffffffffff; 702 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 703 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 704 } 705 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 706 fs->e2fs_uhash = 3; 707 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 708 #ifdef __CHAR_UNSIGNED__ 709 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 710 fs->e2fs_uhash = 3; 711 #else 712 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 713 #endif 714 } 715 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 716 error = ext2_sb_csum_verify(fs); 717 718 return (error); 719 } 720 721 /* 722 * Reload all incore data for a filesystem (used after running fsck on 723 * the root filesystem and finding things to fix). The filesystem must 724 * be mounted read-only. 725 * 726 * Things to do to update the mount: 727 * 1) invalidate all cached meta-data. 728 * 2) re-read superblock from disk. 729 * 3) invalidate all cluster summary information. 730 * 4) invalidate all inactive vnodes. 731 * 5) invalidate all cached file data. 732 * 6) re-read inode data for all active vnodes. 733 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 734 */ 735 static int 736 ext2_reload(struct mount *mp, struct thread *td) 737 { 738 struct vnode *vp, *mvp, *devvp; 739 struct inode *ip; 740 struct buf *bp; 741 struct ext2fs *es; 742 struct m_ext2fs *fs; 743 struct csum *sump; 744 int error, i; 745 int32_t *lp; 746 747 if ((mp->mnt_flag & MNT_RDONLY) == 0) 748 return (EINVAL); 749 /* 750 * Step 1: invalidate all cached meta-data. 751 */ 752 devvp = VFSTOEXT2(mp)->um_devvp; 753 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 754 if (vinvalbuf(devvp, 0, 0, 0) != 0) 755 panic("ext2_reload: dirty1"); 756 VOP_UNLOCK(devvp); 757 758 /* 759 * Step 2: re-read superblock from disk. 760 * constants have been adjusted for ext2 761 */ 762 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 763 return (error); 764 es = (struct ext2fs *)bp->b_data; 765 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 766 brelse(bp); 767 return (EIO); /* XXX needs translation */ 768 } 769 fs = VFSTOEXT2(mp)->um_e2fs; 770 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 771 772 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 773 brelse(bp); 774 return (error); 775 } 776 #ifdef UNKLAR 777 if (fs->fs_sbsize < SBSIZE) 778 bp->b_flags |= B_INVAL; 779 #endif 780 brelse(bp); 781 782 /* 783 * Step 3: invalidate all cluster summary information. 784 */ 785 if (fs->e2fs_contigsumsize > 0) { 786 lp = fs->e2fs_maxcluster; 787 sump = fs->e2fs_clustersum; 788 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 789 *lp++ = fs->e2fs_contigsumsize; 790 sump->cs_init = 0; 791 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 792 } 793 } 794 795 loop: 796 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 797 /* 798 * Step 4: invalidate all cached file data. 799 */ 800 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 801 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 802 goto loop; 803 } 804 if (vinvalbuf(vp, 0, 0, 0)) 805 panic("ext2_reload: dirty2"); 806 807 /* 808 * Step 5: re-read inode data for all active vnodes. 809 */ 810 ip = VTOI(vp); 811 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 812 (int)fs->e2fs_bsize, NOCRED, &bp); 813 if (error) { 814 VOP_UNLOCK(vp); 815 vrele(vp); 816 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 817 return (error); 818 } 819 820 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 821 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 822 823 brelse(bp); 824 VOP_UNLOCK(vp); 825 vrele(vp); 826 827 if (error) { 828 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 829 return (error); 830 } 831 } 832 return (0); 833 } 834 835 /* 836 * Common code for mount and mountroot. 837 */ 838 static int 839 ext2_mountfs(struct vnode *devvp, struct mount *mp) 840 { 841 struct ext2mount *ump; 842 struct buf *bp; 843 struct m_ext2fs *fs; 844 struct ext2fs *es; 845 struct cdev *dev = devvp->v_rdev; 846 struct g_consumer *cp; 847 struct bufobj *bo; 848 struct csum *sump; 849 int error; 850 int ronly; 851 int i; 852 u_long size; 853 int32_t *lp; 854 int32_t e2fs_maxcontig; 855 856 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 857 /* XXX: use VOP_ACESS to check FS perms */ 858 g_topology_lock(); 859 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 860 g_topology_unlock(); 861 VOP_UNLOCK(devvp); 862 if (error) 863 return (error); 864 865 /* XXX: should we check for some sectorsize or 512 instead? */ 866 if (((SBSIZE % cp->provider->sectorsize) != 0) || 867 (SBSIZE < cp->provider->sectorsize)) { 868 g_topology_lock(); 869 g_vfs_close(cp); 870 g_topology_unlock(); 871 return (EINVAL); 872 } 873 874 bo = &devvp->v_bufobj; 875 bo->bo_private = cp; 876 bo->bo_ops = g_vfs_bufops; 877 if (devvp->v_rdev->si_iosize_max != 0) 878 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 879 if (mp->mnt_iosize_max > maxphys) 880 mp->mnt_iosize_max = maxphys; 881 882 bp = NULL; 883 ump = NULL; 884 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 885 goto out; 886 es = (struct ext2fs *)bp->b_data; 887 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 888 error = EINVAL; /* XXX needs translation */ 889 goto out; 890 } 891 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 892 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 893 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 894 printf( 895 "WARNING: Filesystem was not properly dismounted\n"); 896 } else { 897 printf( 898 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 899 error = EPERM; 900 goto out; 901 } 902 } 903 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 904 905 /* 906 * I don't know whether this is the right strategy. Note that 907 * we dynamically allocate both an m_ext2fs and an ext2fs 908 * while Linux keeps the super block in a locked buffer. 909 */ 910 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 911 M_EXT2MNT, M_WAITOK | M_ZERO); 912 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 913 M_EXT2MNT, M_WAITOK); 914 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 915 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 916 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 917 goto out; 918 919 /* 920 * Calculate the maximum contiguous blocks and size of cluster summary 921 * array. In FFS this is done by newfs; however, the superblock 922 * in ext2fs doesn't have these variables, so we can calculate 923 * them here. 924 */ 925 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize); 926 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 927 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN; 928 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 929 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 930 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 931 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 932 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 933 lp = ump->um_e2fs->e2fs_maxcluster; 934 sump = ump->um_e2fs->e2fs_clustersum; 935 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 936 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 937 sump->cs_init = 0; 938 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 939 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 940 } 941 } 942 943 brelse(bp); 944 bp = NULL; 945 fs = ump->um_e2fs; 946 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 947 948 /* 949 * If the fs is not mounted read-only, make sure the super block is 950 * always written back on a sync(). 951 */ 952 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 953 if (ronly == 0) { 954 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 955 fs->e2fs->e2fs_state = 956 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 957 } 958 mp->mnt_data = ump; 959 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 960 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 961 MNT_ILOCK(mp); 962 mp->mnt_flag |= MNT_LOCAL; 963 MNT_IUNLOCK(mp); 964 ump->um_mountp = mp; 965 ump->um_dev = dev; 966 ump->um_devvp = devvp; 967 ump->um_bo = &devvp->v_bufobj; 968 ump->um_cp = cp; 969 970 /* 971 * Setting those two parameters allowed us to use 972 * ufs_bmap w/o changse! 973 */ 974 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 975 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 976 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 977 if (ronly == 0) 978 ext2_sbupdate(ump, MNT_WAIT); 979 /* 980 * Initialize filesystem stat information in mount struct. 981 */ 982 MNT_ILOCK(mp); 983 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 984 MNTK_USES_BCACHE; 985 MNT_IUNLOCK(mp); 986 return (0); 987 out: 988 if (bp) 989 brelse(bp); 990 if (cp != NULL) { 991 g_topology_lock(); 992 g_vfs_close(cp); 993 g_topology_unlock(); 994 } 995 if (ump) { 996 mtx_destroy(EXT2_MTX(ump)); 997 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 998 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 999 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1000 free(ump->um_e2fs, M_EXT2MNT); 1001 free(ump, M_EXT2MNT); 1002 mp->mnt_data = NULL; 1003 } 1004 return (error); 1005 } 1006 1007 /* 1008 * Unmount system call. 1009 */ 1010 static int 1011 ext2_unmount(struct mount *mp, int mntflags) 1012 { 1013 struct ext2mount *ump; 1014 struct m_ext2fs *fs; 1015 struct csum *sump; 1016 int error, flags, i, ronly; 1017 1018 flags = 0; 1019 if (mntflags & MNT_FORCE) { 1020 if (mp->mnt_flag & MNT_ROOTFS) 1021 return (EINVAL); 1022 flags |= FORCECLOSE; 1023 } 1024 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1025 return (error); 1026 ump = VFSTOEXT2(mp); 1027 fs = ump->um_e2fs; 1028 ronly = fs->e2fs_ronly; 1029 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1030 if (fs->e2fs_wasvalid) 1031 fs->e2fs->e2fs_state = 1032 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1033 ext2_sbupdate(ump, MNT_WAIT); 1034 } 1035 1036 g_topology_lock(); 1037 g_vfs_close(ump->um_cp); 1038 g_topology_unlock(); 1039 vrele(ump->um_devvp); 1040 sump = fs->e2fs_clustersum; 1041 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1042 free(sump->cs_sum, M_EXT2MNT); 1043 free(fs->e2fs_clustersum, M_EXT2MNT); 1044 free(fs->e2fs_maxcluster, M_EXT2MNT); 1045 free(fs->e2fs_gd, M_EXT2MNT); 1046 free(fs->e2fs_contigdirs, M_EXT2MNT); 1047 free(fs->e2fs, M_EXT2MNT); 1048 free(fs, M_EXT2MNT); 1049 free(ump, M_EXT2MNT); 1050 mp->mnt_data = NULL; 1051 MNT_ILOCK(mp); 1052 mp->mnt_flag &= ~MNT_LOCAL; 1053 MNT_IUNLOCK(mp); 1054 return (error); 1055 } 1056 1057 /* 1058 * Flush out all the files in a filesystem. 1059 */ 1060 static int 1061 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1062 { 1063 int error; 1064 1065 error = vflush(mp, 0, flags, td); 1066 return (error); 1067 } 1068 1069 /* 1070 * Get filesystem statistics. 1071 */ 1072 int 1073 ext2_statfs(struct mount *mp, struct statfs *sbp) 1074 { 1075 struct ext2mount *ump; 1076 struct m_ext2fs *fs; 1077 uint32_t overhead, overhead_per_group, ngdb; 1078 int i, ngroups; 1079 1080 ump = VFSTOEXT2(mp); 1081 fs = ump->um_e2fs; 1082 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1083 panic("ext2_statfs"); 1084 1085 /* 1086 * Compute the overhead (FS structures) 1087 */ 1088 overhead_per_group = 1089 1 /* block bitmap */ + 1090 1 /* inode bitmap */ + 1091 fs->e2fs_itpg; 1092 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1093 fs->e2fs_gcount * overhead_per_group; 1094 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1095 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1096 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1097 if (ext2_cg_has_sb(fs, i)) 1098 ngroups++; 1099 } 1100 } else { 1101 ngroups = fs->e2fs_gcount; 1102 } 1103 ngdb = fs->e2fs_gdbcount; 1104 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1105 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1106 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1107 overhead += ngroups * (1 /* superblock */ + ngdb); 1108 1109 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1110 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1111 sbp->f_blocks = fs->e2fs_bcount - overhead; 1112 sbp->f_bfree = fs->e2fs_fbcount; 1113 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1114 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1115 sbp->f_ffree = fs->e2fs_ficount; 1116 return (0); 1117 } 1118 1119 /* 1120 * Go through the disk queues to initiate sandbagged IO; 1121 * go through the inodes to write those that have been modified; 1122 * initiate the writing of the super block if it has been modified. 1123 * 1124 * Note: we are always called with the filesystem marked `MPBUSY'. 1125 */ 1126 static int 1127 ext2_sync(struct mount *mp, int waitfor) 1128 { 1129 struct vnode *mvp, *vp; 1130 struct thread *td; 1131 struct inode *ip; 1132 struct ext2mount *ump = VFSTOEXT2(mp); 1133 struct m_ext2fs *fs; 1134 int error, allerror = 0; 1135 1136 td = curthread; 1137 fs = ump->um_e2fs; 1138 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1139 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1140 } 1141 1142 /* 1143 * Write back each (modified) inode. 1144 */ 1145 loop: 1146 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1147 if (vp->v_type == VNON) { 1148 VI_UNLOCK(vp); 1149 continue; 1150 } 1151 ip = VTOI(vp); 1152 if ((ip->i_flag & 1153 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1154 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1155 waitfor == MNT_LAZY)) { 1156 VI_UNLOCK(vp); 1157 continue; 1158 } 1159 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1160 if (error) { 1161 if (error == ENOENT) { 1162 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1163 goto loop; 1164 } 1165 continue; 1166 } 1167 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1168 allerror = error; 1169 VOP_UNLOCK(vp); 1170 vrele(vp); 1171 } 1172 1173 /* 1174 * Force stale filesystem control information to be flushed. 1175 */ 1176 if (waitfor != MNT_LAZY) { 1177 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1178 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1179 allerror = error; 1180 VOP_UNLOCK(ump->um_devvp); 1181 } 1182 1183 /* 1184 * Write back modified superblock. 1185 */ 1186 if (fs->e2fs_fmod != 0) { 1187 fs->e2fs_fmod = 0; 1188 fs->e2fs->e2fs_wtime = htole32(time_second); 1189 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1190 allerror = error; 1191 } 1192 return (allerror); 1193 } 1194 1195 /* 1196 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1197 * in from disk. If it is in core, wait for the lock bit to clear, then 1198 * return the inode locked. Detection and handling of mount points must be 1199 * done by the calling routine. 1200 */ 1201 static int 1202 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1203 { 1204 struct m_ext2fs *fs; 1205 struct inode *ip; 1206 struct ext2mount *ump; 1207 struct buf *bp; 1208 struct vnode *vp; 1209 struct thread *td; 1210 unsigned int i, used_blocks; 1211 int error; 1212 1213 td = curthread; 1214 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1215 if (error || *vpp != NULL) 1216 return (error); 1217 1218 ump = VFSTOEXT2(mp); 1219 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1220 1221 /* Allocate a new vnode/inode. */ 1222 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1223 *vpp = NULL; 1224 free(ip, M_EXT2NODE); 1225 return (error); 1226 } 1227 vp->v_data = ip; 1228 ip->i_vnode = vp; 1229 ip->i_e2fs = fs = ump->um_e2fs; 1230 ip->i_ump = ump; 1231 ip->i_number = ino; 1232 cluster_init_vn(&ip->i_clusterw); 1233 1234 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1235 error = insmntque(vp, mp); 1236 if (error != 0) { 1237 free(ip, M_EXT2NODE); 1238 *vpp = NULL; 1239 return (error); 1240 } 1241 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1242 if (error || *vpp != NULL) 1243 return (error); 1244 1245 /* Read in the disk contents for the inode, copy into the inode. */ 1246 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1247 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1248 /* 1249 * The inode does not contain anything useful, so it would 1250 * be misleading to leave it on its hash chain. With mode 1251 * still zero, it will be unlinked and returned to the free 1252 * list by vput(). 1253 */ 1254 brelse(bp); 1255 vput(vp); 1256 *vpp = NULL; 1257 return (error); 1258 } 1259 /* convert ext2 inode to dinode */ 1260 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1261 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1262 if (error) { 1263 brelse(bp); 1264 vput(vp); 1265 *vpp = NULL; 1266 return (error); 1267 } 1268 ip->i_block_group = ino_to_cg(fs, ino); 1269 ip->i_next_alloc_block = 0; 1270 ip->i_next_alloc_goal = 0; 1271 1272 /* 1273 * Now we want to make sure that block pointers for unused 1274 * blocks are zeroed out - ext2_balloc depends on this 1275 * although for regular files and directories only 1276 * 1277 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1278 * out because we could corrupt the extent tree. 1279 */ 1280 if (!(ip->i_flag & IN_E4EXTENTS) && 1281 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1282 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1283 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1284 ip->i_db[i] = 0; 1285 } 1286 1287 bqrelse(bp); 1288 1289 #ifdef EXT2FS_PRINT_EXTENTS 1290 ext2_print_inode(ip); 1291 error = ext4_ext_walk(ip); 1292 if (error) { 1293 vput(vp); 1294 *vpp = NULL; 1295 return (error); 1296 } 1297 #endif 1298 1299 /* 1300 * Initialize the vnode from the inode, check for aliases. 1301 * Note that the underlying vnode may have changed. 1302 */ 1303 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1304 vput(vp); 1305 *vpp = NULL; 1306 return (error); 1307 } 1308 1309 /* 1310 * Finish inode initialization. 1311 */ 1312 1313 *vpp = vp; 1314 return (0); 1315 } 1316 1317 /* 1318 * File handle to vnode 1319 * 1320 * Have to be really careful about stale file handles: 1321 * - check that the inode number is valid 1322 * - call ext2_vget() to get the locked inode 1323 * - check for an unallocated inode (i_mode == 0) 1324 * - check that the given client host has export rights and return 1325 * those rights via. exflagsp and credanonp 1326 */ 1327 static int 1328 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1329 { 1330 struct inode *ip; 1331 struct ufid *ufhp; 1332 struct vnode *nvp; 1333 struct m_ext2fs *fs; 1334 int error; 1335 1336 ufhp = (struct ufid *)fhp; 1337 fs = VFSTOEXT2(mp)->um_e2fs; 1338 if (ufhp->ufid_ino < EXT2_ROOTINO || 1339 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1340 return (ESTALE); 1341 1342 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1343 if (error) { 1344 *vpp = NULLVP; 1345 return (error); 1346 } 1347 ip = VTOI(nvp); 1348 if (ip->i_mode == 0 || 1349 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1350 vput(nvp); 1351 *vpp = NULLVP; 1352 return (ESTALE); 1353 } 1354 *vpp = nvp; 1355 vnode_create_vobject(*vpp, 0, curthread); 1356 return (0); 1357 } 1358 1359 /* 1360 * Write a superblock and associated information back to disk. 1361 */ 1362 static int 1363 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1364 { 1365 struct m_ext2fs *fs = mp->um_e2fs; 1366 struct ext2fs *es = fs->e2fs; 1367 struct buf *bp; 1368 int error = 0; 1369 1370 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1371 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1372 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1373 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1374 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1375 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1376 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1377 } 1378 1379 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1380 1381 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1382 ext2_sb_csum_set(fs); 1383 1384 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1385 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1386 if (waitfor == MNT_WAIT) 1387 error = bwrite(bp); 1388 else 1389 bawrite(bp); 1390 1391 /* 1392 * The buffers for group descriptors, inode bitmaps and block bitmaps 1393 * are not busy at this point and are (hopefully) written by the 1394 * usual sync mechanism. No need to write them here. 1395 */ 1396 return (error); 1397 } 1398 int 1399 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1400 { 1401 struct m_ext2fs *fs = mp->um_e2fs; 1402 struct buf *bp; 1403 int i, j, g_count = 0, error = 0, allerror = 0; 1404 1405 allerror = ext2_sbupdate(mp, waitfor); 1406 1407 /* Update gd csums */ 1408 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1409 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1410 ext2_gd_csum_set(fs); 1411 1412 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1413 bp = getblk(mp->um_devvp, fsbtodb(fs, 1414 ext2_cg_location(fs, i)), 1415 fs->e2fs_bsize, 0, 0, 0); 1416 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1417 memcpy(bp->b_data, &fs->e2fs_gd[ 1418 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1419 fs->e2fs_bsize); 1420 } else { 1421 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1422 g_count < fs->e2fs_gcount; j++, g_count++) 1423 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1424 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1425 } 1426 if (waitfor == MNT_WAIT) 1427 error = bwrite(bp); 1428 else 1429 bawrite(bp); 1430 } 1431 1432 if (!allerror && error) 1433 allerror = error; 1434 return (allerror); 1435 } 1436 1437 /* 1438 * Return the root of a filesystem. 1439 */ 1440 static int 1441 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1442 { 1443 struct vnode *nvp; 1444 int error; 1445 1446 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1447 if (error) 1448 return (error); 1449 *vpp = nvp; 1450 return (0); 1451 } 1452