1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/sdt.h> 56 #include <sys/stat.h> 57 #include <sys/mutex.h> 58 59 #include <geom/geom.h> 60 #include <geom/geom_vfs.h> 61 62 #include <fs/ext2fs/fs.h> 63 #include <fs/ext2fs/ext2_mount.h> 64 #include <fs/ext2fs/inode.h> 65 66 #include <fs/ext2fs/ext2fs.h> 67 #include <fs/ext2fs/ext2_dinode.h> 68 #include <fs/ext2fs/ext2_extern.h> 69 #include <fs/ext2fs/ext2_extents.h> 70 71 SDT_PROVIDER_DECLARE(ext2fs); 72 /* 73 * ext2fs trace probe: 74 * arg0: verbosity. Higher numbers give more verbose messages 75 * arg1: Textual message 76 */ 77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 80 81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 82 static int ext2_mountfs(struct vnode *, struct mount *); 83 static int ext2_reload(struct mount *mp, struct thread *td); 84 static int ext2_sbupdate(struct ext2mount *, int); 85 static int ext2_cgupdate(struct ext2mount *, int); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_mount_t ext2_mount; 93 94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 96 97 static struct vfsops ext2fs_vfsops = { 98 .vfs_fhtovp = ext2_fhtovp, 99 .vfs_mount = ext2_mount, 100 .vfs_root = ext2_root, /* root inode via vget */ 101 .vfs_statfs = ext2_statfs, 102 .vfs_sync = ext2_sync, 103 .vfs_unmount = ext2_unmount, 104 .vfs_vget = ext2_vget, 105 }; 106 107 VFS_SET(ext2fs_vfsops, ext2fs, 0); 108 109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 110 int ronly); 111 static int ext2_compute_sb_data(struct vnode * devvp, 112 struct ext2fs * es, struct m_ext2fs * fs); 113 114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 115 "noclusterw", "noexec", "export", "force", "from", "multilabel", 116 "suiddir", "nosymfollow", "sync", "union", NULL }; 117 118 /* 119 * VFS Operations. 120 * 121 * mount system call 122 */ 123 static int 124 ext2_mount(struct mount *mp) 125 { 126 struct vfsoptlist *opts; 127 struct vnode *devvp; 128 struct thread *td; 129 struct ext2mount *ump = NULL; 130 struct m_ext2fs *fs; 131 struct nameidata nd, *ndp = &nd; 132 accmode_t accmode; 133 char *path, *fspec; 134 int error, flags, len; 135 136 td = curthread; 137 opts = mp->mnt_optnew; 138 139 if (vfs_filteropt(opts, ext2_opts)) 140 return (EINVAL); 141 142 vfs_getopt(opts, "fspath", (void **)&path, NULL); 143 /* Double-check the length of path.. */ 144 if (strlen(path) >= MAXMNTLEN) 145 return (ENAMETOOLONG); 146 147 fspec = NULL; 148 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 149 if (!error && fspec[len - 1] != '\0') 150 return (EINVAL); 151 152 /* 153 * If updating, check whether changing from read-only to 154 * read/write; if there is no device name, that's all we do. 155 */ 156 if (mp->mnt_flag & MNT_UPDATE) { 157 ump = VFSTOEXT2(mp); 158 fs = ump->um_e2fs; 159 error = 0; 160 if (fs->e2fs_ronly == 0 && 161 vfs_flagopt(opts, "ro", NULL, 0)) { 162 error = VFS_SYNC(mp, MNT_WAIT); 163 if (error) 164 return (error); 165 flags = WRITECLOSE; 166 if (mp->mnt_flag & MNT_FORCE) 167 flags |= FORCECLOSE; 168 error = ext2_flushfiles(mp, flags, td); 169 if (error == 0 && fs->e2fs_wasvalid && 170 ext2_cgupdate(ump, MNT_WAIT) == 0) { 171 fs->e2fs->e2fs_state = 172 htole16((le16toh(fs->e2fs->e2fs_state) | 173 E2FS_ISCLEAN)); 174 ext2_sbupdate(ump, MNT_WAIT); 175 } 176 fs->e2fs_ronly = 1; 177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 178 g_topology_lock(); 179 g_access(ump->um_cp, 0, -1, 0); 180 g_topology_unlock(); 181 } 182 if (!error && (mp->mnt_flag & MNT_RELOAD)) 183 error = ext2_reload(mp, td); 184 if (error) 185 return (error); 186 devvp = ump->um_devvp; 187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 189 return (EPERM); 190 191 /* 192 * If upgrade to read-write by non-root, then verify 193 * that user has necessary permissions on the device. 194 */ 195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 196 error = VOP_ACCESS(devvp, VREAD | VWRITE, 197 td->td_ucred, td); 198 if (error) 199 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 200 if (error) { 201 VOP_UNLOCK(devvp); 202 return (error); 203 } 204 VOP_UNLOCK(devvp); 205 g_topology_lock(); 206 error = g_access(ump->um_cp, 0, 1, 0); 207 g_topology_unlock(); 208 if (error) 209 return (error); 210 211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 213 if (mp->mnt_flag & MNT_FORCE) { 214 printf( 215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 216 } else { 217 printf( 218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 219 fs->e2fs_fsmnt); 220 return (EPERM); 221 } 222 } 223 fs->e2fs->e2fs_state = 224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 225 (void)ext2_cgupdate(ump, MNT_WAIT); 226 fs->e2fs_ronly = 0; 227 MNT_ILOCK(mp); 228 mp->mnt_flag &= ~MNT_RDONLY; 229 MNT_IUNLOCK(mp); 230 } 231 if (vfs_flagopt(opts, "export", NULL, 0)) { 232 /* Process export requests in vfs_mount.c. */ 233 return (error); 234 } 235 } 236 237 /* 238 * Not an update, or updating the name: look up the name 239 * and verify that it refers to a sensible disk device. 240 */ 241 if (fspec == NULL) 242 return (EINVAL); 243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec); 244 if ((error = namei(ndp)) != 0) 245 return (error); 246 NDFREE(ndp, NDF_ONLY_PNBUF); 247 devvp = ndp->ni_vp; 248 249 if (!vn_isdisk_error(devvp, &error)) { 250 vput(devvp); 251 return (error); 252 } 253 254 /* 255 * If mount by non-root, then verify that user has necessary 256 * permissions on the device. 257 * 258 * XXXRW: VOP_ACCESS() enough? 259 */ 260 accmode = VREAD; 261 if ((mp->mnt_flag & MNT_RDONLY) == 0) 262 accmode |= VWRITE; 263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 264 if (error) 265 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 266 if (error) { 267 vput(devvp); 268 return (error); 269 } 270 271 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 272 error = ext2_mountfs(devvp, mp); 273 } else { 274 if (devvp != ump->um_devvp) { 275 vput(devvp); 276 return (EINVAL); /* needs translation */ 277 } else 278 vput(devvp); 279 } 280 if (error) { 281 vrele(devvp); 282 return (error); 283 } 284 ump = VFSTOEXT2(mp); 285 fs = ump->um_e2fs; 286 287 /* 288 * Note that this strncpy() is ok because of a check at the start 289 * of ext2_mount(). 290 */ 291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 293 vfs_mountedfrom(mp, fspec); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 } 401 if (b_bitmap <= last_cg_block) { 402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 403 "block bitmap overlaps gds", i); 404 return (EINVAL); 405 } 406 if (b_bitmap < first_block || b_bitmap > last_block) { 407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 408 "block bitmap not in group", i); 409 return (EINVAL); 410 } 411 412 i_bitmap = e2fs_gd_get_i_bitmap(gd); 413 if (i_bitmap == 0) { 414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 415 "inode bitmap is zero", i); 416 return (EINVAL); 417 } 418 if (i_bitmap <= last_cg_block) { 419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 420 "inode bitmap overlaps gds", i); 421 return (EINVAL); 422 } 423 if (i_bitmap < first_block || i_bitmap > last_block) { 424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 425 "inode bitmap not in group blk", i); 426 return (EINVAL); 427 } 428 429 i_tables = e2fs_gd_get_i_tables(gd); 430 if (i_tables == 0) { 431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 432 "inode table is zero", i); 433 return (EINVAL); 434 } 435 if (i_tables <= last_cg_block) { 436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 437 "inode tables overlaps gds", i); 438 return (EINVAL); 439 } 440 if (i_tables < first_block || 441 i_tables + fs->e2fs_itpg - 1 > last_block) { 442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 443 "inode tables not in group blk", i); 444 return (EINVAL); 445 } 446 447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 448 first_block += EXT2_BLOCKS_PER_GROUP(fs); 449 } 450 451 return (0); 452 } 453 454 /* 455 * This computes the fields of the m_ext2fs structure from the 456 * data in the ext2fs structure read in. 457 */ 458 static int 459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 460 struct m_ext2fs *fs) 461 { 462 struct buf *bp; 463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 464 int i, j; 465 int g_count = 0; 466 int error; 467 468 /* Check checksum features */ 469 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 470 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 471 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 472 "incorrect checksum features combination"); 473 return (EINVAL); 474 } 475 476 /* Precompute checksum seed for all metadata */ 477 ext2_sb_csum_set_seed(fs); 478 479 /* Verify sb csum if possible */ 480 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 481 error = ext2_sb_csum_verify(fs); 482 if (error) { 483 return (error); 484 } 485 } 486 487 /* Check for block size = 1K|2K|4K */ 488 if (le32toh(es->e2fs_log_bsize) > 2) { 489 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 490 "bad block size"); 491 return (EINVAL); 492 } 493 494 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 495 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 496 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 497 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 498 499 /* Check for fragment size */ 500 if (le32toh(es->e2fs_log_fsize) > 501 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 502 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 503 "invalid log cluster size"); 504 return (EINVAL); 505 } 506 507 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 508 if (fs->e2fs_fsize != fs->e2fs_bsize) { 509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 510 "fragment size != block size"); 511 return (EINVAL); 512 } 513 514 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 515 516 /* Check reserved gdt blocks for future filesystem expansion */ 517 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 518 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 519 "number of reserved GDT blocks too large"); 520 return (EINVAL); 521 } 522 523 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 524 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 525 } else { 526 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 527 528 /* 529 * Check first ino. 530 */ 531 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 532 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 533 "invalid first ino"); 534 return (EINVAL); 535 } 536 537 /* 538 * Simple sanity check for superblock inode size value. 539 */ 540 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 541 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 542 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 543 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 544 "invalid inode size"); 545 return (EINVAL); 546 } 547 } 548 549 /* Check group descriptors */ 550 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 551 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 552 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 553 "unsupported 64bit descriptor size"); 554 return (EINVAL); 555 } 556 557 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 558 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 559 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 560 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 561 "zero blocks/fragments per group"); 562 return (EINVAL); 563 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 564 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 565 "blocks per group not equal fragments per group"); 566 return (EINVAL); 567 } 568 569 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 570 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 571 "non-standard group size unsupported"); 572 return (EINVAL); 573 } 574 575 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 576 if (fs->e2fs_ipb == 0 || 577 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 578 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 579 "bad inodes per block size"); 580 return (EINVAL); 581 } 582 583 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 584 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 586 "invalid inodes per group"); 587 return (EINVAL); 588 } 589 590 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 591 592 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 593 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 594 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 595 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 596 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 597 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 598 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 599 } 600 if (fs->e2fs_rbcount > fs->e2fs_bcount || 601 fs->e2fs_fbcount > fs->e2fs_bcount) { 602 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 603 "invalid block count"); 604 return (EINVAL); 605 } 606 607 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 608 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 610 "invalid number of free inodes"); 611 return (EINVAL); 612 } 613 614 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 615 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 617 "first data block out of range"); 618 return (EINVAL); 619 } 620 621 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 622 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 623 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 624 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 625 "groups count too large"); 626 return (EINVAL); 627 } 628 629 /* Check for extra isize in big inodes. */ 630 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 631 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 632 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 633 "no space for extra inode timestamps"); 634 return (EINVAL); 635 } 636 637 /* s_resuid / s_resgid ? */ 638 639 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 640 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 642 } else { 643 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 644 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 645 fs->e2fs_bsize / sizeof(struct ext2_gd)); 646 } 647 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 648 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 649 M_EXT2MNT, M_WAITOK | M_ZERO); 650 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 651 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 652 653 for (i = 0; i < fs->e2fs_gdbcount; i++) { 654 error = bread(devvp, 655 fsbtodb(fs, ext2_cg_location(fs, i)), 656 fs->e2fs_bsize, NOCRED, &bp); 657 if (error) { 658 /* 659 * fs->e2fs_gd and fs->e2fs_contigdirs 660 * will be freed later by the caller, 661 * because this function could be called from 662 * MNT_UPDATE path. 663 */ 664 return (error); 665 } 666 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 667 memcpy(&fs->e2fs_gd[ 668 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 669 bp->b_data, fs->e2fs_bsize); 670 } else { 671 for (j = 0; j < e2fs_descpb && 672 g_count < fs->e2fs_gcount; j++, g_count++) 673 memcpy(&fs->e2fs_gd[g_count], 674 bp->b_data + j * E2FS_REV0_GD_SIZE, 675 E2FS_REV0_GD_SIZE); 676 } 677 brelse(bp); 678 bp = NULL; 679 } 680 681 /* Validate cgs consistency */ 682 error = ext2_cg_validate(fs); 683 if (error) 684 return (error); 685 686 /* Verfy cgs csum */ 687 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 688 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 689 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 690 if (error) 691 return (error); 692 } 693 /* Initialization for the ext2 Orlov allocator variant. */ 694 fs->e2fs_total_dir = 0; 695 for (i = 0; i < fs->e2fs_gcount; i++) 696 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 697 698 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 699 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 700 fs->e2fs_maxfilesize = 0x7fffffff; 701 else { 702 fs->e2fs_maxfilesize = 0xffffffffffff; 703 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 704 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 705 } 706 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 707 fs->e2fs_uhash = 3; 708 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 709 #ifdef __CHAR_UNSIGNED__ 710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 711 fs->e2fs_uhash = 3; 712 #else 713 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 714 #endif 715 } 716 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 717 error = ext2_sb_csum_verify(fs); 718 719 return (error); 720 } 721 722 /* 723 * Reload all incore data for a filesystem (used after running fsck on 724 * the root filesystem and finding things to fix). The filesystem must 725 * be mounted read-only. 726 * 727 * Things to do to update the mount: 728 * 1) invalidate all cached meta-data. 729 * 2) re-read superblock from disk. 730 * 3) invalidate all cluster summary information. 731 * 4) invalidate all inactive vnodes. 732 * 5) invalidate all cached file data. 733 * 6) re-read inode data for all active vnodes. 734 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 735 */ 736 static int 737 ext2_reload(struct mount *mp, struct thread *td) 738 { 739 struct vnode *vp, *mvp, *devvp; 740 struct inode *ip; 741 struct buf *bp; 742 struct ext2fs *es; 743 struct m_ext2fs *fs; 744 struct csum *sump; 745 int error, i; 746 int32_t *lp; 747 748 if ((mp->mnt_flag & MNT_RDONLY) == 0) 749 return (EINVAL); 750 /* 751 * Step 1: invalidate all cached meta-data. 752 */ 753 devvp = VFSTOEXT2(mp)->um_devvp; 754 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 755 if (vinvalbuf(devvp, 0, 0, 0) != 0) 756 panic("ext2_reload: dirty1"); 757 VOP_UNLOCK(devvp); 758 759 /* 760 * Step 2: re-read superblock from disk. 761 * constants have been adjusted for ext2 762 */ 763 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 764 return (error); 765 es = (struct ext2fs *)bp->b_data; 766 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 767 brelse(bp); 768 return (EIO); /* XXX needs translation */ 769 } 770 fs = VFSTOEXT2(mp)->um_e2fs; 771 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 772 773 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 774 brelse(bp); 775 return (error); 776 } 777 #ifdef UNKLAR 778 if (fs->fs_sbsize < SBSIZE) 779 bp->b_flags |= B_INVAL; 780 #endif 781 brelse(bp); 782 783 /* 784 * Step 3: invalidate all cluster summary information. 785 */ 786 if (fs->e2fs_contigsumsize > 0) { 787 lp = fs->e2fs_maxcluster; 788 sump = fs->e2fs_clustersum; 789 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 790 *lp++ = fs->e2fs_contigsumsize; 791 sump->cs_init = 0; 792 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 793 } 794 } 795 796 loop: 797 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 798 /* 799 * Step 4: invalidate all cached file data. 800 */ 801 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 802 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 803 goto loop; 804 } 805 if (vinvalbuf(vp, 0, 0, 0)) 806 panic("ext2_reload: dirty2"); 807 808 /* 809 * Step 5: re-read inode data for all active vnodes. 810 */ 811 ip = VTOI(vp); 812 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 813 (int)fs->e2fs_bsize, NOCRED, &bp); 814 if (error) { 815 VOP_UNLOCK(vp); 816 vrele(vp); 817 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 818 return (error); 819 } 820 821 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 822 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 823 824 brelse(bp); 825 VOP_UNLOCK(vp); 826 vrele(vp); 827 828 if (error) { 829 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 830 return (error); 831 } 832 } 833 return (0); 834 } 835 836 /* 837 * Common code for mount and mountroot. 838 */ 839 static int 840 ext2_mountfs(struct vnode *devvp, struct mount *mp) 841 { 842 struct ext2mount *ump; 843 struct buf *bp; 844 struct m_ext2fs *fs; 845 struct ext2fs *es; 846 struct cdev *dev = devvp->v_rdev; 847 struct g_consumer *cp; 848 struct bufobj *bo; 849 struct csum *sump; 850 int error; 851 int ronly; 852 int i; 853 u_long size; 854 int32_t *lp; 855 int32_t e2fs_maxcontig; 856 857 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 858 /* XXX: use VOP_ACESS to check FS perms */ 859 g_topology_lock(); 860 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 861 g_topology_unlock(); 862 VOP_UNLOCK(devvp); 863 if (error) 864 return (error); 865 866 /* XXX: should we check for some sectorsize or 512 instead? */ 867 if (((SBSIZE % cp->provider->sectorsize) != 0) || 868 (SBSIZE < cp->provider->sectorsize)) { 869 g_topology_lock(); 870 g_vfs_close(cp); 871 g_topology_unlock(); 872 return (EINVAL); 873 } 874 875 bo = &devvp->v_bufobj; 876 bo->bo_private = cp; 877 bo->bo_ops = g_vfs_bufops; 878 if (devvp->v_rdev->si_iosize_max != 0) 879 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 880 if (mp->mnt_iosize_max > maxphys) 881 mp->mnt_iosize_max = maxphys; 882 883 bp = NULL; 884 ump = NULL; 885 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 886 goto out; 887 es = (struct ext2fs *)bp->b_data; 888 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 889 error = EINVAL; /* XXX needs translation */ 890 goto out; 891 } 892 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 893 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 894 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 895 printf( 896 "WARNING: Filesystem was not properly dismounted\n"); 897 } else { 898 printf( 899 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 900 error = EPERM; 901 goto out; 902 } 903 } 904 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 905 906 /* 907 * I don't know whether this is the right strategy. Note that 908 * we dynamically allocate both an m_ext2fs and an ext2fs 909 * while Linux keeps the super block in a locked buffer. 910 */ 911 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 912 M_EXT2MNT, M_WAITOK | M_ZERO); 913 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 914 M_EXT2MNT, M_WAITOK); 915 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 916 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 917 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 918 goto out; 919 920 /* 921 * Calculate the maximum contiguous blocks and size of cluster summary 922 * array. In FFS this is done by newfs; however, the superblock 923 * in ext2fs doesn't have these variables, so we can calculate 924 * them here. 925 */ 926 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize); 927 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 928 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN; 929 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 930 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 931 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 932 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 933 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 934 lp = ump->um_e2fs->e2fs_maxcluster; 935 sump = ump->um_e2fs->e2fs_clustersum; 936 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 937 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 938 sump->cs_init = 0; 939 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 940 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 941 } 942 } 943 944 brelse(bp); 945 bp = NULL; 946 fs = ump->um_e2fs; 947 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 948 949 /* 950 * If the fs is not mounted read-only, make sure the super block is 951 * always written back on a sync(). 952 */ 953 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 954 if (ronly == 0) { 955 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 956 fs->e2fs->e2fs_state = 957 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 958 } 959 mp->mnt_data = ump; 960 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 961 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 962 MNT_ILOCK(mp); 963 mp->mnt_flag |= MNT_LOCAL; 964 MNT_IUNLOCK(mp); 965 ump->um_mountp = mp; 966 ump->um_dev = dev; 967 ump->um_devvp = devvp; 968 ump->um_bo = &devvp->v_bufobj; 969 ump->um_cp = cp; 970 971 /* 972 * Setting those two parameters allowed us to use 973 * ufs_bmap w/o changse! 974 */ 975 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 976 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 977 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 978 if (ronly == 0) 979 ext2_sbupdate(ump, MNT_WAIT); 980 /* 981 * Initialize filesystem stat information in mount struct. 982 */ 983 MNT_ILOCK(mp); 984 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 985 MNTK_USES_BCACHE; 986 MNT_IUNLOCK(mp); 987 return (0); 988 out: 989 if (bp) 990 brelse(bp); 991 if (cp != NULL) { 992 g_topology_lock(); 993 g_vfs_close(cp); 994 g_topology_unlock(); 995 } 996 if (ump) { 997 mtx_destroy(EXT2_MTX(ump)); 998 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 999 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1000 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1001 free(ump->um_e2fs, M_EXT2MNT); 1002 free(ump, M_EXT2MNT); 1003 mp->mnt_data = NULL; 1004 } 1005 return (error); 1006 } 1007 1008 /* 1009 * Unmount system call. 1010 */ 1011 static int 1012 ext2_unmount(struct mount *mp, int mntflags) 1013 { 1014 struct ext2mount *ump; 1015 struct m_ext2fs *fs; 1016 struct csum *sump; 1017 int error, flags, i, ronly; 1018 1019 flags = 0; 1020 if (mntflags & MNT_FORCE) { 1021 if (mp->mnt_flag & MNT_ROOTFS) 1022 return (EINVAL); 1023 flags |= FORCECLOSE; 1024 } 1025 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1026 return (error); 1027 ump = VFSTOEXT2(mp); 1028 fs = ump->um_e2fs; 1029 ronly = fs->e2fs_ronly; 1030 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1031 if (fs->e2fs_wasvalid) 1032 fs->e2fs->e2fs_state = 1033 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1034 ext2_sbupdate(ump, MNT_WAIT); 1035 } 1036 1037 g_topology_lock(); 1038 g_vfs_close(ump->um_cp); 1039 g_topology_unlock(); 1040 vrele(ump->um_devvp); 1041 sump = fs->e2fs_clustersum; 1042 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1043 free(sump->cs_sum, M_EXT2MNT); 1044 free(fs->e2fs_clustersum, M_EXT2MNT); 1045 free(fs->e2fs_maxcluster, M_EXT2MNT); 1046 free(fs->e2fs_gd, M_EXT2MNT); 1047 free(fs->e2fs_contigdirs, M_EXT2MNT); 1048 free(fs->e2fs, M_EXT2MNT); 1049 free(fs, M_EXT2MNT); 1050 free(ump, M_EXT2MNT); 1051 mp->mnt_data = NULL; 1052 MNT_ILOCK(mp); 1053 mp->mnt_flag &= ~MNT_LOCAL; 1054 MNT_IUNLOCK(mp); 1055 return (error); 1056 } 1057 1058 /* 1059 * Flush out all the files in a filesystem. 1060 */ 1061 static int 1062 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1063 { 1064 int error; 1065 1066 error = vflush(mp, 0, flags, td); 1067 return (error); 1068 } 1069 1070 /* 1071 * Get filesystem statistics. 1072 */ 1073 int 1074 ext2_statfs(struct mount *mp, struct statfs *sbp) 1075 { 1076 struct ext2mount *ump; 1077 struct m_ext2fs *fs; 1078 uint32_t overhead, overhead_per_group, ngdb; 1079 int i, ngroups; 1080 1081 ump = VFSTOEXT2(mp); 1082 fs = ump->um_e2fs; 1083 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1084 panic("ext2_statfs"); 1085 1086 /* 1087 * Compute the overhead (FS structures) 1088 */ 1089 overhead_per_group = 1090 1 /* block bitmap */ + 1091 1 /* inode bitmap */ + 1092 fs->e2fs_itpg; 1093 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1094 fs->e2fs_gcount * overhead_per_group; 1095 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1096 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1097 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1098 if (ext2_cg_has_sb(fs, i)) 1099 ngroups++; 1100 } 1101 } else { 1102 ngroups = fs->e2fs_gcount; 1103 } 1104 ngdb = fs->e2fs_gdbcount; 1105 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1106 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1107 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1108 overhead += ngroups * (1 /* superblock */ + ngdb); 1109 1110 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1111 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1112 sbp->f_blocks = fs->e2fs_bcount - overhead; 1113 sbp->f_bfree = fs->e2fs_fbcount; 1114 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1115 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1116 sbp->f_ffree = fs->e2fs_ficount; 1117 return (0); 1118 } 1119 1120 /* 1121 * Go through the disk queues to initiate sandbagged IO; 1122 * go through the inodes to write those that have been modified; 1123 * initiate the writing of the super block if it has been modified. 1124 * 1125 * Note: we are always called with the filesystem marked `MPBUSY'. 1126 */ 1127 static int 1128 ext2_sync(struct mount *mp, int waitfor) 1129 { 1130 struct vnode *mvp, *vp; 1131 struct thread *td; 1132 struct inode *ip; 1133 struct ext2mount *ump = VFSTOEXT2(mp); 1134 struct m_ext2fs *fs; 1135 int error, allerror = 0; 1136 1137 td = curthread; 1138 fs = ump->um_e2fs; 1139 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1140 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1141 } 1142 1143 /* 1144 * Write back each (modified) inode. 1145 */ 1146 loop: 1147 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1148 if (vp->v_type == VNON) { 1149 VI_UNLOCK(vp); 1150 continue; 1151 } 1152 ip = VTOI(vp); 1153 if ((ip->i_flag & 1154 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1155 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1156 waitfor == MNT_LAZY)) { 1157 VI_UNLOCK(vp); 1158 continue; 1159 } 1160 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1161 if (error) { 1162 if (error == ENOENT) { 1163 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1164 goto loop; 1165 } 1166 continue; 1167 } 1168 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1169 allerror = error; 1170 VOP_UNLOCK(vp); 1171 vrele(vp); 1172 } 1173 1174 /* 1175 * Force stale filesystem control information to be flushed. 1176 */ 1177 if (waitfor != MNT_LAZY) { 1178 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1179 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1180 allerror = error; 1181 VOP_UNLOCK(ump->um_devvp); 1182 } 1183 1184 /* 1185 * Write back modified superblock. 1186 */ 1187 if (fs->e2fs_fmod != 0) { 1188 fs->e2fs_fmod = 0; 1189 fs->e2fs->e2fs_wtime = htole32(time_second); 1190 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1191 allerror = error; 1192 } 1193 return (allerror); 1194 } 1195 1196 /* 1197 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1198 * in from disk. If it is in core, wait for the lock bit to clear, then 1199 * return the inode locked. Detection and handling of mount points must be 1200 * done by the calling routine. 1201 */ 1202 static int 1203 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1204 { 1205 struct m_ext2fs *fs; 1206 struct inode *ip; 1207 struct ext2mount *ump; 1208 struct buf *bp; 1209 struct vnode *vp; 1210 struct thread *td; 1211 unsigned int i, used_blocks; 1212 int error; 1213 1214 td = curthread; 1215 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1216 if (error || *vpp != NULL) 1217 return (error); 1218 1219 ump = VFSTOEXT2(mp); 1220 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1221 1222 /* Allocate a new vnode/inode. */ 1223 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1224 *vpp = NULL; 1225 free(ip, M_EXT2NODE); 1226 return (error); 1227 } 1228 vp->v_data = ip; 1229 ip->i_vnode = vp; 1230 ip->i_e2fs = fs = ump->um_e2fs; 1231 ip->i_ump = ump; 1232 ip->i_number = ino; 1233 cluster_init_vn(&ip->i_clusterw); 1234 1235 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1236 error = insmntque(vp, mp); 1237 if (error != 0) { 1238 free(ip, M_EXT2NODE); 1239 *vpp = NULL; 1240 return (error); 1241 } 1242 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1243 if (error || *vpp != NULL) 1244 return (error); 1245 1246 /* Read in the disk contents for the inode, copy into the inode. */ 1247 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1248 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1249 /* 1250 * The inode does not contain anything useful, so it would 1251 * be misleading to leave it on its hash chain. With mode 1252 * still zero, it will be unlinked and returned to the free 1253 * list by vput(). 1254 */ 1255 brelse(bp); 1256 vput(vp); 1257 *vpp = NULL; 1258 return (error); 1259 } 1260 /* convert ext2 inode to dinode */ 1261 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1262 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1263 if (error) { 1264 brelse(bp); 1265 vput(vp); 1266 *vpp = NULL; 1267 return (error); 1268 } 1269 ip->i_block_group = ino_to_cg(fs, ino); 1270 ip->i_next_alloc_block = 0; 1271 ip->i_next_alloc_goal = 0; 1272 1273 /* 1274 * Now we want to make sure that block pointers for unused 1275 * blocks are zeroed out - ext2_balloc depends on this 1276 * although for regular files and directories only 1277 * 1278 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1279 * out because we could corrupt the extent tree. 1280 */ 1281 if (!(ip->i_flag & IN_E4EXTENTS) && 1282 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1283 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1284 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1285 ip->i_db[i] = 0; 1286 } 1287 1288 bqrelse(bp); 1289 1290 #ifdef EXT2FS_PRINT_EXTENTS 1291 ext2_print_inode(ip); 1292 error = ext4_ext_walk(ip); 1293 if (error) { 1294 vput(vp); 1295 *vpp = NULL; 1296 return (error); 1297 } 1298 #endif 1299 1300 /* 1301 * Initialize the vnode from the inode, check for aliases. 1302 * Note that the underlying vnode may have changed. 1303 */ 1304 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1305 vput(vp); 1306 *vpp = NULL; 1307 return (error); 1308 } 1309 1310 /* 1311 * Finish inode initialization. 1312 */ 1313 1314 *vpp = vp; 1315 return (0); 1316 } 1317 1318 /* 1319 * File handle to vnode 1320 * 1321 * Have to be really careful about stale file handles: 1322 * - check that the inode number is valid 1323 * - call ext2_vget() to get the locked inode 1324 * - check for an unallocated inode (i_mode == 0) 1325 * - check that the given client host has export rights and return 1326 * those rights via. exflagsp and credanonp 1327 */ 1328 static int 1329 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1330 { 1331 struct inode *ip; 1332 struct ufid *ufhp; 1333 struct vnode *nvp; 1334 struct m_ext2fs *fs; 1335 int error; 1336 1337 ufhp = (struct ufid *)fhp; 1338 fs = VFSTOEXT2(mp)->um_e2fs; 1339 if (ufhp->ufid_ino < EXT2_ROOTINO || 1340 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1341 return (ESTALE); 1342 1343 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1344 if (error) { 1345 *vpp = NULLVP; 1346 return (error); 1347 } 1348 ip = VTOI(nvp); 1349 if (ip->i_mode == 0 || 1350 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1351 vput(nvp); 1352 *vpp = NULLVP; 1353 return (ESTALE); 1354 } 1355 *vpp = nvp; 1356 vnode_create_vobject(*vpp, 0, curthread); 1357 return (0); 1358 } 1359 1360 /* 1361 * Write a superblock and associated information back to disk. 1362 */ 1363 static int 1364 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1365 { 1366 struct m_ext2fs *fs = mp->um_e2fs; 1367 struct ext2fs *es = fs->e2fs; 1368 struct buf *bp; 1369 int error = 0; 1370 1371 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1372 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1373 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1374 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1375 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1376 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1377 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1378 } 1379 1380 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1381 1382 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1383 ext2_sb_csum_set(fs); 1384 1385 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1386 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1387 if (waitfor == MNT_WAIT) 1388 error = bwrite(bp); 1389 else 1390 bawrite(bp); 1391 1392 /* 1393 * The buffers for group descriptors, inode bitmaps and block bitmaps 1394 * are not busy at this point and are (hopefully) written by the 1395 * usual sync mechanism. No need to write them here. 1396 */ 1397 return (error); 1398 } 1399 int 1400 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1401 { 1402 struct m_ext2fs *fs = mp->um_e2fs; 1403 struct buf *bp; 1404 int i, j, g_count = 0, error = 0, allerror = 0; 1405 1406 allerror = ext2_sbupdate(mp, waitfor); 1407 1408 /* Update gd csums */ 1409 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1410 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1411 ext2_gd_csum_set(fs); 1412 1413 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1414 bp = getblk(mp->um_devvp, fsbtodb(fs, 1415 ext2_cg_location(fs, i)), 1416 fs->e2fs_bsize, 0, 0, 0); 1417 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1418 memcpy(bp->b_data, &fs->e2fs_gd[ 1419 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1420 fs->e2fs_bsize); 1421 } else { 1422 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1423 g_count < fs->e2fs_gcount; j++, g_count++) 1424 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1425 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1426 } 1427 if (waitfor == MNT_WAIT) 1428 error = bwrite(bp); 1429 else 1430 bawrite(bp); 1431 } 1432 1433 if (!allerror && error) 1434 allerror = error; 1435 return (allerror); 1436 } 1437 1438 /* 1439 * Return the root of a filesystem. 1440 */ 1441 static int 1442 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1443 { 1444 struct vnode *nvp; 1445 int error; 1446 1447 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1448 if (error) 1449 return (error); 1450 *vpp = nvp; 1451 return (0); 1452 } 1453