1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/sdt.h> 56 #include <sys/stat.h> 57 #include <sys/mutex.h> 58 59 #include <geom/geom.h> 60 #include <geom/geom_vfs.h> 61 62 #include <fs/ext2fs/fs.h> 63 #include <fs/ext2fs/ext2_mount.h> 64 #include <fs/ext2fs/inode.h> 65 66 #include <fs/ext2fs/ext2fs.h> 67 #include <fs/ext2fs/ext2_dinode.h> 68 #include <fs/ext2fs/ext2_extern.h> 69 #include <fs/ext2fs/ext2_extents.h> 70 71 SDT_PROVIDER_DECLARE(ext2fs); 72 /* 73 * ext2fs trace probe: 74 * arg0: verbosity. Higher numbers give more verbose messages 75 * arg1: Textual message 76 */ 77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 80 81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 82 static int ext2_mountfs(struct vnode *, struct mount *); 83 static int ext2_reload(struct mount *mp, struct thread *td); 84 static int ext2_sbupdate(struct ext2mount *, int); 85 static int ext2_cgupdate(struct ext2mount *, int); 86 static vfs_unmount_t ext2_unmount; 87 static vfs_root_t ext2_root; 88 static vfs_statfs_t ext2_statfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_mount_t ext2_mount; 93 94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 96 97 static struct vfsops ext2fs_vfsops = { 98 .vfs_fhtovp = ext2_fhtovp, 99 .vfs_mount = ext2_mount, 100 .vfs_root = ext2_root, /* root inode via vget */ 101 .vfs_statfs = ext2_statfs, 102 .vfs_sync = ext2_sync, 103 .vfs_unmount = ext2_unmount, 104 .vfs_vget = ext2_vget, 105 }; 106 107 VFS_SET(ext2fs_vfsops, ext2fs, 0); 108 109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 110 int ronly); 111 static int ext2_compute_sb_data(struct vnode * devvp, 112 struct ext2fs * es, struct m_ext2fs * fs); 113 114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 115 "noclusterw", "noexec", "export", "force", "from", "multilabel", 116 "suiddir", "nosymfollow", "sync", "union", NULL }; 117 118 /* 119 * VFS Operations. 120 * 121 * mount system call 122 */ 123 static int 124 ext2_mount(struct mount *mp) 125 { 126 struct vfsoptlist *opts; 127 struct vnode *devvp; 128 struct thread *td; 129 struct ext2mount *ump = NULL; 130 struct m_ext2fs *fs; 131 struct nameidata nd, *ndp = &nd; 132 accmode_t accmode; 133 char *path, *fspec; 134 int error, flags, len; 135 136 td = curthread; 137 opts = mp->mnt_optnew; 138 139 if (vfs_filteropt(opts, ext2_opts)) 140 return (EINVAL); 141 142 vfs_getopt(opts, "fspath", (void **)&path, NULL); 143 /* Double-check the length of path.. */ 144 if (strlen(path) >= MAXMNTLEN) 145 return (ENAMETOOLONG); 146 147 fspec = NULL; 148 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 149 if (!error && fspec[len - 1] != '\0') 150 return (EINVAL); 151 152 /* 153 * If updating, check whether changing from read-only to 154 * read/write; if there is no device name, that's all we do. 155 */ 156 if (mp->mnt_flag & MNT_UPDATE) { 157 ump = VFSTOEXT2(mp); 158 fs = ump->um_e2fs; 159 error = 0; 160 if (fs->e2fs_ronly == 0 && 161 vfs_flagopt(opts, "ro", NULL, 0)) { 162 error = VFS_SYNC(mp, MNT_WAIT); 163 if (error) 164 return (error); 165 flags = WRITECLOSE; 166 if (mp->mnt_flag & MNT_FORCE) 167 flags |= FORCECLOSE; 168 error = ext2_flushfiles(mp, flags, td); 169 if (error == 0 && fs->e2fs_wasvalid && 170 ext2_cgupdate(ump, MNT_WAIT) == 0) { 171 fs->e2fs->e2fs_state = 172 htole16((le16toh(fs->e2fs->e2fs_state) | 173 E2FS_ISCLEAN)); 174 ext2_sbupdate(ump, MNT_WAIT); 175 } 176 fs->e2fs_ronly = 1; 177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 178 g_topology_lock(); 179 g_access(ump->um_cp, 0, -1, 0); 180 g_topology_unlock(); 181 } 182 if (!error && (mp->mnt_flag & MNT_RELOAD)) 183 error = ext2_reload(mp, td); 184 if (error) 185 return (error); 186 devvp = ump->um_devvp; 187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { 188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 189 return (EPERM); 190 191 /* 192 * If upgrade to read-write by non-root, then verify 193 * that user has necessary permissions on the device. 194 */ 195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 196 error = VOP_ACCESS(devvp, VREAD | VWRITE, 197 td->td_ucred, td); 198 if (error) 199 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 200 if (error) { 201 VOP_UNLOCK(devvp); 202 return (error); 203 } 204 VOP_UNLOCK(devvp); 205 g_topology_lock(); 206 error = g_access(ump->um_cp, 0, 1, 0); 207 g_topology_unlock(); 208 if (error) 209 return (error); 210 211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 213 if (mp->mnt_flag & MNT_FORCE) { 214 printf( 215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 216 } else { 217 printf( 218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 219 fs->e2fs_fsmnt); 220 return (EPERM); 221 } 222 } 223 fs->e2fs->e2fs_state = 224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 225 (void)ext2_cgupdate(ump, MNT_WAIT); 226 fs->e2fs_ronly = 0; 227 MNT_ILOCK(mp); 228 mp->mnt_flag &= ~MNT_RDONLY; 229 MNT_IUNLOCK(mp); 230 } 231 if (vfs_flagopt(opts, "export", NULL, 0)) { 232 /* Process export requests in vfs_mount.c. */ 233 return (error); 234 } 235 } 236 237 /* 238 * Not an update, or updating the name: look up the name 239 * and verify that it refers to a sensible disk device. 240 */ 241 if (fspec == NULL) 242 return (EINVAL); 243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec); 244 if ((error = namei(ndp)) != 0) 245 return (error); 246 NDFREE(ndp, NDF_ONLY_PNBUF); 247 devvp = ndp->ni_vp; 248 249 if (!vn_isdisk_error(devvp, &error)) { 250 vput(devvp); 251 return (error); 252 } 253 254 /* 255 * If mount by non-root, then verify that user has necessary 256 * permissions on the device. 257 * 258 * XXXRW: VOP_ACCESS() enough? 259 */ 260 accmode = VREAD; 261 if ((mp->mnt_flag & MNT_RDONLY) == 0) 262 accmode |= VWRITE; 263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 264 if (error) 265 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 266 if (error) { 267 vput(devvp); 268 return (error); 269 } 270 271 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 272 error = ext2_mountfs(devvp, mp); 273 } else { 274 if (devvp != ump->um_devvp) { 275 vput(devvp); 276 return (EINVAL); /* needs translation */ 277 } else 278 vput(devvp); 279 } 280 if (error) { 281 vrele(devvp); 282 return (error); 283 } 284 ump = VFSTOEXT2(mp); 285 fs = ump->um_e2fs; 286 287 /* 288 * Note that this strncpy() is ok because of a check at the start 289 * of ext2_mount(). 290 */ 291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); 292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; 293 vfs_mountedfrom(mp, fspec); 294 return (0); 295 } 296 297 static int 298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 299 { 300 uint32_t i, mask; 301 302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 305 return (1); 306 } 307 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 309 if (mask) { 310 printf("WARNING: mount of %s denied due to " 311 "unsupported optional features:\n", devtoname(dev)); 312 for (i = 0; 313 i < sizeof(incompat)/sizeof(struct ext2_feature); 314 i++) 315 if (mask & incompat[i].mask) 316 printf("%s ", incompat[i].name); 317 printf("\n"); 318 return (1); 319 } 320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 321 if (!ronly && mask) { 322 printf("WARNING: R/W mount of %s denied due to " 323 "unsupported optional features:\n", devtoname(dev)); 324 for (i = 0; 325 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 326 i++) 327 if (mask & ro_compat[i].mask) 328 printf("%s ", ro_compat[i].name); 329 printf("\n"); 330 return (1); 331 } 332 } 333 return (0); 334 } 335 336 static e4fs_daddr_t 337 ext2_cg_location(struct m_ext2fs *fs, int number) 338 { 339 int cg, descpb, logical_sb, has_super = 0; 340 341 /* 342 * Adjust logical superblock block number. 343 * Godmar thinks: if the blocksize is greater than 1024, then 344 * the superblock is logically part of block zero. 345 */ 346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 347 348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 349 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 350 return (logical_sb + number + 1); 351 352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 354 else 355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 356 357 cg = descpb * number; 358 359 if (ext2_cg_has_sb(fs, cg)) 360 has_super = 1; 361 362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 363 le32toh(fs->e2fs->e2fs_first_dblock)); 364 } 365 366 static int 367 ext2_cg_validate(struct m_ext2fs *fs) 368 { 369 uint64_t b_bitmap; 370 uint64_t i_bitmap; 371 uint64_t i_tables; 372 uint64_t first_block, last_block, last_cg_block; 373 struct ext2_gd *gd; 374 unsigned int i, cg_count; 375 376 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 377 last_cg_block = ext2_cg_number_gdb(fs, 0); 378 cg_count = fs->e2fs_gcount; 379 380 for (i = 0; i < fs->e2fs_gcount; i++) { 381 gd = &fs->e2fs_gd[i]; 382 383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 384 i == fs->e2fs_gcount - 1) { 385 last_block = fs->e2fs_bcount - 1; 386 } else { 387 last_block = first_block + 388 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 389 } 390 391 if ((cg_count == fs->e2fs_gcount) && 392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 393 cg_count = i; 394 395 b_bitmap = e2fs_gd_get_b_bitmap(gd); 396 if (b_bitmap == 0) { 397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 398 "block bitmap is zero", i); 399 return (EINVAL); 400 } 401 if (b_bitmap <= last_cg_block) { 402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 403 "block bitmap overlaps gds", i); 404 return (EINVAL); 405 } 406 if (b_bitmap < first_block || b_bitmap > last_block) { 407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 408 "block bitmap not in group", i); 409 return (EINVAL); 410 } 411 412 i_bitmap = e2fs_gd_get_i_bitmap(gd); 413 if (i_bitmap == 0) { 414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 415 "inode bitmap is zero", i); 416 return (EINVAL); 417 } 418 if (i_bitmap <= last_cg_block) { 419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 420 "inode bitmap overlaps gds", i); 421 return (EINVAL); 422 } 423 if (i_bitmap < first_block || i_bitmap > last_block) { 424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 425 "inode bitmap not in group blk", i); 426 return (EINVAL); 427 } 428 429 i_tables = e2fs_gd_get_i_tables(gd); 430 if (i_tables == 0) { 431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 432 "inode table is zero", i); 433 return (EINVAL); 434 } 435 if (i_tables <= last_cg_block) { 436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 437 "inode tables overlaps gds", i); 438 return (EINVAL); 439 } 440 if (i_tables < first_block || 441 i_tables + fs->e2fs_itpg - 1 > last_block) { 442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 443 "inode tables not in group blk", i); 444 return (EINVAL); 445 } 446 447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 448 first_block += EXT2_BLOCKS_PER_GROUP(fs); 449 } 450 451 return (0); 452 } 453 454 /* 455 * This computes the fields of the m_ext2fs structure from the 456 * data in the ext2fs structure read in. 457 */ 458 static int 459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 460 struct m_ext2fs *fs) 461 { 462 struct buf *bp; 463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 464 int i, j; 465 int g_count = 0; 466 int error; 467 468 /* Check if first dblock is valid */ 469 if (fs->e2fs->e2fs_bcount >= 1024 && fs->e2fs->e2fs_first_dblock) { 470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 471 "first dblock is invalid"); 472 return (EINVAL); 473 } 474 475 /* Check checksum features */ 476 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 477 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 478 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 479 "incorrect checksum features combination"); 480 return (EINVAL); 481 } 482 483 /* Precompute checksum seed for all metadata */ 484 ext2_sb_csum_set_seed(fs); 485 486 /* Verify sb csum if possible */ 487 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 488 error = ext2_sb_csum_verify(fs); 489 if (error) { 490 return (error); 491 } 492 } 493 494 /* Check for block size = 1K|2K|4K */ 495 if (le32toh(es->e2fs_log_bsize) > 2) { 496 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 497 "bad block size"); 498 return (EINVAL); 499 } 500 501 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 502 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 503 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 504 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 505 506 /* Check for fragment size */ 507 if (le32toh(es->e2fs_log_fsize) > 508 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 510 "invalid log cluster size"); 511 return (EINVAL); 512 } 513 514 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 515 if (fs->e2fs_fsize != fs->e2fs_bsize) { 516 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 517 "fragment size != block size"); 518 return (EINVAL); 519 } 520 521 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 522 523 /* Check reserved gdt blocks for future filesystem expansion */ 524 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 525 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 526 "number of reserved GDT blocks too large"); 527 return (EINVAL); 528 } 529 530 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 531 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 532 } else { 533 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 534 535 /* 536 * Check first ino. 537 */ 538 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 539 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 540 "invalid first ino"); 541 return (EINVAL); 542 } 543 544 /* 545 * Simple sanity check for superblock inode size value. 546 */ 547 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 548 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 549 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 550 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 551 "invalid inode size"); 552 return (EINVAL); 553 } 554 } 555 556 /* Check group descriptors */ 557 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 558 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 560 "unsupported 64bit descriptor size"); 561 return (EINVAL); 562 } 563 564 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 565 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 566 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 567 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 568 "zero blocks/fragments per group"); 569 return (EINVAL); 570 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 571 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 572 "blocks per group not equal fragments per group"); 573 return (EINVAL); 574 } 575 576 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 578 "non-standard group size unsupported"); 579 return (EINVAL); 580 } 581 582 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 583 if (fs->e2fs_ipb == 0 || 584 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 586 "bad inodes per block size"); 587 return (EINVAL); 588 } 589 590 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 591 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 592 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 593 "invalid inodes per group"); 594 return (EINVAL); 595 } 596 597 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 598 599 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 600 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 601 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 602 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 603 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 604 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 605 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 606 } 607 if (fs->e2fs_rbcount > fs->e2fs_bcount || 608 fs->e2fs_fbcount > fs->e2fs_bcount) { 609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 610 "invalid block count"); 611 return (EINVAL); 612 } 613 614 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 615 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 617 "invalid number of free inodes"); 618 return (EINVAL); 619 } 620 621 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 622 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 624 "first data block out of range"); 625 return (EINVAL); 626 } 627 628 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 629 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 630 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 632 "groups count too large"); 633 return (EINVAL); 634 } 635 636 /* Check for extra isize in big inodes. */ 637 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 638 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 639 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 640 "no space for extra inode timestamps"); 641 return (EINVAL); 642 } 643 644 /* s_resuid / s_resgid ? */ 645 646 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 647 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 648 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 649 } else { 650 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 651 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 652 fs->e2fs_bsize / sizeof(struct ext2_gd)); 653 } 654 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 655 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 656 M_EXT2MNT, M_WAITOK | M_ZERO); 657 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 658 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 659 660 for (i = 0; i < fs->e2fs_gdbcount; i++) { 661 error = bread(devvp, 662 fsbtodb(fs, ext2_cg_location(fs, i)), 663 fs->e2fs_bsize, NOCRED, &bp); 664 if (error) { 665 /* 666 * fs->e2fs_gd and fs->e2fs_contigdirs 667 * will be freed later by the caller, 668 * because this function could be called from 669 * MNT_UPDATE path. 670 */ 671 return (error); 672 } 673 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 674 memcpy(&fs->e2fs_gd[ 675 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 676 bp->b_data, fs->e2fs_bsize); 677 } else { 678 for (j = 0; j < e2fs_descpb && 679 g_count < fs->e2fs_gcount; j++, g_count++) 680 memcpy(&fs->e2fs_gd[g_count], 681 bp->b_data + j * E2FS_REV0_GD_SIZE, 682 E2FS_REV0_GD_SIZE); 683 } 684 brelse(bp); 685 bp = NULL; 686 } 687 688 /* Validate cgs consistency */ 689 error = ext2_cg_validate(fs); 690 if (error) 691 return (error); 692 693 /* Verfy cgs csum */ 694 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 695 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 696 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 697 if (error) 698 return (error); 699 } 700 /* Initialization for the ext2 Orlov allocator variant. */ 701 fs->e2fs_total_dir = 0; 702 for (i = 0; i < fs->e2fs_gcount; i++) 703 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 704 705 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 706 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 707 fs->e2fs_maxfilesize = 0x7fffffff; 708 else { 709 fs->e2fs_maxfilesize = 0xffffffffffff; 710 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 711 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 712 } 713 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 714 fs->e2fs_uhash = 3; 715 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 716 #ifdef __CHAR_UNSIGNED__ 717 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 718 fs->e2fs_uhash = 3; 719 #else 720 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 721 #endif 722 } 723 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 724 error = ext2_sb_csum_verify(fs); 725 726 return (error); 727 } 728 729 /* 730 * Reload all incore data for a filesystem (used after running fsck on 731 * the root filesystem and finding things to fix). The filesystem must 732 * be mounted read-only. 733 * 734 * Things to do to update the mount: 735 * 1) invalidate all cached meta-data. 736 * 2) re-read superblock from disk. 737 * 3) invalidate all cluster summary information. 738 * 4) invalidate all inactive vnodes. 739 * 5) invalidate all cached file data. 740 * 6) re-read inode data for all active vnodes. 741 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 742 */ 743 static int 744 ext2_reload(struct mount *mp, struct thread *td) 745 { 746 struct vnode *vp, *mvp, *devvp; 747 struct inode *ip; 748 struct buf *bp; 749 struct ext2fs *es; 750 struct m_ext2fs *fs; 751 struct csum *sump; 752 int error, i; 753 int32_t *lp; 754 755 if ((mp->mnt_flag & MNT_RDONLY) == 0) 756 return (EINVAL); 757 /* 758 * Step 1: invalidate all cached meta-data. 759 */ 760 devvp = VFSTOEXT2(mp)->um_devvp; 761 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 762 if (vinvalbuf(devvp, 0, 0, 0) != 0) 763 panic("ext2_reload: dirty1"); 764 VOP_UNLOCK(devvp); 765 766 /* 767 * Step 2: re-read superblock from disk. 768 * constants have been adjusted for ext2 769 */ 770 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 771 return (error); 772 es = (struct ext2fs *)bp->b_data; 773 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 774 brelse(bp); 775 return (EIO); /* XXX needs translation */ 776 } 777 fs = VFSTOEXT2(mp)->um_e2fs; 778 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 779 780 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 781 brelse(bp); 782 return (error); 783 } 784 #ifdef UNKLAR 785 if (fs->fs_sbsize < SBSIZE) 786 bp->b_flags |= B_INVAL; 787 #endif 788 brelse(bp); 789 790 /* 791 * Step 3: invalidate all cluster summary information. 792 */ 793 if (fs->e2fs_contigsumsize > 0) { 794 lp = fs->e2fs_maxcluster; 795 sump = fs->e2fs_clustersum; 796 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 797 *lp++ = fs->e2fs_contigsumsize; 798 sump->cs_init = 0; 799 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 800 } 801 } 802 803 loop: 804 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 805 /* 806 * Step 4: invalidate all cached file data. 807 */ 808 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 809 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 810 goto loop; 811 } 812 if (vinvalbuf(vp, 0, 0, 0)) 813 panic("ext2_reload: dirty2"); 814 815 /* 816 * Step 5: re-read inode data for all active vnodes. 817 */ 818 ip = VTOI(vp); 819 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 820 (int)fs->e2fs_bsize, NOCRED, &bp); 821 if (error) { 822 VOP_UNLOCK(vp); 823 vrele(vp); 824 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 825 return (error); 826 } 827 828 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 829 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 830 831 brelse(bp); 832 VOP_UNLOCK(vp); 833 vrele(vp); 834 835 if (error) { 836 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 837 return (error); 838 } 839 } 840 return (0); 841 } 842 843 /* 844 * Common code for mount and mountroot. 845 */ 846 static int 847 ext2_mountfs(struct vnode *devvp, struct mount *mp) 848 { 849 struct ext2mount *ump; 850 struct buf *bp; 851 struct m_ext2fs *fs; 852 struct ext2fs *es; 853 struct cdev *dev = devvp->v_rdev; 854 struct g_consumer *cp; 855 struct bufobj *bo; 856 struct csum *sump; 857 int error; 858 int ronly; 859 int i; 860 u_long size; 861 int32_t *lp; 862 int32_t e2fs_maxcontig; 863 864 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 865 /* XXX: use VOP_ACESS to check FS perms */ 866 g_topology_lock(); 867 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 868 g_topology_unlock(); 869 VOP_UNLOCK(devvp); 870 if (error) 871 return (error); 872 873 /* XXX: should we check for some sectorsize or 512 instead? */ 874 if (((SBSIZE % cp->provider->sectorsize) != 0) || 875 (SBSIZE < cp->provider->sectorsize)) { 876 g_topology_lock(); 877 g_vfs_close(cp); 878 g_topology_unlock(); 879 return (EINVAL); 880 } 881 882 bo = &devvp->v_bufobj; 883 bo->bo_private = cp; 884 bo->bo_ops = g_vfs_bufops; 885 if (devvp->v_rdev->si_iosize_max != 0) 886 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 887 if (mp->mnt_iosize_max > maxphys) 888 mp->mnt_iosize_max = maxphys; 889 890 bp = NULL; 891 ump = NULL; 892 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 893 goto out; 894 es = (struct ext2fs *)bp->b_data; 895 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 896 error = EINVAL; /* XXX needs translation */ 897 goto out; 898 } 899 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 900 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 901 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 902 printf( 903 "WARNING: Filesystem was not properly dismounted\n"); 904 } else { 905 printf( 906 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 907 error = EPERM; 908 goto out; 909 } 910 } 911 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 912 913 /* 914 * I don't know whether this is the right strategy. Note that 915 * we dynamically allocate both an m_ext2fs and an ext2fs 916 * while Linux keeps the super block in a locked buffer. 917 */ 918 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 919 M_EXT2MNT, M_WAITOK | M_ZERO); 920 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 921 M_EXT2MNT, M_WAITOK); 922 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); 923 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 924 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 925 goto out; 926 927 /* 928 * Calculate the maximum contiguous blocks and size of cluster summary 929 * array. In FFS this is done by newfs; however, the superblock 930 * in ext2fs doesn't have these variables, so we can calculate 931 * them here. 932 */ 933 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize); 934 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 935 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN; 936 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 937 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 938 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 939 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 940 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 941 lp = ump->um_e2fs->e2fs_maxcluster; 942 sump = ump->um_e2fs->e2fs_clustersum; 943 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 944 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 945 sump->cs_init = 0; 946 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 947 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 948 } 949 } 950 951 brelse(bp); 952 bp = NULL; 953 fs = ump->um_e2fs; 954 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 955 956 /* 957 * If the fs is not mounted read-only, make sure the super block is 958 * always written back on a sync(). 959 */ 960 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 961 if (ronly == 0) { 962 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 963 fs->e2fs->e2fs_state = 964 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 965 } 966 mp->mnt_data = ump; 967 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 968 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 969 MNT_ILOCK(mp); 970 mp->mnt_flag |= MNT_LOCAL; 971 MNT_IUNLOCK(mp); 972 ump->um_mountp = mp; 973 ump->um_dev = dev; 974 ump->um_devvp = devvp; 975 ump->um_bo = &devvp->v_bufobj; 976 ump->um_cp = cp; 977 978 /* 979 * Setting those two parameters allowed us to use 980 * ufs_bmap w/o changse! 981 */ 982 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 983 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 984 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 985 if (ronly == 0) 986 ext2_sbupdate(ump, MNT_WAIT); 987 /* 988 * Initialize filesystem stat information in mount struct. 989 */ 990 MNT_ILOCK(mp); 991 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 992 MNTK_USES_BCACHE; 993 MNT_IUNLOCK(mp); 994 return (0); 995 out: 996 if (bp) 997 brelse(bp); 998 if (cp != NULL) { 999 g_topology_lock(); 1000 g_vfs_close(cp); 1001 g_topology_unlock(); 1002 } 1003 if (ump) { 1004 mtx_destroy(EXT2_MTX(ump)); 1005 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1006 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1007 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1008 free(ump->um_e2fs, M_EXT2MNT); 1009 free(ump, M_EXT2MNT); 1010 mp->mnt_data = NULL; 1011 } 1012 return (error); 1013 } 1014 1015 /* 1016 * Unmount system call. 1017 */ 1018 static int 1019 ext2_unmount(struct mount *mp, int mntflags) 1020 { 1021 struct ext2mount *ump; 1022 struct m_ext2fs *fs; 1023 struct csum *sump; 1024 int error, flags, i, ronly; 1025 1026 flags = 0; 1027 if (mntflags & MNT_FORCE) { 1028 if (mp->mnt_flag & MNT_ROOTFS) 1029 return (EINVAL); 1030 flags |= FORCECLOSE; 1031 } 1032 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) 1033 return (error); 1034 ump = VFSTOEXT2(mp); 1035 fs = ump->um_e2fs; 1036 ronly = fs->e2fs_ronly; 1037 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1038 if (fs->e2fs_wasvalid) 1039 fs->e2fs->e2fs_state = 1040 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1041 ext2_sbupdate(ump, MNT_WAIT); 1042 } 1043 1044 g_topology_lock(); 1045 g_vfs_close(ump->um_cp); 1046 g_topology_unlock(); 1047 vrele(ump->um_devvp); 1048 sump = fs->e2fs_clustersum; 1049 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1050 free(sump->cs_sum, M_EXT2MNT); 1051 free(fs->e2fs_clustersum, M_EXT2MNT); 1052 free(fs->e2fs_maxcluster, M_EXT2MNT); 1053 free(fs->e2fs_gd, M_EXT2MNT); 1054 free(fs->e2fs_contigdirs, M_EXT2MNT); 1055 free(fs->e2fs, M_EXT2MNT); 1056 free(fs, M_EXT2MNT); 1057 free(ump, M_EXT2MNT); 1058 mp->mnt_data = NULL; 1059 MNT_ILOCK(mp); 1060 mp->mnt_flag &= ~MNT_LOCAL; 1061 MNT_IUNLOCK(mp); 1062 return (error); 1063 } 1064 1065 /* 1066 * Flush out all the files in a filesystem. 1067 */ 1068 static int 1069 ext2_flushfiles(struct mount *mp, int flags, struct thread *td) 1070 { 1071 int error; 1072 1073 error = vflush(mp, 0, flags, td); 1074 return (error); 1075 } 1076 1077 /* 1078 * Get filesystem statistics. 1079 */ 1080 int 1081 ext2_statfs(struct mount *mp, struct statfs *sbp) 1082 { 1083 struct ext2mount *ump; 1084 struct m_ext2fs *fs; 1085 uint32_t overhead, overhead_per_group, ngdb; 1086 int i, ngroups; 1087 1088 ump = VFSTOEXT2(mp); 1089 fs = ump->um_e2fs; 1090 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1091 panic("ext2_statfs"); 1092 1093 /* 1094 * Compute the overhead (FS structures) 1095 */ 1096 overhead_per_group = 1097 1 /* block bitmap */ + 1098 1 /* inode bitmap */ + 1099 fs->e2fs_itpg; 1100 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1101 fs->e2fs_gcount * overhead_per_group; 1102 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1103 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1104 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1105 if (ext2_cg_has_sb(fs, i)) 1106 ngroups++; 1107 } 1108 } else { 1109 ngroups = fs->e2fs_gcount; 1110 } 1111 ngdb = fs->e2fs_gdbcount; 1112 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1113 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1114 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1115 overhead += ngroups * (1 /* superblock */ + ngdb); 1116 1117 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1118 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1119 sbp->f_blocks = fs->e2fs_bcount - overhead; 1120 sbp->f_bfree = fs->e2fs_fbcount; 1121 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1122 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1123 sbp->f_ffree = fs->e2fs_ficount; 1124 return (0); 1125 } 1126 1127 /* 1128 * Go through the disk queues to initiate sandbagged IO; 1129 * go through the inodes to write those that have been modified; 1130 * initiate the writing of the super block if it has been modified. 1131 * 1132 * Note: we are always called with the filesystem marked `MPBUSY'. 1133 */ 1134 static int 1135 ext2_sync(struct mount *mp, int waitfor) 1136 { 1137 struct vnode *mvp, *vp; 1138 struct thread *td; 1139 struct inode *ip; 1140 struct ext2mount *ump = VFSTOEXT2(mp); 1141 struct m_ext2fs *fs; 1142 int error, allerror = 0; 1143 1144 td = curthread; 1145 fs = ump->um_e2fs; 1146 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1147 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1148 } 1149 1150 /* 1151 * Write back each (modified) inode. 1152 */ 1153 loop: 1154 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1155 if (vp->v_type == VNON) { 1156 VI_UNLOCK(vp); 1157 continue; 1158 } 1159 ip = VTOI(vp); 1160 if ((ip->i_flag & 1161 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1162 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1163 waitfor == MNT_LAZY)) { 1164 VI_UNLOCK(vp); 1165 continue; 1166 } 1167 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1168 if (error) { 1169 if (error == ENOENT) { 1170 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1171 goto loop; 1172 } 1173 continue; 1174 } 1175 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 1176 allerror = error; 1177 VOP_UNLOCK(vp); 1178 vrele(vp); 1179 } 1180 1181 /* 1182 * Force stale filesystem control information to be flushed. 1183 */ 1184 if (waitfor != MNT_LAZY) { 1185 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1186 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 1187 allerror = error; 1188 VOP_UNLOCK(ump->um_devvp); 1189 } 1190 1191 /* 1192 * Write back modified superblock. 1193 */ 1194 if (fs->e2fs_fmod != 0) { 1195 fs->e2fs_fmod = 0; 1196 fs->e2fs->e2fs_wtime = htole32(time_second); 1197 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1198 allerror = error; 1199 } 1200 return (allerror); 1201 } 1202 1203 /* 1204 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1205 * in from disk. If it is in core, wait for the lock bit to clear, then 1206 * return the inode locked. Detection and handling of mount points must be 1207 * done by the calling routine. 1208 */ 1209 static int 1210 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) 1211 { 1212 struct m_ext2fs *fs; 1213 struct inode *ip; 1214 struct ext2mount *ump; 1215 struct buf *bp; 1216 struct vnode *vp; 1217 struct thread *td; 1218 unsigned int i, used_blocks; 1219 int error; 1220 1221 td = curthread; 1222 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 1223 if (error || *vpp != NULL) 1224 return (error); 1225 1226 ump = VFSTOEXT2(mp); 1227 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1228 1229 /* Allocate a new vnode/inode. */ 1230 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 1231 *vpp = NULL; 1232 free(ip, M_EXT2NODE); 1233 return (error); 1234 } 1235 vp->v_data = ip; 1236 ip->i_vnode = vp; 1237 ip->i_e2fs = fs = ump->um_e2fs; 1238 ip->i_ump = ump; 1239 ip->i_number = ino; 1240 cluster_init_vn(&ip->i_clusterw); 1241 1242 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1243 error = insmntque(vp, mp); 1244 if (error != 0) { 1245 free(ip, M_EXT2NODE); 1246 *vpp = NULL; 1247 return (error); 1248 } 1249 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 1250 if (error || *vpp != NULL) 1251 return (error); 1252 1253 /* Read in the disk contents for the inode, copy into the inode. */ 1254 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1255 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { 1256 /* 1257 * The inode does not contain anything useful, so it would 1258 * be misleading to leave it on its hash chain. With mode 1259 * still zero, it will be unlinked and returned to the free 1260 * list by vput(). 1261 */ 1262 brelse(bp); 1263 vput(vp); 1264 *vpp = NULL; 1265 return (error); 1266 } 1267 /* convert ext2 inode to dinode */ 1268 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1269 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1270 if (error) { 1271 brelse(bp); 1272 vput(vp); 1273 *vpp = NULL; 1274 return (error); 1275 } 1276 ip->i_block_group = ino_to_cg(fs, ino); 1277 ip->i_next_alloc_block = 0; 1278 ip->i_next_alloc_goal = 0; 1279 1280 /* 1281 * Now we want to make sure that block pointers for unused 1282 * blocks are zeroed out - ext2_balloc depends on this 1283 * although for regular files and directories only 1284 * 1285 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1286 * out because we could corrupt the extent tree. 1287 */ 1288 if (!(ip->i_flag & IN_E4EXTENTS) && 1289 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1290 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1291 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1292 ip->i_db[i] = 0; 1293 } 1294 1295 bqrelse(bp); 1296 1297 #ifdef EXT2FS_PRINT_EXTENTS 1298 ext2_print_inode(ip); 1299 error = ext4_ext_walk(ip); 1300 if (error) { 1301 vput(vp); 1302 *vpp = NULL; 1303 return (error); 1304 } 1305 #endif 1306 1307 /* 1308 * Initialize the vnode from the inode, check for aliases. 1309 * Note that the underlying vnode may have changed. 1310 */ 1311 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1312 vput(vp); 1313 *vpp = NULL; 1314 return (error); 1315 } 1316 1317 /* 1318 * Finish inode initialization. 1319 */ 1320 1321 *vpp = vp; 1322 return (0); 1323 } 1324 1325 /* 1326 * File handle to vnode 1327 * 1328 * Have to be really careful about stale file handles: 1329 * - check that the inode number is valid 1330 * - call ext2_vget() to get the locked inode 1331 * - check for an unallocated inode (i_mode == 0) 1332 * - check that the given client host has export rights and return 1333 * those rights via. exflagsp and credanonp 1334 */ 1335 static int 1336 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) 1337 { 1338 struct inode *ip; 1339 struct ufid *ufhp; 1340 struct vnode *nvp; 1341 struct m_ext2fs *fs; 1342 int error; 1343 1344 ufhp = (struct ufid *)fhp; 1345 fs = VFSTOEXT2(mp)->um_e2fs; 1346 if (ufhp->ufid_ino < EXT2_ROOTINO || 1347 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1348 return (ESTALE); 1349 1350 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1351 if (error) { 1352 *vpp = NULLVP; 1353 return (error); 1354 } 1355 ip = VTOI(nvp); 1356 if (ip->i_mode == 0 || 1357 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1358 vput(nvp); 1359 *vpp = NULLVP; 1360 return (ESTALE); 1361 } 1362 *vpp = nvp; 1363 vnode_create_vobject(*vpp, 0, curthread); 1364 return (0); 1365 } 1366 1367 /* 1368 * Write a superblock and associated information back to disk. 1369 */ 1370 static int 1371 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1372 { 1373 struct m_ext2fs *fs = mp->um_e2fs; 1374 struct ext2fs *es = fs->e2fs; 1375 struct buf *bp; 1376 int error = 0; 1377 1378 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1379 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1380 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1381 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1382 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1383 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1384 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1385 } 1386 1387 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1388 1389 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1390 ext2_sb_csum_set(fs); 1391 1392 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1393 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1394 if (waitfor == MNT_WAIT) 1395 error = bwrite(bp); 1396 else 1397 bawrite(bp); 1398 1399 /* 1400 * The buffers for group descriptors, inode bitmaps and block bitmaps 1401 * are not busy at this point and are (hopefully) written by the 1402 * usual sync mechanism. No need to write them here. 1403 */ 1404 return (error); 1405 } 1406 int 1407 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1408 { 1409 struct m_ext2fs *fs = mp->um_e2fs; 1410 struct buf *bp; 1411 int i, j, g_count = 0, error = 0, allerror = 0; 1412 1413 allerror = ext2_sbupdate(mp, waitfor); 1414 1415 /* Update gd csums */ 1416 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1417 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1418 ext2_gd_csum_set(fs); 1419 1420 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1421 bp = getblk(mp->um_devvp, fsbtodb(fs, 1422 ext2_cg_location(fs, i)), 1423 fs->e2fs_bsize, 0, 0, 0); 1424 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1425 memcpy(bp->b_data, &fs->e2fs_gd[ 1426 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1427 fs->e2fs_bsize); 1428 } else { 1429 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1430 g_count < fs->e2fs_gcount; j++, g_count++) 1431 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1432 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1433 } 1434 if (waitfor == MNT_WAIT) 1435 error = bwrite(bp); 1436 else 1437 bawrite(bp); 1438 } 1439 1440 if (!allerror && error) 1441 allerror = error; 1442 return (allerror); 1443 } 1444 1445 /* 1446 * Return the root of a filesystem. 1447 */ 1448 static int 1449 ext2_root(struct mount *mp, int flags, struct vnode **vpp) 1450 { 1451 struct vnode *nvp; 1452 int error; 1453 1454 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); 1455 if (error) 1456 return (error); 1457 *vpp = nvp; 1458 return (0); 1459 } 1460