1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2017 by Delphix. All rights reserved. 28 * Copyright 2024 Oxide Computer Company 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kmem.h> 34 #include <sys/user.h> 35 #include <sys/proc.h> 36 #include <sys/cred.h> 37 #include <sys/disp.h> 38 #include <sys/buf.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/vnode.h> 42 #include <sys/fdio.h> 43 #include <sys/file.h> 44 #include <sys/uio.h> 45 #include <sys/conf.h> 46 #include <sys/statvfs.h> 47 #include <sys/mount.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/sysmacros.h> 52 #include <sys/conf.h> 53 #include <sys/mkdev.h> 54 #include <sys/swap.h> 55 #include <sys/sunddi.h> 56 #include <sys/sunldi.h> 57 #include <sys/dktp/fdisk.h> 58 #include <sys/fs/pc_label.h> 59 #include <sys/fs/pc_fs.h> 60 #include <sys/fs/pc_dir.h> 61 #include <sys/fs/pc_node.h> 62 #include <fs/fs_subr.h> 63 #include <sys/modctl.h> 64 #include <sys/dkio.h> 65 #include <sys/open.h> 66 #include <sys/mntent.h> 67 #include <sys/policy.h> 68 #include <sys/atomic.h> 69 #include <sys/sdt.h> 70 71 /* 72 * The majority of PC media use a 512 sector size, but 73 * occasionally you will run across a 1k sector size. 74 * For media with a 1k sector size, fd_strategy() requires 75 * the I/O size to be a 1k multiple; so when the sector size 76 * is not yet known, always read 1k. 77 */ 78 #define PC_SAFESECSIZE (PC_SECSIZE * 2) 79 80 static int pcfs_pseudo_floppy(dev_t); 81 82 static int pcfsinit(int, char *); 83 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *, 84 struct cred *); 85 static int pcfs_unmount(struct vfs *, int, struct cred *); 86 static int pcfs_root(struct vfs *, struct vnode **); 87 static int pcfs_statvfs(struct vfs *, struct statvfs64 *); 88 static int pc_syncfsnodes(struct pcfs *); 89 static int pcfs_sync(struct vfs *, short, struct cred *); 90 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp); 91 static void pcfs_freevfs(vfs_t *vfsp); 92 static int pcfs_syncfs(struct vfs *, uint64_t, struct cred *); 93 94 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp); 95 static int pc_writefat(struct pcfs *fsp, daddr_t start); 96 97 static int pc_getfattype(struct pcfs *fsp); 98 static void pcfs_parse_mntopts(struct pcfs *fsp); 99 100 101 /* 102 * pcfs mount options table 103 */ 104 105 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL }; 106 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL }; 107 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL }; 108 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL }; 109 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL }; 110 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL }; 111 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 112 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 113 114 static mntopt_t mntopts[] = { 115 /* 116 * option name cancel option default arg flags opt data 117 */ 118 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL }, 119 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL }, 120 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL }, 121 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL }, 122 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL }, 123 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, 0, NULL }, 124 { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 125 { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }, 126 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL }, 127 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL } 128 }; 129 130 static mntopts_t pcfs_mntopts = { 131 sizeof (mntopts) / sizeof (mntopt_t), 132 mntopts 133 }; 134 135 int pcfsdebuglevel = 0; 136 137 /* 138 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab. 139 * pcfs_lock: (inside per filesystem structure "pcfs") 140 * per filesystem lock. Most of the vfsops and vnodeops are 141 * protected by this lock. 142 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead". 143 * 144 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock 145 * 146 * pcfs_mountcount: used to prevent module unloads while there is still 147 * pcfs state from a former mount hanging around. With 148 * forced umount support, the filesystem module must not 149 * be allowed to go away before the last VFS_FREEVFS() 150 * call has been made. 151 * Since this is just an atomic counter, there's no need 152 * for locking. 153 */ 154 kmutex_t pcfslock; 155 krwlock_t pcnodes_lock; 156 uint32_t pcfs_mountcount; 157 158 static int pcfstype; 159 160 static vfsdef_t vfw = { 161 VFSDEF_VERSION, 162 "pcfs", 163 pcfsinit, 164 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV, 165 &pcfs_mntopts 166 }; 167 168 extern struct mod_ops mod_fsops; 169 170 static struct modlfs modlfs = { 171 &mod_fsops, 172 "PC filesystem", 173 &vfw 174 }; 175 176 static struct modlinkage modlinkage = { 177 MODREV_1, 178 &modlfs, 179 NULL 180 }; 181 182 int 183 _init(void) 184 { 185 int error; 186 187 #if !defined(lint) 188 /* make sure the on-disk structures are sane */ 189 ASSERT(sizeof (struct pcdir) == 32); 190 ASSERT(sizeof (struct pcdir_lfn) == 32); 191 #endif 192 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL); 193 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL); 194 error = mod_install(&modlinkage); 195 if (error) { 196 mutex_destroy(&pcfslock); 197 rw_destroy(&pcnodes_lock); 198 } 199 return (error); 200 } 201 202 int 203 _fini(void) 204 { 205 int error; 206 207 /* 208 * If a forcedly unmounted instance is still hanging around, 209 * we cannot allow the module to be unloaded because that would 210 * cause panics once the VFS framework decides it's time to call 211 * into VFS_FREEVFS(). 212 */ 213 if (pcfs_mountcount) 214 return (EBUSY); 215 216 error = mod_remove(&modlinkage); 217 if (error) 218 return (error); 219 mutex_destroy(&pcfslock); 220 rw_destroy(&pcnodes_lock); 221 /* 222 * Tear down the operations vectors 223 */ 224 (void) vfs_freevfsops_by_type(pcfstype); 225 vn_freevnodeops(pcfs_fvnodeops); 226 vn_freevnodeops(pcfs_dvnodeops); 227 return (0); 228 } 229 230 int 231 _info(struct modinfo *modinfop) 232 { 233 return (mod_info(&modlinkage, modinfop)); 234 } 235 236 /* ARGSUSED1 */ 237 static int 238 pcfsinit(int fstype, char *name) 239 { 240 static const fs_operation_def_t pcfs_vfsops_template[] = { 241 VFSNAME_MOUNT, { .vfs_mount = pcfs_mount }, 242 VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount }, 243 VFSNAME_ROOT, { .vfs_root = pcfs_root }, 244 VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs }, 245 VFSNAME_SYNC, { .vfs_sync = pcfs_sync }, 246 VFSNAME_VGET, { .vfs_vget = pcfs_vget }, 247 VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs }, 248 VFSNAME_SYNCFS, { .vfs_syncfs = pcfs_syncfs }, 249 NULL, NULL 250 }; 251 int error; 252 253 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL); 254 if (error != 0) { 255 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template"); 256 return (error); 257 } 258 259 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops); 260 if (error != 0) { 261 (void) vfs_freevfsops_by_type(fstype); 262 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template"); 263 return (error); 264 } 265 266 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops); 267 if (error != 0) { 268 (void) vfs_freevfsops_by_type(fstype); 269 vn_freevnodeops(pcfs_fvnodeops); 270 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template"); 271 return (error); 272 } 273 274 pcfstype = fstype; 275 (void) pc_init(); 276 pcfs_mountcount = 0; 277 return (0); 278 } 279 280 static struct pcfs *pc_mounttab = NULL; 281 282 extern struct pcfs_args pc_tz; 283 284 /* 285 * Define some special logical drives we use internal to this file. 286 */ 287 #define BOOT_PARTITION_DRIVE 99 288 #define PRIMARY_DOS_DRIVE 1 289 #define UNPARTITIONED_DRIVE 0 290 291 static int 292 pcfs_device_identify( 293 struct vfs *vfsp, 294 struct mounta *uap, 295 struct cred *cr, 296 int *dos_ldrive, 297 dev_t *xdev) 298 { 299 struct pathname special; 300 char *c; 301 struct vnode *svp = NULL; 302 struct vnode *lvp = NULL; 303 int oflag, aflag; 304 int error; 305 306 /* 307 * Resolve path name of special file being mounted. 308 */ 309 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) { 310 return (error); 311 } 312 313 *dos_ldrive = -1; 314 315 if (error = 316 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) { 317 /* 318 * If there's no device node, the name specified most likely 319 * maps to a PCFS-style "partition specifier" to select a 320 * harddisk primary/logical partition. Disable floppy-specific 321 * checks in such cases unless an explicit :A or :B is 322 * requested. 323 */ 324 325 /* 326 * Split the pathname string at the last ':' separator. 327 * If there's no ':' in the device name, or the ':' is the 328 * last character in the string, the name is invalid and 329 * the error from the previous lookup will be returned. 330 */ 331 c = strrchr(special.pn_path, ':'); 332 if (c == NULL || strlen(c) == 0) 333 goto devlookup_done; 334 335 *c++ = '\0'; 336 337 /* 338 * PCFS partition name suffixes can be: 339 * - "boot" to indicate the X86BOOT partition 340 * - a drive letter [c-z] for the "DOS logical drive" 341 * - a drive number 1..24 for the "DOS logical drive" 342 * - a "floppy name letter", 'a' or 'b' (just strip this) 343 */ 344 if (strcasecmp(c, "boot") == 0) { 345 /* 346 * The Solaris boot partition is requested. 347 */ 348 *dos_ldrive = BOOT_PARTITION_DRIVE; 349 } else if (strspn(c, "0123456789") == strlen(c)) { 350 /* 351 * All digits - parse the partition number. 352 */ 353 long drvnum = 0; 354 355 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) { 356 /* 357 * A number alright - in the allowed range ? 358 */ 359 if (drvnum > 24 || drvnum == 0) 360 error = ENXIO; 361 } 362 if (error) 363 goto devlookup_done; 364 *dos_ldrive = (int)drvnum; 365 } else if (strlen(c) == 1) { 366 /* 367 * A single trailing character was specified. 368 * - [c-zC-Z] means a harddisk partition, and 369 * we retrieve the partition number. 370 * - [abAB] means a floppy drive, so we swallow 371 * the "drive specifier" and test later 372 * whether the physical device is a floppy. 373 */ 374 *c = tolower(*c); 375 if (*c == 'a' || *c == 'b') { 376 *dos_ldrive = UNPARTITIONED_DRIVE; 377 } else if (*c < 'c' || *c > 'z') { 378 error = ENXIO; 379 goto devlookup_done; 380 } else { 381 *dos_ldrive = 1 + *c - 'c'; 382 } 383 } else { 384 /* 385 * Can't parse this - pass through previous error. 386 */ 387 goto devlookup_done; 388 } 389 390 391 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, 392 NULLVPP, &svp); 393 } else { 394 *dos_ldrive = UNPARTITIONED_DRIVE; 395 } 396 devlookup_done: 397 pn_free(&special); 398 if (error) 399 return (error); 400 401 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE); 402 403 /* 404 * Verify caller's permission to open the device special file. 405 */ 406 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 || 407 ((uap->flags & MS_RDONLY) != 0)) { 408 oflag = FREAD; 409 aflag = VREAD; 410 } else { 411 oflag = FREAD | FWRITE; 412 aflag = VREAD | VWRITE; 413 } 414 415 error = vfs_get_lofi(vfsp, &lvp); 416 417 if (error > 0) { 418 if (error == ENOENT) 419 error = ENODEV; 420 goto out; 421 } else if (error == 0) { 422 *xdev = lvp->v_rdev; 423 } else { 424 *xdev = svp->v_rdev; 425 426 if (svp->v_type != VBLK) { 427 error = ENOTBLK; 428 goto out; 429 } 430 431 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0) 432 goto out; 433 } 434 435 if (getmajor(*xdev) >= devcnt) { 436 error = ENXIO; 437 goto out; 438 } 439 440 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0) 441 goto out; 442 443 out: 444 if (svp != NULL) 445 VN_RELE(svp); 446 if (lvp != NULL) 447 VN_RELE(lvp); 448 return (error); 449 } 450 451 static int 452 pcfs_device_ismounted( 453 struct vfs *vfsp, 454 int dos_ldrive, 455 dev_t xdev, 456 int *remounting, 457 dev_t *pseudodev) 458 { 459 struct pcfs *fsp; 460 int remount = *remounting; 461 462 /* 463 * Ensure that this logical drive isn't already mounted, unless 464 * this is a REMOUNT request. 465 * Note: The framework will perform this check if the "...:c" 466 * PCFS-style "logical drive" syntax has not been used and an 467 * actually existing physical device is backing this filesystem. 468 * Once all block device drivers support PC-style partitioning, 469 * this codeblock can be dropped. 470 */ 471 *pseudodev = xdev; 472 473 if (dos_ldrive) { 474 mutex_enter(&pcfslock); 475 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt) 476 if (fsp->pcfs_xdev == xdev && 477 fsp->pcfs_ldrive == dos_ldrive) { 478 mutex_exit(&pcfslock); 479 if (remount) { 480 return (0); 481 } else { 482 return (EBUSY); 483 } 484 } 485 /* 486 * Assign a unique device number for the vfs 487 * The old way (getudev() + a constantly incrementing 488 * major number) was wrong because it changes vfs_dev 489 * across mounts and reboots, which breaks nfs file handles. 490 * UFS just uses the real dev_t. We can't do that because 491 * of the way pcfs opens fdisk partitons (the :c and :d 492 * partitions are on the same dev_t). Though that _might_ 493 * actually be ok, since the file handle contains an 494 * absolute block number, it's probably better to make them 495 * different. So I think we should retain the original 496 * dev_t, but come up with a different minor number based 497 * on the logical drive that will _always_ come up the same. 498 * For now, we steal the upper 6 bits. 499 */ 500 #ifdef notdef 501 /* what should we do here? */ 502 if (((getminor(xdev) >> 12) & 0x3F) != 0) 503 printf("whoops - upper bits used!\n"); 504 #endif 505 *pseudodev = makedevice(getmajor(xdev), 506 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32); 507 if (vfs_devmounting(*pseudodev, vfsp)) { 508 mutex_exit(&pcfslock); 509 return (EBUSY); 510 } 511 if (vfs_devismounted(*pseudodev)) { 512 mutex_exit(&pcfslock); 513 if (remount) { 514 return (0); 515 } else { 516 return (EBUSY); 517 } 518 } 519 mutex_exit(&pcfslock); 520 } else { 521 *pseudodev = xdev; 522 if (vfs_devmounting(*pseudodev, vfsp)) { 523 return (EBUSY); 524 } 525 if (vfs_devismounted(*pseudodev)) 526 if (remount) { 527 return (0); 528 } else { 529 return (EBUSY); 530 } 531 } 532 533 /* 534 * This is not a remount. Even if MS_REMOUNT was requested, 535 * the caller needs to proceed as it would on an ordinary 536 * mount. 537 */ 538 *remounting = 0; 539 540 ASSERT(*pseudodev); 541 return (0); 542 } 543 544 /* 545 * Get the PCFS-specific mount options from the VFS framework. 546 * For "timezone" and "secsize", we need to parse the number 547 * ourselves and ensure its validity. 548 * Note: "secsize" is deliberately undocumented at this time, 549 * it's a workaround for devices (particularly: lofi image files) 550 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection. 551 */ 552 static void 553 pcfs_parse_mntopts(struct pcfs *fsp) 554 { 555 char *c; 556 char *endptr; 557 long l; 558 struct vfs *vfsp = fsp->pcfs_vfs; 559 560 ASSERT(fsp->pcfs_secondswest == 0); 561 ASSERT(fsp->pcfs_secsize == 0); 562 563 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL)) 564 fsp->pcfs_flags |= PCFS_HIDDEN; 565 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL)) 566 fsp->pcfs_flags |= PCFS_FOLDCASE; 567 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL)) 568 fsp->pcfs_flags |= PCFS_NOCLAMPTIME; 569 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 570 fsp->pcfs_flags |= PCFS_NOATIME; 571 572 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) { 573 if (ddi_strtol(c, &endptr, 10, &l) == 0 && 574 endptr == c + strlen(c)) { 575 /* 576 * A number alright - in the allowed range ? 577 */ 578 if (l <= -12*3600 || l >= 12*3600) { 579 cmn_err(CE_WARN, "!pcfs: invalid use of " 580 "'timezone' mount option - %ld " 581 "is out of range. Assuming 0.", l); 582 l = 0; 583 } 584 } else { 585 cmn_err(CE_WARN, "!pcfs: invalid use of " 586 "'timezone' mount option - argument %s " 587 "is not a valid number. Assuming 0.", c); 588 l = 0; 589 } 590 fsp->pcfs_secondswest = l; 591 } 592 593 /* 594 * The "secsize=..." mount option is a workaround for the lack of 595 * lofi(4D) support for DKIOCGMEDIAINFO. If PCFS wants to parse the 596 * partition table of a disk image and it has been partitioned with 597 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk 598 * images. 599 * That should really be fixed in lofi ... this is a workaround. 600 */ 601 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) { 602 if (ddi_strtol(c, &endptr, 10, &l) == 0 && 603 endptr == c + strlen(c)) { 604 /* 605 * A number alright - a valid sector size as well ? 606 */ 607 if (!VALID_SECSIZE(l)) { 608 cmn_err(CE_WARN, "!pcfs: invalid use of " 609 "'secsize' mount option - %ld is " 610 "unsupported. Autodetecting.", l); 611 l = 0; 612 } 613 } else { 614 cmn_err(CE_WARN, "!pcfs: invalid use of " 615 "'secsize' mount option - argument %s " 616 "is not a valid number. Autodetecting.", c); 617 l = 0; 618 } 619 fsp->pcfs_secsize = l; 620 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1; 621 } 622 } 623 624 /* 625 * vfs operations 626 */ 627 628 /* 629 * pcfs_mount - backend for VFS_MOUNT() on PCFS. 630 */ 631 static int 632 pcfs_mount( 633 struct vfs *vfsp, 634 struct vnode *mvp, 635 struct mounta *uap, 636 struct cred *cr) 637 { 638 struct pcfs *fsp; 639 struct vnode *devvp; 640 dev_t pseudodev; 641 dev_t xdev; 642 int dos_ldrive = 0; 643 int error; 644 int remounting; 645 646 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 647 return (error); 648 649 if (mvp->v_type != VDIR) 650 return (ENOTDIR); 651 652 mutex_enter(&mvp->v_lock); 653 if ((uap->flags & MS_REMOUNT) == 0 && 654 (uap->flags & MS_OVERLAY) == 0 && 655 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 656 mutex_exit(&mvp->v_lock); 657 return (EBUSY); 658 } 659 mutex_exit(&mvp->v_lock); 660 661 /* 662 * PCFS doesn't do mount arguments anymore - everything's a mount 663 * option these days. In order not to break existing callers, we 664 * don't reject it yet, just warn that the data (if any) is ignored. 665 */ 666 if (uap->datalen != 0) 667 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with " 668 "mount argument structures instead of mount options. " 669 "Ignoring mount(2) 'dataptr' argument."); 670 671 /* 672 * This is needed early, to make sure the access / open calls 673 * are done using the correct mode. Processing this mount option 674 * only when calling pcfs_parse_mntopts() would lead us to attempt 675 * a read/write access to a possibly writeprotected device, and 676 * a readonly mount attempt might fail because of that. 677 */ 678 if (uap->flags & MS_RDONLY) { 679 vfsp->vfs_flag |= VFS_RDONLY; 680 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 681 } 682 683 /* 684 * For most filesystems, this is just a lookupname() on the 685 * mount pathname string. PCFS historically has to do its own 686 * partition table parsing because not all Solaris architectures 687 * support all styles of partitioning that PC media can have, and 688 * hence PCFS understands "device names" that don't map to actual 689 * physical device nodes. Parsing the "PCFS syntax" for device 690 * names is done in pcfs_device_identify() - see there. 691 * 692 * Once all block device drivers that can host FAT filesystems have 693 * been enhanced to create device nodes for all PC-style partitions, 694 * this code can go away. 695 */ 696 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev)) 697 return (error); 698 699 /* 700 * As with looking up the actual device to mount, PCFS cannot rely 701 * on just the checks done by vfs_ismounted() whether a given device 702 * is mounted already. The additional check against the "PCFS syntax" 703 * is done in pcfs_device_ismounted(). 704 */ 705 remounting = (uap->flags & MS_REMOUNT); 706 707 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting, 708 &pseudodev)) 709 return (error); 710 711 if (remounting) 712 return (0); 713 714 /* 715 * Mount the filesystem. 716 * An instance structure is required before the attempt to locate 717 * and parse the FAT BPB. This is because mount options may change 718 * the behaviour of the filesystem type matching code. Precreate 719 * it and fill it in to a degree that allows parsing the mount 720 * options. 721 */ 722 devvp = makespecvp(xdev, VBLK); 723 if (IS_SWAPVP(devvp)) { 724 VN_RELE(devvp); 725 return (EBUSY); 726 } 727 error = VOP_OPEN(&devvp, 728 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL); 729 if (error) { 730 VN_RELE(devvp); 731 return (error); 732 } 733 734 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP); 735 fsp->pcfs_vfs = vfsp; 736 fsp->pcfs_xdev = xdev; 737 fsp->pcfs_devvp = devvp; 738 fsp->pcfs_ldrive = dos_ldrive; 739 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL); 740 741 pcfs_parse_mntopts(fsp); 742 743 /* 744 * This is the actual "mount" - the PCFS superblock check. 745 * 746 * Find the requested logical drive and the FAT BPB therein. 747 * Check device type and flag the instance if media is removeable. 748 * 749 * Initializes most members of the filesystem instance structure. 750 * Returns EINVAL if no valid BPB can be found. Other errors may 751 * occur after I/O failures, or when invalid / unparseable partition 752 * tables are encountered. 753 */ 754 if (error = pc_getfattype(fsp)) 755 goto errout; 756 757 /* 758 * Now that the BPB has been parsed, this structural information 759 * is available and known to be valid. Initialize the VFS. 760 */ 761 vfsp->vfs_data = fsp; 762 vfsp->vfs_dev = pseudodev; 763 vfsp->vfs_fstype = pcfstype; 764 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype); 765 vfsp->vfs_bcount = 0; 766 vfsp->vfs_bsize = fsp->pcfs_clsize; 767 768 /* 769 * Validate that we can access the FAT and that it is, to the 770 * degree we can verify here, self-consistent. 771 */ 772 if (error = pc_verify(fsp)) 773 goto errout; 774 775 /* 776 * Record the time of the mount, to return as an "approximate" 777 * timestamp for the FAT root directory. Since FAT roots don't 778 * have timestamps, this is less confusing to the user than 779 * claiming "zero" / Jan/01/1970. 780 */ 781 gethrestime(&fsp->pcfs_mounttime); 782 783 /* 784 * Fix up the mount options. Because "noatime" is made default on 785 * removeable media only, a fixed disk will have neither "atime" 786 * nor "noatime" set. We set the options explicitly depending on 787 * the PCFS_NOATIME flag, to inform the user of what applies. 788 * Mount option cancellation will take care that the mutually 789 * exclusive 'other' is cleared. 790 */ 791 vfs_setmntopt(vfsp, 792 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME, 793 NULL, 0); 794 795 /* 796 * All clear - insert the FS instance into PCFS' list. 797 */ 798 mutex_enter(&pcfslock); 799 fsp->pcfs_nxt = pc_mounttab; 800 pc_mounttab = fsp; 801 mutex_exit(&pcfslock); 802 atomic_inc_32(&pcfs_mountcount); 803 return (0); 804 805 errout: 806 (void) VOP_CLOSE(devvp, 807 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE, 808 1, (offset_t)0, cr, NULL); 809 VN_RELE(devvp); 810 mutex_destroy(&fsp->pcfs_lock); 811 kmem_free(fsp, sizeof (*fsp)); 812 return (error); 813 814 } 815 816 static int 817 pcfs_unmount( 818 struct vfs *vfsp, 819 int flag, 820 struct cred *cr) 821 { 822 struct pcfs *fsp, *fsp1; 823 824 if (secpolicy_fs_unmount(cr, vfsp) != 0) 825 return (EPERM); 826 827 fsp = VFSTOPCFS(vfsp); 828 829 /* 830 * We don't have to lock fsp because the VVFSLOCK in vfs layer will 831 * prevent lookuppn from crossing the mount point. 832 * If this is not a forced umount request and there's ongoing I/O, 833 * don't allow the mount to proceed. 834 */ 835 if (flag & MS_FORCE) 836 vfsp->vfs_flag |= VFS_UNMOUNTED; 837 else if (fsp->pcfs_nrefs) 838 return (EBUSY); 839 840 mutex_enter(&pcfslock); 841 842 /* 843 * If this is a forced umount request or if the fs instance has 844 * been marked as beyond recovery, allow the umount to proceed 845 * regardless of state. pc_diskchanged() forcibly releases all 846 * inactive vnodes/pcnodes. 847 */ 848 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) { 849 rw_enter(&pcnodes_lock, RW_WRITER); 850 pc_diskchanged(fsp); 851 rw_exit(&pcnodes_lock); 852 } 853 854 /* now there should be no pcp node on pcfhead or pcdhead. */ 855 856 if (fsp == pc_mounttab) { 857 pc_mounttab = fsp->pcfs_nxt; 858 } else { 859 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt) 860 if (fsp1->pcfs_nxt == fsp) 861 fsp1->pcfs_nxt = fsp->pcfs_nxt; 862 } 863 864 mutex_exit(&pcfslock); 865 866 /* 867 * Since we support VFS_FREEVFS(), there's no need to 868 * free the fsp right now. The framework will tell us 869 * when the right time to do so has arrived by calling 870 * into pcfs_freevfs. 871 */ 872 return (0); 873 } 874 875 /* 876 * find root of pcfs 877 */ 878 static int 879 pcfs_root( 880 struct vfs *vfsp, 881 struct vnode **vpp) 882 { 883 struct pcfs *fsp; 884 struct pcnode *pcp; 885 int error; 886 887 fsp = VFSTOPCFS(vfsp); 888 if (error = pc_lockfs(fsp, 0, 0)) 889 return (error); 890 891 pcp = pc_getnode(fsp, (daddr_t)0, 0, NULL); 892 pc_unlockfs(fsp); 893 *vpp = PCTOV(pcp); 894 pcp->pc_flags |= PC_EXTERNAL; 895 return (0); 896 } 897 898 /* 899 * Get file system statistics. 900 */ 901 static int 902 pcfs_statvfs( 903 struct vfs *vfsp, 904 struct statvfs64 *sp) 905 { 906 struct pcfs *fsp; 907 int error; 908 dev32_t d32; 909 910 fsp = VFSTOPCFS(vfsp); 911 error = pc_getfat(fsp); 912 if (error) 913 return (error); 914 bzero(sp, sizeof (*sp)); 915 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize; 916 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster; 917 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp); 918 sp->f_files = (fsfilcnt64_t)-1; 919 sp->f_ffree = (fsfilcnt64_t)-1; 920 sp->f_favail = (fsfilcnt64_t)-1; 921 #ifdef notdef 922 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev); 923 #endif /* notdef */ 924 (void) cmpldev(&d32, vfsp->vfs_dev); 925 sp->f_fsid = d32; 926 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 927 sp->f_flag = vf_to_stf(vfsp->vfs_flag); 928 sp->f_namemax = PCMAXNAMLEN; 929 return (0); 930 } 931 932 static int 933 pc_syncfsnodes(struct pcfs *fsp) 934 { 935 struct pchead *hp; 936 struct pcnode *pcp; 937 int error; 938 939 if (error = pc_lockfs(fsp, 0, 0)) 940 return (error); 941 942 if (!(error = pc_syncfat(fsp))) { 943 hp = pcfhead; 944 while (hp < & pcfhead [ NPCHASH ]) { 945 rw_enter(&pcnodes_lock, RW_READER); 946 pcp = hp->pch_forw; 947 while (pcp != (struct pcnode *)hp) { 948 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp) 949 if (error = pc_nodesync(pcp)) 950 break; 951 pcp = pcp -> pc_forw; 952 } 953 rw_exit(&pcnodes_lock); 954 if (error) 955 break; 956 hp++; 957 } 958 } 959 pc_unlockfs(fsp); 960 return (error); 961 } 962 963 /* 964 * Flush any pending I/O. 965 */ 966 static int 967 pcfs_sync(struct vfs *vfsp, short flag, struct cred *cr) 968 { 969 struct pcfs *fsp; 970 int error = 0; 971 972 /* this prevents the filesystem from being umounted. */ 973 mutex_enter(&pcfslock); 974 if (vfsp != NULL) { 975 fsp = VFSTOPCFS(vfsp); 976 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) { 977 error = pc_syncfsnodes(fsp); 978 } else { 979 rw_enter(&pcnodes_lock, RW_WRITER); 980 pc_diskchanged(fsp); 981 rw_exit(&pcnodes_lock); 982 error = EIO; 983 } 984 } else { 985 fsp = pc_mounttab; 986 while (fsp != NULL) { 987 if (fsp->pcfs_flags & PCFS_IRRECOV) { 988 rw_enter(&pcnodes_lock, RW_WRITER); 989 pc_diskchanged(fsp); 990 rw_exit(&pcnodes_lock); 991 error = EIO; 992 break; 993 } 994 error = pc_syncfsnodes(fsp); 995 if (error) break; 996 fsp = fsp->pcfs_nxt; 997 } 998 } 999 mutex_exit(&pcfslock); 1000 return (error); 1001 } 1002 1003 static int 1004 pcfs_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *cr) 1005 { 1006 int ret; 1007 struct pcfs *fsp; 1008 1009 if (flags != 0) { 1010 return (ENOTSUP); 1011 } 1012 1013 fsp = VFSTOPCFS(vfsp); 1014 if ((fsp->pcfs_flags & PCFS_IRRECOV) == 0) { 1015 ret = pc_syncfsnodes(fsp); 1016 } else { 1017 rw_enter(&pcnodes_lock, RW_WRITER); 1018 pc_diskchanged(fsp); 1019 rw_exit(&pcnodes_lock); 1020 ret = EIO; 1021 } 1022 1023 return (ret); 1024 } 1025 1026 int 1027 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing) 1028 { 1029 int err; 1030 1031 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing) 1032 return (EIO); 1033 1034 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) { 1035 fsp->pcfs_count++; 1036 } else { 1037 mutex_enter(&fsp->pcfs_lock); 1038 if (fsp->pcfs_flags & PCFS_LOCKED) 1039 panic("pc_lockfs"); 1040 /* 1041 * We check the IRRECOV bit again just in case somebody 1042 * snuck past the initial check but then got held up before 1043 * they could grab the lock. (And in the meantime someone 1044 * had grabbed the lock and set the bit) 1045 */ 1046 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) { 1047 if ((err = pc_getfat(fsp))) { 1048 mutex_exit(&fsp->pcfs_lock); 1049 return (err); 1050 } 1051 } 1052 fsp->pcfs_flags |= PCFS_LOCKED; 1053 fsp->pcfs_owner = curthread; 1054 fsp->pcfs_count++; 1055 } 1056 return (0); 1057 } 1058 1059 void 1060 pc_unlockfs(struct pcfs *fsp) 1061 { 1062 1063 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0) 1064 panic("pc_unlockfs"); 1065 if (--fsp->pcfs_count < 0) 1066 panic("pc_unlockfs: count"); 1067 if (fsp->pcfs_count == 0) { 1068 fsp->pcfs_flags &= ~PCFS_LOCKED; 1069 fsp->pcfs_owner = 0; 1070 mutex_exit(&fsp->pcfs_lock); 1071 } 1072 } 1073 1074 int 1075 pc_syncfat(struct pcfs *fsp) 1076 { 1077 struct buf *bp; 1078 int nfat; 1079 int error = 0; 1080 struct fat_od_fsi *fsinfo_disk; 1081 1082 if ((fsp->pcfs_fatp == NULL) || 1083 !(fsp->pcfs_flags & PCFS_FATMOD)) 1084 return (0); 1085 /* 1086 * write out all copies of FATs 1087 */ 1088 fsp->pcfs_flags &= ~PCFS_FATMOD; 1089 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT; 1090 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) { 1091 error = pc_writefat(fsp, pc_dbdaddr(fsp, 1092 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec)); 1093 if (error) { 1094 pc_mark_irrecov(fsp); 1095 return (EIO); 1096 } 1097 } 1098 pc_clear_fatchanges(fsp); 1099 1100 /* 1101 * Write out fsinfo sector. 1102 */ 1103 if (IS_FAT32(fsp)) { 1104 bp = bread(fsp->pcfs_xdev, 1105 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize); 1106 if (bp->b_flags & (B_ERROR | B_STALE)) { 1107 error = geterror(bp); 1108 } 1109 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr); 1110 if (!error && FSISIG_OK(fsinfo_disk)) { 1111 fsinfo_disk->fsi_incore.fs_free_clusters = 1112 LE_32(fsp->pcfs_fsinfo.fs_free_clusters); 1113 fsinfo_disk->fsi_incore.fs_next_free = 1114 LE_32(FSINFO_UNKNOWN); 1115 bwrite2(bp); 1116 error = geterror(bp); 1117 } 1118 brelse(bp); 1119 if (error) { 1120 pc_mark_irrecov(fsp); 1121 return (EIO); 1122 } 1123 } 1124 return (0); 1125 } 1126 1127 void 1128 pc_invalfat(struct pcfs *fsp) 1129 { 1130 struct pcfs *xfsp; 1131 int mount_cnt = 0; 1132 1133 if (fsp->pcfs_fatp == NULL) 1134 panic("pc_invalfat"); 1135 /* 1136 * Release FAT 1137 */ 1138 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize); 1139 fsp->pcfs_fatp = NULL; 1140 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize); 1141 fsp->pcfs_fat_changemap = NULL; 1142 /* 1143 * Invalidate all the blocks associated with the device. 1144 * Not needed if stateless. 1145 */ 1146 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt) 1147 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev) 1148 mount_cnt++; 1149 1150 if (!mount_cnt) 1151 binval(fsp->pcfs_xdev); 1152 /* 1153 * close mounted device 1154 */ 1155 (void) VOP_CLOSE(fsp->pcfs_devvp, 1156 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE, 1157 1, (offset_t)0, CRED(), NULL); 1158 } 1159 1160 void 1161 pc_badfs(struct pcfs *fsp) 1162 { 1163 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n", 1164 getmajor(fsp->pcfs_devvp->v_rdev), 1165 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive); 1166 } 1167 1168 /* 1169 * The problem with supporting NFS on the PCFS filesystem is that there 1170 * is no good place to keep the generation number. The only possible 1171 * place is inside a directory entry. There are a few words that we 1172 * don't use - they store NT & OS/2 attributes, and the creation/last access 1173 * time of the file - but it seems wrong to use them. In addition, directory 1174 * entries come and go. If a directory is removed completely, its directory 1175 * blocks are freed and the generation numbers are lost. Whereas in ufs, 1176 * inode blocks are dedicated for inodes, so the generation numbers are 1177 * permanently kept on the disk. 1178 */ 1179 static int 1180 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 1181 { 1182 struct pcnode *pcp; 1183 struct pc_fid *pcfid; 1184 struct pcfs *fsp; 1185 struct pcdir *ep; 1186 daddr_t eblkno; 1187 int eoffset; 1188 struct buf *bp; 1189 int error; 1190 pc_cluster32_t cn; 1191 1192 pcfid = (struct pc_fid *)fidp; 1193 fsp = VFSTOPCFS(vfsp); 1194 1195 error = pc_lockfs(fsp, 0, 0); 1196 if (error) { 1197 *vpp = NULL; 1198 return (error); 1199 } 1200 1201 if (pcfid->pcfid_block == 0) { 1202 pcp = pc_getnode(fsp, (daddr_t)0, 0, NULL); 1203 pcp->pc_flags |= PC_EXTERNAL; 1204 *vpp = PCTOV(pcp); 1205 pc_unlockfs(fsp); 1206 return (0); 1207 } 1208 eblkno = pcfid->pcfid_block; 1209 eoffset = pcfid->pcfid_offset; 1210 1211 if ((pc_dbtocl(fsp, 1212 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) || 1213 (eoffset > fsp->pcfs_clsize)) { 1214 pc_unlockfs(fsp); 1215 *vpp = NULL; 1216 return (EINVAL); 1217 } 1218 1219 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart) 1220 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) { 1221 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno), 1222 fsp->pcfs_clsize); 1223 } else { 1224 /* 1225 * This is an access "backwards" into the FAT12/FAT16 1226 * root directory. A better code structure would 1227 * significantly improve maintainability here ... 1228 */ 1229 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno), 1230 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize); 1231 } 1232 if (bp->b_flags & (B_ERROR | B_STALE)) { 1233 error = geterror(bp); 1234 brelse(bp); 1235 if (error) 1236 pc_mark_irrecov(fsp); 1237 *vpp = NULL; 1238 pc_unlockfs(fsp); 1239 return (error); 1240 } 1241 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset); 1242 /* 1243 * Ok, if this is a valid file handle that we gave out, 1244 * then simply ensuring that the creation time matches, 1245 * the entry has not been deleted, and it has a valid first 1246 * character should be enough. 1247 * 1248 * Unfortunately, verifying that the <blkno, offset> _still_ 1249 * refers to a directory entry is not easy, since we'd have 1250 * to search _all_ directories starting from root to find it. 1251 * That's a high price to pay just in case somebody is forging 1252 * file handles. So instead we verify that as much of the 1253 * entry is valid as we can: 1254 * 1255 * 1. The starting cluster is 0 (unallocated) or valid 1256 * 2. It is not an LFN entry 1257 * 3. It is not hidden (unless mounted as such) 1258 * 4. It is not the label 1259 */ 1260 cn = pc_getstartcluster(fsp, ep); 1261 /* 1262 * if the starting cluster is valid, but not valid according 1263 * to pc_validcl(), force it to be to simplify the following if. 1264 */ 1265 if (cn == 0) 1266 cn = PCF_FIRSTCLUSTER; 1267 if (IS_FAT32(fsp)) { 1268 if (cn >= PCF_LASTCLUSTER32) 1269 cn = PCF_FIRSTCLUSTER; 1270 } else { 1271 if (cn >= PCF_LASTCLUSTER) 1272 cn = PCF_FIRSTCLUSTER; 1273 } 1274 if ((!pc_validcl(fsp, cn)) || 1275 (PCDL_IS_LFN(ep)) || 1276 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) || 1277 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) { 1278 bp->b_flags |= B_STALE | B_AGE; 1279 brelse(bp); 1280 pc_unlockfs(fsp); 1281 return (EINVAL); 1282 } 1283 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) && 1284 (ep->pcd_filename[0] != PCD_ERASED) && 1285 (pc_validchar(ep->pcd_filename[0]) || 1286 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) { 1287 pcp = pc_getnode(fsp, eblkno, eoffset, ep); 1288 pcp->pc_flags |= PC_EXTERNAL; 1289 *vpp = PCTOV(pcp); 1290 } else { 1291 *vpp = NULL; 1292 } 1293 bp->b_flags |= B_STALE | B_AGE; 1294 brelse(bp); 1295 pc_unlockfs(fsp); 1296 return (0); 1297 } 1298 1299 /* 1300 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about 1301 * a meg), so we can't bread() it all in at once. This routine reads a 1302 * fat a chunk at a time. 1303 */ 1304 static int 1305 pc_readfat(struct pcfs *fsp, uchar_t *fatp) 1306 { 1307 struct buf *bp; 1308 size_t off; 1309 size_t readsize; 1310 daddr_t diskblk; 1311 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize; 1312 daddr_t start = fsp->pcfs_fatstart; 1313 1314 readsize = fsp->pcfs_clsize; 1315 for (off = 0; off < fatsize; off += readsize, fatp += readsize) { 1316 if (readsize > (fatsize - off)) 1317 readsize = fatsize - off; 1318 diskblk = pc_dbdaddr(fsp, start + 1319 pc_cltodb(fsp, pc_lblkno(fsp, off))); 1320 bp = bread(fsp->pcfs_xdev, diskblk, readsize); 1321 if (bp->b_flags & (B_ERROR | B_STALE)) { 1322 brelse(bp); 1323 return (EIO); 1324 } 1325 bp->b_flags |= B_STALE | B_AGE; 1326 bcopy(bp->b_un.b_addr, fatp, readsize); 1327 brelse(bp); 1328 } 1329 return (0); 1330 } 1331 1332 /* 1333 * We write the FAT out a _lot_, in order to make sure that it 1334 * is up-to-date. But on a FAT32 system (large drive, small clusters) 1335 * the FAT might be a couple of megabytes, and writing it all out just 1336 * because we created or deleted a small file is painful (especially 1337 * since we do it for each alternate FAT too). So instead, for FAT16 and 1338 * FAT32 we only write out the bit that has changed. We don't clear 1339 * the 'updated' fields here because the caller might be writing out 1340 * several FATs, so the caller must use pc_clear_fatchanges() after 1341 * all FATs have been updated. 1342 * This function doesn't take "start" from fsp->pcfs_dosstart because 1343 * callers can use it to write either the primary or any of the alternate 1344 * FAT tables. 1345 */ 1346 static int 1347 pc_writefat(struct pcfs *fsp, daddr_t start) 1348 { 1349 struct buf *bp; 1350 size_t off; 1351 size_t writesize; 1352 int error; 1353 uchar_t *fatp = fsp->pcfs_fatp; 1354 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize; 1355 1356 writesize = fsp->pcfs_clsize; 1357 for (off = 0; off < fatsize; off += writesize, fatp += writesize) { 1358 if (writesize > (fatsize - off)) 1359 writesize = fatsize - off; 1360 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) { 1361 continue; 1362 } 1363 bp = ngeteblk(writesize); 1364 bp->b_edev = fsp->pcfs_xdev; 1365 bp->b_dev = cmpdev(bp->b_edev); 1366 bp->b_blkno = start + pc_dbdaddr(fsp, 1367 pc_cltodb(fsp, pc_lblkno(fsp, off))); 1368 DTRACE_PROBE3(pc_writefat, longlong_t, bp->b_blkno, 1369 uchar_t *, fatp, 1370 size_t, writesize); 1371 bcopy(fatp, bp->b_un.b_addr, writesize); 1372 bwrite2(bp); 1373 error = geterror(bp); 1374 brelse(bp); 1375 if (error) { 1376 return (error); 1377 } 1378 } 1379 return (0); 1380 } 1381 1382 /* 1383 * Mark the FAT cluster that 'cn' is stored in as modified. 1384 */ 1385 void 1386 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn) 1387 { 1388 pc_cluster32_t bn; 1389 size_t size; 1390 1391 /* which fat block is the cluster number stored in? */ 1392 if (IS_FAT32(fsp)) { 1393 size = sizeof (pc_cluster32_t); 1394 bn = pc_lblkno(fsp, cn * size); 1395 fsp->pcfs_fat_changemap[bn] = 1; 1396 } else if (IS_FAT16(fsp)) { 1397 size = sizeof (pc_cluster16_t); 1398 bn = pc_lblkno(fsp, cn * size); 1399 fsp->pcfs_fat_changemap[bn] = 1; 1400 } else { 1401 offset_t off; 1402 pc_cluster32_t nbn; 1403 1404 ASSERT(IS_FAT12(fsp)); 1405 off = cn + (cn >> 1); 1406 bn = pc_lblkno(fsp, off); 1407 fsp->pcfs_fat_changemap[bn] = 1; 1408 /* does this field wrap into the next fat cluster? */ 1409 nbn = pc_lblkno(fsp, off + 1); 1410 if (nbn != bn) { 1411 fsp->pcfs_fat_changemap[nbn] = 1; 1412 } 1413 } 1414 } 1415 1416 /* 1417 * return whether the FAT cluster 'bn' is updated and needs to 1418 * be written out. 1419 */ 1420 int 1421 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn) 1422 { 1423 return (fsp->pcfs_fat_changemap[bn] == 1); 1424 } 1425 1426 /* 1427 * Implementation of VFS_FREEVFS() to support forced umounts. 1428 * This is called by the vfs framework after umount, to trigger 1429 * the release of any resources still associated with the given 1430 * vfs_t once the need to keep them has gone away. 1431 */ 1432 void 1433 pcfs_freevfs(vfs_t *vfsp) 1434 { 1435 struct pcfs *fsp = VFSTOPCFS(vfsp); 1436 1437 mutex_enter(&pcfslock); 1438 /* 1439 * Purging the FAT closes the device - can't do any more 1440 * I/O after this. 1441 */ 1442 if (fsp->pcfs_fatp != NULL) 1443 pc_invalfat(fsp); 1444 mutex_exit(&pcfslock); 1445 1446 VN_RELE(fsp->pcfs_devvp); 1447 mutex_destroy(&fsp->pcfs_lock); 1448 kmem_free(fsp, sizeof (*fsp)); 1449 1450 /* 1451 * Allow _fini() to succeed now, if so desired. 1452 */ 1453 atomic_dec_32(&pcfs_mountcount); 1454 } 1455 1456 1457 /* 1458 * PC-style partition parsing and FAT BPB identification/validation code. 1459 * The partition parsers here assume: 1460 * - a FAT filesystem will be in a partition that has one of a set of 1461 * recognized partition IDs 1462 * - the user wants the 'numbering' (C:, D:, ...) that one would get 1463 * on MSDOS 6.x. 1464 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs) 1465 * will not factor in the enumeration. 1466 * These days, such assumptions should be revisited. FAT is no longer the 1467 * only game in 'PC town'. 1468 */ 1469 /* 1470 * isDosDrive() 1471 * Boolean function. Give it the systid field for an fdisk partition 1472 * and it decides if that's a systid that describes a DOS drive. We 1473 * use systid values defined in sys/dktp/fdisk.h. 1474 */ 1475 static int 1476 isDosDrive(uchar_t checkMe) 1477 { 1478 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) || 1479 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) || 1480 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) || 1481 (checkMe == DIAGPART)); 1482 } 1483 1484 1485 /* 1486 * isDosExtended() 1487 * Boolean function. Give it the systid field for an fdisk partition 1488 * and it decides if that's a systid that describes an extended DOS 1489 * partition. 1490 */ 1491 static int 1492 isDosExtended(uchar_t checkMe) 1493 { 1494 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA)); 1495 } 1496 1497 1498 /* 1499 * isBootPart() 1500 * Boolean function. Give it the systid field for an fdisk partition 1501 * and it decides if that's a systid that describes a Solaris boot 1502 * partition. 1503 */ 1504 static int 1505 isBootPart(uchar_t checkMe) 1506 { 1507 return (checkMe == X86BOOT); 1508 } 1509 1510 1511 /* 1512 * noLogicalDrive() 1513 * Display error message about not being able to find a logical 1514 * drive. 1515 */ 1516 static void 1517 noLogicalDrive(int ldrive) 1518 { 1519 if (ldrive == BOOT_PARTITION_DRIVE) { 1520 cmn_err(CE_NOTE, "!pcfs: no boot partition"); 1521 } else { 1522 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive); 1523 } 1524 } 1525 1526 1527 /* 1528 * findTheDrive() 1529 * Discover offset of the requested logical drive, and return 1530 * that offset (startSector), the systid of that drive (sysid), 1531 * and a buffer pointer (bp), with the buffer contents being 1532 * the first sector of the logical drive (i.e., the sector that 1533 * contains the BPB for that drive). 1534 * 1535 * Note: this code is not capable of addressing >2TB disks, as it uses 1536 * daddr_t not diskaddr_t, some of the calculations would overflow 1537 */ 1538 #define COPY_PTBL(mbr, ptblp) \ 1539 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \ 1540 FD_NUMPART * sizeof (struct ipart)) 1541 1542 static int 1543 findTheDrive(struct pcfs *fsp, buf_t **bp) 1544 { 1545 int ldrive = fsp->pcfs_ldrive; 1546 dev_t dev = fsp->pcfs_devvp->v_rdev; 1547 1548 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */ 1549 daddr_t lastseek = 0; /* Disk block we sought previously */ 1550 daddr_t diskblk = 0; /* Disk block to get */ 1551 daddr_t xstartsect; /* base of Extended DOS partition */ 1552 int logicalDriveCount = 0; /* Count of logical drives seen */ 1553 int extendedPart = -1; /* index of extended dos partition */ 1554 int primaryPart = -1; /* index of primary dos partition */ 1555 int bootPart = -1; /* index of a Solaris boot partition */ 1556 uint32_t xnumsect = 0; /* length of extended DOS partition */ 1557 int driveIndex; /* computed FDISK table index */ 1558 daddr_t startsec; 1559 len_t mediasize; 1560 int i; 1561 /* 1562 * Count of drives in the current extended partition's 1563 * FDISK table, and indexes of the drives themselves. 1564 */ 1565 int extndDrives[FD_NUMPART]; 1566 int numDrives = 0; 1567 1568 /* 1569 * Count of drives (beyond primary) in master boot record's 1570 * FDISK table, and indexes of the drives themselves. 1571 */ 1572 int extraDrives[FD_NUMPART]; 1573 int numExtraDrives = 0; 1574 1575 /* 1576 * "ldrive == 0" should never happen, as this is a request to 1577 * mount the physical device (and ignore partitioning). The code 1578 * in pcfs_mount() should have made sure that a logical drive number 1579 * is at least 1, meaning we're looking for drive "C:". It is not 1580 * safe (and a bug in the callers of this function) to request logical 1581 * drive number 0; we could ASSERT() but a graceful EIO is a more 1582 * polite way. 1583 */ 1584 if (ldrive == 0) { 1585 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero"); 1586 noLogicalDrive(ldrive); 1587 return (EIO); 1588 } 1589 1590 /* 1591 * Copy from disk block into memory aligned structure for fdisk usage. 1592 */ 1593 COPY_PTBL((*bp)->b_un.b_addr, dosp); 1594 1595 /* 1596 * This check is ok because a FAT BPB and a master boot record (MBB) 1597 * have the same signature, in the same position within the block. 1598 */ 1599 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) { 1600 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, " 1601 "device (%x.%x):%d\n", 1602 getmajor(dev), getminor(dev), ldrive); 1603 return (EINVAL); 1604 } 1605 1606 /* 1607 * Get a summary of what is in the Master FDISK table. 1608 * Normally we expect to find one partition marked as a DOS drive. 1609 * This partition is the one Windows calls the primary dos partition. 1610 * If the machine has any logical drives then we also expect 1611 * to find a partition marked as an extended DOS partition. 1612 * 1613 * Sometimes we'll find multiple partitions marked as DOS drives. 1614 * The Solaris fdisk program allows these partitions 1615 * to be created, but Windows fdisk no longer does. We still need 1616 * to support these, though, since Windows does. We also need to fix 1617 * our fdisk to behave like the Windows version. 1618 * 1619 * It turns out that some off-the-shelf media have *only* an 1620 * Extended partition, so we need to deal with that case as well. 1621 * 1622 * Only a single (the first) Extended or Boot Partition will 1623 * be recognized. Any others will be ignored. 1624 */ 1625 for (i = 0; i < FD_NUMPART; i++) { 1626 DTRACE_PROBE4(primarypart, struct pcfs *, fsp, 1627 uint_t, (uint_t)dosp[i].systid, 1628 uint_t, LE_32(dosp[i].relsect), 1629 uint_t, LE_32(dosp[i].numsect)); 1630 1631 if (isDosDrive(dosp[i].systid)) { 1632 if (primaryPart < 0) { 1633 logicalDriveCount++; 1634 primaryPart = i; 1635 } else { 1636 extraDrives[numExtraDrives++] = i; 1637 } 1638 continue; 1639 } 1640 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) { 1641 extendedPart = i; 1642 continue; 1643 } 1644 if ((bootPart < 0) && isBootPart(dosp[i].systid)) { 1645 bootPart = i; 1646 continue; 1647 } 1648 } 1649 1650 if (ldrive == BOOT_PARTITION_DRIVE) { 1651 if (bootPart < 0) { 1652 noLogicalDrive(ldrive); 1653 return (EINVAL); 1654 } 1655 startsec = LE_32(dosp[bootPart].relsect); 1656 mediasize = LE_32(dosp[bootPart].numsect); 1657 goto found; 1658 } 1659 1660 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) { 1661 startsec = LE_32(dosp[primaryPart].relsect); 1662 mediasize = LE_32(dosp[primaryPart].numsect); 1663 goto found; 1664 } 1665 1666 /* 1667 * We are not looking for the C: drive (or the primary drive 1668 * was not found), so we had better have an extended partition 1669 * or extra drives in the Master FDISK table. 1670 */ 1671 if ((extendedPart < 0) && (numExtraDrives == 0)) { 1672 cmn_err(CE_NOTE, "!pcfs: no extended dos partition"); 1673 noLogicalDrive(ldrive); 1674 return (EINVAL); 1675 } 1676 1677 if (extendedPart >= 0) { 1678 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect); 1679 xnumsect = LE_32(dosp[extendedPart].numsect); 1680 do { 1681 /* 1682 * If the seek would not cause us to change 1683 * position on the drive, then we're out of 1684 * extended partitions to examine. 1685 */ 1686 if (diskblk == lastseek) 1687 break; 1688 logicalDriveCount += numDrives; 1689 /* 1690 * Seek the next extended partition, and find 1691 * logical drives within it. 1692 */ 1693 brelse(*bp); 1694 /* 1695 * bread() block numbers are multiples of DEV_BSIZE 1696 * but the device sector size (the unit of partitioning) 1697 * might be larger than that; pcfs_get_device_info() 1698 * has calculated the multiplicator for us. 1699 */ 1700 *bp = bread(dev, 1701 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize); 1702 if ((*bp)->b_flags & B_ERROR) { 1703 return (EIO); 1704 } 1705 1706 lastseek = diskblk; 1707 COPY_PTBL((*bp)->b_un.b_addr, dosp); 1708 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) { 1709 cmn_err(CE_NOTE, "!pcfs: " 1710 "extended partition table signature err, " 1711 "device (%x.%x):%d, LBA %u", 1712 getmajor(dev), getminor(dev), ldrive, 1713 (uint_t)pc_dbdaddr(fsp, diskblk)); 1714 return (EINVAL); 1715 } 1716 /* 1717 * Count up drives, and track where the next 1718 * extended partition is in case we need it. We 1719 * are expecting only one extended partition. If 1720 * there is more than one we'll only go to the 1721 * first one we see, but warn about ignoring. 1722 */ 1723 numDrives = 0; 1724 for (i = 0; i < FD_NUMPART; i++) { 1725 DTRACE_PROBE4(extendedpart, 1726 struct pcfs *, fsp, 1727 uint_t, (uint_t)dosp[i].systid, 1728 uint_t, LE_32(dosp[i].relsect), 1729 uint_t, LE_32(dosp[i].numsect)); 1730 if (isDosDrive(dosp[i].systid)) { 1731 extndDrives[numDrives++] = i; 1732 } else if (isDosExtended(dosp[i].systid)) { 1733 if (diskblk != lastseek) { 1734 /* 1735 * Already found an extended 1736 * partition in this table. 1737 */ 1738 cmn_err(CE_NOTE, 1739 "!pcfs: ignoring unexpected" 1740 " additional extended" 1741 " partition"); 1742 } else { 1743 diskblk = xstartsect + 1744 LE_32(dosp[i].relsect); 1745 } 1746 } 1747 } 1748 } while (ldrive > logicalDriveCount + numDrives); 1749 1750 ASSERT(numDrives <= FD_NUMPART); 1751 1752 if (ldrive <= logicalDriveCount + numDrives) { 1753 /* 1754 * The number of logical drives we've found thus 1755 * far is enough to get us to the one we were 1756 * searching for. 1757 */ 1758 driveIndex = logicalDriveCount + numDrives - ldrive; 1759 mediasize = 1760 LE_32(dosp[extndDrives[driveIndex]].numsect); 1761 startsec = 1762 LE_32(dosp[extndDrives[driveIndex]].relsect) + 1763 lastseek; 1764 if (startsec > (xstartsect + xnumsect)) { 1765 cmn_err(CE_NOTE, "!pcfs: extended partition " 1766 "values bad"); 1767 return (EINVAL); 1768 } 1769 goto found; 1770 } else { 1771 /* 1772 * We ran out of extended dos partition 1773 * drives. The only hope now is to go 1774 * back to extra drives defined in the master 1775 * fdisk table. But we overwrote that table 1776 * already, so we must load it in again. 1777 */ 1778 logicalDriveCount += numDrives; 1779 brelse(*bp); 1780 ASSERT(fsp->pcfs_dosstart == 0); 1781 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), 1782 fsp->pcfs_secsize); 1783 if ((*bp)->b_flags & B_ERROR) { 1784 return (EIO); 1785 } 1786 COPY_PTBL((*bp)->b_un.b_addr, dosp); 1787 } 1788 } 1789 /* 1790 * Still haven't found the drive, is it an extra 1791 * drive defined in the main FDISK table? 1792 */ 1793 if (ldrive <= logicalDriveCount + numExtraDrives) { 1794 driveIndex = logicalDriveCount + numExtraDrives - ldrive; 1795 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART)); 1796 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect); 1797 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect); 1798 goto found; 1799 } 1800 /* 1801 * Still haven't found the drive, and there is 1802 * nowhere else to look. 1803 */ 1804 noLogicalDrive(ldrive); 1805 return (EINVAL); 1806 1807 found: 1808 /* 1809 * We need this value in units of sectorsize, because PCFS' internal 1810 * offset calculations go haywire for > 512Byte sectors unless all 1811 * pcfs_.*start values are in units of sectors. 1812 * So, assign before the capacity check (that's done in DEV_BSIZE) 1813 */ 1814 fsp->pcfs_dosstart = startsec; 1815 1816 /* 1817 * convert from device sectors to proper units: 1818 * - starting sector: DEV_BSIZE (as argument to bread()) 1819 * - media size: Bytes 1820 */ 1821 startsec = pc_dbdaddr(fsp, startsec); 1822 mediasize *= fsp->pcfs_secsize; 1823 1824 /* 1825 * some additional validation / warnings in case the partition table 1826 * and the actual media capacity are not in accordance ... 1827 */ 1828 if (fsp->pcfs_mediasize != 0) { 1829 diskaddr_t startoff = 1830 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE; 1831 1832 if (startoff >= fsp->pcfs_mediasize || 1833 startoff + mediasize > fsp->pcfs_mediasize) { 1834 cmn_err(CE_WARN, 1835 "!pcfs: partition size (LBA start %u, %lld bytes, " 1836 "device (%x.%x):%d) smaller than " 1837 "mediasize (%lld bytes).\n" 1838 "filesystem may be truncated, access errors " 1839 "may result.\n", 1840 (uint_t)startsec, (long long)mediasize, 1841 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 1842 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize); 1843 } 1844 } else { 1845 fsp->pcfs_mediasize = mediasize; 1846 } 1847 1848 return (0); 1849 } 1850 1851 1852 static fattype_t 1853 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize) 1854 { 1855 uint32_t ncl = fsp->pcfs_ncluster; 1856 1857 if (ncl <= 4096) { 1858 if (bpb_get_FatSz16(bpb) == 0) 1859 return (FAT_UNKNOWN); 1860 1861 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 && 1862 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2)) 1863 return (FAT12); 1864 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0) 1865 return (FAT12); 1866 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0) 1867 return (FAT16); 1868 1869 switch (bpb_get_Media(bpb)) { 1870 case SS8SPT: 1871 case DS8SPT: 1872 case SS9SPT: 1873 case DS9SPT: 1874 case DS18SPT: 1875 case DS9_15SPT: 1876 /* 1877 * Is this reliable - all floppies are FAT12 ? 1878 */ 1879 return (FAT12); 1880 case MD_FIXED: 1881 /* 1882 * Is this reliable - disks are always FAT16 ? 1883 */ 1884 return (FAT16); 1885 default: 1886 break; 1887 } 1888 } else if (ncl <= 65536) { 1889 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0) 1890 return (FAT32); 1891 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb))) 1892 return (FAT32); 1893 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb))) 1894 return (FAT32); 1895 1896 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb))) 1897 return (FAT16); 1898 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4) 1899 return (FAT16); 1900 } 1901 1902 /* 1903 * We don't know 1904 */ 1905 return (FAT_UNKNOWN); 1906 } 1907 1908 /* 1909 * Check to see if the BPB we found is correct. 1910 * 1911 * This looks far more complicated that it needs to be for pure structural 1912 * validation. The reason for this is that parseBPB() is also used for 1913 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which 1914 * BPB fields (do not) have 'known good' values, even if we (do not) reject 1915 * the BPB when attempting to mount the filesystem. 1916 * 1917 * Real-world usage of FAT shows there are a lot of corner-case situations 1918 * and, following the specification strictly, invalid filesystems out there. 1919 * Known are situations such as: 1920 * - FAT12/FAT16 filesystems with garbage in either totsec16/32 1921 * instead of the zero in one of the fields mandated by the spec 1922 * - filesystems that claim to be larger than the partition they're in 1923 * - filesystems without valid media descriptor 1924 * - FAT32 filesystems with RootEntCnt != 0 1925 * - FAT32 filesystems with less than 65526 clusters 1926 * - FAT32 filesystems without valid FSI sector 1927 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32 1928 * 1929 * Such filesystems are accessible by PCFS - if it'd know to start with that 1930 * the filesystem should be treated as a specific FAT type. Before S10, it 1931 * relied on the PC/fdisk partition type for the purpose and almost completely 1932 * ignored the BPB; now it ignores the partition type for anything else but 1933 * logical drive enumeration, which can result in rejection of (invalid) 1934 * FAT32 - if the partition ID says FAT32, but the filesystem, for example 1935 * has less than 65526 clusters. 1936 * 1937 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's 1938 * not possible to allow all such mostly-compliant filesystems in unless one 1939 * accepts false positives (definitely invalid filesystems that cause problems 1940 * later). This at least allows to pinpoint why the mount failed. 1941 * 1942 * Due to the use of FAT on removeable media, all relaxations of the rules 1943 * here need to be carefully evaluated wrt. to potential effects on PCFS 1944 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so 1945 * beware. 1946 */ 1947 static int 1948 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid) 1949 { 1950 fattype_t type; 1951 1952 uint32_t ncl; /* number of clusters in file area */ 1953 uint32_t rec; 1954 uint32_t reserved; 1955 uint32_t fsisec, bkbootsec; 1956 blkcnt_t totsec, totsec16, totsec32, datasec; 1957 size_t fatsec, fatsec16, fatsec32, rdirsec; 1958 size_t secsize; 1959 len_t mediasize; 1960 uint64_t validflags = 0; 1961 1962 if (VALID_BPBSIG(bpb_get_BPBSig(bpb))) 1963 validflags |= BPB_BPBSIG_OK; 1964 1965 rec = bpb_get_RootEntCnt(bpb); 1966 reserved = bpb_get_RsvdSecCnt(bpb); 1967 fsisec = bpb_get_FSInfo32(bpb); 1968 bkbootsec = bpb_get_BkBootSec32(bpb); 1969 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb); 1970 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb); 1971 fatsec16 = bpb_get_FatSz16(bpb); 1972 fatsec32 = bpb_get_FatSz32(bpb); 1973 1974 totsec = totsec16 ? totsec16 : totsec32; 1975 fatsec = fatsec16 ? fatsec16 : fatsec32; 1976 1977 secsize = bpb_get_BytesPerSec(bpb); 1978 if (!VALID_SECSIZE(secsize)) 1979 secsize = fsp->pcfs_secsize; 1980 if (secsize != fsp->pcfs_secsize) { 1981 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n", 1982 getmajor(fsp->pcfs_xdev), 1983 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive); 1984 PC_DPRINTF2(3, "!BPB secsize %d != " 1985 "autodetected media block size %d\n", 1986 (int)secsize, (int)fsp->pcfs_secsize); 1987 if (fsp->pcfs_ldrive) { 1988 /* 1989 * We've already attempted to parse the partition 1990 * table. If the block size used for that don't match 1991 * the PCFS sector size, we're hosed one way or the 1992 * other. Just try what happens. 1993 */ 1994 secsize = fsp->pcfs_secsize; 1995 PC_DPRINTF1(3, 1996 "!pcfs: Using autodetected secsize %d\n", 1997 (int)secsize); 1998 } else { 1999 /* 2000 * This allows mounting lofi images of PCFS partitions 2001 * with sectorsize != DEV_BSIZE. We can't parse the 2002 * partition table on whole-disk images unless the 2003 * (undocumented) "secsize=..." mount option is used, 2004 * but at least this allows us to mount if we have 2005 * an image of a partition. 2006 */ 2007 PC_DPRINTF1(3, 2008 "!pcfs: Using BPB secsize %d\n", (int)secsize); 2009 } 2010 } 2011 2012 if (fsp->pcfs_mediasize == 0) { 2013 mediasize = (len_t)totsec * (len_t)secsize; 2014 /* 2015 * This is not an error because not all devices support the 2016 * dkio(4I) mediasize queries, and/or not all devices are 2017 * partitioned. If we have not been able to figure out the 2018 * size of the underlaying medium, we have to trust the BPB. 2019 */ 2020 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed " 2021 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n", 2022 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 2023 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize); 2024 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) { 2025 cmn_err(CE_WARN, 2026 "!pcfs: autodetected mediasize (%lld Bytes) smaller than " 2027 "FAT BPB mediasize (%lld Bytes).\n" 2028 "truncated filesystem on device (%x.%x):%d, access errors " 2029 "possible.\n", 2030 (long long)fsp->pcfs_mediasize, 2031 (long long)(totsec * (blkcnt_t)secsize), 2032 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 2033 fsp->pcfs_ldrive); 2034 mediasize = fsp->pcfs_mediasize; 2035 } else { 2036 /* 2037 * This is actually ok. A FAT needs not occupy the maximum 2038 * space available in its partition, it can be shorter. 2039 */ 2040 mediasize = (len_t)totsec * (len_t)secsize; 2041 } 2042 2043 /* 2044 * Since we let just about anything pass through this function, 2045 * fence against divide-by-zero here. 2046 */ 2047 if (secsize) 2048 rdirsec = roundup(rec * 32, secsize) / secsize; 2049 else 2050 rdirsec = 0; 2051 2052 /* 2053 * This assignment is necessary before pc_dbdaddr() can first be 2054 * used. Must initialize the value here. 2055 */ 2056 fsp->pcfs_secsize = secsize; 2057 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1; 2058 2059 fsp->pcfs_mediasize = mediasize; 2060 2061 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb); 2062 fsp->pcfs_numfat = bpb_get_NumFATs(bpb); 2063 fsp->pcfs_mediadesc = bpb_get_Media(bpb); 2064 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl; 2065 fsp->pcfs_rdirsec = rdirsec; 2066 2067 /* 2068 * Remember: All PCFS offset calculations in sectors. Before I/O 2069 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is 2070 * necessary so that media with > 512Byte sector sizes work correctly. 2071 */ 2072 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved; 2073 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec; 2074 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec; 2075 datasec = totsec - 2076 (blkcnt_t)fatsec * fsp->pcfs_numfat - 2077 (blkcnt_t)rdirsec - 2078 (blkcnt_t)reserved; 2079 2080 DTRACE_PROBE4(fatgeometry, 2081 blkcnt_t, totsec, size_t, fatsec, 2082 size_t, rdirsec, blkcnt_t, datasec); 2083 2084 /* 2085 * 'totsec' is taken directly from the BPB and guaranteed to fit 2086 * into a 32bit unsigned integer. The calculation of 'datasec', 2087 * on the other hand, could underflow for incorrect values in 2088 * rdirsec/reserved/fatsec. Check for that. 2089 * We also check that the BPB conforms to the FAT specification's 2090 * requirement that either of the 16/32bit total sector counts 2091 * must be zero. 2092 */ 2093 if (totsec != 0 && 2094 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) && 2095 datasec < totsec && datasec <= UINT32_MAX) 2096 validflags |= BPB_TOTSEC_OK; 2097 2098 if ((len_t)totsec * (len_t)secsize <= mediasize) 2099 validflags |= BPB_MEDIASZ_OK; 2100 2101 if (VALID_SECSIZE(secsize)) 2102 validflags |= BPB_SECSIZE_OK; 2103 if (VALID_SPCL(fsp->pcfs_spcl)) 2104 validflags |= BPB_SECPERCLUS_OK; 2105 if (VALID_CLSIZE(fsp->pcfs_clsize)) 2106 validflags |= BPB_CLSIZE_OK; 2107 if (VALID_NUMFATS(fsp->pcfs_numfat)) 2108 validflags |= BPB_NUMFAT_OK; 2109 if (VALID_RSVDSEC(reserved) && reserved < totsec) 2110 validflags |= BPB_RSVDSECCNT_OK; 2111 if (VALID_MEDIA(fsp->pcfs_mediadesc)) 2112 validflags |= BPB_MEDIADESC_OK; 2113 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb))) 2114 validflags |= BPB_BOOTSIG16_OK; 2115 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb))) 2116 validflags |= BPB_BOOTSIG32_OK; 2117 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb))) 2118 validflags |= BPB_FSTYPSTR16_OK; 2119 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb))) 2120 validflags |= BPB_FSTYPSTR32_OK; 2121 if (VALID_OEMNAME(bpb_OEMName(bpb))) 2122 validflags |= BPB_OEMNAME_OK; 2123 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec) 2124 validflags |= BPB_BKBOOTSEC_OK; 2125 if (fsisec > 0 && fsisec <= reserved) 2126 validflags |= BPB_FSISEC_OK; 2127 if (VALID_JMPBOOT(bpb_jmpBoot(bpb))) 2128 validflags |= BPB_JMPBOOT_OK; 2129 if (VALID_FSVER32(bpb_get_FSVer32(bpb))) 2130 validflags |= BPB_FSVER_OK; 2131 if (VALID_VOLLAB(bpb_VolLab16(bpb))) 2132 validflags |= BPB_VOLLAB16_OK; 2133 if (VALID_VOLLAB(bpb_VolLab32(bpb))) 2134 validflags |= BPB_VOLLAB32_OK; 2135 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb))) 2136 validflags |= BPB_EXTFLAGS_OK; 2137 2138 /* 2139 * Try to determine which FAT format to use. 2140 * 2141 * Calculate the number of clusters in order to determine 2142 * the type of FAT we are looking at. This is the only 2143 * recommended way of determining FAT type, though there 2144 * are other hints in the data, this is the best way. 2145 * 2146 * Since we let just about "anything" pass through this function 2147 * without early exits, fence against divide-by-zero here. 2148 * 2149 * datasec was already validated against UINT32_MAX so we know 2150 * the result will not overflow the 32bit calculation. 2151 */ 2152 if (fsp->pcfs_spcl) 2153 ncl = (uint32_t)datasec / fsp->pcfs_spcl; 2154 else 2155 ncl = 0; 2156 2157 fsp->pcfs_ncluster = ncl; 2158 2159 /* 2160 * From the Microsoft FAT specification: 2161 * In the following example, when it says <, it does not mean <=. 2162 * Note also that the numbers are correct. The first number for 2163 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers 2164 * and the '<' signs are not wrong. 2165 * 2166 * We "specialdetect" the corner cases, and use at least one "extra" 2167 * criterion to decide whether it's FAT16 or FAT32 if the cluster 2168 * count is dangerously close to the boundaries. 2169 */ 2170 2171 if (ncl <= PCF_FIRSTCLUSTER) { 2172 type = FAT_UNKNOWN; 2173 } else if (ncl < 4085) { 2174 type = FAT12; 2175 } else if (ncl <= 4096) { 2176 type = FAT_QUESTIONABLE; 2177 } else if (ncl < 65525) { 2178 type = FAT16; 2179 } else if (ncl <= 65536) { 2180 type = FAT_QUESTIONABLE; 2181 } else if (ncl < PCF_LASTCLUSTER32) { 2182 type = FAT32; 2183 } else { 2184 type = FAT_UNKNOWN; 2185 } 2186 2187 DTRACE_PROBE4(parseBPB__initial, 2188 struct pcfs *, fsp, unsigned char *, bpb, 2189 int, validflags, fattype_t, type); 2190 2191 recheck: 2192 fsp->pcfs_fatsec = fatsec; 2193 2194 /* Do some final sanity checks for each specific type of FAT */ 2195 switch (type) { 2196 case FAT12: 2197 if (rec != 0) 2198 validflags |= BPB_ROOTENTCNT_OK; 2199 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec || 2200 bpb_get_TotSec16(bpb) == 0) 2201 validflags |= BPB_TOTSEC16_OK; 2202 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec || 2203 bpb_get_TotSec32(bpb) == 0) 2204 validflags |= BPB_TOTSEC32_OK; 2205 if (bpb_get_FatSz16(bpb) == fatsec) 2206 validflags |= BPB_FATSZ16_OK; 2207 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) 2208 * 3 / 2) 2209 validflags |= BPB_FATSZ_OK; 2210 if (ncl < 4085) 2211 validflags |= BPB_NCLUSTERS_OK; 2212 2213 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff); 2214 fsp->pcfs_rootblksize = 2215 fsp->pcfs_rdirsec * secsize; 2216 fsp->pcfs_fsistart = 0; 2217 2218 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK) 2219 type = FAT_UNKNOWN; 2220 break; 2221 case FAT16: 2222 if (rec != 0) 2223 validflags |= BPB_ROOTENTCNT_OK; 2224 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec || 2225 bpb_get_TotSec16(bpb) == 0) 2226 validflags |= BPB_TOTSEC16_OK; 2227 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec || 2228 bpb_get_TotSec32(bpb) == 0) 2229 validflags |= BPB_TOTSEC32_OK; 2230 if (bpb_get_FatSz16(bpb) == fatsec) 2231 validflags |= BPB_FATSZ16_OK; 2232 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2) 2233 validflags |= BPB_FATSZ_OK; 2234 if (ncl >= 4085 && ncl < 65525) 2235 validflags |= BPB_NCLUSTERS_OK; 2236 2237 fsp->pcfs_lastclmark = PCF_LASTCLUSTER; 2238 fsp->pcfs_rootblksize = 2239 fsp->pcfs_rdirsec * secsize; 2240 fsp->pcfs_fsistart = 0; 2241 2242 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK) 2243 type = FAT_UNKNOWN; 2244 break; 2245 case FAT32: 2246 if (rec == 0) 2247 validflags |= BPB_ROOTENTCNT_OK; 2248 if (bpb_get_TotSec16(bpb) == 0) 2249 validflags |= BPB_TOTSEC16_OK; 2250 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec) 2251 validflags |= BPB_TOTSEC32_OK; 2252 if (bpb_get_FatSz16(bpb) == 0) 2253 validflags |= BPB_FATSZ16_OK; 2254 if (bpb_get_FatSz32(bpb) == fatsec) 2255 validflags |= BPB_FATSZ32_OK; 2256 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4) 2257 validflags |= BPB_FATSZ_OK; 2258 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32) 2259 validflags |= BPB_NCLUSTERS_OK; 2260 2261 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32; 2262 fsp->pcfs_rootblksize = fsp->pcfs_clsize; 2263 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec; 2264 if (validflags & BPB_FSISEC_OK) 2265 fsp->pcfs_flags |= PCFS_FSINFO_OK; 2266 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb); 2267 if (pc_validcl(fsp, fsp->pcfs_rootclnum)) 2268 validflags |= BPB_ROOTCLUSTER_OK; 2269 2270 /* 2271 * Current PCFS code only works if 'pcfs_rdirstart' 2272 * contains the root cluster number on FAT32. 2273 * That's a mis-use and would better be changed. 2274 */ 2275 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum; 2276 2277 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK) 2278 type = FAT_UNKNOWN; 2279 break; 2280 case FAT_QUESTIONABLE: 2281 type = secondaryBPBChecks(fsp, bpb, secsize); 2282 goto recheck; 2283 default: 2284 ASSERT(type == FAT_UNKNOWN); 2285 break; 2286 } 2287 2288 ASSERT(type != FAT_QUESTIONABLE); 2289 2290 fsp->pcfs_fattype = type; 2291 2292 if (valid) 2293 *valid = validflags; 2294 2295 DTRACE_PROBE4(parseBPB__final, 2296 struct pcfs *, fsp, unsigned char *, bpb, 2297 int, validflags, fattype_t, type); 2298 2299 if (type != FAT_UNKNOWN) { 2300 ASSERT((secsize & (DEV_BSIZE - 1)) == 0); 2301 ASSERT(ISP2(secsize / DEV_BSIZE)); 2302 return (1); 2303 } 2304 2305 return (0); 2306 } 2307 2308 2309 /* 2310 * Detect the device's native block size (sector size). 2311 * 2312 * Test whether the device is: 2313 * - a floppy device from a known controller type via DKIOCINFO 2314 * - a real floppy using the fd(4D) driver and capable of fdio(4I) ioctls 2315 * - a USB floppy drive (identified by drive geometry) 2316 * 2317 * Detecting a floppy will make PCFS metadata updates on such media synchronous, 2318 * to minimize risks due to slow I/O and user hotplugging / device ejection. 2319 * 2320 * This might be a bit wasteful on kernel stack space; if anyone's 2321 * bothered by this, kmem_alloc/kmem_free the ioctl arguments... 2322 */ 2323 static void 2324 pcfs_device_getinfo(struct pcfs *fsp) 2325 { 2326 dev_t rdev = fsp->pcfs_xdev; 2327 int error; 2328 union { 2329 struct dk_minfo mi; 2330 struct dk_cinfo ci; 2331 struct dk_geom gi; 2332 struct fd_char fc; 2333 } arg; /* save stackspace ... */ 2334 intptr_t argp = (intptr_t)&arg; 2335 ldi_handle_t lh; 2336 ldi_ident_t li; 2337 int isfloppy, isremoveable, ishotpluggable; 2338 cred_t *cr = CRED(); 2339 2340 if (ldi_ident_from_dev(rdev, &li)) 2341 goto out; 2342 2343 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li); 2344 ldi_ident_release(li); 2345 if (error) 2346 goto out; 2347 2348 /* 2349 * Not sure if this could possibly happen. It'd be a bit like 2350 * VOP_OPEN() changing the passed-in vnode ptr. We're just not 2351 * expecting it, needs some thought if triggered ... 2352 */ 2353 ASSERT(fsp->pcfs_xdev == rdev); 2354 2355 /* 2356 * Check for removeable/hotpluggable media. 2357 */ 2358 if (ldi_ioctl(lh, DKIOCREMOVABLE, 2359 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) { 2360 isremoveable = 0; 2361 } 2362 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE, 2363 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) { 2364 ishotpluggable = 0; 2365 } 2366 2367 /* 2368 * Make sure we don't use "half-initialized" values if the ioctls fail. 2369 */ 2370 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) { 2371 bzero(&arg, sizeof (arg)); 2372 fsp->pcfs_mediasize = 0; 2373 } else { 2374 fsp->pcfs_mediasize = 2375 (len_t)arg.mi.dki_lbsize * 2376 (len_t)arg.mi.dki_capacity; 2377 } 2378 2379 if (VALID_SECSIZE(arg.mi.dki_lbsize)) { 2380 if (fsp->pcfs_secsize == 0) { 2381 fsp->pcfs_secsize = arg.mi.dki_lbsize; 2382 fsp->pcfs_sdshift = 2383 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1; 2384 } else { 2385 PC_DPRINTF4(1, "!pcfs: autodetected media block size " 2386 "%d, device (%x.%x), different from user-provided " 2387 "%d. User override - ignoring autodetect result.\n", 2388 arg.mi.dki_lbsize, 2389 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 2390 fsp->pcfs_secsize); 2391 } 2392 } else if (arg.mi.dki_lbsize) { 2393 PC_DPRINTF3(1, "!pcfs: autodetected media block size " 2394 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). " 2395 "Ignoring autodetect result.\n", 2396 arg.mi.dki_lbsize, 2397 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev)); 2398 } 2399 2400 /* 2401 * We treat the following media types as a floppy by default. 2402 */ 2403 isfloppy = 2404 (arg.mi.dki_media_type == DK_FLOPPY || 2405 arg.mi.dki_media_type == DK_ZIP || 2406 arg.mi.dki_media_type == DK_JAZ); 2407 2408 /* 2409 * if this device understands fdio(4I) requests it's 2410 * obviously a floppy drive. 2411 */ 2412 if (!isfloppy && 2413 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL)) 2414 isfloppy = 1; 2415 2416 /* 2417 * some devices we like to treat as floppies, but they don't 2418 * understand fdio(4I) requests. 2419 */ 2420 if (!isfloppy && 2421 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) && 2422 (arg.ci.dki_ctype == DKC_WDC2880 || 2423 arg.ci.dki_ctype == DKC_NCRFLOPPY || 2424 arg.ci.dki_ctype == DKC_SMSFLOPPY || 2425 arg.ci.dki_ctype == DKC_INTEL82077)) 2426 isfloppy = 1; 2427 2428 /* 2429 * This is the "final fallback" test - media with 2430 * 2 heads and 80 cylinders are assumed to be floppies. 2431 * This is normally true for USB floppy drives ... 2432 */ 2433 if (!isfloppy && 2434 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) && 2435 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2)) 2436 isfloppy = 1; 2437 2438 /* 2439 * This is similar to the "old" PCFS code that sets this flag 2440 * just based on the media descriptor being 0xf8 (MD_FIXED). 2441 * Should be re-worked. We really need some specialcasing for 2442 * removeable media. 2443 */ 2444 if (!isfloppy) { 2445 fsp->pcfs_flags |= PCFS_NOCHK; 2446 } 2447 2448 /* 2449 * We automatically disable access time updates if the medium is 2450 * removeable and/or hotpluggable, and the admin did not explicitly 2451 * request access time updates (via the "atime" mount option). 2452 * The majority of flash-based media should fit this category. 2453 * Minimizing write access extends the lifetime of your memory stick ! 2454 */ 2455 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) && 2456 (isremoveable || ishotpluggable | isfloppy)) { 2457 fsp->pcfs_flags |= PCFS_NOATIME; 2458 } 2459 2460 (void) ldi_close(lh, FREAD, cr); 2461 out: 2462 if (fsp->pcfs_secsize == 0) { 2463 PC_DPRINTF3(1, "!pcfs: media block size autodetection " 2464 "device (%x.%x) failed, no user-provided fallback. " 2465 "Using %d bytes.\n", 2466 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 2467 DEV_BSIZE); 2468 fsp->pcfs_secsize = DEV_BSIZE; 2469 fsp->pcfs_sdshift = 0; 2470 } 2471 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0); 2472 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize)); 2473 } 2474 2475 /* 2476 * Get the FAT type for the DOS medium. 2477 * 2478 * ------------------------- 2479 * According to Microsoft: 2480 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the 2481 * count of clusters on the volume and nothing else. 2482 * ------------------------- 2483 * 2484 */ 2485 static int 2486 pc_getfattype(struct pcfs *fsp) 2487 { 2488 int error = 0; 2489 buf_t *bp = NULL; 2490 struct vnode *devvp = fsp->pcfs_devvp; 2491 dev_t dev = devvp->v_rdev; 2492 2493 /* 2494 * Detect the native block size of the medium, and attempt to 2495 * detect whether the medium is removeable. 2496 * We do treat removable media (floppies, USB and FireWire disks) 2497 * differently wrt. to the frequency and synchronicity of FAT updates. 2498 * We need to know the media block size in order to be able to 2499 * parse the partition table. 2500 */ 2501 pcfs_device_getinfo(fsp); 2502 2503 /* 2504 * Unpartitioned media (floppies and some removeable devices) 2505 * don't have a partition table, the FAT BPB is at disk block 0. 2506 * Start out by reading block 0. 2507 */ 2508 fsp->pcfs_dosstart = 0; 2509 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize); 2510 2511 if (error = geterror(bp)) 2512 goto out; 2513 2514 /* 2515 * If a logical drive number is requested, parse the partition table 2516 * and attempt to locate it. Otherwise, proceed immediately to the 2517 * BPB check. findTheDrive(), if successful, returns the disk block 2518 * number where the requested partition starts in "startsec". 2519 */ 2520 if (fsp->pcfs_ldrive != 0) { 2521 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on " 2522 "device (%x,%x):%d to find BPB\n", 2523 getmajor(dev), getminor(dev), fsp->pcfs_ldrive); 2524 2525 if (error = findTheDrive(fsp, &bp)) 2526 goto out; 2527 2528 ASSERT(fsp->pcfs_dosstart != 0); 2529 2530 brelse(bp); 2531 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), 2532 fsp->pcfs_secsize); 2533 if (error = geterror(bp)) 2534 goto out; 2535 } 2536 2537 /* 2538 * Validate the BPB and fill in the instance structure. 2539 */ 2540 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) { 2541 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on " 2542 "device (%x.%x):%d, disk LBA %u\n", 2543 getmajor(dev), getminor(dev), fsp->pcfs_ldrive, 2544 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart)); 2545 error = EINVAL; 2546 goto out; 2547 } 2548 2549 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN); 2550 2551 out: 2552 /* 2553 * Release the buffer used 2554 */ 2555 if (bp != NULL) 2556 brelse(bp); 2557 return (error); 2558 } 2559 2560 2561 /* 2562 * Get the file allocation table. 2563 * If there is an old FAT, invalidate it. 2564 */ 2565 int 2566 pc_getfat(struct pcfs *fsp) 2567 { 2568 struct buf *bp = NULL; 2569 uchar_t *fatp = NULL; 2570 uchar_t *fat_changemap = NULL; 2571 int error; 2572 int fat_changemapsize; 2573 int flags = 0; 2574 int nfat; 2575 int altfat_mustmatch = 0; 2576 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize; 2577 2578 if (fsp->pcfs_fatp) { 2579 /* 2580 * There is a FAT in core. 2581 * If there are open file pcnodes or we have modified it or 2582 * it hasn't timed out yet use the in core FAT. 2583 * Otherwise invalidate it and get a new one 2584 */ 2585 #ifdef notdef 2586 if (fsp->pcfs_frefs || 2587 (fsp->pcfs_flags & PCFS_FATMOD) || 2588 (gethrestime_sec() < fsp->pcfs_fattime)) { 2589 return (0); 2590 } else { 2591 mutex_enter(&pcfslock); 2592 pc_invalfat(fsp); 2593 mutex_exit(&pcfslock); 2594 } 2595 #endif /* notdef */ 2596 return (0); 2597 } 2598 2599 /* 2600 * Get FAT and check it for validity 2601 */ 2602 fatp = kmem_alloc(fatsize, KM_SLEEP); 2603 error = pc_readfat(fsp, fatp); 2604 if (error) { 2605 flags = B_ERROR; 2606 goto out; 2607 } 2608 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1; 2609 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP); 2610 fsp->pcfs_fatp = fatp; 2611 fsp->pcfs_fat_changemapsize = fat_changemapsize; 2612 fsp->pcfs_fat_changemap = fat_changemap; 2613 2614 /* 2615 * The only definite signature check is that the 2616 * media descriptor byte should match the first byte 2617 * of the FAT block. 2618 */ 2619 if (fatp[0] != fsp->pcfs_mediadesc) { 2620 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, " 2621 "media descriptor %x, FAT[0] lowbyte %x\n", 2622 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]); 2623 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n"); 2624 altfat_mustmatch = 1; 2625 } 2626 2627 /* 2628 * Get alternate FATs and check for consistency 2629 * This is an inlined version of pc_readfat(). 2630 * Since we're only comparing FAT and alternate FAT, 2631 * there's no reason to let pc_readfat() copy data out 2632 * of the buf. Instead, compare in-situ, one cluster 2633 * at a time. 2634 */ 2635 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) { 2636 size_t startsec; 2637 size_t off; 2638 2639 startsec = pc_dbdaddr(fsp, 2640 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec); 2641 2642 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) { 2643 daddr_t fatblk = startsec + pc_dbdaddr(fsp, 2644 pc_cltodb(fsp, pc_lblkno(fsp, off))); 2645 2646 bp = bread(fsp->pcfs_xdev, fatblk, 2647 MIN(fsp->pcfs_clsize, fatsize - off)); 2648 if (bp->b_flags & (B_ERROR | B_STALE)) { 2649 cmn_err(CE_NOTE, 2650 "!pcfs: alternate FAT #%d (start LBA %p)" 2651 " read error at offset %ld on device" 2652 " (%x.%x):%d", 2653 nfat, (void *)(uintptr_t)startsec, off, 2654 getmajor(fsp->pcfs_xdev), 2655 getminor(fsp->pcfs_xdev), 2656 fsp->pcfs_ldrive); 2657 flags = B_ERROR; 2658 error = EIO; 2659 goto out; 2660 } 2661 bp->b_flags |= B_STALE | B_AGE; 2662 if (bcmp(bp->b_un.b_addr, fatp + off, 2663 MIN(fsp->pcfs_clsize, fatsize - off))) { 2664 cmn_err(CE_NOTE, 2665 "!pcfs: alternate FAT #%d (start LBA %p)" 2666 " corrupted at offset %ld on device" 2667 " (%x.%x):%d", 2668 nfat, (void *)(uintptr_t)startsec, off, 2669 getmajor(fsp->pcfs_xdev), 2670 getminor(fsp->pcfs_xdev), 2671 fsp->pcfs_ldrive); 2672 if (altfat_mustmatch) { 2673 flags = B_ERROR; 2674 error = EIO; 2675 goto out; 2676 } 2677 } 2678 brelse(bp); 2679 bp = NULL; /* prevent double release */ 2680 } 2681 } 2682 2683 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT; 2684 fsp->pcfs_fatjustread = 1; 2685 2686 /* 2687 * Retrieve FAT32 fsinfo sector. 2688 * A failure to read this is not fatal to accessing the volume. 2689 * It simply means operations that count or search free blocks 2690 * will have to do a full FAT walk, vs. a possibly quicker lookup 2691 * of the summary information. 2692 * Hence, we log a message but return success overall after this point. 2693 */ 2694 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) { 2695 struct fat_od_fsi *fsinfo_disk; 2696 2697 bp = bread(fsp->pcfs_xdev, 2698 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize); 2699 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr; 2700 if (bp->b_flags & (B_ERROR | B_STALE) || 2701 !FSISIG_OK(fsinfo_disk)) { 2702 cmn_err(CE_NOTE, 2703 "!pcfs: error reading fat32 fsinfo from " 2704 "device (%x.%x):%d, block %lld", 2705 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev), 2706 fsp->pcfs_ldrive, 2707 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart)); 2708 fsp->pcfs_flags &= ~PCFS_FSINFO_OK; 2709 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN; 2710 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN; 2711 } else { 2712 bp->b_flags |= B_STALE | B_AGE; 2713 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr); 2714 fsp->pcfs_fsinfo.fs_free_clusters = 2715 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters); 2716 fsp->pcfs_fsinfo.fs_next_free = 2717 LE_32(fsinfo_disk->fsi_incore.fs_next_free); 2718 } 2719 brelse(bp); 2720 bp = NULL; 2721 } 2722 2723 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free)) 2724 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free; 2725 else 2726 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER; 2727 2728 return (0); 2729 2730 out: 2731 cmn_err(CE_NOTE, "!pcfs: illegal disk format"); 2732 if (bp) 2733 brelse(bp); 2734 if (fatp) 2735 kmem_free(fatp, fatsize); 2736 if (fat_changemap) 2737 kmem_free(fat_changemap, fat_changemapsize); 2738 2739 if (flags) { 2740 pc_mark_irrecov(fsp); 2741 } 2742 return (error); 2743 } 2744