1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /*
27 * Copyright (c) 2017 by Delphix. All rights reserved.
28 * Copyright 2024 Oxide Computer Company
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/user.h>
35 #include <sys/proc.h>
36 #include <sys/cred.h>
37 #include <sys/disp.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/vnode.h>
42 #include <sys/fdio.h>
43 #include <sys/file.h>
44 #include <sys/uio.h>
45 #include <sys/conf.h>
46 #include <sys/statvfs.h>
47 #include <sys/mount.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/sysmacros.h>
52 #include <sys/conf.h>
53 #include <sys/mkdev.h>
54 #include <sys/swap.h>
55 #include <sys/sunddi.h>
56 #include <sys/sunldi.h>
57 #include <sys/dktp/fdisk.h>
58 #include <sys/fs/pc_label.h>
59 #include <sys/fs/pc_fs.h>
60 #include <sys/fs/pc_dir.h>
61 #include <sys/fs/pc_node.h>
62 #include <fs/fs_subr.h>
63 #include <sys/modctl.h>
64 #include <sys/dkio.h>
65 #include <sys/open.h>
66 #include <sys/mntent.h>
67 #include <sys/policy.h>
68 #include <sys/atomic.h>
69 #include <sys/sdt.h>
70
71 /*
72 * The majority of PC media use a 512 sector size, but
73 * occasionally you will run across a 1k sector size.
74 * For media with a 1k sector size, fd_strategy() requires
75 * the I/O size to be a 1k multiple; so when the sector size
76 * is not yet known, always read 1k.
77 */
78 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
79
80 static int pcfs_pseudo_floppy(dev_t);
81
82 static int pcfsinit(int, char *);
83 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
84 struct cred *);
85 static int pcfs_unmount(struct vfs *, int, struct cred *);
86 static int pcfs_root(struct vfs *, struct vnode **);
87 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
88 static int pc_syncfsnodes(struct pcfs *);
89 static int pcfs_sync(struct vfs *, short, struct cred *);
90 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
91 static void pcfs_freevfs(vfs_t *vfsp);
92 static int pcfs_syncfs(struct vfs *, uint64_t, struct cred *);
93
94 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
95 static int pc_writefat(struct pcfs *fsp, daddr_t start);
96
97 static int pc_getfattype(struct pcfs *fsp);
98 static void pcfs_parse_mntopts(struct pcfs *fsp);
99
100
101 /*
102 * pcfs mount options table
103 */
104
105 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
106 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
107 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
108 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
109 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
110 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
111 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
112 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
113
114 static mntopt_t mntopts[] = {
115 /*
116 * option name cancel option default arg flags opt data
117 */
118 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
119 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
120 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
121 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
122 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
123 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, 0, NULL },
124 { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
125 { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL },
126 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
127 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
128 };
129
130 static mntopts_t pcfs_mntopts = {
131 sizeof (mntopts) / sizeof (mntopt_t),
132 mntopts
133 };
134
135 int pcfsdebuglevel = 0;
136
137 /*
138 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
139 * pcfs_lock: (inside per filesystem structure "pcfs")
140 * per filesystem lock. Most of the vfsops and vnodeops are
141 * protected by this lock.
142 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
143 *
144 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
145 *
146 * pcfs_mountcount: used to prevent module unloads while there is still
147 * pcfs state from a former mount hanging around. With
148 * forced umount support, the filesystem module must not
149 * be allowed to go away before the last VFS_FREEVFS()
150 * call has been made.
151 * Since this is just an atomic counter, there's no need
152 * for locking.
153 */
154 kmutex_t pcfslock;
155 krwlock_t pcnodes_lock;
156 uint32_t pcfs_mountcount;
157
158 static int pcfstype;
159
160 static vfsdef_t vfw = {
161 VFSDEF_VERSION,
162 "pcfs",
163 pcfsinit,
164 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
165 &pcfs_mntopts
166 };
167
168 extern struct mod_ops mod_fsops;
169
170 static struct modlfs modlfs = {
171 &mod_fsops,
172 "PC filesystem",
173 &vfw
174 };
175
176 static struct modlinkage modlinkage = {
177 MODREV_1,
178 &modlfs,
179 NULL
180 };
181
182 int
_init(void)183 _init(void)
184 {
185 int error;
186
187 #if !defined(lint)
188 /* make sure the on-disk structures are sane */
189 ASSERT(sizeof (struct pcdir) == 32);
190 ASSERT(sizeof (struct pcdir_lfn) == 32);
191 #endif
192 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
193 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
194 error = mod_install(&modlinkage);
195 if (error) {
196 mutex_destroy(&pcfslock);
197 rw_destroy(&pcnodes_lock);
198 }
199 return (error);
200 }
201
202 int
_fini(void)203 _fini(void)
204 {
205 int error;
206
207 /*
208 * If a forcedly unmounted instance is still hanging around,
209 * we cannot allow the module to be unloaded because that would
210 * cause panics once the VFS framework decides it's time to call
211 * into VFS_FREEVFS().
212 */
213 if (pcfs_mountcount)
214 return (EBUSY);
215
216 error = mod_remove(&modlinkage);
217 if (error)
218 return (error);
219 mutex_destroy(&pcfslock);
220 rw_destroy(&pcnodes_lock);
221 /*
222 * Tear down the operations vectors
223 */
224 (void) vfs_freevfsops_by_type(pcfstype);
225 vn_freevnodeops(pcfs_fvnodeops);
226 vn_freevnodeops(pcfs_dvnodeops);
227 return (0);
228 }
229
230 int
_info(struct modinfo * modinfop)231 _info(struct modinfo *modinfop)
232 {
233 return (mod_info(&modlinkage, modinfop));
234 }
235
236 /* ARGSUSED1 */
237 static int
pcfsinit(int fstype,char * name)238 pcfsinit(int fstype, char *name)
239 {
240 static const fs_operation_def_t pcfs_vfsops_template[] = {
241 VFSNAME_MOUNT, { .vfs_mount = pcfs_mount },
242 VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount },
243 VFSNAME_ROOT, { .vfs_root = pcfs_root },
244 VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs },
245 VFSNAME_SYNC, { .vfs_sync = pcfs_sync },
246 VFSNAME_VGET, { .vfs_vget = pcfs_vget },
247 VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs },
248 VFSNAME_SYNCFS, { .vfs_syncfs = pcfs_syncfs },
249 NULL, NULL
250 };
251 int error;
252
253 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
254 if (error != 0) {
255 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
256 return (error);
257 }
258
259 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
260 if (error != 0) {
261 (void) vfs_freevfsops_by_type(fstype);
262 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
263 return (error);
264 }
265
266 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
267 if (error != 0) {
268 (void) vfs_freevfsops_by_type(fstype);
269 vn_freevnodeops(pcfs_fvnodeops);
270 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
271 return (error);
272 }
273
274 pcfstype = fstype;
275 (void) pc_init();
276 pcfs_mountcount = 0;
277 return (0);
278 }
279
280 static struct pcfs *pc_mounttab = NULL;
281
282 extern struct pcfs_args pc_tz;
283
284 /*
285 * Define some special logical drives we use internal to this file.
286 */
287 #define BOOT_PARTITION_DRIVE 99
288 #define PRIMARY_DOS_DRIVE 1
289 #define UNPARTITIONED_DRIVE 0
290
291 static int
pcfs_device_identify(struct vfs * vfsp,struct mounta * uap,struct cred * cr,int * dos_ldrive,dev_t * xdev)292 pcfs_device_identify(
293 struct vfs *vfsp,
294 struct mounta *uap,
295 struct cred *cr,
296 int *dos_ldrive,
297 dev_t *xdev)
298 {
299 struct pathname special;
300 char *c;
301 struct vnode *svp = NULL;
302 struct vnode *lvp = NULL;
303 int oflag, aflag;
304 int error;
305
306 /*
307 * Resolve path name of special file being mounted.
308 */
309 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
310 return (error);
311 }
312
313 *dos_ldrive = -1;
314
315 if (error =
316 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
317 /*
318 * If there's no device node, the name specified most likely
319 * maps to a PCFS-style "partition specifier" to select a
320 * harddisk primary/logical partition. Disable floppy-specific
321 * checks in such cases unless an explicit :A or :B is
322 * requested.
323 */
324
325 /*
326 * Split the pathname string at the last ':' separator.
327 * If there's no ':' in the device name, or the ':' is the
328 * last character in the string, the name is invalid and
329 * the error from the previous lookup will be returned.
330 */
331 c = strrchr(special.pn_path, ':');
332 if (c == NULL || strlen(c) == 0)
333 goto devlookup_done;
334
335 *c++ = '\0';
336
337 /*
338 * PCFS partition name suffixes can be:
339 * - "boot" to indicate the X86BOOT partition
340 * - a drive letter [c-z] for the "DOS logical drive"
341 * - a drive number 1..24 for the "DOS logical drive"
342 * - a "floppy name letter", 'a' or 'b' (just strip this)
343 */
344 if (strcasecmp(c, "boot") == 0) {
345 /*
346 * The Solaris boot partition is requested.
347 */
348 *dos_ldrive = BOOT_PARTITION_DRIVE;
349 } else if (strspn(c, "0123456789") == strlen(c)) {
350 /*
351 * All digits - parse the partition number.
352 */
353 long drvnum = 0;
354
355 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
356 /*
357 * A number alright - in the allowed range ?
358 */
359 if (drvnum > 24 || drvnum == 0)
360 error = ENXIO;
361 }
362 if (error)
363 goto devlookup_done;
364 *dos_ldrive = (int)drvnum;
365 } else if (strlen(c) == 1) {
366 /*
367 * A single trailing character was specified.
368 * - [c-zC-Z] means a harddisk partition, and
369 * we retrieve the partition number.
370 * - [abAB] means a floppy drive, so we swallow
371 * the "drive specifier" and test later
372 * whether the physical device is a floppy.
373 */
374 *c = tolower(*c);
375 if (*c == 'a' || *c == 'b') {
376 *dos_ldrive = UNPARTITIONED_DRIVE;
377 } else if (*c < 'c' || *c > 'z') {
378 error = ENXIO;
379 goto devlookup_done;
380 } else {
381 *dos_ldrive = 1 + *c - 'c';
382 }
383 } else {
384 /*
385 * Can't parse this - pass through previous error.
386 */
387 goto devlookup_done;
388 }
389
390
391 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
392 NULLVPP, &svp);
393 } else {
394 *dos_ldrive = UNPARTITIONED_DRIVE;
395 }
396 devlookup_done:
397 pn_free(&special);
398 if (error)
399 return (error);
400
401 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
402
403 /*
404 * Verify caller's permission to open the device special file.
405 */
406 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
407 ((uap->flags & MS_RDONLY) != 0)) {
408 oflag = FREAD;
409 aflag = VREAD;
410 } else {
411 oflag = FREAD | FWRITE;
412 aflag = VREAD | VWRITE;
413 }
414
415 error = vfs_get_lofi(vfsp, &lvp);
416
417 if (error > 0) {
418 if (error == ENOENT)
419 error = ENODEV;
420 goto out;
421 } else if (error == 0) {
422 *xdev = lvp->v_rdev;
423 } else {
424 *xdev = svp->v_rdev;
425
426 if (svp->v_type != VBLK) {
427 error = ENOTBLK;
428 goto out;
429 }
430
431 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
432 goto out;
433 }
434
435 if (getmajor(*xdev) >= devcnt) {
436 error = ENXIO;
437 goto out;
438 }
439
440 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
441 goto out;
442
443 out:
444 if (svp != NULL)
445 VN_RELE(svp);
446 if (lvp != NULL)
447 VN_RELE(lvp);
448 return (error);
449 }
450
451 static int
pcfs_device_ismounted(struct vfs * vfsp,int dos_ldrive,dev_t xdev,int * remounting,dev_t * pseudodev)452 pcfs_device_ismounted(
453 struct vfs *vfsp,
454 int dos_ldrive,
455 dev_t xdev,
456 int *remounting,
457 dev_t *pseudodev)
458 {
459 struct pcfs *fsp;
460 int remount = *remounting;
461
462 /*
463 * Ensure that this logical drive isn't already mounted, unless
464 * this is a REMOUNT request.
465 * Note: The framework will perform this check if the "...:c"
466 * PCFS-style "logical drive" syntax has not been used and an
467 * actually existing physical device is backing this filesystem.
468 * Once all block device drivers support PC-style partitioning,
469 * this codeblock can be dropped.
470 */
471 *pseudodev = xdev;
472
473 if (dos_ldrive) {
474 mutex_enter(&pcfslock);
475 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
476 if (fsp->pcfs_xdev == xdev &&
477 fsp->pcfs_ldrive == dos_ldrive) {
478 mutex_exit(&pcfslock);
479 if (remount) {
480 return (0);
481 } else {
482 return (EBUSY);
483 }
484 }
485 /*
486 * Assign a unique device number for the vfs
487 * The old way (getudev() + a constantly incrementing
488 * major number) was wrong because it changes vfs_dev
489 * across mounts and reboots, which breaks nfs file handles.
490 * UFS just uses the real dev_t. We can't do that because
491 * of the way pcfs opens fdisk partitons (the :c and :d
492 * partitions are on the same dev_t). Though that _might_
493 * actually be ok, since the file handle contains an
494 * absolute block number, it's probably better to make them
495 * different. So I think we should retain the original
496 * dev_t, but come up with a different minor number based
497 * on the logical drive that will _always_ come up the same.
498 * For now, we steal the upper 6 bits.
499 */
500 #ifdef notdef
501 /* what should we do here? */
502 if (((getminor(xdev) >> 12) & 0x3F) != 0)
503 printf("whoops - upper bits used!\n");
504 #endif
505 *pseudodev = makedevice(getmajor(xdev),
506 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
507 if (vfs_devmounting(*pseudodev, vfsp)) {
508 mutex_exit(&pcfslock);
509 return (EBUSY);
510 }
511 if (vfs_devismounted(*pseudodev)) {
512 mutex_exit(&pcfslock);
513 if (remount) {
514 return (0);
515 } else {
516 return (EBUSY);
517 }
518 }
519 mutex_exit(&pcfslock);
520 } else {
521 *pseudodev = xdev;
522 if (vfs_devmounting(*pseudodev, vfsp)) {
523 return (EBUSY);
524 }
525 if (vfs_devismounted(*pseudodev))
526 if (remount) {
527 return (0);
528 } else {
529 return (EBUSY);
530 }
531 }
532
533 /*
534 * This is not a remount. Even if MS_REMOUNT was requested,
535 * the caller needs to proceed as it would on an ordinary
536 * mount.
537 */
538 *remounting = 0;
539
540 ASSERT(*pseudodev);
541 return (0);
542 }
543
544 /*
545 * Get the PCFS-specific mount options from the VFS framework.
546 * For "timezone" and "secsize", we need to parse the number
547 * ourselves and ensure its validity.
548 * Note: "secsize" is deliberately undocumented at this time,
549 * it's a workaround for devices (particularly: lofi image files)
550 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
551 */
552 static void
pcfs_parse_mntopts(struct pcfs * fsp)553 pcfs_parse_mntopts(struct pcfs *fsp)
554 {
555 char *c;
556 char *endptr;
557 long l;
558 struct vfs *vfsp = fsp->pcfs_vfs;
559
560 ASSERT(fsp->pcfs_secondswest == 0);
561 ASSERT(fsp->pcfs_secsize == 0);
562
563 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
564 fsp->pcfs_flags |= PCFS_HIDDEN;
565 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
566 fsp->pcfs_flags |= PCFS_FOLDCASE;
567 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
568 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
569 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
570 fsp->pcfs_flags |= PCFS_NOATIME;
571
572 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
573 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
574 endptr == c + strlen(c)) {
575 /*
576 * A number alright - in the allowed range ?
577 */
578 if (l <= -12*3600 || l >= 12*3600) {
579 cmn_err(CE_WARN, "!pcfs: invalid use of "
580 "'timezone' mount option - %ld "
581 "is out of range. Assuming 0.", l);
582 l = 0;
583 }
584 } else {
585 cmn_err(CE_WARN, "!pcfs: invalid use of "
586 "'timezone' mount option - argument %s "
587 "is not a valid number. Assuming 0.", c);
588 l = 0;
589 }
590 fsp->pcfs_secondswest = l;
591 }
592
593 /*
594 * The "secsize=..." mount option is a workaround for the lack of
595 * lofi(4D) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
596 * partition table of a disk image and it has been partitioned with
597 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
598 * images.
599 * That should really be fixed in lofi ... this is a workaround.
600 */
601 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
602 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
603 endptr == c + strlen(c)) {
604 /*
605 * A number alright - a valid sector size as well ?
606 */
607 if (!VALID_SECSIZE(l)) {
608 cmn_err(CE_WARN, "!pcfs: invalid use of "
609 "'secsize' mount option - %ld is "
610 "unsupported. Autodetecting.", l);
611 l = 0;
612 }
613 } else {
614 cmn_err(CE_WARN, "!pcfs: invalid use of "
615 "'secsize' mount option - argument %s "
616 "is not a valid number. Autodetecting.", c);
617 l = 0;
618 }
619 fsp->pcfs_secsize = l;
620 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
621 }
622 }
623
624 /*
625 * vfs operations
626 */
627
628 /*
629 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
630 */
631 static int
pcfs_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)632 pcfs_mount(
633 struct vfs *vfsp,
634 struct vnode *mvp,
635 struct mounta *uap,
636 struct cred *cr)
637 {
638 struct pcfs *fsp;
639 struct vnode *devvp;
640 dev_t pseudodev;
641 dev_t xdev;
642 int dos_ldrive = 0;
643 int error;
644 int remounting;
645
646 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
647 return (error);
648
649 if (mvp->v_type != VDIR)
650 return (ENOTDIR);
651
652 mutex_enter(&mvp->v_lock);
653 if ((uap->flags & MS_REMOUNT) == 0 &&
654 (uap->flags & MS_OVERLAY) == 0 &&
655 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
656 mutex_exit(&mvp->v_lock);
657 return (EBUSY);
658 }
659 mutex_exit(&mvp->v_lock);
660
661 /*
662 * PCFS doesn't do mount arguments anymore - everything's a mount
663 * option these days. In order not to break existing callers, we
664 * don't reject it yet, just warn that the data (if any) is ignored.
665 */
666 if (uap->datalen != 0)
667 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
668 "mount argument structures instead of mount options. "
669 "Ignoring mount(2) 'dataptr' argument.");
670
671 /*
672 * This is needed early, to make sure the access / open calls
673 * are done using the correct mode. Processing this mount option
674 * only when calling pcfs_parse_mntopts() would lead us to attempt
675 * a read/write access to a possibly writeprotected device, and
676 * a readonly mount attempt might fail because of that.
677 */
678 if (uap->flags & MS_RDONLY) {
679 vfsp->vfs_flag |= VFS_RDONLY;
680 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
681 }
682
683 /*
684 * For most filesystems, this is just a lookupname() on the
685 * mount pathname string. PCFS historically has to do its own
686 * partition table parsing because not all Solaris architectures
687 * support all styles of partitioning that PC media can have, and
688 * hence PCFS understands "device names" that don't map to actual
689 * physical device nodes. Parsing the "PCFS syntax" for device
690 * names is done in pcfs_device_identify() - see there.
691 *
692 * Once all block device drivers that can host FAT filesystems have
693 * been enhanced to create device nodes for all PC-style partitions,
694 * this code can go away.
695 */
696 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
697 return (error);
698
699 /*
700 * As with looking up the actual device to mount, PCFS cannot rely
701 * on just the checks done by vfs_ismounted() whether a given device
702 * is mounted already. The additional check against the "PCFS syntax"
703 * is done in pcfs_device_ismounted().
704 */
705 remounting = (uap->flags & MS_REMOUNT);
706
707 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
708 &pseudodev))
709 return (error);
710
711 if (remounting)
712 return (0);
713
714 /*
715 * Mount the filesystem.
716 * An instance structure is required before the attempt to locate
717 * and parse the FAT BPB. This is because mount options may change
718 * the behaviour of the filesystem type matching code. Precreate
719 * it and fill it in to a degree that allows parsing the mount
720 * options.
721 */
722 devvp = makespecvp(xdev, VBLK);
723 if (IS_SWAPVP(devvp)) {
724 VN_RELE(devvp);
725 return (EBUSY);
726 }
727 error = VOP_OPEN(&devvp,
728 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
729 if (error) {
730 VN_RELE(devvp);
731 return (error);
732 }
733
734 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
735 fsp->pcfs_vfs = vfsp;
736 fsp->pcfs_xdev = xdev;
737 fsp->pcfs_devvp = devvp;
738 fsp->pcfs_ldrive = dos_ldrive;
739 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
740
741 pcfs_parse_mntopts(fsp);
742
743 /*
744 * This is the actual "mount" - the PCFS superblock check.
745 *
746 * Find the requested logical drive and the FAT BPB therein.
747 * Check device type and flag the instance if media is removeable.
748 *
749 * Initializes most members of the filesystem instance structure.
750 * Returns EINVAL if no valid BPB can be found. Other errors may
751 * occur after I/O failures, or when invalid / unparseable partition
752 * tables are encountered.
753 */
754 if (error = pc_getfattype(fsp))
755 goto errout;
756
757 /*
758 * Now that the BPB has been parsed, this structural information
759 * is available and known to be valid. Initialize the VFS.
760 */
761 vfsp->vfs_data = fsp;
762 vfsp->vfs_dev = pseudodev;
763 vfsp->vfs_fstype = pcfstype;
764 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
765 vfsp->vfs_bcount = 0;
766 vfsp->vfs_bsize = fsp->pcfs_clsize;
767
768 /*
769 * Validate that we can access the FAT and that it is, to the
770 * degree we can verify here, self-consistent.
771 */
772 if (error = pc_verify(fsp))
773 goto errout;
774
775 /*
776 * Record the time of the mount, to return as an "approximate"
777 * timestamp for the FAT root directory. Since FAT roots don't
778 * have timestamps, this is less confusing to the user than
779 * claiming "zero" / Jan/01/1970.
780 */
781 gethrestime(&fsp->pcfs_mounttime);
782
783 /*
784 * Fix up the mount options. Because "noatime" is made default on
785 * removeable media only, a fixed disk will have neither "atime"
786 * nor "noatime" set. We set the options explicitly depending on
787 * the PCFS_NOATIME flag, to inform the user of what applies.
788 * Mount option cancellation will take care that the mutually
789 * exclusive 'other' is cleared.
790 */
791 vfs_setmntopt(vfsp,
792 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
793 NULL, 0);
794
795 /*
796 * All clear - insert the FS instance into PCFS' list.
797 */
798 mutex_enter(&pcfslock);
799 fsp->pcfs_nxt = pc_mounttab;
800 pc_mounttab = fsp;
801 mutex_exit(&pcfslock);
802 atomic_inc_32(&pcfs_mountcount);
803 return (0);
804
805 errout:
806 (void) VOP_CLOSE(devvp,
807 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
808 1, (offset_t)0, cr, NULL);
809 VN_RELE(devvp);
810 mutex_destroy(&fsp->pcfs_lock);
811 kmem_free(fsp, sizeof (*fsp));
812 return (error);
813
814 }
815
816 static int
pcfs_unmount(struct vfs * vfsp,int flag,struct cred * cr)817 pcfs_unmount(
818 struct vfs *vfsp,
819 int flag,
820 struct cred *cr)
821 {
822 struct pcfs *fsp, *fsp1;
823
824 if (secpolicy_fs_unmount(cr, vfsp) != 0)
825 return (EPERM);
826
827 fsp = VFSTOPCFS(vfsp);
828
829 /*
830 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
831 * prevent lookuppn from crossing the mount point.
832 * If this is not a forced umount request and there's ongoing I/O,
833 * don't allow the mount to proceed.
834 */
835 if (flag & MS_FORCE)
836 vfsp->vfs_flag |= VFS_UNMOUNTED;
837 else if (fsp->pcfs_nrefs)
838 return (EBUSY);
839
840 mutex_enter(&pcfslock);
841
842 /*
843 * If this is a forced umount request or if the fs instance has
844 * been marked as beyond recovery, allow the umount to proceed
845 * regardless of state. pc_diskchanged() forcibly releases all
846 * inactive vnodes/pcnodes.
847 */
848 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
849 rw_enter(&pcnodes_lock, RW_WRITER);
850 pc_diskchanged(fsp);
851 rw_exit(&pcnodes_lock);
852 }
853
854 /* now there should be no pcp node on pcfhead or pcdhead. */
855
856 if (fsp == pc_mounttab) {
857 pc_mounttab = fsp->pcfs_nxt;
858 } else {
859 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
860 if (fsp1->pcfs_nxt == fsp)
861 fsp1->pcfs_nxt = fsp->pcfs_nxt;
862 }
863
864 mutex_exit(&pcfslock);
865
866 /*
867 * Since we support VFS_FREEVFS(), there's no need to
868 * free the fsp right now. The framework will tell us
869 * when the right time to do so has arrived by calling
870 * into pcfs_freevfs.
871 */
872 return (0);
873 }
874
875 /*
876 * find root of pcfs
877 */
878 static int
pcfs_root(struct vfs * vfsp,struct vnode ** vpp)879 pcfs_root(
880 struct vfs *vfsp,
881 struct vnode **vpp)
882 {
883 struct pcfs *fsp;
884 struct pcnode *pcp;
885 int error;
886
887 fsp = VFSTOPCFS(vfsp);
888 if (error = pc_lockfs(fsp, 0, 0))
889 return (error);
890
891 pcp = pc_getnode(fsp, (daddr_t)0, 0, NULL);
892 pc_unlockfs(fsp);
893 *vpp = PCTOV(pcp);
894 pcp->pc_flags |= PC_EXTERNAL;
895 return (0);
896 }
897
898 /*
899 * Get file system statistics.
900 */
901 static int
pcfs_statvfs(struct vfs * vfsp,struct statvfs64 * sp)902 pcfs_statvfs(
903 struct vfs *vfsp,
904 struct statvfs64 *sp)
905 {
906 struct pcfs *fsp;
907 int error;
908 dev32_t d32;
909
910 fsp = VFSTOPCFS(vfsp);
911 error = pc_getfat(fsp);
912 if (error)
913 return (error);
914 bzero(sp, sizeof (*sp));
915 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
916 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
917 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
918 sp->f_files = (fsfilcnt64_t)-1;
919 sp->f_ffree = (fsfilcnt64_t)-1;
920 sp->f_favail = (fsfilcnt64_t)-1;
921 #ifdef notdef
922 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
923 #endif /* notdef */
924 (void) cmpldev(&d32, vfsp->vfs_dev);
925 sp->f_fsid = d32;
926 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
927 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
928 sp->f_namemax = PCMAXNAMLEN;
929 return (0);
930 }
931
932 static int
pc_syncfsnodes(struct pcfs * fsp)933 pc_syncfsnodes(struct pcfs *fsp)
934 {
935 struct pchead *hp;
936 struct pcnode *pcp;
937 int error;
938
939 if (error = pc_lockfs(fsp, 0, 0))
940 return (error);
941
942 if (!(error = pc_syncfat(fsp))) {
943 hp = pcfhead;
944 while (hp < & pcfhead [ NPCHASH ]) {
945 rw_enter(&pcnodes_lock, RW_READER);
946 pcp = hp->pch_forw;
947 while (pcp != (struct pcnode *)hp) {
948 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
949 if (error = pc_nodesync(pcp))
950 break;
951 pcp = pcp -> pc_forw;
952 }
953 rw_exit(&pcnodes_lock);
954 if (error)
955 break;
956 hp++;
957 }
958 }
959 pc_unlockfs(fsp);
960 return (error);
961 }
962
963 /*
964 * Flush any pending I/O.
965 */
966 static int
pcfs_sync(struct vfs * vfsp,short flag,struct cred * cr)967 pcfs_sync(struct vfs *vfsp, short flag, struct cred *cr)
968 {
969 struct pcfs *fsp;
970 int error = 0;
971
972 /* this prevents the filesystem from being umounted. */
973 mutex_enter(&pcfslock);
974 if (vfsp != NULL) {
975 fsp = VFSTOPCFS(vfsp);
976 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
977 error = pc_syncfsnodes(fsp);
978 } else {
979 rw_enter(&pcnodes_lock, RW_WRITER);
980 pc_diskchanged(fsp);
981 rw_exit(&pcnodes_lock);
982 error = EIO;
983 }
984 } else {
985 fsp = pc_mounttab;
986 while (fsp != NULL) {
987 if (fsp->pcfs_flags & PCFS_IRRECOV) {
988 rw_enter(&pcnodes_lock, RW_WRITER);
989 pc_diskchanged(fsp);
990 rw_exit(&pcnodes_lock);
991 error = EIO;
992 break;
993 }
994 error = pc_syncfsnodes(fsp);
995 if (error) break;
996 fsp = fsp->pcfs_nxt;
997 }
998 }
999 mutex_exit(&pcfslock);
1000 return (error);
1001 }
1002
1003 static int
pcfs_syncfs(vfs_t * vfsp,uint64_t flags,cred_t * cr)1004 pcfs_syncfs(vfs_t *vfsp, uint64_t flags, cred_t *cr)
1005 {
1006 int ret;
1007 struct pcfs *fsp;
1008
1009 if (flags != 0) {
1010 return (ENOTSUP);
1011 }
1012
1013 fsp = VFSTOPCFS(vfsp);
1014 if ((fsp->pcfs_flags & PCFS_IRRECOV) == 0) {
1015 ret = pc_syncfsnodes(fsp);
1016 } else {
1017 rw_enter(&pcnodes_lock, RW_WRITER);
1018 pc_diskchanged(fsp);
1019 rw_exit(&pcnodes_lock);
1020 ret = EIO;
1021 }
1022
1023 return (ret);
1024 }
1025
1026 int
pc_lockfs(struct pcfs * fsp,int diskchanged,int releasing)1027 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1028 {
1029 int err;
1030
1031 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1032 return (EIO);
1033
1034 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1035 fsp->pcfs_count++;
1036 } else {
1037 mutex_enter(&fsp->pcfs_lock);
1038 if (fsp->pcfs_flags & PCFS_LOCKED)
1039 panic("pc_lockfs");
1040 /*
1041 * We check the IRRECOV bit again just in case somebody
1042 * snuck past the initial check but then got held up before
1043 * they could grab the lock. (And in the meantime someone
1044 * had grabbed the lock and set the bit)
1045 */
1046 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1047 if ((err = pc_getfat(fsp))) {
1048 mutex_exit(&fsp->pcfs_lock);
1049 return (err);
1050 }
1051 }
1052 fsp->pcfs_flags |= PCFS_LOCKED;
1053 fsp->pcfs_owner = curthread;
1054 fsp->pcfs_count++;
1055 }
1056 return (0);
1057 }
1058
1059 void
pc_unlockfs(struct pcfs * fsp)1060 pc_unlockfs(struct pcfs *fsp)
1061 {
1062
1063 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1064 panic("pc_unlockfs");
1065 if (--fsp->pcfs_count < 0)
1066 panic("pc_unlockfs: count");
1067 if (fsp->pcfs_count == 0) {
1068 fsp->pcfs_flags &= ~PCFS_LOCKED;
1069 fsp->pcfs_owner = 0;
1070 mutex_exit(&fsp->pcfs_lock);
1071 }
1072 }
1073
1074 int
pc_syncfat(struct pcfs * fsp)1075 pc_syncfat(struct pcfs *fsp)
1076 {
1077 struct buf *bp;
1078 int nfat;
1079 int error = 0;
1080 struct fat_od_fsi *fsinfo_disk;
1081
1082 if ((fsp->pcfs_fatp == NULL) ||
1083 !(fsp->pcfs_flags & PCFS_FATMOD))
1084 return (0);
1085 /*
1086 * write out all copies of FATs
1087 */
1088 fsp->pcfs_flags &= ~PCFS_FATMOD;
1089 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1090 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1091 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1092 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1093 if (error) {
1094 pc_mark_irrecov(fsp);
1095 return (EIO);
1096 }
1097 }
1098 pc_clear_fatchanges(fsp);
1099
1100 /*
1101 * Write out fsinfo sector.
1102 */
1103 if (IS_FAT32(fsp)) {
1104 bp = bread(fsp->pcfs_xdev,
1105 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1106 if (bp->b_flags & (B_ERROR | B_STALE)) {
1107 error = geterror(bp);
1108 }
1109 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1110 if (!error && FSISIG_OK(fsinfo_disk)) {
1111 fsinfo_disk->fsi_incore.fs_free_clusters =
1112 LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1113 fsinfo_disk->fsi_incore.fs_next_free =
1114 LE_32(FSINFO_UNKNOWN);
1115 bwrite2(bp);
1116 error = geterror(bp);
1117 }
1118 brelse(bp);
1119 if (error) {
1120 pc_mark_irrecov(fsp);
1121 return (EIO);
1122 }
1123 }
1124 return (0);
1125 }
1126
1127 void
pc_invalfat(struct pcfs * fsp)1128 pc_invalfat(struct pcfs *fsp)
1129 {
1130 struct pcfs *xfsp;
1131 int mount_cnt = 0;
1132
1133 if (fsp->pcfs_fatp == NULL)
1134 panic("pc_invalfat");
1135 /*
1136 * Release FAT
1137 */
1138 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1139 fsp->pcfs_fatp = NULL;
1140 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1141 fsp->pcfs_fat_changemap = NULL;
1142 /*
1143 * Invalidate all the blocks associated with the device.
1144 * Not needed if stateless.
1145 */
1146 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1147 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1148 mount_cnt++;
1149
1150 if (!mount_cnt)
1151 binval(fsp->pcfs_xdev);
1152 /*
1153 * close mounted device
1154 */
1155 (void) VOP_CLOSE(fsp->pcfs_devvp,
1156 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1157 1, (offset_t)0, CRED(), NULL);
1158 }
1159
1160 void
pc_badfs(struct pcfs * fsp)1161 pc_badfs(struct pcfs *fsp)
1162 {
1163 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1164 getmajor(fsp->pcfs_devvp->v_rdev),
1165 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1166 }
1167
1168 /*
1169 * The problem with supporting NFS on the PCFS filesystem is that there
1170 * is no good place to keep the generation number. The only possible
1171 * place is inside a directory entry. There are a few words that we
1172 * don't use - they store NT & OS/2 attributes, and the creation/last access
1173 * time of the file - but it seems wrong to use them. In addition, directory
1174 * entries come and go. If a directory is removed completely, its directory
1175 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1176 * inode blocks are dedicated for inodes, so the generation numbers are
1177 * permanently kept on the disk.
1178 */
1179 static int
pcfs_vget(struct vfs * vfsp,struct vnode ** vpp,struct fid * fidp)1180 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1181 {
1182 struct pcnode *pcp;
1183 struct pc_fid *pcfid;
1184 struct pcfs *fsp;
1185 struct pcdir *ep;
1186 daddr_t eblkno;
1187 int eoffset;
1188 struct buf *bp;
1189 int error;
1190 pc_cluster32_t cn;
1191
1192 pcfid = (struct pc_fid *)fidp;
1193 fsp = VFSTOPCFS(vfsp);
1194
1195 error = pc_lockfs(fsp, 0, 0);
1196 if (error) {
1197 *vpp = NULL;
1198 return (error);
1199 }
1200
1201 if (pcfid->pcfid_block == 0) {
1202 pcp = pc_getnode(fsp, (daddr_t)0, 0, NULL);
1203 pcp->pc_flags |= PC_EXTERNAL;
1204 *vpp = PCTOV(pcp);
1205 pc_unlockfs(fsp);
1206 return (0);
1207 }
1208 eblkno = pcfid->pcfid_block;
1209 eoffset = pcfid->pcfid_offset;
1210
1211 if ((pc_dbtocl(fsp,
1212 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1213 (eoffset > fsp->pcfs_clsize)) {
1214 pc_unlockfs(fsp);
1215 *vpp = NULL;
1216 return (EINVAL);
1217 }
1218
1219 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1220 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1221 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1222 fsp->pcfs_clsize);
1223 } else {
1224 /*
1225 * This is an access "backwards" into the FAT12/FAT16
1226 * root directory. A better code structure would
1227 * significantly improve maintainability here ...
1228 */
1229 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1230 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1231 }
1232 if (bp->b_flags & (B_ERROR | B_STALE)) {
1233 error = geterror(bp);
1234 brelse(bp);
1235 if (error)
1236 pc_mark_irrecov(fsp);
1237 *vpp = NULL;
1238 pc_unlockfs(fsp);
1239 return (error);
1240 }
1241 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1242 /*
1243 * Ok, if this is a valid file handle that we gave out,
1244 * then simply ensuring that the creation time matches,
1245 * the entry has not been deleted, and it has a valid first
1246 * character should be enough.
1247 *
1248 * Unfortunately, verifying that the <blkno, offset> _still_
1249 * refers to a directory entry is not easy, since we'd have
1250 * to search _all_ directories starting from root to find it.
1251 * That's a high price to pay just in case somebody is forging
1252 * file handles. So instead we verify that as much of the
1253 * entry is valid as we can:
1254 *
1255 * 1. The starting cluster is 0 (unallocated) or valid
1256 * 2. It is not an LFN entry
1257 * 3. It is not hidden (unless mounted as such)
1258 * 4. It is not the label
1259 */
1260 cn = pc_getstartcluster(fsp, ep);
1261 /*
1262 * if the starting cluster is valid, but not valid according
1263 * to pc_validcl(), force it to be to simplify the following if.
1264 */
1265 if (cn == 0)
1266 cn = PCF_FIRSTCLUSTER;
1267 if (IS_FAT32(fsp)) {
1268 if (cn >= PCF_LASTCLUSTER32)
1269 cn = PCF_FIRSTCLUSTER;
1270 } else {
1271 if (cn >= PCF_LASTCLUSTER)
1272 cn = PCF_FIRSTCLUSTER;
1273 }
1274 if ((!pc_validcl(fsp, cn)) ||
1275 (PCDL_IS_LFN(ep)) ||
1276 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1277 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1278 bp->b_flags |= B_STALE | B_AGE;
1279 brelse(bp);
1280 pc_unlockfs(fsp);
1281 return (EINVAL);
1282 }
1283 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1284 (ep->pcd_filename[0] != PCD_ERASED) &&
1285 (pc_validchar(ep->pcd_filename[0]) ||
1286 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1287 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1288 pcp->pc_flags |= PC_EXTERNAL;
1289 *vpp = PCTOV(pcp);
1290 } else {
1291 *vpp = NULL;
1292 }
1293 bp->b_flags |= B_STALE | B_AGE;
1294 brelse(bp);
1295 pc_unlockfs(fsp);
1296 return (0);
1297 }
1298
1299 /*
1300 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1301 * a meg), so we can't bread() it all in at once. This routine reads a
1302 * fat a chunk at a time.
1303 */
1304 static int
pc_readfat(struct pcfs * fsp,uchar_t * fatp)1305 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1306 {
1307 struct buf *bp;
1308 size_t off;
1309 size_t readsize;
1310 daddr_t diskblk;
1311 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1312 daddr_t start = fsp->pcfs_fatstart;
1313
1314 readsize = fsp->pcfs_clsize;
1315 for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1316 if (readsize > (fatsize - off))
1317 readsize = fatsize - off;
1318 diskblk = pc_dbdaddr(fsp, start +
1319 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1320 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1321 if (bp->b_flags & (B_ERROR | B_STALE)) {
1322 brelse(bp);
1323 return (EIO);
1324 }
1325 bp->b_flags |= B_STALE | B_AGE;
1326 bcopy(bp->b_un.b_addr, fatp, readsize);
1327 brelse(bp);
1328 }
1329 return (0);
1330 }
1331
1332 /*
1333 * We write the FAT out a _lot_, in order to make sure that it
1334 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1335 * the FAT might be a couple of megabytes, and writing it all out just
1336 * because we created or deleted a small file is painful (especially
1337 * since we do it for each alternate FAT too). So instead, for FAT16 and
1338 * FAT32 we only write out the bit that has changed. We don't clear
1339 * the 'updated' fields here because the caller might be writing out
1340 * several FATs, so the caller must use pc_clear_fatchanges() after
1341 * all FATs have been updated.
1342 * This function doesn't take "start" from fsp->pcfs_dosstart because
1343 * callers can use it to write either the primary or any of the alternate
1344 * FAT tables.
1345 */
1346 static int
pc_writefat(struct pcfs * fsp,daddr_t start)1347 pc_writefat(struct pcfs *fsp, daddr_t start)
1348 {
1349 struct buf *bp;
1350 size_t off;
1351 size_t writesize;
1352 int error;
1353 uchar_t *fatp = fsp->pcfs_fatp;
1354 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1355
1356 writesize = fsp->pcfs_clsize;
1357 for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1358 if (writesize > (fatsize - off))
1359 writesize = fatsize - off;
1360 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1361 continue;
1362 }
1363 bp = ngeteblk(writesize);
1364 bp->b_edev = fsp->pcfs_xdev;
1365 bp->b_dev = cmpdev(bp->b_edev);
1366 bp->b_blkno = start + pc_dbdaddr(fsp,
1367 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1368 DTRACE_PROBE3(pc_writefat, longlong_t, bp->b_blkno,
1369 uchar_t *, fatp,
1370 size_t, writesize);
1371 bcopy(fatp, bp->b_un.b_addr, writesize);
1372 bwrite2(bp);
1373 error = geterror(bp);
1374 brelse(bp);
1375 if (error) {
1376 return (error);
1377 }
1378 }
1379 return (0);
1380 }
1381
1382 /*
1383 * Mark the FAT cluster that 'cn' is stored in as modified.
1384 */
1385 void
pc_mark_fat_updated(struct pcfs * fsp,pc_cluster32_t cn)1386 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1387 {
1388 pc_cluster32_t bn;
1389 size_t size;
1390
1391 /* which fat block is the cluster number stored in? */
1392 if (IS_FAT32(fsp)) {
1393 size = sizeof (pc_cluster32_t);
1394 bn = pc_lblkno(fsp, cn * size);
1395 fsp->pcfs_fat_changemap[bn] = 1;
1396 } else if (IS_FAT16(fsp)) {
1397 size = sizeof (pc_cluster16_t);
1398 bn = pc_lblkno(fsp, cn * size);
1399 fsp->pcfs_fat_changemap[bn] = 1;
1400 } else {
1401 offset_t off;
1402 pc_cluster32_t nbn;
1403
1404 ASSERT(IS_FAT12(fsp));
1405 off = cn + (cn >> 1);
1406 bn = pc_lblkno(fsp, off);
1407 fsp->pcfs_fat_changemap[bn] = 1;
1408 /* does this field wrap into the next fat cluster? */
1409 nbn = pc_lblkno(fsp, off + 1);
1410 if (nbn != bn) {
1411 fsp->pcfs_fat_changemap[nbn] = 1;
1412 }
1413 }
1414 }
1415
1416 /*
1417 * return whether the FAT cluster 'bn' is updated and needs to
1418 * be written out.
1419 */
1420 int
pc_fat_is_changed(struct pcfs * fsp,pc_cluster32_t bn)1421 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1422 {
1423 return (fsp->pcfs_fat_changemap[bn] == 1);
1424 }
1425
1426 /*
1427 * Implementation of VFS_FREEVFS() to support forced umounts.
1428 * This is called by the vfs framework after umount, to trigger
1429 * the release of any resources still associated with the given
1430 * vfs_t once the need to keep them has gone away.
1431 */
1432 void
pcfs_freevfs(vfs_t * vfsp)1433 pcfs_freevfs(vfs_t *vfsp)
1434 {
1435 struct pcfs *fsp = VFSTOPCFS(vfsp);
1436
1437 mutex_enter(&pcfslock);
1438 /*
1439 * Purging the FAT closes the device - can't do any more
1440 * I/O after this.
1441 */
1442 if (fsp->pcfs_fatp != NULL)
1443 pc_invalfat(fsp);
1444 mutex_exit(&pcfslock);
1445
1446 VN_RELE(fsp->pcfs_devvp);
1447 mutex_destroy(&fsp->pcfs_lock);
1448 kmem_free(fsp, sizeof (*fsp));
1449
1450 /*
1451 * Allow _fini() to succeed now, if so desired.
1452 */
1453 atomic_dec_32(&pcfs_mountcount);
1454 }
1455
1456
1457 /*
1458 * PC-style partition parsing and FAT BPB identification/validation code.
1459 * The partition parsers here assume:
1460 * - a FAT filesystem will be in a partition that has one of a set of
1461 * recognized partition IDs
1462 * - the user wants the 'numbering' (C:, D:, ...) that one would get
1463 * on MSDOS 6.x.
1464 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1465 * will not factor in the enumeration.
1466 * These days, such assumptions should be revisited. FAT is no longer the
1467 * only game in 'PC town'.
1468 */
1469 /*
1470 * isDosDrive()
1471 * Boolean function. Give it the systid field for an fdisk partition
1472 * and it decides if that's a systid that describes a DOS drive. We
1473 * use systid values defined in sys/dktp/fdisk.h.
1474 */
1475 static int
isDosDrive(uchar_t checkMe)1476 isDosDrive(uchar_t checkMe)
1477 {
1478 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1479 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1480 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1481 (checkMe == DIAGPART));
1482 }
1483
1484
1485 /*
1486 * isDosExtended()
1487 * Boolean function. Give it the systid field for an fdisk partition
1488 * and it decides if that's a systid that describes an extended DOS
1489 * partition.
1490 */
1491 static int
isDosExtended(uchar_t checkMe)1492 isDosExtended(uchar_t checkMe)
1493 {
1494 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1495 }
1496
1497
1498 /*
1499 * isBootPart()
1500 * Boolean function. Give it the systid field for an fdisk partition
1501 * and it decides if that's a systid that describes a Solaris boot
1502 * partition.
1503 */
1504 static int
isBootPart(uchar_t checkMe)1505 isBootPart(uchar_t checkMe)
1506 {
1507 return (checkMe == X86BOOT);
1508 }
1509
1510
1511 /*
1512 * noLogicalDrive()
1513 * Display error message about not being able to find a logical
1514 * drive.
1515 */
1516 static void
noLogicalDrive(int ldrive)1517 noLogicalDrive(int ldrive)
1518 {
1519 if (ldrive == BOOT_PARTITION_DRIVE) {
1520 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1521 } else {
1522 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1523 }
1524 }
1525
1526
1527 /*
1528 * findTheDrive()
1529 * Discover offset of the requested logical drive, and return
1530 * that offset (startSector), the systid of that drive (sysid),
1531 * and a buffer pointer (bp), with the buffer contents being
1532 * the first sector of the logical drive (i.e., the sector that
1533 * contains the BPB for that drive).
1534 *
1535 * Note: this code is not capable of addressing >2TB disks, as it uses
1536 * daddr_t not diskaddr_t, some of the calculations would overflow
1537 */
1538 #define COPY_PTBL(mbr, ptblp) \
1539 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \
1540 FD_NUMPART * sizeof (struct ipart))
1541
1542 static int
findTheDrive(struct pcfs * fsp,buf_t ** bp)1543 findTheDrive(struct pcfs *fsp, buf_t **bp)
1544 {
1545 int ldrive = fsp->pcfs_ldrive;
1546 dev_t dev = fsp->pcfs_devvp->v_rdev;
1547
1548 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */
1549 daddr_t lastseek = 0; /* Disk block we sought previously */
1550 daddr_t diskblk = 0; /* Disk block to get */
1551 daddr_t xstartsect; /* base of Extended DOS partition */
1552 int logicalDriveCount = 0; /* Count of logical drives seen */
1553 int extendedPart = -1; /* index of extended dos partition */
1554 int primaryPart = -1; /* index of primary dos partition */
1555 int bootPart = -1; /* index of a Solaris boot partition */
1556 uint32_t xnumsect = 0; /* length of extended DOS partition */
1557 int driveIndex; /* computed FDISK table index */
1558 daddr_t startsec;
1559 len_t mediasize;
1560 int i;
1561 /*
1562 * Count of drives in the current extended partition's
1563 * FDISK table, and indexes of the drives themselves.
1564 */
1565 int extndDrives[FD_NUMPART];
1566 int numDrives = 0;
1567
1568 /*
1569 * Count of drives (beyond primary) in master boot record's
1570 * FDISK table, and indexes of the drives themselves.
1571 */
1572 int extraDrives[FD_NUMPART];
1573 int numExtraDrives = 0;
1574
1575 /*
1576 * "ldrive == 0" should never happen, as this is a request to
1577 * mount the physical device (and ignore partitioning). The code
1578 * in pcfs_mount() should have made sure that a logical drive number
1579 * is at least 1, meaning we're looking for drive "C:". It is not
1580 * safe (and a bug in the callers of this function) to request logical
1581 * drive number 0; we could ASSERT() but a graceful EIO is a more
1582 * polite way.
1583 */
1584 if (ldrive == 0) {
1585 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1586 noLogicalDrive(ldrive);
1587 return (EIO);
1588 }
1589
1590 /*
1591 * Copy from disk block into memory aligned structure for fdisk usage.
1592 */
1593 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1594
1595 /*
1596 * This check is ok because a FAT BPB and a master boot record (MBB)
1597 * have the same signature, in the same position within the block.
1598 */
1599 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1600 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1601 "device (%x.%x):%d\n",
1602 getmajor(dev), getminor(dev), ldrive);
1603 return (EINVAL);
1604 }
1605
1606 /*
1607 * Get a summary of what is in the Master FDISK table.
1608 * Normally we expect to find one partition marked as a DOS drive.
1609 * This partition is the one Windows calls the primary dos partition.
1610 * If the machine has any logical drives then we also expect
1611 * to find a partition marked as an extended DOS partition.
1612 *
1613 * Sometimes we'll find multiple partitions marked as DOS drives.
1614 * The Solaris fdisk program allows these partitions
1615 * to be created, but Windows fdisk no longer does. We still need
1616 * to support these, though, since Windows does. We also need to fix
1617 * our fdisk to behave like the Windows version.
1618 *
1619 * It turns out that some off-the-shelf media have *only* an
1620 * Extended partition, so we need to deal with that case as well.
1621 *
1622 * Only a single (the first) Extended or Boot Partition will
1623 * be recognized. Any others will be ignored.
1624 */
1625 for (i = 0; i < FD_NUMPART; i++) {
1626 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1627 uint_t, (uint_t)dosp[i].systid,
1628 uint_t, LE_32(dosp[i].relsect),
1629 uint_t, LE_32(dosp[i].numsect));
1630
1631 if (isDosDrive(dosp[i].systid)) {
1632 if (primaryPart < 0) {
1633 logicalDriveCount++;
1634 primaryPart = i;
1635 } else {
1636 extraDrives[numExtraDrives++] = i;
1637 }
1638 continue;
1639 }
1640 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1641 extendedPart = i;
1642 continue;
1643 }
1644 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1645 bootPart = i;
1646 continue;
1647 }
1648 }
1649
1650 if (ldrive == BOOT_PARTITION_DRIVE) {
1651 if (bootPart < 0) {
1652 noLogicalDrive(ldrive);
1653 return (EINVAL);
1654 }
1655 startsec = LE_32(dosp[bootPart].relsect);
1656 mediasize = LE_32(dosp[bootPart].numsect);
1657 goto found;
1658 }
1659
1660 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1661 startsec = LE_32(dosp[primaryPart].relsect);
1662 mediasize = LE_32(dosp[primaryPart].numsect);
1663 goto found;
1664 }
1665
1666 /*
1667 * We are not looking for the C: drive (or the primary drive
1668 * was not found), so we had better have an extended partition
1669 * or extra drives in the Master FDISK table.
1670 */
1671 if ((extendedPart < 0) && (numExtraDrives == 0)) {
1672 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1673 noLogicalDrive(ldrive);
1674 return (EINVAL);
1675 }
1676
1677 if (extendedPart >= 0) {
1678 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1679 xnumsect = LE_32(dosp[extendedPart].numsect);
1680 do {
1681 /*
1682 * If the seek would not cause us to change
1683 * position on the drive, then we're out of
1684 * extended partitions to examine.
1685 */
1686 if (diskblk == lastseek)
1687 break;
1688 logicalDriveCount += numDrives;
1689 /*
1690 * Seek the next extended partition, and find
1691 * logical drives within it.
1692 */
1693 brelse(*bp);
1694 /*
1695 * bread() block numbers are multiples of DEV_BSIZE
1696 * but the device sector size (the unit of partitioning)
1697 * might be larger than that; pcfs_get_device_info()
1698 * has calculated the multiplicator for us.
1699 */
1700 *bp = bread(dev,
1701 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1702 if ((*bp)->b_flags & B_ERROR) {
1703 return (EIO);
1704 }
1705
1706 lastseek = diskblk;
1707 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1708 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1709 cmn_err(CE_NOTE, "!pcfs: "
1710 "extended partition table signature err, "
1711 "device (%x.%x):%d, LBA %u",
1712 getmajor(dev), getminor(dev), ldrive,
1713 (uint_t)pc_dbdaddr(fsp, diskblk));
1714 return (EINVAL);
1715 }
1716 /*
1717 * Count up drives, and track where the next
1718 * extended partition is in case we need it. We
1719 * are expecting only one extended partition. If
1720 * there is more than one we'll only go to the
1721 * first one we see, but warn about ignoring.
1722 */
1723 numDrives = 0;
1724 for (i = 0; i < FD_NUMPART; i++) {
1725 DTRACE_PROBE4(extendedpart,
1726 struct pcfs *, fsp,
1727 uint_t, (uint_t)dosp[i].systid,
1728 uint_t, LE_32(dosp[i].relsect),
1729 uint_t, LE_32(dosp[i].numsect));
1730 if (isDosDrive(dosp[i].systid)) {
1731 extndDrives[numDrives++] = i;
1732 } else if (isDosExtended(dosp[i].systid)) {
1733 if (diskblk != lastseek) {
1734 /*
1735 * Already found an extended
1736 * partition in this table.
1737 */
1738 cmn_err(CE_NOTE,
1739 "!pcfs: ignoring unexpected"
1740 " additional extended"
1741 " partition");
1742 } else {
1743 diskblk = xstartsect +
1744 LE_32(dosp[i].relsect);
1745 }
1746 }
1747 }
1748 } while (ldrive > logicalDriveCount + numDrives);
1749
1750 ASSERT(numDrives <= FD_NUMPART);
1751
1752 if (ldrive <= logicalDriveCount + numDrives) {
1753 /*
1754 * The number of logical drives we've found thus
1755 * far is enough to get us to the one we were
1756 * searching for.
1757 */
1758 driveIndex = logicalDriveCount + numDrives - ldrive;
1759 mediasize =
1760 LE_32(dosp[extndDrives[driveIndex]].numsect);
1761 startsec =
1762 LE_32(dosp[extndDrives[driveIndex]].relsect) +
1763 lastseek;
1764 if (startsec > (xstartsect + xnumsect)) {
1765 cmn_err(CE_NOTE, "!pcfs: extended partition "
1766 "values bad");
1767 return (EINVAL);
1768 }
1769 goto found;
1770 } else {
1771 /*
1772 * We ran out of extended dos partition
1773 * drives. The only hope now is to go
1774 * back to extra drives defined in the master
1775 * fdisk table. But we overwrote that table
1776 * already, so we must load it in again.
1777 */
1778 logicalDriveCount += numDrives;
1779 brelse(*bp);
1780 ASSERT(fsp->pcfs_dosstart == 0);
1781 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1782 fsp->pcfs_secsize);
1783 if ((*bp)->b_flags & B_ERROR) {
1784 return (EIO);
1785 }
1786 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1787 }
1788 }
1789 /*
1790 * Still haven't found the drive, is it an extra
1791 * drive defined in the main FDISK table?
1792 */
1793 if (ldrive <= logicalDriveCount + numExtraDrives) {
1794 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1795 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1796 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1797 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1798 goto found;
1799 }
1800 /*
1801 * Still haven't found the drive, and there is
1802 * nowhere else to look.
1803 */
1804 noLogicalDrive(ldrive);
1805 return (EINVAL);
1806
1807 found:
1808 /*
1809 * We need this value in units of sectorsize, because PCFS' internal
1810 * offset calculations go haywire for > 512Byte sectors unless all
1811 * pcfs_.*start values are in units of sectors.
1812 * So, assign before the capacity check (that's done in DEV_BSIZE)
1813 */
1814 fsp->pcfs_dosstart = startsec;
1815
1816 /*
1817 * convert from device sectors to proper units:
1818 * - starting sector: DEV_BSIZE (as argument to bread())
1819 * - media size: Bytes
1820 */
1821 startsec = pc_dbdaddr(fsp, startsec);
1822 mediasize *= fsp->pcfs_secsize;
1823
1824 /*
1825 * some additional validation / warnings in case the partition table
1826 * and the actual media capacity are not in accordance ...
1827 */
1828 if (fsp->pcfs_mediasize != 0) {
1829 diskaddr_t startoff =
1830 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1831
1832 if (startoff >= fsp->pcfs_mediasize ||
1833 startoff + mediasize > fsp->pcfs_mediasize) {
1834 cmn_err(CE_WARN,
1835 "!pcfs: partition size (LBA start %u, %lld bytes, "
1836 "device (%x.%x):%d) smaller than "
1837 "mediasize (%lld bytes).\n"
1838 "filesystem may be truncated, access errors "
1839 "may result.\n",
1840 (uint_t)startsec, (long long)mediasize,
1841 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1842 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1843 }
1844 } else {
1845 fsp->pcfs_mediasize = mediasize;
1846 }
1847
1848 return (0);
1849 }
1850
1851
1852 static fattype_t
secondaryBPBChecks(struct pcfs * fsp,uchar_t * bpb,size_t secsize)1853 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1854 {
1855 uint32_t ncl = fsp->pcfs_ncluster;
1856
1857 if (ncl <= 4096) {
1858 if (bpb_get_FatSz16(bpb) == 0)
1859 return (FAT_UNKNOWN);
1860
1861 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1862 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1863 return (FAT12);
1864 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1865 return (FAT12);
1866 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1867 return (FAT16);
1868
1869 switch (bpb_get_Media(bpb)) {
1870 case SS8SPT:
1871 case DS8SPT:
1872 case SS9SPT:
1873 case DS9SPT:
1874 case DS18SPT:
1875 case DS9_15SPT:
1876 /*
1877 * Is this reliable - all floppies are FAT12 ?
1878 */
1879 return (FAT12);
1880 case MD_FIXED:
1881 /*
1882 * Is this reliable - disks are always FAT16 ?
1883 */
1884 return (FAT16);
1885 default:
1886 break;
1887 }
1888 } else if (ncl <= 65536) {
1889 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1890 return (FAT32);
1891 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1892 return (FAT32);
1893 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1894 return (FAT32);
1895
1896 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1897 return (FAT16);
1898 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1899 return (FAT16);
1900 }
1901
1902 /*
1903 * We don't know
1904 */
1905 return (FAT_UNKNOWN);
1906 }
1907
1908 /*
1909 * Check to see if the BPB we found is correct.
1910 *
1911 * This looks far more complicated that it needs to be for pure structural
1912 * validation. The reason for this is that parseBPB() is also used for
1913 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1914 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1915 * the BPB when attempting to mount the filesystem.
1916 *
1917 * Real-world usage of FAT shows there are a lot of corner-case situations
1918 * and, following the specification strictly, invalid filesystems out there.
1919 * Known are situations such as:
1920 * - FAT12/FAT16 filesystems with garbage in either totsec16/32
1921 * instead of the zero in one of the fields mandated by the spec
1922 * - filesystems that claim to be larger than the partition they're in
1923 * - filesystems without valid media descriptor
1924 * - FAT32 filesystems with RootEntCnt != 0
1925 * - FAT32 filesystems with less than 65526 clusters
1926 * - FAT32 filesystems without valid FSI sector
1927 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1928 *
1929 * Such filesystems are accessible by PCFS - if it'd know to start with that
1930 * the filesystem should be treated as a specific FAT type. Before S10, it
1931 * relied on the PC/fdisk partition type for the purpose and almost completely
1932 * ignored the BPB; now it ignores the partition type for anything else but
1933 * logical drive enumeration, which can result in rejection of (invalid)
1934 * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1935 * has less than 65526 clusters.
1936 *
1937 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1938 * not possible to allow all such mostly-compliant filesystems in unless one
1939 * accepts false positives (definitely invalid filesystems that cause problems
1940 * later). This at least allows to pinpoint why the mount failed.
1941 *
1942 * Due to the use of FAT on removeable media, all relaxations of the rules
1943 * here need to be carefully evaluated wrt. to potential effects on PCFS
1944 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1945 * beware.
1946 */
1947 static int
parseBPB(struct pcfs * fsp,uchar_t * bpb,int * valid)1948 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1949 {
1950 fattype_t type;
1951
1952 uint32_t ncl; /* number of clusters in file area */
1953 uint32_t rec;
1954 uint32_t reserved;
1955 uint32_t fsisec, bkbootsec;
1956 blkcnt_t totsec, totsec16, totsec32, datasec;
1957 size_t fatsec, fatsec16, fatsec32, rdirsec;
1958 size_t secsize;
1959 len_t mediasize;
1960 uint64_t validflags = 0;
1961
1962 if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1963 validflags |= BPB_BPBSIG_OK;
1964
1965 rec = bpb_get_RootEntCnt(bpb);
1966 reserved = bpb_get_RsvdSecCnt(bpb);
1967 fsisec = bpb_get_FSInfo32(bpb);
1968 bkbootsec = bpb_get_BkBootSec32(bpb);
1969 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1970 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1971 fatsec16 = bpb_get_FatSz16(bpb);
1972 fatsec32 = bpb_get_FatSz32(bpb);
1973
1974 totsec = totsec16 ? totsec16 : totsec32;
1975 fatsec = fatsec16 ? fatsec16 : fatsec32;
1976
1977 secsize = bpb_get_BytesPerSec(bpb);
1978 if (!VALID_SECSIZE(secsize))
1979 secsize = fsp->pcfs_secsize;
1980 if (secsize != fsp->pcfs_secsize) {
1981 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1982 getmajor(fsp->pcfs_xdev),
1983 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1984 PC_DPRINTF2(3, "!BPB secsize %d != "
1985 "autodetected media block size %d\n",
1986 (int)secsize, (int)fsp->pcfs_secsize);
1987 if (fsp->pcfs_ldrive) {
1988 /*
1989 * We've already attempted to parse the partition
1990 * table. If the block size used for that don't match
1991 * the PCFS sector size, we're hosed one way or the
1992 * other. Just try what happens.
1993 */
1994 secsize = fsp->pcfs_secsize;
1995 PC_DPRINTF1(3,
1996 "!pcfs: Using autodetected secsize %d\n",
1997 (int)secsize);
1998 } else {
1999 /*
2000 * This allows mounting lofi images of PCFS partitions
2001 * with sectorsize != DEV_BSIZE. We can't parse the
2002 * partition table on whole-disk images unless the
2003 * (undocumented) "secsize=..." mount option is used,
2004 * but at least this allows us to mount if we have
2005 * an image of a partition.
2006 */
2007 PC_DPRINTF1(3,
2008 "!pcfs: Using BPB secsize %d\n", (int)secsize);
2009 }
2010 }
2011
2012 if (fsp->pcfs_mediasize == 0) {
2013 mediasize = (len_t)totsec * (len_t)secsize;
2014 /*
2015 * This is not an error because not all devices support the
2016 * dkio(4I) mediasize queries, and/or not all devices are
2017 * partitioned. If we have not been able to figure out the
2018 * size of the underlaying medium, we have to trust the BPB.
2019 */
2020 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
2021 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
2022 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2023 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
2024 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
2025 cmn_err(CE_WARN,
2026 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
2027 "FAT BPB mediasize (%lld Bytes).\n"
2028 "truncated filesystem on device (%x.%x):%d, access errors "
2029 "possible.\n",
2030 (long long)fsp->pcfs_mediasize,
2031 (long long)(totsec * (blkcnt_t)secsize),
2032 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2033 fsp->pcfs_ldrive);
2034 mediasize = fsp->pcfs_mediasize;
2035 } else {
2036 /*
2037 * This is actually ok. A FAT needs not occupy the maximum
2038 * space available in its partition, it can be shorter.
2039 */
2040 mediasize = (len_t)totsec * (len_t)secsize;
2041 }
2042
2043 /*
2044 * Since we let just about anything pass through this function,
2045 * fence against divide-by-zero here.
2046 */
2047 if (secsize)
2048 rdirsec = roundup(rec * 32, secsize) / secsize;
2049 else
2050 rdirsec = 0;
2051
2052 /*
2053 * This assignment is necessary before pc_dbdaddr() can first be
2054 * used. Must initialize the value here.
2055 */
2056 fsp->pcfs_secsize = secsize;
2057 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2058
2059 fsp->pcfs_mediasize = mediasize;
2060
2061 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2062 fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2063 fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2064 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2065 fsp->pcfs_rdirsec = rdirsec;
2066
2067 /*
2068 * Remember: All PCFS offset calculations in sectors. Before I/O
2069 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2070 * necessary so that media with > 512Byte sector sizes work correctly.
2071 */
2072 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2073 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2074 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2075 datasec = totsec -
2076 (blkcnt_t)fatsec * fsp->pcfs_numfat -
2077 (blkcnt_t)rdirsec -
2078 (blkcnt_t)reserved;
2079
2080 DTRACE_PROBE4(fatgeometry,
2081 blkcnt_t, totsec, size_t, fatsec,
2082 size_t, rdirsec, blkcnt_t, datasec);
2083
2084 /*
2085 * 'totsec' is taken directly from the BPB and guaranteed to fit
2086 * into a 32bit unsigned integer. The calculation of 'datasec',
2087 * on the other hand, could underflow for incorrect values in
2088 * rdirsec/reserved/fatsec. Check for that.
2089 * We also check that the BPB conforms to the FAT specification's
2090 * requirement that either of the 16/32bit total sector counts
2091 * must be zero.
2092 */
2093 if (totsec != 0 &&
2094 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2095 datasec < totsec && datasec <= UINT32_MAX)
2096 validflags |= BPB_TOTSEC_OK;
2097
2098 if ((len_t)totsec * (len_t)secsize <= mediasize)
2099 validflags |= BPB_MEDIASZ_OK;
2100
2101 if (VALID_SECSIZE(secsize))
2102 validflags |= BPB_SECSIZE_OK;
2103 if (VALID_SPCL(fsp->pcfs_spcl))
2104 validflags |= BPB_SECPERCLUS_OK;
2105 if (VALID_CLSIZE(fsp->pcfs_clsize))
2106 validflags |= BPB_CLSIZE_OK;
2107 if (VALID_NUMFATS(fsp->pcfs_numfat))
2108 validflags |= BPB_NUMFAT_OK;
2109 if (VALID_RSVDSEC(reserved) && reserved < totsec)
2110 validflags |= BPB_RSVDSECCNT_OK;
2111 if (VALID_MEDIA(fsp->pcfs_mediadesc))
2112 validflags |= BPB_MEDIADESC_OK;
2113 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2114 validflags |= BPB_BOOTSIG16_OK;
2115 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2116 validflags |= BPB_BOOTSIG32_OK;
2117 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2118 validflags |= BPB_FSTYPSTR16_OK;
2119 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2120 validflags |= BPB_FSTYPSTR32_OK;
2121 if (VALID_OEMNAME(bpb_OEMName(bpb)))
2122 validflags |= BPB_OEMNAME_OK;
2123 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2124 validflags |= BPB_BKBOOTSEC_OK;
2125 if (fsisec > 0 && fsisec <= reserved)
2126 validflags |= BPB_FSISEC_OK;
2127 if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2128 validflags |= BPB_JMPBOOT_OK;
2129 if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2130 validflags |= BPB_FSVER_OK;
2131 if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2132 validflags |= BPB_VOLLAB16_OK;
2133 if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2134 validflags |= BPB_VOLLAB32_OK;
2135 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2136 validflags |= BPB_EXTFLAGS_OK;
2137
2138 /*
2139 * Try to determine which FAT format to use.
2140 *
2141 * Calculate the number of clusters in order to determine
2142 * the type of FAT we are looking at. This is the only
2143 * recommended way of determining FAT type, though there
2144 * are other hints in the data, this is the best way.
2145 *
2146 * Since we let just about "anything" pass through this function
2147 * without early exits, fence against divide-by-zero here.
2148 *
2149 * datasec was already validated against UINT32_MAX so we know
2150 * the result will not overflow the 32bit calculation.
2151 */
2152 if (fsp->pcfs_spcl)
2153 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2154 else
2155 ncl = 0;
2156
2157 fsp->pcfs_ncluster = ncl;
2158
2159 /*
2160 * From the Microsoft FAT specification:
2161 * In the following example, when it says <, it does not mean <=.
2162 * Note also that the numbers are correct. The first number for
2163 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2164 * and the '<' signs are not wrong.
2165 *
2166 * We "specialdetect" the corner cases, and use at least one "extra"
2167 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2168 * count is dangerously close to the boundaries.
2169 */
2170
2171 if (ncl <= PCF_FIRSTCLUSTER) {
2172 type = FAT_UNKNOWN;
2173 } else if (ncl < 4085) {
2174 type = FAT12;
2175 } else if (ncl <= 4096) {
2176 type = FAT_QUESTIONABLE;
2177 } else if (ncl < 65525) {
2178 type = FAT16;
2179 } else if (ncl <= 65536) {
2180 type = FAT_QUESTIONABLE;
2181 } else if (ncl < PCF_LASTCLUSTER32) {
2182 type = FAT32;
2183 } else {
2184 type = FAT_UNKNOWN;
2185 }
2186
2187 DTRACE_PROBE4(parseBPB__initial,
2188 struct pcfs *, fsp, unsigned char *, bpb,
2189 int, validflags, fattype_t, type);
2190
2191 recheck:
2192 fsp->pcfs_fatsec = fatsec;
2193
2194 /* Do some final sanity checks for each specific type of FAT */
2195 switch (type) {
2196 case FAT12:
2197 if (rec != 0)
2198 validflags |= BPB_ROOTENTCNT_OK;
2199 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2200 bpb_get_TotSec16(bpb) == 0)
2201 validflags |= BPB_TOTSEC16_OK;
2202 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2203 bpb_get_TotSec32(bpb) == 0)
2204 validflags |= BPB_TOTSEC32_OK;
2205 if (bpb_get_FatSz16(bpb) == fatsec)
2206 validflags |= BPB_FATSZ16_OK;
2207 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2208 * 3 / 2)
2209 validflags |= BPB_FATSZ_OK;
2210 if (ncl < 4085)
2211 validflags |= BPB_NCLUSTERS_OK;
2212
2213 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2214 fsp->pcfs_rootblksize =
2215 fsp->pcfs_rdirsec * secsize;
2216 fsp->pcfs_fsistart = 0;
2217
2218 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2219 type = FAT_UNKNOWN;
2220 break;
2221 case FAT16:
2222 if (rec != 0)
2223 validflags |= BPB_ROOTENTCNT_OK;
2224 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2225 bpb_get_TotSec16(bpb) == 0)
2226 validflags |= BPB_TOTSEC16_OK;
2227 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2228 bpb_get_TotSec32(bpb) == 0)
2229 validflags |= BPB_TOTSEC32_OK;
2230 if (bpb_get_FatSz16(bpb) == fatsec)
2231 validflags |= BPB_FATSZ16_OK;
2232 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2233 validflags |= BPB_FATSZ_OK;
2234 if (ncl >= 4085 && ncl < 65525)
2235 validflags |= BPB_NCLUSTERS_OK;
2236
2237 fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2238 fsp->pcfs_rootblksize =
2239 fsp->pcfs_rdirsec * secsize;
2240 fsp->pcfs_fsistart = 0;
2241
2242 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2243 type = FAT_UNKNOWN;
2244 break;
2245 case FAT32:
2246 if (rec == 0)
2247 validflags |= BPB_ROOTENTCNT_OK;
2248 if (bpb_get_TotSec16(bpb) == 0)
2249 validflags |= BPB_TOTSEC16_OK;
2250 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2251 validflags |= BPB_TOTSEC32_OK;
2252 if (bpb_get_FatSz16(bpb) == 0)
2253 validflags |= BPB_FATSZ16_OK;
2254 if (bpb_get_FatSz32(bpb) == fatsec)
2255 validflags |= BPB_FATSZ32_OK;
2256 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2257 validflags |= BPB_FATSZ_OK;
2258 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2259 validflags |= BPB_NCLUSTERS_OK;
2260
2261 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2262 fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2263 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2264 if (validflags & BPB_FSISEC_OK)
2265 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2266 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2267 if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2268 validflags |= BPB_ROOTCLUSTER_OK;
2269
2270 /*
2271 * Current PCFS code only works if 'pcfs_rdirstart'
2272 * contains the root cluster number on FAT32.
2273 * That's a mis-use and would better be changed.
2274 */
2275 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2276
2277 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2278 type = FAT_UNKNOWN;
2279 break;
2280 case FAT_QUESTIONABLE:
2281 type = secondaryBPBChecks(fsp, bpb, secsize);
2282 goto recheck;
2283 default:
2284 ASSERT(type == FAT_UNKNOWN);
2285 break;
2286 }
2287
2288 ASSERT(type != FAT_QUESTIONABLE);
2289
2290 fsp->pcfs_fattype = type;
2291
2292 if (valid)
2293 *valid = validflags;
2294
2295 DTRACE_PROBE4(parseBPB__final,
2296 struct pcfs *, fsp, unsigned char *, bpb,
2297 int, validflags, fattype_t, type);
2298
2299 if (type != FAT_UNKNOWN) {
2300 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2301 ASSERT(ISP2(secsize / DEV_BSIZE));
2302 return (1);
2303 }
2304
2305 return (0);
2306 }
2307
2308
2309 /*
2310 * Detect the device's native block size (sector size).
2311 *
2312 * Test whether the device is:
2313 * - a floppy device from a known controller type via DKIOCINFO
2314 * - a real floppy using the fd(4D) driver and capable of fdio(4I) ioctls
2315 * - a USB floppy drive (identified by drive geometry)
2316 *
2317 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2318 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2319 *
2320 * This might be a bit wasteful on kernel stack space; if anyone's
2321 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2322 */
2323 static void
pcfs_device_getinfo(struct pcfs * fsp)2324 pcfs_device_getinfo(struct pcfs *fsp)
2325 {
2326 dev_t rdev = fsp->pcfs_xdev;
2327 int error;
2328 union {
2329 struct dk_minfo mi;
2330 struct dk_cinfo ci;
2331 struct dk_geom gi;
2332 struct fd_char fc;
2333 } arg; /* save stackspace ... */
2334 intptr_t argp = (intptr_t)&arg;
2335 ldi_handle_t lh;
2336 ldi_ident_t li;
2337 int isfloppy, isremoveable, ishotpluggable;
2338 cred_t *cr = CRED();
2339
2340 if (ldi_ident_from_dev(rdev, &li))
2341 goto out;
2342
2343 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2344 ldi_ident_release(li);
2345 if (error)
2346 goto out;
2347
2348 /*
2349 * Not sure if this could possibly happen. It'd be a bit like
2350 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2351 * expecting it, needs some thought if triggered ...
2352 */
2353 ASSERT(fsp->pcfs_xdev == rdev);
2354
2355 /*
2356 * Check for removeable/hotpluggable media.
2357 */
2358 if (ldi_ioctl(lh, DKIOCREMOVABLE,
2359 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2360 isremoveable = 0;
2361 }
2362 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2363 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2364 ishotpluggable = 0;
2365 }
2366
2367 /*
2368 * Make sure we don't use "half-initialized" values if the ioctls fail.
2369 */
2370 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2371 bzero(&arg, sizeof (arg));
2372 fsp->pcfs_mediasize = 0;
2373 } else {
2374 fsp->pcfs_mediasize =
2375 (len_t)arg.mi.dki_lbsize *
2376 (len_t)arg.mi.dki_capacity;
2377 }
2378
2379 if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2380 if (fsp->pcfs_secsize == 0) {
2381 fsp->pcfs_secsize = arg.mi.dki_lbsize;
2382 fsp->pcfs_sdshift =
2383 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2384 } else {
2385 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2386 "%d, device (%x.%x), different from user-provided "
2387 "%d. User override - ignoring autodetect result.\n",
2388 arg.mi.dki_lbsize,
2389 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2390 fsp->pcfs_secsize);
2391 }
2392 } else if (arg.mi.dki_lbsize) {
2393 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2394 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2395 "Ignoring autodetect result.\n",
2396 arg.mi.dki_lbsize,
2397 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2398 }
2399
2400 /*
2401 * We treat the following media types as a floppy by default.
2402 */
2403 isfloppy =
2404 (arg.mi.dki_media_type == DK_FLOPPY ||
2405 arg.mi.dki_media_type == DK_ZIP ||
2406 arg.mi.dki_media_type == DK_JAZ);
2407
2408 /*
2409 * if this device understands fdio(4I) requests it's
2410 * obviously a floppy drive.
2411 */
2412 if (!isfloppy &&
2413 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2414 isfloppy = 1;
2415
2416 /*
2417 * some devices we like to treat as floppies, but they don't
2418 * understand fdio(4I) requests.
2419 */
2420 if (!isfloppy &&
2421 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2422 (arg.ci.dki_ctype == DKC_WDC2880 ||
2423 arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2424 arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2425 arg.ci.dki_ctype == DKC_INTEL82077))
2426 isfloppy = 1;
2427
2428 /*
2429 * This is the "final fallback" test - media with
2430 * 2 heads and 80 cylinders are assumed to be floppies.
2431 * This is normally true for USB floppy drives ...
2432 */
2433 if (!isfloppy &&
2434 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2435 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2436 isfloppy = 1;
2437
2438 /*
2439 * This is similar to the "old" PCFS code that sets this flag
2440 * just based on the media descriptor being 0xf8 (MD_FIXED).
2441 * Should be re-worked. We really need some specialcasing for
2442 * removeable media.
2443 */
2444 if (!isfloppy) {
2445 fsp->pcfs_flags |= PCFS_NOCHK;
2446 }
2447
2448 /*
2449 * We automatically disable access time updates if the medium is
2450 * removeable and/or hotpluggable, and the admin did not explicitly
2451 * request access time updates (via the "atime" mount option).
2452 * The majority of flash-based media should fit this category.
2453 * Minimizing write access extends the lifetime of your memory stick !
2454 */
2455 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2456 (isremoveable || ishotpluggable | isfloppy)) {
2457 fsp->pcfs_flags |= PCFS_NOATIME;
2458 }
2459
2460 (void) ldi_close(lh, FREAD, cr);
2461 out:
2462 if (fsp->pcfs_secsize == 0) {
2463 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2464 "device (%x.%x) failed, no user-provided fallback. "
2465 "Using %d bytes.\n",
2466 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2467 DEV_BSIZE);
2468 fsp->pcfs_secsize = DEV_BSIZE;
2469 fsp->pcfs_sdshift = 0;
2470 }
2471 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2472 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2473 }
2474
2475 /*
2476 * Get the FAT type for the DOS medium.
2477 *
2478 * -------------------------
2479 * According to Microsoft:
2480 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2481 * count of clusters on the volume and nothing else.
2482 * -------------------------
2483 *
2484 */
2485 static int
pc_getfattype(struct pcfs * fsp)2486 pc_getfattype(struct pcfs *fsp)
2487 {
2488 int error = 0;
2489 buf_t *bp = NULL;
2490 struct vnode *devvp = fsp->pcfs_devvp;
2491 dev_t dev = devvp->v_rdev;
2492
2493 /*
2494 * Detect the native block size of the medium, and attempt to
2495 * detect whether the medium is removeable.
2496 * We do treat removable media (floppies, USB and FireWire disks)
2497 * differently wrt. to the frequency and synchronicity of FAT updates.
2498 * We need to know the media block size in order to be able to
2499 * parse the partition table.
2500 */
2501 pcfs_device_getinfo(fsp);
2502
2503 /*
2504 * Unpartitioned media (floppies and some removeable devices)
2505 * don't have a partition table, the FAT BPB is at disk block 0.
2506 * Start out by reading block 0.
2507 */
2508 fsp->pcfs_dosstart = 0;
2509 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2510
2511 if (error = geterror(bp))
2512 goto out;
2513
2514 /*
2515 * If a logical drive number is requested, parse the partition table
2516 * and attempt to locate it. Otherwise, proceed immediately to the
2517 * BPB check. findTheDrive(), if successful, returns the disk block
2518 * number where the requested partition starts in "startsec".
2519 */
2520 if (fsp->pcfs_ldrive != 0) {
2521 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2522 "device (%x,%x):%d to find BPB\n",
2523 getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2524
2525 if (error = findTheDrive(fsp, &bp))
2526 goto out;
2527
2528 ASSERT(fsp->pcfs_dosstart != 0);
2529
2530 brelse(bp);
2531 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2532 fsp->pcfs_secsize);
2533 if (error = geterror(bp))
2534 goto out;
2535 }
2536
2537 /*
2538 * Validate the BPB and fill in the instance structure.
2539 */
2540 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2541 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2542 "device (%x.%x):%d, disk LBA %u\n",
2543 getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2544 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2545 error = EINVAL;
2546 goto out;
2547 }
2548
2549 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2550
2551 out:
2552 /*
2553 * Release the buffer used
2554 */
2555 if (bp != NULL)
2556 brelse(bp);
2557 return (error);
2558 }
2559
2560
2561 /*
2562 * Get the file allocation table.
2563 * If there is an old FAT, invalidate it.
2564 */
2565 int
pc_getfat(struct pcfs * fsp)2566 pc_getfat(struct pcfs *fsp)
2567 {
2568 struct buf *bp = NULL;
2569 uchar_t *fatp = NULL;
2570 uchar_t *fat_changemap = NULL;
2571 int error;
2572 int fat_changemapsize;
2573 int flags = 0;
2574 int nfat;
2575 int altfat_mustmatch = 0;
2576 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2577
2578 if (fsp->pcfs_fatp) {
2579 /*
2580 * There is a FAT in core.
2581 * If there are open file pcnodes or we have modified it or
2582 * it hasn't timed out yet use the in core FAT.
2583 * Otherwise invalidate it and get a new one
2584 */
2585 #ifdef notdef
2586 if (fsp->pcfs_frefs ||
2587 (fsp->pcfs_flags & PCFS_FATMOD) ||
2588 (gethrestime_sec() < fsp->pcfs_fattime)) {
2589 return (0);
2590 } else {
2591 mutex_enter(&pcfslock);
2592 pc_invalfat(fsp);
2593 mutex_exit(&pcfslock);
2594 }
2595 #endif /* notdef */
2596 return (0);
2597 }
2598
2599 /*
2600 * Get FAT and check it for validity
2601 */
2602 fatp = kmem_alloc(fatsize, KM_SLEEP);
2603 error = pc_readfat(fsp, fatp);
2604 if (error) {
2605 flags = B_ERROR;
2606 goto out;
2607 }
2608 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2609 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2610 fsp->pcfs_fatp = fatp;
2611 fsp->pcfs_fat_changemapsize = fat_changemapsize;
2612 fsp->pcfs_fat_changemap = fat_changemap;
2613
2614 /*
2615 * The only definite signature check is that the
2616 * media descriptor byte should match the first byte
2617 * of the FAT block.
2618 */
2619 if (fatp[0] != fsp->pcfs_mediadesc) {
2620 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2621 "media descriptor %x, FAT[0] lowbyte %x\n",
2622 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2623 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2624 altfat_mustmatch = 1;
2625 }
2626
2627 /*
2628 * Get alternate FATs and check for consistency
2629 * This is an inlined version of pc_readfat().
2630 * Since we're only comparing FAT and alternate FAT,
2631 * there's no reason to let pc_readfat() copy data out
2632 * of the buf. Instead, compare in-situ, one cluster
2633 * at a time.
2634 */
2635 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2636 size_t startsec;
2637 size_t off;
2638
2639 startsec = pc_dbdaddr(fsp,
2640 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2641
2642 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2643 daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2644 pc_cltodb(fsp, pc_lblkno(fsp, off)));
2645
2646 bp = bread(fsp->pcfs_xdev, fatblk,
2647 MIN(fsp->pcfs_clsize, fatsize - off));
2648 if (bp->b_flags & (B_ERROR | B_STALE)) {
2649 cmn_err(CE_NOTE,
2650 "!pcfs: alternate FAT #%d (start LBA %p)"
2651 " read error at offset %ld on device"
2652 " (%x.%x):%d",
2653 nfat, (void *)(uintptr_t)startsec, off,
2654 getmajor(fsp->pcfs_xdev),
2655 getminor(fsp->pcfs_xdev),
2656 fsp->pcfs_ldrive);
2657 flags = B_ERROR;
2658 error = EIO;
2659 goto out;
2660 }
2661 bp->b_flags |= B_STALE | B_AGE;
2662 if (bcmp(bp->b_un.b_addr, fatp + off,
2663 MIN(fsp->pcfs_clsize, fatsize - off))) {
2664 cmn_err(CE_NOTE,
2665 "!pcfs: alternate FAT #%d (start LBA %p)"
2666 " corrupted at offset %ld on device"
2667 " (%x.%x):%d",
2668 nfat, (void *)(uintptr_t)startsec, off,
2669 getmajor(fsp->pcfs_xdev),
2670 getminor(fsp->pcfs_xdev),
2671 fsp->pcfs_ldrive);
2672 if (altfat_mustmatch) {
2673 flags = B_ERROR;
2674 error = EIO;
2675 goto out;
2676 }
2677 }
2678 brelse(bp);
2679 bp = NULL; /* prevent double release */
2680 }
2681 }
2682
2683 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2684 fsp->pcfs_fatjustread = 1;
2685
2686 /*
2687 * Retrieve FAT32 fsinfo sector.
2688 * A failure to read this is not fatal to accessing the volume.
2689 * It simply means operations that count or search free blocks
2690 * will have to do a full FAT walk, vs. a possibly quicker lookup
2691 * of the summary information.
2692 * Hence, we log a message but return success overall after this point.
2693 */
2694 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2695 struct fat_od_fsi *fsinfo_disk;
2696
2697 bp = bread(fsp->pcfs_xdev,
2698 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2699 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2700 if (bp->b_flags & (B_ERROR | B_STALE) ||
2701 !FSISIG_OK(fsinfo_disk)) {
2702 cmn_err(CE_NOTE,
2703 "!pcfs: error reading fat32 fsinfo from "
2704 "device (%x.%x):%d, block %lld",
2705 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2706 fsp->pcfs_ldrive,
2707 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2708 fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2709 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2710 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2711 } else {
2712 bp->b_flags |= B_STALE | B_AGE;
2713 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2714 fsp->pcfs_fsinfo.fs_free_clusters =
2715 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2716 fsp->pcfs_fsinfo.fs_next_free =
2717 LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2718 }
2719 brelse(bp);
2720 bp = NULL;
2721 }
2722
2723 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2724 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2725 else
2726 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2727
2728 return (0);
2729
2730 out:
2731 cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2732 if (bp)
2733 brelse(bp);
2734 if (fatp)
2735 kmem_free(fatp, fatsize);
2736 if (fat_changemap)
2737 kmem_free(fat_changemap, fat_changemapsize);
2738
2739 if (flags) {
2740 pc_mark_irrecov(fsp);
2741 }
2742 return (error);
2743 }
2744