1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /*
27 * Copyright (c) 2017 by Delphix. All rights reserved.
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/user.h>
34 #include <sys/proc.h>
35 #include <sys/cred.h>
36 #include <sys/disp.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/fdio.h>
42 #include <sys/file.h>
43 #include <sys/uio.h>
44 #include <sys/conf.h>
45 #include <sys/statvfs.h>
46 #include <sys/mount.h>
47 #include <sys/pathname.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/sysmacros.h>
51 #include <sys/conf.h>
52 #include <sys/mkdev.h>
53 #include <sys/swap.h>
54 #include <sys/sunddi.h>
55 #include <sys/sunldi.h>
56 #include <sys/dktp/fdisk.h>
57 #include <sys/fs/pc_label.h>
58 #include <sys/fs/pc_fs.h>
59 #include <sys/fs/pc_dir.h>
60 #include <sys/fs/pc_node.h>
61 #include <fs/fs_subr.h>
62 #include <sys/modctl.h>
63 #include <sys/dkio.h>
64 #include <sys/open.h>
65 #include <sys/mntent.h>
66 #include <sys/policy.h>
67 #include <sys/atomic.h>
68 #include <sys/sdt.h>
69
70 /*
71 * The majority of PC media use a 512 sector size, but
72 * occasionally you will run across a 1k sector size.
73 * For media with a 1k sector size, fd_strategy() requires
74 * the I/O size to be a 1k multiple; so when the sector size
75 * is not yet known, always read 1k.
76 */
77 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
78
79 static int pcfs_pseudo_floppy(dev_t);
80
81 static int pcfsinit(int, char *);
82 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
83 struct cred *);
84 static int pcfs_unmount(struct vfs *, int, struct cred *);
85 static int pcfs_root(struct vfs *, struct vnode **);
86 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
87 static int pc_syncfsnodes(struct pcfs *);
88 static int pcfs_sync(struct vfs *, short, struct cred *);
89 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
90 static void pcfs_freevfs(vfs_t *vfsp);
91
92 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
93 static int pc_writefat(struct pcfs *fsp, daddr_t start);
94
95 static int pc_getfattype(struct pcfs *fsp);
96 static void pcfs_parse_mntopts(struct pcfs *fsp);
97
98
99 /*
100 * pcfs mount options table
101 */
102
103 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
104 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
105 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
106 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
107 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
108 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
109 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
110 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
111
112 static mntopt_t mntopts[] = {
113 /*
114 * option name cancel option default arg flags opt data
115 */
116 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
117 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
118 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
119 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
120 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
121 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, 0, NULL },
122 { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
123 { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL },
124 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
125 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
126 };
127
128 static mntopts_t pcfs_mntopts = {
129 sizeof (mntopts) / sizeof (mntopt_t),
130 mntopts
131 };
132
133 int pcfsdebuglevel = 0;
134
135 /*
136 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
137 * pcfs_lock: (inside per filesystem structure "pcfs")
138 * per filesystem lock. Most of the vfsops and vnodeops are
139 * protected by this lock.
140 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
141 *
142 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
143 *
144 * pcfs_mountcount: used to prevent module unloads while there is still
145 * pcfs state from a former mount hanging around. With
146 * forced umount support, the filesystem module must not
147 * be allowed to go away before the last VFS_FREEVFS()
148 * call has been made.
149 * Since this is just an atomic counter, there's no need
150 * for locking.
151 */
152 kmutex_t pcfslock;
153 krwlock_t pcnodes_lock;
154 uint32_t pcfs_mountcount;
155
156 static int pcfstype;
157
158 static vfsdef_t vfw = {
159 VFSDEF_VERSION,
160 "pcfs",
161 pcfsinit,
162 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
163 &pcfs_mntopts
164 };
165
166 extern struct mod_ops mod_fsops;
167
168 static struct modlfs modlfs = {
169 &mod_fsops,
170 "PC filesystem",
171 &vfw
172 };
173
174 static struct modlinkage modlinkage = {
175 MODREV_1,
176 &modlfs,
177 NULL
178 };
179
180 int
_init(void)181 _init(void)
182 {
183 int error;
184
185 #if !defined(lint)
186 /* make sure the on-disk structures are sane */
187 ASSERT(sizeof (struct pcdir) == 32);
188 ASSERT(sizeof (struct pcdir_lfn) == 32);
189 #endif
190 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
191 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
192 error = mod_install(&modlinkage);
193 if (error) {
194 mutex_destroy(&pcfslock);
195 rw_destroy(&pcnodes_lock);
196 }
197 return (error);
198 }
199
200 int
_fini(void)201 _fini(void)
202 {
203 int error;
204
205 /*
206 * If a forcedly unmounted instance is still hanging around,
207 * we cannot allow the module to be unloaded because that would
208 * cause panics once the VFS framework decides it's time to call
209 * into VFS_FREEVFS().
210 */
211 if (pcfs_mountcount)
212 return (EBUSY);
213
214 error = mod_remove(&modlinkage);
215 if (error)
216 return (error);
217 mutex_destroy(&pcfslock);
218 rw_destroy(&pcnodes_lock);
219 /*
220 * Tear down the operations vectors
221 */
222 (void) vfs_freevfsops_by_type(pcfstype);
223 vn_freevnodeops(pcfs_fvnodeops);
224 vn_freevnodeops(pcfs_dvnodeops);
225 return (0);
226 }
227
228 int
_info(struct modinfo * modinfop)229 _info(struct modinfo *modinfop)
230 {
231 return (mod_info(&modlinkage, modinfop));
232 }
233
234 /* ARGSUSED1 */
235 static int
pcfsinit(int fstype,char * name)236 pcfsinit(int fstype, char *name)
237 {
238 static const fs_operation_def_t pcfs_vfsops_template[] = {
239 VFSNAME_MOUNT, { .vfs_mount = pcfs_mount },
240 VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount },
241 VFSNAME_ROOT, { .vfs_root = pcfs_root },
242 VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs },
243 VFSNAME_SYNC, { .vfs_sync = pcfs_sync },
244 VFSNAME_VGET, { .vfs_vget = pcfs_vget },
245 VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs },
246 NULL, NULL
247 };
248 int error;
249
250 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
251 if (error != 0) {
252 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
253 return (error);
254 }
255
256 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
257 if (error != 0) {
258 (void) vfs_freevfsops_by_type(fstype);
259 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
260 return (error);
261 }
262
263 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
264 if (error != 0) {
265 (void) vfs_freevfsops_by_type(fstype);
266 vn_freevnodeops(pcfs_fvnodeops);
267 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
268 return (error);
269 }
270
271 pcfstype = fstype;
272 (void) pc_init();
273 pcfs_mountcount = 0;
274 return (0);
275 }
276
277 static struct pcfs *pc_mounttab = NULL;
278
279 extern struct pcfs_args pc_tz;
280
281 /*
282 * Define some special logical drives we use internal to this file.
283 */
284 #define BOOT_PARTITION_DRIVE 99
285 #define PRIMARY_DOS_DRIVE 1
286 #define UNPARTITIONED_DRIVE 0
287
288 static int
pcfs_device_identify(struct vfs * vfsp,struct mounta * uap,struct cred * cr,int * dos_ldrive,dev_t * xdev)289 pcfs_device_identify(
290 struct vfs *vfsp,
291 struct mounta *uap,
292 struct cred *cr,
293 int *dos_ldrive,
294 dev_t *xdev)
295 {
296 struct pathname special;
297 char *c;
298 struct vnode *svp = NULL;
299 struct vnode *lvp = NULL;
300 int oflag, aflag;
301 int error;
302
303 /*
304 * Resolve path name of special file being mounted.
305 */
306 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
307 return (error);
308 }
309
310 *dos_ldrive = -1;
311
312 if (error =
313 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
314 /*
315 * If there's no device node, the name specified most likely
316 * maps to a PCFS-style "partition specifier" to select a
317 * harddisk primary/logical partition. Disable floppy-specific
318 * checks in such cases unless an explicit :A or :B is
319 * requested.
320 */
321
322 /*
323 * Split the pathname string at the last ':' separator.
324 * If there's no ':' in the device name, or the ':' is the
325 * last character in the string, the name is invalid and
326 * the error from the previous lookup will be returned.
327 */
328 c = strrchr(special.pn_path, ':');
329 if (c == NULL || strlen(c) == 0)
330 goto devlookup_done;
331
332 *c++ = '\0';
333
334 /*
335 * PCFS partition name suffixes can be:
336 * - "boot" to indicate the X86BOOT partition
337 * - a drive letter [c-z] for the "DOS logical drive"
338 * - a drive number 1..24 for the "DOS logical drive"
339 * - a "floppy name letter", 'a' or 'b' (just strip this)
340 */
341 if (strcasecmp(c, "boot") == 0) {
342 /*
343 * The Solaris boot partition is requested.
344 */
345 *dos_ldrive = BOOT_PARTITION_DRIVE;
346 } else if (strspn(c, "0123456789") == strlen(c)) {
347 /*
348 * All digits - parse the partition number.
349 */
350 long drvnum = 0;
351
352 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
353 /*
354 * A number alright - in the allowed range ?
355 */
356 if (drvnum > 24 || drvnum == 0)
357 error = ENXIO;
358 }
359 if (error)
360 goto devlookup_done;
361 *dos_ldrive = (int)drvnum;
362 } else if (strlen(c) == 1) {
363 /*
364 * A single trailing character was specified.
365 * - [c-zC-Z] means a harddisk partition, and
366 * we retrieve the partition number.
367 * - [abAB] means a floppy drive, so we swallow
368 * the "drive specifier" and test later
369 * whether the physical device is a floppy.
370 */
371 *c = tolower(*c);
372 if (*c == 'a' || *c == 'b') {
373 *dos_ldrive = UNPARTITIONED_DRIVE;
374 } else if (*c < 'c' || *c > 'z') {
375 error = ENXIO;
376 goto devlookup_done;
377 } else {
378 *dos_ldrive = 1 + *c - 'c';
379 }
380 } else {
381 /*
382 * Can't parse this - pass through previous error.
383 */
384 goto devlookup_done;
385 }
386
387
388 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
389 NULLVPP, &svp);
390 } else {
391 *dos_ldrive = UNPARTITIONED_DRIVE;
392 }
393 devlookup_done:
394 pn_free(&special);
395 if (error)
396 return (error);
397
398 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
399
400 /*
401 * Verify caller's permission to open the device special file.
402 */
403 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
404 ((uap->flags & MS_RDONLY) != 0)) {
405 oflag = FREAD;
406 aflag = VREAD;
407 } else {
408 oflag = FREAD | FWRITE;
409 aflag = VREAD | VWRITE;
410 }
411
412 error = vfs_get_lofi(vfsp, &lvp);
413
414 if (error > 0) {
415 if (error == ENOENT)
416 error = ENODEV;
417 goto out;
418 } else if (error == 0) {
419 *xdev = lvp->v_rdev;
420 } else {
421 *xdev = svp->v_rdev;
422
423 if (svp->v_type != VBLK) {
424 error = ENOTBLK;
425 goto out;
426 }
427
428 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
429 goto out;
430 }
431
432 if (getmajor(*xdev) >= devcnt) {
433 error = ENXIO;
434 goto out;
435 }
436
437 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
438 goto out;
439
440 out:
441 if (svp != NULL)
442 VN_RELE(svp);
443 if (lvp != NULL)
444 VN_RELE(lvp);
445 return (error);
446 }
447
448 static int
pcfs_device_ismounted(struct vfs * vfsp,int dos_ldrive,dev_t xdev,int * remounting,dev_t * pseudodev)449 pcfs_device_ismounted(
450 struct vfs *vfsp,
451 int dos_ldrive,
452 dev_t xdev,
453 int *remounting,
454 dev_t *pseudodev)
455 {
456 struct pcfs *fsp;
457 int remount = *remounting;
458
459 /*
460 * Ensure that this logical drive isn't already mounted, unless
461 * this is a REMOUNT request.
462 * Note: The framework will perform this check if the "...:c"
463 * PCFS-style "logical drive" syntax has not been used and an
464 * actually existing physical device is backing this filesystem.
465 * Once all block device drivers support PC-style partitioning,
466 * this codeblock can be dropped.
467 */
468 *pseudodev = xdev;
469
470 if (dos_ldrive) {
471 mutex_enter(&pcfslock);
472 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
473 if (fsp->pcfs_xdev == xdev &&
474 fsp->pcfs_ldrive == dos_ldrive) {
475 mutex_exit(&pcfslock);
476 if (remount) {
477 return (0);
478 } else {
479 return (EBUSY);
480 }
481 }
482 /*
483 * Assign a unique device number for the vfs
484 * The old way (getudev() + a constantly incrementing
485 * major number) was wrong because it changes vfs_dev
486 * across mounts and reboots, which breaks nfs file handles.
487 * UFS just uses the real dev_t. We can't do that because
488 * of the way pcfs opens fdisk partitons (the :c and :d
489 * partitions are on the same dev_t). Though that _might_
490 * actually be ok, since the file handle contains an
491 * absolute block number, it's probably better to make them
492 * different. So I think we should retain the original
493 * dev_t, but come up with a different minor number based
494 * on the logical drive that will _always_ come up the same.
495 * For now, we steal the upper 6 bits.
496 */
497 #ifdef notdef
498 /* what should we do here? */
499 if (((getminor(xdev) >> 12) & 0x3F) != 0)
500 printf("whoops - upper bits used!\n");
501 #endif
502 *pseudodev = makedevice(getmajor(xdev),
503 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
504 if (vfs_devmounting(*pseudodev, vfsp)) {
505 mutex_exit(&pcfslock);
506 return (EBUSY);
507 }
508 if (vfs_devismounted(*pseudodev)) {
509 mutex_exit(&pcfslock);
510 if (remount) {
511 return (0);
512 } else {
513 return (EBUSY);
514 }
515 }
516 mutex_exit(&pcfslock);
517 } else {
518 *pseudodev = xdev;
519 if (vfs_devmounting(*pseudodev, vfsp)) {
520 return (EBUSY);
521 }
522 if (vfs_devismounted(*pseudodev))
523 if (remount) {
524 return (0);
525 } else {
526 return (EBUSY);
527 }
528 }
529
530 /*
531 * This is not a remount. Even if MS_REMOUNT was requested,
532 * the caller needs to proceed as it would on an ordinary
533 * mount.
534 */
535 *remounting = 0;
536
537 ASSERT(*pseudodev);
538 return (0);
539 }
540
541 /*
542 * Get the PCFS-specific mount options from the VFS framework.
543 * For "timezone" and "secsize", we need to parse the number
544 * ourselves and ensure its validity.
545 * Note: "secsize" is deliberately undocumented at this time,
546 * it's a workaround for devices (particularly: lofi image files)
547 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
548 */
549 static void
pcfs_parse_mntopts(struct pcfs * fsp)550 pcfs_parse_mntopts(struct pcfs *fsp)
551 {
552 char *c;
553 char *endptr;
554 long l;
555 struct vfs *vfsp = fsp->pcfs_vfs;
556
557 ASSERT(fsp->pcfs_secondswest == 0);
558 ASSERT(fsp->pcfs_secsize == 0);
559
560 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
561 fsp->pcfs_flags |= PCFS_HIDDEN;
562 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
563 fsp->pcfs_flags |= PCFS_FOLDCASE;
564 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
565 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
566 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
567 fsp->pcfs_flags |= PCFS_NOATIME;
568
569 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
570 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
571 endptr == c + strlen(c)) {
572 /*
573 * A number alright - in the allowed range ?
574 */
575 if (l <= -12*3600 || l >= 12*3600) {
576 cmn_err(CE_WARN, "!pcfs: invalid use of "
577 "'timezone' mount option - %ld "
578 "is out of range. Assuming 0.", l);
579 l = 0;
580 }
581 } else {
582 cmn_err(CE_WARN, "!pcfs: invalid use of "
583 "'timezone' mount option - argument %s "
584 "is not a valid number. Assuming 0.", c);
585 l = 0;
586 }
587 fsp->pcfs_secondswest = l;
588 }
589
590 /*
591 * The "secsize=..." mount option is a workaround for the lack of
592 * lofi(4D) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
593 * partition table of a disk image and it has been partitioned with
594 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
595 * images.
596 * That should really be fixed in lofi ... this is a workaround.
597 */
598 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
599 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
600 endptr == c + strlen(c)) {
601 /*
602 * A number alright - a valid sector size as well ?
603 */
604 if (!VALID_SECSIZE(l)) {
605 cmn_err(CE_WARN, "!pcfs: invalid use of "
606 "'secsize' mount option - %ld is "
607 "unsupported. Autodetecting.", l);
608 l = 0;
609 }
610 } else {
611 cmn_err(CE_WARN, "!pcfs: invalid use of "
612 "'secsize' mount option - argument %s "
613 "is not a valid number. Autodetecting.", c);
614 l = 0;
615 }
616 fsp->pcfs_secsize = l;
617 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
618 }
619 }
620
621 /*
622 * vfs operations
623 */
624
625 /*
626 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
627 */
628 static int
pcfs_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)629 pcfs_mount(
630 struct vfs *vfsp,
631 struct vnode *mvp,
632 struct mounta *uap,
633 struct cred *cr)
634 {
635 struct pcfs *fsp;
636 struct vnode *devvp;
637 dev_t pseudodev;
638 dev_t xdev;
639 int dos_ldrive = 0;
640 int error;
641 int remounting;
642
643 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
644 return (error);
645
646 if (mvp->v_type != VDIR)
647 return (ENOTDIR);
648
649 mutex_enter(&mvp->v_lock);
650 if ((uap->flags & MS_REMOUNT) == 0 &&
651 (uap->flags & MS_OVERLAY) == 0 &&
652 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
653 mutex_exit(&mvp->v_lock);
654 return (EBUSY);
655 }
656 mutex_exit(&mvp->v_lock);
657
658 /*
659 * PCFS doesn't do mount arguments anymore - everything's a mount
660 * option these days. In order not to break existing callers, we
661 * don't reject it yet, just warn that the data (if any) is ignored.
662 */
663 if (uap->datalen != 0)
664 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
665 "mount argument structures instead of mount options. "
666 "Ignoring mount(2) 'dataptr' argument.");
667
668 /*
669 * This is needed early, to make sure the access / open calls
670 * are done using the correct mode. Processing this mount option
671 * only when calling pcfs_parse_mntopts() would lead us to attempt
672 * a read/write access to a possibly writeprotected device, and
673 * a readonly mount attempt might fail because of that.
674 */
675 if (uap->flags & MS_RDONLY) {
676 vfsp->vfs_flag |= VFS_RDONLY;
677 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
678 }
679
680 /*
681 * For most filesystems, this is just a lookupname() on the
682 * mount pathname string. PCFS historically has to do its own
683 * partition table parsing because not all Solaris architectures
684 * support all styles of partitioning that PC media can have, and
685 * hence PCFS understands "device names" that don't map to actual
686 * physical device nodes. Parsing the "PCFS syntax" for device
687 * names is done in pcfs_device_identify() - see there.
688 *
689 * Once all block device drivers that can host FAT filesystems have
690 * been enhanced to create device nodes for all PC-style partitions,
691 * this code can go away.
692 */
693 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
694 return (error);
695
696 /*
697 * As with looking up the actual device to mount, PCFS cannot rely
698 * on just the checks done by vfs_ismounted() whether a given device
699 * is mounted already. The additional check against the "PCFS syntax"
700 * is done in pcfs_device_ismounted().
701 */
702 remounting = (uap->flags & MS_REMOUNT);
703
704 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
705 &pseudodev))
706 return (error);
707
708 if (remounting)
709 return (0);
710
711 /*
712 * Mount the filesystem.
713 * An instance structure is required before the attempt to locate
714 * and parse the FAT BPB. This is because mount options may change
715 * the behaviour of the filesystem type matching code. Precreate
716 * it and fill it in to a degree that allows parsing the mount
717 * options.
718 */
719 devvp = makespecvp(xdev, VBLK);
720 if (IS_SWAPVP(devvp)) {
721 VN_RELE(devvp);
722 return (EBUSY);
723 }
724 error = VOP_OPEN(&devvp,
725 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
726 if (error) {
727 VN_RELE(devvp);
728 return (error);
729 }
730
731 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
732 fsp->pcfs_vfs = vfsp;
733 fsp->pcfs_xdev = xdev;
734 fsp->pcfs_devvp = devvp;
735 fsp->pcfs_ldrive = dos_ldrive;
736 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
737
738 pcfs_parse_mntopts(fsp);
739
740 /*
741 * This is the actual "mount" - the PCFS superblock check.
742 *
743 * Find the requested logical drive and the FAT BPB therein.
744 * Check device type and flag the instance if media is removeable.
745 *
746 * Initializes most members of the filesystem instance structure.
747 * Returns EINVAL if no valid BPB can be found. Other errors may
748 * occur after I/O failures, or when invalid / unparseable partition
749 * tables are encountered.
750 */
751 if (error = pc_getfattype(fsp))
752 goto errout;
753
754 /*
755 * Now that the BPB has been parsed, this structural information
756 * is available and known to be valid. Initialize the VFS.
757 */
758 vfsp->vfs_data = fsp;
759 vfsp->vfs_dev = pseudodev;
760 vfsp->vfs_fstype = pcfstype;
761 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
762 vfsp->vfs_bcount = 0;
763 vfsp->vfs_bsize = fsp->pcfs_clsize;
764
765 /*
766 * Validate that we can access the FAT and that it is, to the
767 * degree we can verify here, self-consistent.
768 */
769 if (error = pc_verify(fsp))
770 goto errout;
771
772 /*
773 * Record the time of the mount, to return as an "approximate"
774 * timestamp for the FAT root directory. Since FAT roots don't
775 * have timestamps, this is less confusing to the user than
776 * claiming "zero" / Jan/01/1970.
777 */
778 gethrestime(&fsp->pcfs_mounttime);
779
780 /*
781 * Fix up the mount options. Because "noatime" is made default on
782 * removeable media only, a fixed disk will have neither "atime"
783 * nor "noatime" set. We set the options explicitly depending on
784 * the PCFS_NOATIME flag, to inform the user of what applies.
785 * Mount option cancellation will take care that the mutually
786 * exclusive 'other' is cleared.
787 */
788 vfs_setmntopt(vfsp,
789 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
790 NULL, 0);
791
792 /*
793 * All clear - insert the FS instance into PCFS' list.
794 */
795 mutex_enter(&pcfslock);
796 fsp->pcfs_nxt = pc_mounttab;
797 pc_mounttab = fsp;
798 mutex_exit(&pcfslock);
799 atomic_inc_32(&pcfs_mountcount);
800 return (0);
801
802 errout:
803 (void) VOP_CLOSE(devvp,
804 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
805 1, (offset_t)0, cr, NULL);
806 VN_RELE(devvp);
807 mutex_destroy(&fsp->pcfs_lock);
808 kmem_free(fsp, sizeof (*fsp));
809 return (error);
810
811 }
812
813 static int
pcfs_unmount(struct vfs * vfsp,int flag,struct cred * cr)814 pcfs_unmount(
815 struct vfs *vfsp,
816 int flag,
817 struct cred *cr)
818 {
819 struct pcfs *fsp, *fsp1;
820
821 if (secpolicy_fs_unmount(cr, vfsp) != 0)
822 return (EPERM);
823
824 fsp = VFSTOPCFS(vfsp);
825
826 /*
827 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
828 * prevent lookuppn from crossing the mount point.
829 * If this is not a forced umount request and there's ongoing I/O,
830 * don't allow the mount to proceed.
831 */
832 if (flag & MS_FORCE)
833 vfsp->vfs_flag |= VFS_UNMOUNTED;
834 else if (fsp->pcfs_nrefs)
835 return (EBUSY);
836
837 mutex_enter(&pcfslock);
838
839 /*
840 * If this is a forced umount request or if the fs instance has
841 * been marked as beyond recovery, allow the umount to proceed
842 * regardless of state. pc_diskchanged() forcibly releases all
843 * inactive vnodes/pcnodes.
844 */
845 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
846 rw_enter(&pcnodes_lock, RW_WRITER);
847 pc_diskchanged(fsp);
848 rw_exit(&pcnodes_lock);
849 }
850
851 /* now there should be no pcp node on pcfhead or pcdhead. */
852
853 if (fsp == pc_mounttab) {
854 pc_mounttab = fsp->pcfs_nxt;
855 } else {
856 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
857 if (fsp1->pcfs_nxt == fsp)
858 fsp1->pcfs_nxt = fsp->pcfs_nxt;
859 }
860
861 mutex_exit(&pcfslock);
862
863 /*
864 * Since we support VFS_FREEVFS(), there's no need to
865 * free the fsp right now. The framework will tell us
866 * when the right time to do so has arrived by calling
867 * into pcfs_freevfs.
868 */
869 return (0);
870 }
871
872 /*
873 * find root of pcfs
874 */
875 static int
pcfs_root(struct vfs * vfsp,struct vnode ** vpp)876 pcfs_root(
877 struct vfs *vfsp,
878 struct vnode **vpp)
879 {
880 struct pcfs *fsp;
881 struct pcnode *pcp;
882 int error;
883
884 fsp = VFSTOPCFS(vfsp);
885 if (error = pc_lockfs(fsp, 0, 0))
886 return (error);
887
888 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
889 pc_unlockfs(fsp);
890 *vpp = PCTOV(pcp);
891 pcp->pc_flags |= PC_EXTERNAL;
892 return (0);
893 }
894
895 /*
896 * Get file system statistics.
897 */
898 static int
pcfs_statvfs(struct vfs * vfsp,struct statvfs64 * sp)899 pcfs_statvfs(
900 struct vfs *vfsp,
901 struct statvfs64 *sp)
902 {
903 struct pcfs *fsp;
904 int error;
905 dev32_t d32;
906
907 fsp = VFSTOPCFS(vfsp);
908 error = pc_getfat(fsp);
909 if (error)
910 return (error);
911 bzero(sp, sizeof (*sp));
912 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
913 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
914 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
915 sp->f_files = (fsfilcnt64_t)-1;
916 sp->f_ffree = (fsfilcnt64_t)-1;
917 sp->f_favail = (fsfilcnt64_t)-1;
918 #ifdef notdef
919 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
920 #endif /* notdef */
921 (void) cmpldev(&d32, vfsp->vfs_dev);
922 sp->f_fsid = d32;
923 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
924 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
925 sp->f_namemax = PCMAXNAMLEN;
926 return (0);
927 }
928
929 static int
pc_syncfsnodes(struct pcfs * fsp)930 pc_syncfsnodes(struct pcfs *fsp)
931 {
932 struct pchead *hp;
933 struct pcnode *pcp;
934 int error;
935
936 if (error = pc_lockfs(fsp, 0, 0))
937 return (error);
938
939 if (!(error = pc_syncfat(fsp))) {
940 hp = pcfhead;
941 while (hp < & pcfhead [ NPCHASH ]) {
942 rw_enter(&pcnodes_lock, RW_READER);
943 pcp = hp->pch_forw;
944 while (pcp != (struct pcnode *)hp) {
945 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
946 if (error = pc_nodesync(pcp))
947 break;
948 pcp = pcp -> pc_forw;
949 }
950 rw_exit(&pcnodes_lock);
951 if (error)
952 break;
953 hp++;
954 }
955 }
956 pc_unlockfs(fsp);
957 return (error);
958 }
959
960 /*
961 * Flush any pending I/O.
962 */
963 /*ARGSUSED*/
964 static int
pcfs_sync(struct vfs * vfsp,short flag,struct cred * cr)965 pcfs_sync(
966 struct vfs *vfsp,
967 short flag,
968 struct cred *cr)
969 {
970 struct pcfs *fsp;
971 int error = 0;
972
973 /* this prevents the filesystem from being umounted. */
974 mutex_enter(&pcfslock);
975 if (vfsp != NULL) {
976 fsp = VFSTOPCFS(vfsp);
977 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
978 error = pc_syncfsnodes(fsp);
979 } else {
980 rw_enter(&pcnodes_lock, RW_WRITER);
981 pc_diskchanged(fsp);
982 rw_exit(&pcnodes_lock);
983 error = EIO;
984 }
985 } else {
986 fsp = pc_mounttab;
987 while (fsp != NULL) {
988 if (fsp->pcfs_flags & PCFS_IRRECOV) {
989 rw_enter(&pcnodes_lock, RW_WRITER);
990 pc_diskchanged(fsp);
991 rw_exit(&pcnodes_lock);
992 error = EIO;
993 break;
994 }
995 error = pc_syncfsnodes(fsp);
996 if (error) break;
997 fsp = fsp->pcfs_nxt;
998 }
999 }
1000 mutex_exit(&pcfslock);
1001 return (error);
1002 }
1003
1004 int
pc_lockfs(struct pcfs * fsp,int diskchanged,int releasing)1005 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1006 {
1007 int err;
1008
1009 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1010 return (EIO);
1011
1012 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1013 fsp->pcfs_count++;
1014 } else {
1015 mutex_enter(&fsp->pcfs_lock);
1016 if (fsp->pcfs_flags & PCFS_LOCKED)
1017 panic("pc_lockfs");
1018 /*
1019 * We check the IRRECOV bit again just in case somebody
1020 * snuck past the initial check but then got held up before
1021 * they could grab the lock. (And in the meantime someone
1022 * had grabbed the lock and set the bit)
1023 */
1024 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1025 if ((err = pc_getfat(fsp))) {
1026 mutex_exit(&fsp->pcfs_lock);
1027 return (err);
1028 }
1029 }
1030 fsp->pcfs_flags |= PCFS_LOCKED;
1031 fsp->pcfs_owner = curthread;
1032 fsp->pcfs_count++;
1033 }
1034 return (0);
1035 }
1036
1037 void
pc_unlockfs(struct pcfs * fsp)1038 pc_unlockfs(struct pcfs *fsp)
1039 {
1040
1041 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1042 panic("pc_unlockfs");
1043 if (--fsp->pcfs_count < 0)
1044 panic("pc_unlockfs: count");
1045 if (fsp->pcfs_count == 0) {
1046 fsp->pcfs_flags &= ~PCFS_LOCKED;
1047 fsp->pcfs_owner = 0;
1048 mutex_exit(&fsp->pcfs_lock);
1049 }
1050 }
1051
1052 int
pc_syncfat(struct pcfs * fsp)1053 pc_syncfat(struct pcfs *fsp)
1054 {
1055 struct buf *bp;
1056 int nfat;
1057 int error = 0;
1058 struct fat_od_fsi *fsinfo_disk;
1059
1060 if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1061 !(fsp->pcfs_flags & PCFS_FATMOD))
1062 return (0);
1063 /*
1064 * write out all copies of FATs
1065 */
1066 fsp->pcfs_flags &= ~PCFS_FATMOD;
1067 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1068 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1069 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1070 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1071 if (error) {
1072 pc_mark_irrecov(fsp);
1073 return (EIO);
1074 }
1075 }
1076 pc_clear_fatchanges(fsp);
1077
1078 /*
1079 * Write out fsinfo sector.
1080 */
1081 if (IS_FAT32(fsp)) {
1082 bp = bread(fsp->pcfs_xdev,
1083 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1084 if (bp->b_flags & (B_ERROR | B_STALE)) {
1085 error = geterror(bp);
1086 }
1087 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1088 if (!error && FSISIG_OK(fsinfo_disk)) {
1089 fsinfo_disk->fsi_incore.fs_free_clusters =
1090 LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1091 fsinfo_disk->fsi_incore.fs_next_free =
1092 LE_32(FSINFO_UNKNOWN);
1093 bwrite2(bp);
1094 error = geterror(bp);
1095 }
1096 brelse(bp);
1097 if (error) {
1098 pc_mark_irrecov(fsp);
1099 return (EIO);
1100 }
1101 }
1102 return (0);
1103 }
1104
1105 void
pc_invalfat(struct pcfs * fsp)1106 pc_invalfat(struct pcfs *fsp)
1107 {
1108 struct pcfs *xfsp;
1109 int mount_cnt = 0;
1110
1111 if (fsp->pcfs_fatp == (uchar_t *)0)
1112 panic("pc_invalfat");
1113 /*
1114 * Release FAT
1115 */
1116 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1117 fsp->pcfs_fatp = NULL;
1118 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1119 fsp->pcfs_fat_changemap = NULL;
1120 /*
1121 * Invalidate all the blocks associated with the device.
1122 * Not needed if stateless.
1123 */
1124 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1125 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1126 mount_cnt++;
1127
1128 if (!mount_cnt)
1129 binval(fsp->pcfs_xdev);
1130 /*
1131 * close mounted device
1132 */
1133 (void) VOP_CLOSE(fsp->pcfs_devvp,
1134 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1135 1, (offset_t)0, CRED(), NULL);
1136 }
1137
1138 void
pc_badfs(struct pcfs * fsp)1139 pc_badfs(struct pcfs *fsp)
1140 {
1141 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1142 getmajor(fsp->pcfs_devvp->v_rdev),
1143 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1144 }
1145
1146 /*
1147 * The problem with supporting NFS on the PCFS filesystem is that there
1148 * is no good place to keep the generation number. The only possible
1149 * place is inside a directory entry. There are a few words that we
1150 * don't use - they store NT & OS/2 attributes, and the creation/last access
1151 * time of the file - but it seems wrong to use them. In addition, directory
1152 * entries come and go. If a directory is removed completely, its directory
1153 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1154 * inode blocks are dedicated for inodes, so the generation numbers are
1155 * permanently kept on the disk.
1156 */
1157 static int
pcfs_vget(struct vfs * vfsp,struct vnode ** vpp,struct fid * fidp)1158 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1159 {
1160 struct pcnode *pcp;
1161 struct pc_fid *pcfid;
1162 struct pcfs *fsp;
1163 struct pcdir *ep;
1164 daddr_t eblkno;
1165 int eoffset;
1166 struct buf *bp;
1167 int error;
1168 pc_cluster32_t cn;
1169
1170 pcfid = (struct pc_fid *)fidp;
1171 fsp = VFSTOPCFS(vfsp);
1172
1173 error = pc_lockfs(fsp, 0, 0);
1174 if (error) {
1175 *vpp = NULL;
1176 return (error);
1177 }
1178
1179 if (pcfid->pcfid_block == 0) {
1180 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1181 pcp->pc_flags |= PC_EXTERNAL;
1182 *vpp = PCTOV(pcp);
1183 pc_unlockfs(fsp);
1184 return (0);
1185 }
1186 eblkno = pcfid->pcfid_block;
1187 eoffset = pcfid->pcfid_offset;
1188
1189 if ((pc_dbtocl(fsp,
1190 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1191 (eoffset > fsp->pcfs_clsize)) {
1192 pc_unlockfs(fsp);
1193 *vpp = NULL;
1194 return (EINVAL);
1195 }
1196
1197 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1198 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1199 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1200 fsp->pcfs_clsize);
1201 } else {
1202 /*
1203 * This is an access "backwards" into the FAT12/FAT16
1204 * root directory. A better code structure would
1205 * significantly improve maintainability here ...
1206 */
1207 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1208 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1209 }
1210 if (bp->b_flags & (B_ERROR | B_STALE)) {
1211 error = geterror(bp);
1212 brelse(bp);
1213 if (error)
1214 pc_mark_irrecov(fsp);
1215 *vpp = NULL;
1216 pc_unlockfs(fsp);
1217 return (error);
1218 }
1219 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1220 /*
1221 * Ok, if this is a valid file handle that we gave out,
1222 * then simply ensuring that the creation time matches,
1223 * the entry has not been deleted, and it has a valid first
1224 * character should be enough.
1225 *
1226 * Unfortunately, verifying that the <blkno, offset> _still_
1227 * refers to a directory entry is not easy, since we'd have
1228 * to search _all_ directories starting from root to find it.
1229 * That's a high price to pay just in case somebody is forging
1230 * file handles. So instead we verify that as much of the
1231 * entry is valid as we can:
1232 *
1233 * 1. The starting cluster is 0 (unallocated) or valid
1234 * 2. It is not an LFN entry
1235 * 3. It is not hidden (unless mounted as such)
1236 * 4. It is not the label
1237 */
1238 cn = pc_getstartcluster(fsp, ep);
1239 /*
1240 * if the starting cluster is valid, but not valid according
1241 * to pc_validcl(), force it to be to simplify the following if.
1242 */
1243 if (cn == 0)
1244 cn = PCF_FIRSTCLUSTER;
1245 if (IS_FAT32(fsp)) {
1246 if (cn >= PCF_LASTCLUSTER32)
1247 cn = PCF_FIRSTCLUSTER;
1248 } else {
1249 if (cn >= PCF_LASTCLUSTER)
1250 cn = PCF_FIRSTCLUSTER;
1251 }
1252 if ((!pc_validcl(fsp, cn)) ||
1253 (PCDL_IS_LFN(ep)) ||
1254 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1255 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1256 bp->b_flags |= B_STALE | B_AGE;
1257 brelse(bp);
1258 pc_unlockfs(fsp);
1259 return (EINVAL);
1260 }
1261 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1262 (ep->pcd_filename[0] != PCD_ERASED) &&
1263 (pc_validchar(ep->pcd_filename[0]) ||
1264 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1265 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1266 pcp->pc_flags |= PC_EXTERNAL;
1267 *vpp = PCTOV(pcp);
1268 } else {
1269 *vpp = NULL;
1270 }
1271 bp->b_flags |= B_STALE | B_AGE;
1272 brelse(bp);
1273 pc_unlockfs(fsp);
1274 return (0);
1275 }
1276
1277 /*
1278 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1279 * a meg), so we can't bread() it all in at once. This routine reads a
1280 * fat a chunk at a time.
1281 */
1282 static int
pc_readfat(struct pcfs * fsp,uchar_t * fatp)1283 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1284 {
1285 struct buf *bp;
1286 size_t off;
1287 size_t readsize;
1288 daddr_t diskblk;
1289 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1290 daddr_t start = fsp->pcfs_fatstart;
1291
1292 readsize = fsp->pcfs_clsize;
1293 for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1294 if (readsize > (fatsize - off))
1295 readsize = fatsize - off;
1296 diskblk = pc_dbdaddr(fsp, start +
1297 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1298 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1299 if (bp->b_flags & (B_ERROR | B_STALE)) {
1300 brelse(bp);
1301 return (EIO);
1302 }
1303 bp->b_flags |= B_STALE | B_AGE;
1304 bcopy(bp->b_un.b_addr, fatp, readsize);
1305 brelse(bp);
1306 }
1307 return (0);
1308 }
1309
1310 /*
1311 * We write the FAT out a _lot_, in order to make sure that it
1312 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1313 * the FAT might be a couple of megabytes, and writing it all out just
1314 * because we created or deleted a small file is painful (especially
1315 * since we do it for each alternate FAT too). So instead, for FAT16 and
1316 * FAT32 we only write out the bit that has changed. We don't clear
1317 * the 'updated' fields here because the caller might be writing out
1318 * several FATs, so the caller must use pc_clear_fatchanges() after
1319 * all FATs have been updated.
1320 * This function doesn't take "start" from fsp->pcfs_dosstart because
1321 * callers can use it to write either the primary or any of the alternate
1322 * FAT tables.
1323 */
1324 static int
pc_writefat(struct pcfs * fsp,daddr_t start)1325 pc_writefat(struct pcfs *fsp, daddr_t start)
1326 {
1327 struct buf *bp;
1328 size_t off;
1329 size_t writesize;
1330 int error;
1331 uchar_t *fatp = fsp->pcfs_fatp;
1332 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1333
1334 writesize = fsp->pcfs_clsize;
1335 for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1336 if (writesize > (fatsize - off))
1337 writesize = fatsize - off;
1338 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1339 continue;
1340 }
1341 bp = ngeteblk(writesize);
1342 bp->b_edev = fsp->pcfs_xdev;
1343 bp->b_dev = cmpdev(bp->b_edev);
1344 bp->b_blkno = pc_dbdaddr(fsp, start +
1345 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1346 bcopy(fatp, bp->b_un.b_addr, writesize);
1347 bwrite2(bp);
1348 error = geterror(bp);
1349 brelse(bp);
1350 if (error) {
1351 return (error);
1352 }
1353 }
1354 return (0);
1355 }
1356
1357 /*
1358 * Mark the FAT cluster that 'cn' is stored in as modified.
1359 */
1360 void
pc_mark_fat_updated(struct pcfs * fsp,pc_cluster32_t cn)1361 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1362 {
1363 pc_cluster32_t bn;
1364 size_t size;
1365
1366 /* which fat block is the cluster number stored in? */
1367 if (IS_FAT32(fsp)) {
1368 size = sizeof (pc_cluster32_t);
1369 bn = pc_lblkno(fsp, cn * size);
1370 fsp->pcfs_fat_changemap[bn] = 1;
1371 } else if (IS_FAT16(fsp)) {
1372 size = sizeof (pc_cluster16_t);
1373 bn = pc_lblkno(fsp, cn * size);
1374 fsp->pcfs_fat_changemap[bn] = 1;
1375 } else {
1376 offset_t off;
1377 pc_cluster32_t nbn;
1378
1379 ASSERT(IS_FAT12(fsp));
1380 off = cn + (cn >> 1);
1381 bn = pc_lblkno(fsp, off);
1382 fsp->pcfs_fat_changemap[bn] = 1;
1383 /* does this field wrap into the next fat cluster? */
1384 nbn = pc_lblkno(fsp, off + 1);
1385 if (nbn != bn) {
1386 fsp->pcfs_fat_changemap[nbn] = 1;
1387 }
1388 }
1389 }
1390
1391 /*
1392 * return whether the FAT cluster 'bn' is updated and needs to
1393 * be written out.
1394 */
1395 int
pc_fat_is_changed(struct pcfs * fsp,pc_cluster32_t bn)1396 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1397 {
1398 return (fsp->pcfs_fat_changemap[bn] == 1);
1399 }
1400
1401 /*
1402 * Implementation of VFS_FREEVFS() to support forced umounts.
1403 * This is called by the vfs framework after umount, to trigger
1404 * the release of any resources still associated with the given
1405 * vfs_t once the need to keep them has gone away.
1406 */
1407 void
pcfs_freevfs(vfs_t * vfsp)1408 pcfs_freevfs(vfs_t *vfsp)
1409 {
1410 struct pcfs *fsp = VFSTOPCFS(vfsp);
1411
1412 mutex_enter(&pcfslock);
1413 /*
1414 * Purging the FAT closes the device - can't do any more
1415 * I/O after this.
1416 */
1417 if (fsp->pcfs_fatp != (uchar_t *)0)
1418 pc_invalfat(fsp);
1419 mutex_exit(&pcfslock);
1420
1421 VN_RELE(fsp->pcfs_devvp);
1422 mutex_destroy(&fsp->pcfs_lock);
1423 kmem_free(fsp, sizeof (*fsp));
1424
1425 /*
1426 * Allow _fini() to succeed now, if so desired.
1427 */
1428 atomic_dec_32(&pcfs_mountcount);
1429 }
1430
1431
1432 /*
1433 * PC-style partition parsing and FAT BPB identification/validation code.
1434 * The partition parsers here assume:
1435 * - a FAT filesystem will be in a partition that has one of a set of
1436 * recognized partition IDs
1437 * - the user wants the 'numbering' (C:, D:, ...) that one would get
1438 * on MSDOS 6.x.
1439 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1440 * will not factor in the enumeration.
1441 * These days, such assumptions should be revisited. FAT is no longer the
1442 * only game in 'PC town'.
1443 */
1444 /*
1445 * isDosDrive()
1446 * Boolean function. Give it the systid field for an fdisk partition
1447 * and it decides if that's a systid that describes a DOS drive. We
1448 * use systid values defined in sys/dktp/fdisk.h.
1449 */
1450 static int
isDosDrive(uchar_t checkMe)1451 isDosDrive(uchar_t checkMe)
1452 {
1453 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1454 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1455 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1456 (checkMe == DIAGPART));
1457 }
1458
1459
1460 /*
1461 * isDosExtended()
1462 * Boolean function. Give it the systid field for an fdisk partition
1463 * and it decides if that's a systid that describes an extended DOS
1464 * partition.
1465 */
1466 static int
isDosExtended(uchar_t checkMe)1467 isDosExtended(uchar_t checkMe)
1468 {
1469 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1470 }
1471
1472
1473 /*
1474 * isBootPart()
1475 * Boolean function. Give it the systid field for an fdisk partition
1476 * and it decides if that's a systid that describes a Solaris boot
1477 * partition.
1478 */
1479 static int
isBootPart(uchar_t checkMe)1480 isBootPart(uchar_t checkMe)
1481 {
1482 return (checkMe == X86BOOT);
1483 }
1484
1485
1486 /*
1487 * noLogicalDrive()
1488 * Display error message about not being able to find a logical
1489 * drive.
1490 */
1491 static void
noLogicalDrive(int ldrive)1492 noLogicalDrive(int ldrive)
1493 {
1494 if (ldrive == BOOT_PARTITION_DRIVE) {
1495 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1496 } else {
1497 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1498 }
1499 }
1500
1501
1502 /*
1503 * findTheDrive()
1504 * Discover offset of the requested logical drive, and return
1505 * that offset (startSector), the systid of that drive (sysid),
1506 * and a buffer pointer (bp), with the buffer contents being
1507 * the first sector of the logical drive (i.e., the sector that
1508 * contains the BPB for that drive).
1509 *
1510 * Note: this code is not capable of addressing >2TB disks, as it uses
1511 * daddr_t not diskaddr_t, some of the calculations would overflow
1512 */
1513 #define COPY_PTBL(mbr, ptblp) \
1514 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \
1515 FD_NUMPART * sizeof (struct ipart))
1516
1517 static int
findTheDrive(struct pcfs * fsp,buf_t ** bp)1518 findTheDrive(struct pcfs *fsp, buf_t **bp)
1519 {
1520 int ldrive = fsp->pcfs_ldrive;
1521 dev_t dev = fsp->pcfs_devvp->v_rdev;
1522
1523 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */
1524 daddr_t lastseek = 0; /* Disk block we sought previously */
1525 daddr_t diskblk = 0; /* Disk block to get */
1526 daddr_t xstartsect; /* base of Extended DOS partition */
1527 int logicalDriveCount = 0; /* Count of logical drives seen */
1528 int extendedPart = -1; /* index of extended dos partition */
1529 int primaryPart = -1; /* index of primary dos partition */
1530 int bootPart = -1; /* index of a Solaris boot partition */
1531 uint32_t xnumsect = 0; /* length of extended DOS partition */
1532 int driveIndex; /* computed FDISK table index */
1533 daddr_t startsec;
1534 len_t mediasize;
1535 int i;
1536 /*
1537 * Count of drives in the current extended partition's
1538 * FDISK table, and indexes of the drives themselves.
1539 */
1540 int extndDrives[FD_NUMPART];
1541 int numDrives = 0;
1542
1543 /*
1544 * Count of drives (beyond primary) in master boot record's
1545 * FDISK table, and indexes of the drives themselves.
1546 */
1547 int extraDrives[FD_NUMPART];
1548 int numExtraDrives = 0;
1549
1550 /*
1551 * "ldrive == 0" should never happen, as this is a request to
1552 * mount the physical device (and ignore partitioning). The code
1553 * in pcfs_mount() should have made sure that a logical drive number
1554 * is at least 1, meaning we're looking for drive "C:". It is not
1555 * safe (and a bug in the callers of this function) to request logical
1556 * drive number 0; we could ASSERT() but a graceful EIO is a more
1557 * polite way.
1558 */
1559 if (ldrive == 0) {
1560 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1561 noLogicalDrive(ldrive);
1562 return (EIO);
1563 }
1564
1565 /*
1566 * Copy from disk block into memory aligned structure for fdisk usage.
1567 */
1568 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1569
1570 /*
1571 * This check is ok because a FAT BPB and a master boot record (MBB)
1572 * have the same signature, in the same position within the block.
1573 */
1574 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1575 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1576 "device (%x.%x):%d\n",
1577 getmajor(dev), getminor(dev), ldrive);
1578 return (EINVAL);
1579 }
1580
1581 /*
1582 * Get a summary of what is in the Master FDISK table.
1583 * Normally we expect to find one partition marked as a DOS drive.
1584 * This partition is the one Windows calls the primary dos partition.
1585 * If the machine has any logical drives then we also expect
1586 * to find a partition marked as an extended DOS partition.
1587 *
1588 * Sometimes we'll find multiple partitions marked as DOS drives.
1589 * The Solaris fdisk program allows these partitions
1590 * to be created, but Windows fdisk no longer does. We still need
1591 * to support these, though, since Windows does. We also need to fix
1592 * our fdisk to behave like the Windows version.
1593 *
1594 * It turns out that some off-the-shelf media have *only* an
1595 * Extended partition, so we need to deal with that case as well.
1596 *
1597 * Only a single (the first) Extended or Boot Partition will
1598 * be recognized. Any others will be ignored.
1599 */
1600 for (i = 0; i < FD_NUMPART; i++) {
1601 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1602 uint_t, (uint_t)dosp[i].systid,
1603 uint_t, LE_32(dosp[i].relsect),
1604 uint_t, LE_32(dosp[i].numsect));
1605
1606 if (isDosDrive(dosp[i].systid)) {
1607 if (primaryPart < 0) {
1608 logicalDriveCount++;
1609 primaryPart = i;
1610 } else {
1611 extraDrives[numExtraDrives++] = i;
1612 }
1613 continue;
1614 }
1615 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1616 extendedPart = i;
1617 continue;
1618 }
1619 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1620 bootPart = i;
1621 continue;
1622 }
1623 }
1624
1625 if (ldrive == BOOT_PARTITION_DRIVE) {
1626 if (bootPart < 0) {
1627 noLogicalDrive(ldrive);
1628 return (EINVAL);
1629 }
1630 startsec = LE_32(dosp[bootPart].relsect);
1631 mediasize = LE_32(dosp[bootPart].numsect);
1632 goto found;
1633 }
1634
1635 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1636 startsec = LE_32(dosp[primaryPart].relsect);
1637 mediasize = LE_32(dosp[primaryPart].numsect);
1638 goto found;
1639 }
1640
1641 /*
1642 * We are not looking for the C: drive (or the primary drive
1643 * was not found), so we had better have an extended partition
1644 * or extra drives in the Master FDISK table.
1645 */
1646 if ((extendedPart < 0) && (numExtraDrives == 0)) {
1647 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1648 noLogicalDrive(ldrive);
1649 return (EINVAL);
1650 }
1651
1652 if (extendedPart >= 0) {
1653 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1654 xnumsect = LE_32(dosp[extendedPart].numsect);
1655 do {
1656 /*
1657 * If the seek would not cause us to change
1658 * position on the drive, then we're out of
1659 * extended partitions to examine.
1660 */
1661 if (diskblk == lastseek)
1662 break;
1663 logicalDriveCount += numDrives;
1664 /*
1665 * Seek the next extended partition, and find
1666 * logical drives within it.
1667 */
1668 brelse(*bp);
1669 /*
1670 * bread() block numbers are multiples of DEV_BSIZE
1671 * but the device sector size (the unit of partitioning)
1672 * might be larger than that; pcfs_get_device_info()
1673 * has calculated the multiplicator for us.
1674 */
1675 *bp = bread(dev,
1676 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1677 if ((*bp)->b_flags & B_ERROR) {
1678 return (EIO);
1679 }
1680
1681 lastseek = diskblk;
1682 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1683 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1684 cmn_err(CE_NOTE, "!pcfs: "
1685 "extended partition table signature err, "
1686 "device (%x.%x):%d, LBA %u",
1687 getmajor(dev), getminor(dev), ldrive,
1688 (uint_t)pc_dbdaddr(fsp, diskblk));
1689 return (EINVAL);
1690 }
1691 /*
1692 * Count up drives, and track where the next
1693 * extended partition is in case we need it. We
1694 * are expecting only one extended partition. If
1695 * there is more than one we'll only go to the
1696 * first one we see, but warn about ignoring.
1697 */
1698 numDrives = 0;
1699 for (i = 0; i < FD_NUMPART; i++) {
1700 DTRACE_PROBE4(extendedpart,
1701 struct pcfs *, fsp,
1702 uint_t, (uint_t)dosp[i].systid,
1703 uint_t, LE_32(dosp[i].relsect),
1704 uint_t, LE_32(dosp[i].numsect));
1705 if (isDosDrive(dosp[i].systid)) {
1706 extndDrives[numDrives++] = i;
1707 } else if (isDosExtended(dosp[i].systid)) {
1708 if (diskblk != lastseek) {
1709 /*
1710 * Already found an extended
1711 * partition in this table.
1712 */
1713 cmn_err(CE_NOTE,
1714 "!pcfs: ignoring unexpected"
1715 " additional extended"
1716 " partition");
1717 } else {
1718 diskblk = xstartsect +
1719 LE_32(dosp[i].relsect);
1720 }
1721 }
1722 }
1723 } while (ldrive > logicalDriveCount + numDrives);
1724
1725 ASSERT(numDrives <= FD_NUMPART);
1726
1727 if (ldrive <= logicalDriveCount + numDrives) {
1728 /*
1729 * The number of logical drives we've found thus
1730 * far is enough to get us to the one we were
1731 * searching for.
1732 */
1733 driveIndex = logicalDriveCount + numDrives - ldrive;
1734 mediasize =
1735 LE_32(dosp[extndDrives[driveIndex]].numsect);
1736 startsec =
1737 LE_32(dosp[extndDrives[driveIndex]].relsect) +
1738 lastseek;
1739 if (startsec > (xstartsect + xnumsect)) {
1740 cmn_err(CE_NOTE, "!pcfs: extended partition "
1741 "values bad");
1742 return (EINVAL);
1743 }
1744 goto found;
1745 } else {
1746 /*
1747 * We ran out of extended dos partition
1748 * drives. The only hope now is to go
1749 * back to extra drives defined in the master
1750 * fdisk table. But we overwrote that table
1751 * already, so we must load it in again.
1752 */
1753 logicalDriveCount += numDrives;
1754 brelse(*bp);
1755 ASSERT(fsp->pcfs_dosstart == 0);
1756 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1757 fsp->pcfs_secsize);
1758 if ((*bp)->b_flags & B_ERROR) {
1759 return (EIO);
1760 }
1761 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1762 }
1763 }
1764 /*
1765 * Still haven't found the drive, is it an extra
1766 * drive defined in the main FDISK table?
1767 */
1768 if (ldrive <= logicalDriveCount + numExtraDrives) {
1769 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1770 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1771 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1772 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1773 goto found;
1774 }
1775 /*
1776 * Still haven't found the drive, and there is
1777 * nowhere else to look.
1778 */
1779 noLogicalDrive(ldrive);
1780 return (EINVAL);
1781
1782 found:
1783 /*
1784 * We need this value in units of sectorsize, because PCFS' internal
1785 * offset calculations go haywire for > 512Byte sectors unless all
1786 * pcfs_.*start values are in units of sectors.
1787 * So, assign before the capacity check (that's done in DEV_BSIZE)
1788 */
1789 fsp->pcfs_dosstart = startsec;
1790
1791 /*
1792 * convert from device sectors to proper units:
1793 * - starting sector: DEV_BSIZE (as argument to bread())
1794 * - media size: Bytes
1795 */
1796 startsec = pc_dbdaddr(fsp, startsec);
1797 mediasize *= fsp->pcfs_secsize;
1798
1799 /*
1800 * some additional validation / warnings in case the partition table
1801 * and the actual media capacity are not in accordance ...
1802 */
1803 if (fsp->pcfs_mediasize != 0) {
1804 diskaddr_t startoff =
1805 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1806
1807 if (startoff >= fsp->pcfs_mediasize ||
1808 startoff + mediasize > fsp->pcfs_mediasize) {
1809 cmn_err(CE_WARN,
1810 "!pcfs: partition size (LBA start %u, %lld bytes, "
1811 "device (%x.%x):%d) smaller than "
1812 "mediasize (%lld bytes).\n"
1813 "filesystem may be truncated, access errors "
1814 "may result.\n",
1815 (uint_t)startsec, (long long)mediasize,
1816 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1817 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1818 }
1819 } else {
1820 fsp->pcfs_mediasize = mediasize;
1821 }
1822
1823 return (0);
1824 }
1825
1826
1827 static fattype_t
secondaryBPBChecks(struct pcfs * fsp,uchar_t * bpb,size_t secsize)1828 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1829 {
1830 uint32_t ncl = fsp->pcfs_ncluster;
1831
1832 if (ncl <= 4096) {
1833 if (bpb_get_FatSz16(bpb) == 0)
1834 return (FAT_UNKNOWN);
1835
1836 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1837 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1838 return (FAT12);
1839 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1840 return (FAT12);
1841 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1842 return (FAT16);
1843
1844 switch (bpb_get_Media(bpb)) {
1845 case SS8SPT:
1846 case DS8SPT:
1847 case SS9SPT:
1848 case DS9SPT:
1849 case DS18SPT:
1850 case DS9_15SPT:
1851 /*
1852 * Is this reliable - all floppies are FAT12 ?
1853 */
1854 return (FAT12);
1855 case MD_FIXED:
1856 /*
1857 * Is this reliable - disks are always FAT16 ?
1858 */
1859 return (FAT16);
1860 default:
1861 break;
1862 }
1863 } else if (ncl <= 65536) {
1864 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1865 return (FAT32);
1866 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1867 return (FAT32);
1868 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1869 return (FAT32);
1870
1871 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1872 return (FAT16);
1873 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1874 return (FAT16);
1875 }
1876
1877 /*
1878 * We don't know
1879 */
1880 return (FAT_UNKNOWN);
1881 }
1882
1883 /*
1884 * Check to see if the BPB we found is correct.
1885 *
1886 * This looks far more complicated that it needs to be for pure structural
1887 * validation. The reason for this is that parseBPB() is also used for
1888 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1889 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1890 * the BPB when attempting to mount the filesystem.
1891 *
1892 * Real-world usage of FAT shows there are a lot of corner-case situations
1893 * and, following the specification strictly, invalid filesystems out there.
1894 * Known are situations such as:
1895 * - FAT12/FAT16 filesystems with garbage in either totsec16/32
1896 * instead of the zero in one of the fields mandated by the spec
1897 * - filesystems that claim to be larger than the partition they're in
1898 * - filesystems without valid media descriptor
1899 * - FAT32 filesystems with RootEntCnt != 0
1900 * - FAT32 filesystems with less than 65526 clusters
1901 * - FAT32 filesystems without valid FSI sector
1902 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1903 *
1904 * Such filesystems are accessible by PCFS - if it'd know to start with that
1905 * the filesystem should be treated as a specific FAT type. Before S10, it
1906 * relied on the PC/fdisk partition type for the purpose and almost completely
1907 * ignored the BPB; now it ignores the partition type for anything else but
1908 * logical drive enumeration, which can result in rejection of (invalid)
1909 * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1910 * has less than 65526 clusters.
1911 *
1912 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1913 * not possible to allow all such mostly-compliant filesystems in unless one
1914 * accepts false positives (definitely invalid filesystems that cause problems
1915 * later). This at least allows to pinpoint why the mount failed.
1916 *
1917 * Due to the use of FAT on removeable media, all relaxations of the rules
1918 * here need to be carefully evaluated wrt. to potential effects on PCFS
1919 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1920 * beware.
1921 */
1922 static int
parseBPB(struct pcfs * fsp,uchar_t * bpb,int * valid)1923 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1924 {
1925 fattype_t type;
1926
1927 uint32_t ncl; /* number of clusters in file area */
1928 uint32_t rec;
1929 uint32_t reserved;
1930 uint32_t fsisec, bkbootsec;
1931 blkcnt_t totsec, totsec16, totsec32, datasec;
1932 size_t fatsec, fatsec16, fatsec32, rdirsec;
1933 size_t secsize;
1934 len_t mediasize;
1935 uint64_t validflags = 0;
1936
1937 if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1938 validflags |= BPB_BPBSIG_OK;
1939
1940 rec = bpb_get_RootEntCnt(bpb);
1941 reserved = bpb_get_RsvdSecCnt(bpb);
1942 fsisec = bpb_get_FSInfo32(bpb);
1943 bkbootsec = bpb_get_BkBootSec32(bpb);
1944 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1945 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1946 fatsec16 = bpb_get_FatSz16(bpb);
1947 fatsec32 = bpb_get_FatSz32(bpb);
1948
1949 totsec = totsec16 ? totsec16 : totsec32;
1950 fatsec = fatsec16 ? fatsec16 : fatsec32;
1951
1952 secsize = bpb_get_BytesPerSec(bpb);
1953 if (!VALID_SECSIZE(secsize))
1954 secsize = fsp->pcfs_secsize;
1955 if (secsize != fsp->pcfs_secsize) {
1956 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1957 getmajor(fsp->pcfs_xdev),
1958 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1959 PC_DPRINTF2(3, "!BPB secsize %d != "
1960 "autodetected media block size %d\n",
1961 (int)secsize, (int)fsp->pcfs_secsize);
1962 if (fsp->pcfs_ldrive) {
1963 /*
1964 * We've already attempted to parse the partition
1965 * table. If the block size used for that don't match
1966 * the PCFS sector size, we're hosed one way or the
1967 * other. Just try what happens.
1968 */
1969 secsize = fsp->pcfs_secsize;
1970 PC_DPRINTF1(3,
1971 "!pcfs: Using autodetected secsize %d\n",
1972 (int)secsize);
1973 } else {
1974 /*
1975 * This allows mounting lofi images of PCFS partitions
1976 * with sectorsize != DEV_BSIZE. We can't parse the
1977 * partition table on whole-disk images unless the
1978 * (undocumented) "secsize=..." mount option is used,
1979 * but at least this allows us to mount if we have
1980 * an image of a partition.
1981 */
1982 PC_DPRINTF1(3,
1983 "!pcfs: Using BPB secsize %d\n", (int)secsize);
1984 }
1985 }
1986
1987 if (fsp->pcfs_mediasize == 0) {
1988 mediasize = (len_t)totsec * (len_t)secsize;
1989 /*
1990 * This is not an error because not all devices support the
1991 * dkio(4I) mediasize queries, and/or not all devices are
1992 * partitioned. If we have not been able to figure out the
1993 * size of the underlaying medium, we have to trust the BPB.
1994 */
1995 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1996 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1997 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1998 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1999 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
2000 cmn_err(CE_WARN,
2001 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
2002 "FAT BPB mediasize (%lld Bytes).\n"
2003 "truncated filesystem on device (%x.%x):%d, access errors "
2004 "possible.\n",
2005 (long long)fsp->pcfs_mediasize,
2006 (long long)(totsec * (blkcnt_t)secsize),
2007 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2008 fsp->pcfs_ldrive);
2009 mediasize = fsp->pcfs_mediasize;
2010 } else {
2011 /*
2012 * This is actually ok. A FAT needs not occupy the maximum
2013 * space available in its partition, it can be shorter.
2014 */
2015 mediasize = (len_t)totsec * (len_t)secsize;
2016 }
2017
2018 /*
2019 * Since we let just about anything pass through this function,
2020 * fence against divide-by-zero here.
2021 */
2022 if (secsize)
2023 rdirsec = roundup(rec * 32, secsize) / secsize;
2024 else
2025 rdirsec = 0;
2026
2027 /*
2028 * This assignment is necessary before pc_dbdaddr() can first be
2029 * used. Must initialize the value here.
2030 */
2031 fsp->pcfs_secsize = secsize;
2032 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2033
2034 fsp->pcfs_mediasize = mediasize;
2035
2036 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2037 fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2038 fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2039 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2040 fsp->pcfs_rdirsec = rdirsec;
2041
2042 /*
2043 * Remember: All PCFS offset calculations in sectors. Before I/O
2044 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2045 * necessary so that media with > 512Byte sector sizes work correctly.
2046 */
2047 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2048 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2049 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2050 datasec = totsec -
2051 (blkcnt_t)fatsec * fsp->pcfs_numfat -
2052 (blkcnt_t)rdirsec -
2053 (blkcnt_t)reserved;
2054
2055 DTRACE_PROBE4(fatgeometry,
2056 blkcnt_t, totsec, size_t, fatsec,
2057 size_t, rdirsec, blkcnt_t, datasec);
2058
2059 /*
2060 * 'totsec' is taken directly from the BPB and guaranteed to fit
2061 * into a 32bit unsigned integer. The calculation of 'datasec',
2062 * on the other hand, could underflow for incorrect values in
2063 * rdirsec/reserved/fatsec. Check for that.
2064 * We also check that the BPB conforms to the FAT specification's
2065 * requirement that either of the 16/32bit total sector counts
2066 * must be zero.
2067 */
2068 if (totsec != 0 &&
2069 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2070 datasec < totsec && datasec <= UINT32_MAX)
2071 validflags |= BPB_TOTSEC_OK;
2072
2073 if ((len_t)totsec * (len_t)secsize <= mediasize)
2074 validflags |= BPB_MEDIASZ_OK;
2075
2076 if (VALID_SECSIZE(secsize))
2077 validflags |= BPB_SECSIZE_OK;
2078 if (VALID_SPCL(fsp->pcfs_spcl))
2079 validflags |= BPB_SECPERCLUS_OK;
2080 if (VALID_CLSIZE(fsp->pcfs_clsize))
2081 validflags |= BPB_CLSIZE_OK;
2082 if (VALID_NUMFATS(fsp->pcfs_numfat))
2083 validflags |= BPB_NUMFAT_OK;
2084 if (VALID_RSVDSEC(reserved) && reserved < totsec)
2085 validflags |= BPB_RSVDSECCNT_OK;
2086 if (VALID_MEDIA(fsp->pcfs_mediadesc))
2087 validflags |= BPB_MEDIADESC_OK;
2088 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2089 validflags |= BPB_BOOTSIG16_OK;
2090 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2091 validflags |= BPB_BOOTSIG32_OK;
2092 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2093 validflags |= BPB_FSTYPSTR16_OK;
2094 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2095 validflags |= BPB_FSTYPSTR32_OK;
2096 if (VALID_OEMNAME(bpb_OEMName(bpb)))
2097 validflags |= BPB_OEMNAME_OK;
2098 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2099 validflags |= BPB_BKBOOTSEC_OK;
2100 if (fsisec > 0 && fsisec <= reserved)
2101 validflags |= BPB_FSISEC_OK;
2102 if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2103 validflags |= BPB_JMPBOOT_OK;
2104 if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2105 validflags |= BPB_FSVER_OK;
2106 if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2107 validflags |= BPB_VOLLAB16_OK;
2108 if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2109 validflags |= BPB_VOLLAB32_OK;
2110 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2111 validflags |= BPB_EXTFLAGS_OK;
2112
2113 /*
2114 * Try to determine which FAT format to use.
2115 *
2116 * Calculate the number of clusters in order to determine
2117 * the type of FAT we are looking at. This is the only
2118 * recommended way of determining FAT type, though there
2119 * are other hints in the data, this is the best way.
2120 *
2121 * Since we let just about "anything" pass through this function
2122 * without early exits, fence against divide-by-zero here.
2123 *
2124 * datasec was already validated against UINT32_MAX so we know
2125 * the result will not overflow the 32bit calculation.
2126 */
2127 if (fsp->pcfs_spcl)
2128 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2129 else
2130 ncl = 0;
2131
2132 fsp->pcfs_ncluster = ncl;
2133
2134 /*
2135 * From the Microsoft FAT specification:
2136 * In the following example, when it says <, it does not mean <=.
2137 * Note also that the numbers are correct. The first number for
2138 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2139 * and the '<' signs are not wrong.
2140 *
2141 * We "specialdetect" the corner cases, and use at least one "extra"
2142 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2143 * count is dangerously close to the boundaries.
2144 */
2145
2146 if (ncl <= PCF_FIRSTCLUSTER) {
2147 type = FAT_UNKNOWN;
2148 } else if (ncl < 4085) {
2149 type = FAT12;
2150 } else if (ncl <= 4096) {
2151 type = FAT_QUESTIONABLE;
2152 } else if (ncl < 65525) {
2153 type = FAT16;
2154 } else if (ncl <= 65536) {
2155 type = FAT_QUESTIONABLE;
2156 } else if (ncl < PCF_LASTCLUSTER32) {
2157 type = FAT32;
2158 } else {
2159 type = FAT_UNKNOWN;
2160 }
2161
2162 DTRACE_PROBE4(parseBPB__initial,
2163 struct pcfs *, fsp, unsigned char *, bpb,
2164 int, validflags, fattype_t, type);
2165
2166 recheck:
2167 fsp->pcfs_fatsec = fatsec;
2168
2169 /* Do some final sanity checks for each specific type of FAT */
2170 switch (type) {
2171 case FAT12:
2172 if (rec != 0)
2173 validflags |= BPB_ROOTENTCNT_OK;
2174 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2175 bpb_get_TotSec16(bpb) == 0)
2176 validflags |= BPB_TOTSEC16_OK;
2177 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2178 bpb_get_TotSec32(bpb) == 0)
2179 validflags |= BPB_TOTSEC32_OK;
2180 if (bpb_get_FatSz16(bpb) == fatsec)
2181 validflags |= BPB_FATSZ16_OK;
2182 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2183 * 3 / 2)
2184 validflags |= BPB_FATSZ_OK;
2185 if (ncl < 4085)
2186 validflags |= BPB_NCLUSTERS_OK;
2187
2188 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2189 fsp->pcfs_rootblksize =
2190 fsp->pcfs_rdirsec * secsize;
2191 fsp->pcfs_fsistart = 0;
2192
2193 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2194 type = FAT_UNKNOWN;
2195 break;
2196 case FAT16:
2197 if (rec != 0)
2198 validflags |= BPB_ROOTENTCNT_OK;
2199 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2200 bpb_get_TotSec16(bpb) == 0)
2201 validflags |= BPB_TOTSEC16_OK;
2202 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2203 bpb_get_TotSec32(bpb) == 0)
2204 validflags |= BPB_TOTSEC32_OK;
2205 if (bpb_get_FatSz16(bpb) == fatsec)
2206 validflags |= BPB_FATSZ16_OK;
2207 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2208 validflags |= BPB_FATSZ_OK;
2209 if (ncl >= 4085 && ncl < 65525)
2210 validflags |= BPB_NCLUSTERS_OK;
2211
2212 fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2213 fsp->pcfs_rootblksize =
2214 fsp->pcfs_rdirsec * secsize;
2215 fsp->pcfs_fsistart = 0;
2216
2217 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2218 type = FAT_UNKNOWN;
2219 break;
2220 case FAT32:
2221 if (rec == 0)
2222 validflags |= BPB_ROOTENTCNT_OK;
2223 if (bpb_get_TotSec16(bpb) == 0)
2224 validflags |= BPB_TOTSEC16_OK;
2225 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2226 validflags |= BPB_TOTSEC32_OK;
2227 if (bpb_get_FatSz16(bpb) == 0)
2228 validflags |= BPB_FATSZ16_OK;
2229 if (bpb_get_FatSz32(bpb) == fatsec)
2230 validflags |= BPB_FATSZ32_OK;
2231 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2232 validflags |= BPB_FATSZ_OK;
2233 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2234 validflags |= BPB_NCLUSTERS_OK;
2235
2236 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2237 fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2238 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2239 if (validflags & BPB_FSISEC_OK)
2240 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2241 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2242 if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2243 validflags |= BPB_ROOTCLUSTER_OK;
2244
2245 /*
2246 * Current PCFS code only works if 'pcfs_rdirstart'
2247 * contains the root cluster number on FAT32.
2248 * That's a mis-use and would better be changed.
2249 */
2250 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2251
2252 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2253 type = FAT_UNKNOWN;
2254 break;
2255 case FAT_QUESTIONABLE:
2256 type = secondaryBPBChecks(fsp, bpb, secsize);
2257 goto recheck;
2258 default:
2259 ASSERT(type == FAT_UNKNOWN);
2260 break;
2261 }
2262
2263 ASSERT(type != FAT_QUESTIONABLE);
2264
2265 fsp->pcfs_fattype = type;
2266
2267 if (valid)
2268 *valid = validflags;
2269
2270 DTRACE_PROBE4(parseBPB__final,
2271 struct pcfs *, fsp, unsigned char *, bpb,
2272 int, validflags, fattype_t, type);
2273
2274 if (type != FAT_UNKNOWN) {
2275 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2276 ASSERT(ISP2(secsize / DEV_BSIZE));
2277 return (1);
2278 }
2279
2280 return (0);
2281 }
2282
2283
2284 /*
2285 * Detect the device's native block size (sector size).
2286 *
2287 * Test whether the device is:
2288 * - a floppy device from a known controller type via DKIOCINFO
2289 * - a real floppy using the fd(4D) driver and capable of fdio(4I) ioctls
2290 * - a USB floppy drive (identified by drive geometry)
2291 *
2292 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2293 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2294 *
2295 * This might be a bit wasteful on kernel stack space; if anyone's
2296 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2297 */
2298 static void
pcfs_device_getinfo(struct pcfs * fsp)2299 pcfs_device_getinfo(struct pcfs *fsp)
2300 {
2301 dev_t rdev = fsp->pcfs_xdev;
2302 int error;
2303 union {
2304 struct dk_minfo mi;
2305 struct dk_cinfo ci;
2306 struct dk_geom gi;
2307 struct fd_char fc;
2308 } arg; /* save stackspace ... */
2309 intptr_t argp = (intptr_t)&arg;
2310 ldi_handle_t lh;
2311 ldi_ident_t li;
2312 int isfloppy, isremoveable, ishotpluggable;
2313 cred_t *cr = CRED();
2314
2315 if (ldi_ident_from_dev(rdev, &li))
2316 goto out;
2317
2318 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2319 ldi_ident_release(li);
2320 if (error)
2321 goto out;
2322
2323 /*
2324 * Not sure if this could possibly happen. It'd be a bit like
2325 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2326 * expecting it, needs some thought if triggered ...
2327 */
2328 ASSERT(fsp->pcfs_xdev == rdev);
2329
2330 /*
2331 * Check for removeable/hotpluggable media.
2332 */
2333 if (ldi_ioctl(lh, DKIOCREMOVABLE,
2334 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2335 isremoveable = 0;
2336 }
2337 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2338 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2339 ishotpluggable = 0;
2340 }
2341
2342 /*
2343 * Make sure we don't use "half-initialized" values if the ioctls fail.
2344 */
2345 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2346 bzero(&arg, sizeof (arg));
2347 fsp->pcfs_mediasize = 0;
2348 } else {
2349 fsp->pcfs_mediasize =
2350 (len_t)arg.mi.dki_lbsize *
2351 (len_t)arg.mi.dki_capacity;
2352 }
2353
2354 if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2355 if (fsp->pcfs_secsize == 0) {
2356 fsp->pcfs_secsize = arg.mi.dki_lbsize;
2357 fsp->pcfs_sdshift =
2358 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2359 } else {
2360 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2361 "%d, device (%x.%x), different from user-provided "
2362 "%d. User override - ignoring autodetect result.\n",
2363 arg.mi.dki_lbsize,
2364 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2365 fsp->pcfs_secsize);
2366 }
2367 } else if (arg.mi.dki_lbsize) {
2368 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2369 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2370 "Ignoring autodetect result.\n",
2371 arg.mi.dki_lbsize,
2372 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2373 }
2374
2375 /*
2376 * We treat the following media types as a floppy by default.
2377 */
2378 isfloppy =
2379 (arg.mi.dki_media_type == DK_FLOPPY ||
2380 arg.mi.dki_media_type == DK_ZIP ||
2381 arg.mi.dki_media_type == DK_JAZ);
2382
2383 /*
2384 * if this device understands fdio(4I) requests it's
2385 * obviously a floppy drive.
2386 */
2387 if (!isfloppy &&
2388 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2389 isfloppy = 1;
2390
2391 /*
2392 * some devices we like to treat as floppies, but they don't
2393 * understand fdio(4I) requests.
2394 */
2395 if (!isfloppy &&
2396 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2397 (arg.ci.dki_ctype == DKC_WDC2880 ||
2398 arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2399 arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2400 arg.ci.dki_ctype == DKC_INTEL82077))
2401 isfloppy = 1;
2402
2403 /*
2404 * This is the "final fallback" test - media with
2405 * 2 heads and 80 cylinders are assumed to be floppies.
2406 * This is normally true for USB floppy drives ...
2407 */
2408 if (!isfloppy &&
2409 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2410 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2411 isfloppy = 1;
2412
2413 /*
2414 * This is similar to the "old" PCFS code that sets this flag
2415 * just based on the media descriptor being 0xf8 (MD_FIXED).
2416 * Should be re-worked. We really need some specialcasing for
2417 * removeable media.
2418 */
2419 if (!isfloppy) {
2420 fsp->pcfs_flags |= PCFS_NOCHK;
2421 }
2422
2423 /*
2424 * We automatically disable access time updates if the medium is
2425 * removeable and/or hotpluggable, and the admin did not explicitly
2426 * request access time updates (via the "atime" mount option).
2427 * The majority of flash-based media should fit this category.
2428 * Minimizing write access extends the lifetime of your memory stick !
2429 */
2430 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2431 (isremoveable || ishotpluggable | isfloppy)) {
2432 fsp->pcfs_flags |= PCFS_NOATIME;
2433 }
2434
2435 (void) ldi_close(lh, FREAD, cr);
2436 out:
2437 if (fsp->pcfs_secsize == 0) {
2438 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2439 "device (%x.%x) failed, no user-provided fallback. "
2440 "Using %d bytes.\n",
2441 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2442 DEV_BSIZE);
2443 fsp->pcfs_secsize = DEV_BSIZE;
2444 fsp->pcfs_sdshift = 0;
2445 }
2446 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2447 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2448 }
2449
2450 /*
2451 * Get the FAT type for the DOS medium.
2452 *
2453 * -------------------------
2454 * According to Microsoft:
2455 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2456 * count of clusters on the volume and nothing else.
2457 * -------------------------
2458 *
2459 */
2460 static int
pc_getfattype(struct pcfs * fsp)2461 pc_getfattype(struct pcfs *fsp)
2462 {
2463 int error = 0;
2464 buf_t *bp = NULL;
2465 struct vnode *devvp = fsp->pcfs_devvp;
2466 dev_t dev = devvp->v_rdev;
2467
2468 /*
2469 * Detect the native block size of the medium, and attempt to
2470 * detect whether the medium is removeable.
2471 * We do treat removable media (floppies, USB and FireWire disks)
2472 * differently wrt. to the frequency and synchronicity of FAT updates.
2473 * We need to know the media block size in order to be able to
2474 * parse the partition table.
2475 */
2476 pcfs_device_getinfo(fsp);
2477
2478 /*
2479 * Unpartitioned media (floppies and some removeable devices)
2480 * don't have a partition table, the FAT BPB is at disk block 0.
2481 * Start out by reading block 0.
2482 */
2483 fsp->pcfs_dosstart = 0;
2484 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2485
2486 if (error = geterror(bp))
2487 goto out;
2488
2489 /*
2490 * If a logical drive number is requested, parse the partition table
2491 * and attempt to locate it. Otherwise, proceed immediately to the
2492 * BPB check. findTheDrive(), if successful, returns the disk block
2493 * number where the requested partition starts in "startsec".
2494 */
2495 if (fsp->pcfs_ldrive != 0) {
2496 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2497 "device (%x,%x):%d to find BPB\n",
2498 getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2499
2500 if (error = findTheDrive(fsp, &bp))
2501 goto out;
2502
2503 ASSERT(fsp->pcfs_dosstart != 0);
2504
2505 brelse(bp);
2506 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2507 fsp->pcfs_secsize);
2508 if (error = geterror(bp))
2509 goto out;
2510 }
2511
2512 /*
2513 * Validate the BPB and fill in the instance structure.
2514 */
2515 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2516 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2517 "device (%x.%x):%d, disk LBA %u\n",
2518 getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2519 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2520 error = EINVAL;
2521 goto out;
2522 }
2523
2524 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2525
2526 out:
2527 /*
2528 * Release the buffer used
2529 */
2530 if (bp != NULL)
2531 brelse(bp);
2532 return (error);
2533 }
2534
2535
2536 /*
2537 * Get the file allocation table.
2538 * If there is an old FAT, invalidate it.
2539 */
2540 int
pc_getfat(struct pcfs * fsp)2541 pc_getfat(struct pcfs *fsp)
2542 {
2543 struct buf *bp = NULL;
2544 uchar_t *fatp = NULL;
2545 uchar_t *fat_changemap = NULL;
2546 int error;
2547 int fat_changemapsize;
2548 int flags = 0;
2549 int nfat;
2550 int altfat_mustmatch = 0;
2551 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2552
2553 if (fsp->pcfs_fatp) {
2554 /*
2555 * There is a FAT in core.
2556 * If there are open file pcnodes or we have modified it or
2557 * it hasn't timed out yet use the in core FAT.
2558 * Otherwise invalidate it and get a new one
2559 */
2560 #ifdef notdef
2561 if (fsp->pcfs_frefs ||
2562 (fsp->pcfs_flags & PCFS_FATMOD) ||
2563 (gethrestime_sec() < fsp->pcfs_fattime)) {
2564 return (0);
2565 } else {
2566 mutex_enter(&pcfslock);
2567 pc_invalfat(fsp);
2568 mutex_exit(&pcfslock);
2569 }
2570 #endif /* notdef */
2571 return (0);
2572 }
2573
2574 /*
2575 * Get FAT and check it for validity
2576 */
2577 fatp = kmem_alloc(fatsize, KM_SLEEP);
2578 error = pc_readfat(fsp, fatp);
2579 if (error) {
2580 flags = B_ERROR;
2581 goto out;
2582 }
2583 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2584 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2585 fsp->pcfs_fatp = fatp;
2586 fsp->pcfs_fat_changemapsize = fat_changemapsize;
2587 fsp->pcfs_fat_changemap = fat_changemap;
2588
2589 /*
2590 * The only definite signature check is that the
2591 * media descriptor byte should match the first byte
2592 * of the FAT block.
2593 */
2594 if (fatp[0] != fsp->pcfs_mediadesc) {
2595 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2596 "media descriptor %x, FAT[0] lowbyte %x\n",
2597 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2598 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2599 altfat_mustmatch = 1;
2600 }
2601
2602 /*
2603 * Get alternate FATs and check for consistency
2604 * This is an inlined version of pc_readfat().
2605 * Since we're only comparing FAT and alternate FAT,
2606 * there's no reason to let pc_readfat() copy data out
2607 * of the buf. Instead, compare in-situ, one cluster
2608 * at a time.
2609 */
2610 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2611 size_t startsec;
2612 size_t off;
2613
2614 startsec = pc_dbdaddr(fsp,
2615 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2616
2617 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2618 daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2619 pc_cltodb(fsp, pc_lblkno(fsp, off)));
2620
2621 bp = bread(fsp->pcfs_xdev, fatblk,
2622 MIN(fsp->pcfs_clsize, fatsize - off));
2623 if (bp->b_flags & (B_ERROR | B_STALE)) {
2624 cmn_err(CE_NOTE,
2625 "!pcfs: alternate FAT #%d (start LBA %p)"
2626 " read error at offset %ld on device"
2627 " (%x.%x):%d",
2628 nfat, (void *)(uintptr_t)startsec, off,
2629 getmajor(fsp->pcfs_xdev),
2630 getminor(fsp->pcfs_xdev),
2631 fsp->pcfs_ldrive);
2632 flags = B_ERROR;
2633 error = EIO;
2634 goto out;
2635 }
2636 bp->b_flags |= B_STALE | B_AGE;
2637 if (bcmp(bp->b_un.b_addr, fatp + off,
2638 MIN(fsp->pcfs_clsize, fatsize - off))) {
2639 cmn_err(CE_NOTE,
2640 "!pcfs: alternate FAT #%d (start LBA %p)"
2641 " corrupted at offset %ld on device"
2642 " (%x.%x):%d",
2643 nfat, (void *)(uintptr_t)startsec, off,
2644 getmajor(fsp->pcfs_xdev),
2645 getminor(fsp->pcfs_xdev),
2646 fsp->pcfs_ldrive);
2647 if (altfat_mustmatch) {
2648 flags = B_ERROR;
2649 error = EIO;
2650 goto out;
2651 }
2652 }
2653 brelse(bp);
2654 bp = NULL; /* prevent double release */
2655 }
2656 }
2657
2658 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2659 fsp->pcfs_fatjustread = 1;
2660
2661 /*
2662 * Retrieve FAT32 fsinfo sector.
2663 * A failure to read this is not fatal to accessing the volume.
2664 * It simply means operations that count or search free blocks
2665 * will have to do a full FAT walk, vs. a possibly quicker lookup
2666 * of the summary information.
2667 * Hence, we log a message but return success overall after this point.
2668 */
2669 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2670 struct fat_od_fsi *fsinfo_disk;
2671
2672 bp = bread(fsp->pcfs_xdev,
2673 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2674 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2675 if (bp->b_flags & (B_ERROR | B_STALE) ||
2676 !FSISIG_OK(fsinfo_disk)) {
2677 cmn_err(CE_NOTE,
2678 "!pcfs: error reading fat32 fsinfo from "
2679 "device (%x.%x):%d, block %lld",
2680 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2681 fsp->pcfs_ldrive,
2682 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2683 fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2684 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2685 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2686 } else {
2687 bp->b_flags |= B_STALE | B_AGE;
2688 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2689 fsp->pcfs_fsinfo.fs_free_clusters =
2690 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2691 fsp->pcfs_fsinfo.fs_next_free =
2692 LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2693 }
2694 brelse(bp);
2695 bp = NULL;
2696 }
2697
2698 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2699 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2700 else
2701 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2702
2703 return (0);
2704
2705 out:
2706 cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2707 if (bp)
2708 brelse(bp);
2709 if (fatp)
2710 kmem_free(fatp, fatsize);
2711 if (fat_changemap)
2712 kmem_free(fat_changemap, fat_changemapsize);
2713
2714 if (flags) {
2715 pc_mark_irrecov(fsp);
2716 }
2717 return (error);
2718 }
2719