1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/kmem.h>
30 #include <sys/user.h>
31 #include <sys/proc.h>
32 #include <sys/cred.h>
33 #include <sys/disp.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/fdio.h>
39 #include <sys/file.h>
40 #include <sys/uio.h>
41 #include <sys/conf.h>
42 #include <sys/statvfs.h>
43 #include <sys/mount.h>
44 #include <sys/pathname.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/sysmacros.h>
48 #include <sys/conf.h>
49 #include <sys/mkdev.h>
50 #include <sys/swap.h>
51 #include <sys/sunddi.h>
52 #include <sys/sunldi.h>
53 #include <sys/dktp/fdisk.h>
54 #include <sys/fs/pc_label.h>
55 #include <sys/fs/pc_fs.h>
56 #include <sys/fs/pc_dir.h>
57 #include <sys/fs/pc_node.h>
58 #include <fs/fs_subr.h>
59 #include <sys/modctl.h>
60 #include <sys/dkio.h>
61 #include <sys/open.h>
62 #include <sys/mntent.h>
63 #include <sys/policy.h>
64 #include <sys/atomic.h>
65 #include <sys/sdt.h>
66
67 /*
68 * The majority of PC media use a 512 sector size, but
69 * occasionally you will run across a 1k sector size.
70 * For media with a 1k sector size, fd_strategy() requires
71 * the I/O size to be a 1k multiple; so when the sector size
72 * is not yet known, always read 1k.
73 */
74 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
75
76 static int pcfs_pseudo_floppy(dev_t);
77
78 static int pcfsinit(int, char *);
79 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
80 struct cred *);
81 static int pcfs_unmount(struct vfs *, int, struct cred *);
82 static int pcfs_root(struct vfs *, struct vnode **);
83 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
84 static int pc_syncfsnodes(struct pcfs *);
85 static int pcfs_sync(struct vfs *, short, struct cred *);
86 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
87 static void pcfs_freevfs(vfs_t *vfsp);
88
89 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
90 static int pc_writefat(struct pcfs *fsp, daddr_t start);
91
92 static int pc_getfattype(struct pcfs *fsp);
93 static void pcfs_parse_mntopts(struct pcfs *fsp);
94
95
96 /*
97 * pcfs mount options table
98 */
99
100 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
101 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
102 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
103 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
104 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
105 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
106 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
107 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
108
109 static mntopt_t mntopts[] = {
110 /*
111 * option name cancel option default arg flags opt data
112 */
113 { MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
114 { MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
115 { MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
116 { MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
117 { MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
118 { MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
119 { MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
120 { MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
121 { MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
122 { MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
123 };
124
125 static mntopts_t pcfs_mntopts = {
126 sizeof (mntopts) / sizeof (mntopt_t),
127 mntopts
128 };
129
130 int pcfsdebuglevel = 0;
131
132 /*
133 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
134 * pcfs_lock: (inside per filesystem structure "pcfs")
135 * per filesystem lock. Most of the vfsops and vnodeops are
136 * protected by this lock.
137 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
138 *
139 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
140 *
141 * pcfs_mountcount: used to prevent module unloads while there is still
142 * pcfs state from a former mount hanging around. With
143 * forced umount support, the filesystem module must not
144 * be allowed to go away before the last VFS_FREEVFS()
145 * call has been made.
146 * Since this is just an atomic counter, there's no need
147 * for locking.
148 */
149 kmutex_t pcfslock;
150 krwlock_t pcnodes_lock;
151 uint32_t pcfs_mountcount;
152
153 static int pcfstype;
154
155 static vfsdef_t vfw = {
156 VFSDEF_VERSION,
157 "pcfs",
158 pcfsinit,
159 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
160 &pcfs_mntopts
161 };
162
163 extern struct mod_ops mod_fsops;
164
165 static struct modlfs modlfs = {
166 &mod_fsops,
167 "PC filesystem",
168 &vfw
169 };
170
171 static struct modlinkage modlinkage = {
172 MODREV_1,
173 &modlfs,
174 NULL
175 };
176
177 int
_init(void)178 _init(void)
179 {
180 int error;
181
182 #if !defined(lint)
183 /* make sure the on-disk structures are sane */
184 ASSERT(sizeof (struct pcdir) == 32);
185 ASSERT(sizeof (struct pcdir_lfn) == 32);
186 #endif
187 mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
188 rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
189 error = mod_install(&modlinkage);
190 if (error) {
191 mutex_destroy(&pcfslock);
192 rw_destroy(&pcnodes_lock);
193 }
194 return (error);
195 }
196
197 int
_fini(void)198 _fini(void)
199 {
200 int error;
201
202 /*
203 * If a forcedly unmounted instance is still hanging around,
204 * we cannot allow the module to be unloaded because that would
205 * cause panics once the VFS framework decides it's time to call
206 * into VFS_FREEVFS().
207 */
208 if (pcfs_mountcount)
209 return (EBUSY);
210
211 error = mod_remove(&modlinkage);
212 if (error)
213 return (error);
214 mutex_destroy(&pcfslock);
215 rw_destroy(&pcnodes_lock);
216 /*
217 * Tear down the operations vectors
218 */
219 (void) vfs_freevfsops_by_type(pcfstype);
220 vn_freevnodeops(pcfs_fvnodeops);
221 vn_freevnodeops(pcfs_dvnodeops);
222 return (0);
223 }
224
225 int
_info(struct modinfo * modinfop)226 _info(struct modinfo *modinfop)
227 {
228 return (mod_info(&modlinkage, modinfop));
229 }
230
231 /* ARGSUSED1 */
232 static int
pcfsinit(int fstype,char * name)233 pcfsinit(int fstype, char *name)
234 {
235 static const fs_operation_def_t pcfs_vfsops_template[] = {
236 VFSNAME_MOUNT, { .vfs_mount = pcfs_mount },
237 VFSNAME_UNMOUNT, { .vfs_unmount = pcfs_unmount },
238 VFSNAME_ROOT, { .vfs_root = pcfs_root },
239 VFSNAME_STATVFS, { .vfs_statvfs = pcfs_statvfs },
240 VFSNAME_SYNC, { .vfs_sync = pcfs_sync },
241 VFSNAME_VGET, { .vfs_vget = pcfs_vget },
242 VFSNAME_FREEVFS, { .vfs_freevfs = pcfs_freevfs },
243 NULL, NULL
244 };
245 int error;
246
247 error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
248 if (error != 0) {
249 cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
250 return (error);
251 }
252
253 error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
254 if (error != 0) {
255 (void) vfs_freevfsops_by_type(fstype);
256 cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
257 return (error);
258 }
259
260 error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
261 if (error != 0) {
262 (void) vfs_freevfsops_by_type(fstype);
263 vn_freevnodeops(pcfs_fvnodeops);
264 cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
265 return (error);
266 }
267
268 pcfstype = fstype;
269 (void) pc_init();
270 pcfs_mountcount = 0;
271 return (0);
272 }
273
274 static struct pcfs *pc_mounttab = NULL;
275
276 extern struct pcfs_args pc_tz;
277
278 /*
279 * Define some special logical drives we use internal to this file.
280 */
281 #define BOOT_PARTITION_DRIVE 99
282 #define PRIMARY_DOS_DRIVE 1
283 #define UNPARTITIONED_DRIVE 0
284
285 static int
pcfs_device_identify(struct vfs * vfsp,struct mounta * uap,struct cred * cr,int * dos_ldrive,dev_t * xdev)286 pcfs_device_identify(
287 struct vfs *vfsp,
288 struct mounta *uap,
289 struct cred *cr,
290 int *dos_ldrive,
291 dev_t *xdev)
292 {
293 struct pathname special;
294 char *c;
295 struct vnode *svp = NULL;
296 struct vnode *lvp = NULL;
297 int oflag, aflag;
298 int error;
299
300 /*
301 * Resolve path name of special file being mounted.
302 */
303 if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
304 return (error);
305 }
306
307 *dos_ldrive = -1;
308
309 if (error =
310 lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
311 /*
312 * If there's no device node, the name specified most likely
313 * maps to a PCFS-style "partition specifier" to select a
314 * harddisk primary/logical partition. Disable floppy-specific
315 * checks in such cases unless an explicit :A or :B is
316 * requested.
317 */
318
319 /*
320 * Split the pathname string at the last ':' separator.
321 * If there's no ':' in the device name, or the ':' is the
322 * last character in the string, the name is invalid and
323 * the error from the previous lookup will be returned.
324 */
325 c = strrchr(special.pn_path, ':');
326 if (c == NULL || strlen(c) == 0)
327 goto devlookup_done;
328
329 *c++ = '\0';
330
331 /*
332 * PCFS partition name suffixes can be:
333 * - "boot" to indicate the X86BOOT partition
334 * - a drive letter [c-z] for the "DOS logical drive"
335 * - a drive number 1..24 for the "DOS logical drive"
336 * - a "floppy name letter", 'a' or 'b' (just strip this)
337 */
338 if (strcasecmp(c, "boot") == 0) {
339 /*
340 * The Solaris boot partition is requested.
341 */
342 *dos_ldrive = BOOT_PARTITION_DRIVE;
343 } else if (strspn(c, "0123456789") == strlen(c)) {
344 /*
345 * All digits - parse the partition number.
346 */
347 long drvnum = 0;
348
349 if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
350 /*
351 * A number alright - in the allowed range ?
352 */
353 if (drvnum > 24 || drvnum == 0)
354 error = ENXIO;
355 }
356 if (error)
357 goto devlookup_done;
358 *dos_ldrive = (int)drvnum;
359 } else if (strlen(c) == 1) {
360 /*
361 * A single trailing character was specified.
362 * - [c-zC-Z] means a harddisk partition, and
363 * we retrieve the partition number.
364 * - [abAB] means a floppy drive, so we swallow
365 * the "drive specifier" and test later
366 * whether the physical device is a floppy.
367 */
368 *c = tolower(*c);
369 if (*c == 'a' || *c == 'b') {
370 *dos_ldrive = UNPARTITIONED_DRIVE;
371 } else if (*c < 'c' || *c > 'z') {
372 error = ENXIO;
373 goto devlookup_done;
374 } else {
375 *dos_ldrive = 1 + *c - 'c';
376 }
377 } else {
378 /*
379 * Can't parse this - pass through previous error.
380 */
381 goto devlookup_done;
382 }
383
384
385 error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
386 NULLVPP, &svp);
387 } else {
388 *dos_ldrive = UNPARTITIONED_DRIVE;
389 }
390 devlookup_done:
391 pn_free(&special);
392 if (error)
393 return (error);
394
395 ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
396
397 /*
398 * Verify caller's permission to open the device special file.
399 */
400 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
401 ((uap->flags & MS_RDONLY) != 0)) {
402 oflag = FREAD;
403 aflag = VREAD;
404 } else {
405 oflag = FREAD | FWRITE;
406 aflag = VREAD | VWRITE;
407 }
408
409 error = vfs_get_lofi(vfsp, &lvp);
410
411 if (error > 0) {
412 if (error == ENOENT)
413 error = ENODEV;
414 goto out;
415 } else if (error == 0) {
416 *xdev = lvp->v_rdev;
417 } else {
418 *xdev = svp->v_rdev;
419
420 if (svp->v_type != VBLK) {
421 error = ENOTBLK;
422 goto out;
423 }
424
425 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
426 goto out;
427 }
428
429 if (getmajor(*xdev) >= devcnt) {
430 error = ENXIO;
431 goto out;
432 }
433
434 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
435 goto out;
436
437 out:
438 if (svp != NULL)
439 VN_RELE(svp);
440 if (lvp != NULL)
441 VN_RELE(lvp);
442 return (error);
443 }
444
445 static int
pcfs_device_ismounted(struct vfs * vfsp,int dos_ldrive,dev_t xdev,int * remounting,dev_t * pseudodev)446 pcfs_device_ismounted(
447 struct vfs *vfsp,
448 int dos_ldrive,
449 dev_t xdev,
450 int *remounting,
451 dev_t *pseudodev)
452 {
453 struct pcfs *fsp;
454 int remount = *remounting;
455
456 /*
457 * Ensure that this logical drive isn't already mounted, unless
458 * this is a REMOUNT request.
459 * Note: The framework will perform this check if the "...:c"
460 * PCFS-style "logical drive" syntax has not been used and an
461 * actually existing physical device is backing this filesystem.
462 * Once all block device drivers support PC-style partitioning,
463 * this codeblock can be dropped.
464 */
465 *pseudodev = xdev;
466
467 if (dos_ldrive) {
468 mutex_enter(&pcfslock);
469 for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
470 if (fsp->pcfs_xdev == xdev &&
471 fsp->pcfs_ldrive == dos_ldrive) {
472 mutex_exit(&pcfslock);
473 if (remount) {
474 return (0);
475 } else {
476 return (EBUSY);
477 }
478 }
479 /*
480 * Assign a unique device number for the vfs
481 * The old way (getudev() + a constantly incrementing
482 * major number) was wrong because it changes vfs_dev
483 * across mounts and reboots, which breaks nfs file handles.
484 * UFS just uses the real dev_t. We can't do that because
485 * of the way pcfs opens fdisk partitons (the :c and :d
486 * partitions are on the same dev_t). Though that _might_
487 * actually be ok, since the file handle contains an
488 * absolute block number, it's probably better to make them
489 * different. So I think we should retain the original
490 * dev_t, but come up with a different minor number based
491 * on the logical drive that will _always_ come up the same.
492 * For now, we steal the upper 6 bits.
493 */
494 #ifdef notdef
495 /* what should we do here? */
496 if (((getminor(xdev) >> 12) & 0x3F) != 0)
497 printf("whoops - upper bits used!\n");
498 #endif
499 *pseudodev = makedevice(getmajor(xdev),
500 ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
501 if (vfs_devmounting(*pseudodev, vfsp)) {
502 mutex_exit(&pcfslock);
503 return (EBUSY);
504 }
505 if (vfs_devismounted(*pseudodev)) {
506 mutex_exit(&pcfslock);
507 if (remount) {
508 return (0);
509 } else {
510 return (EBUSY);
511 }
512 }
513 mutex_exit(&pcfslock);
514 } else {
515 *pseudodev = xdev;
516 if (vfs_devmounting(*pseudodev, vfsp)) {
517 return (EBUSY);
518 }
519 if (vfs_devismounted(*pseudodev))
520 if (remount) {
521 return (0);
522 } else {
523 return (EBUSY);
524 }
525 }
526
527 /*
528 * This is not a remount. Even if MS_REMOUNT was requested,
529 * the caller needs to proceed as it would on an ordinary
530 * mount.
531 */
532 *remounting = 0;
533
534 ASSERT(*pseudodev);
535 return (0);
536 }
537
538 /*
539 * Get the PCFS-specific mount options from the VFS framework.
540 * For "timezone" and "secsize", we need to parse the number
541 * ourselves and ensure its validity.
542 * Note: "secsize" is deliberately undocumented at this time,
543 * it's a workaround for devices (particularly: lofi image files)
544 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
545 */
546 static void
pcfs_parse_mntopts(struct pcfs * fsp)547 pcfs_parse_mntopts(struct pcfs *fsp)
548 {
549 char *c;
550 char *endptr;
551 long l;
552 struct vfs *vfsp = fsp->pcfs_vfs;
553
554 ASSERT(fsp->pcfs_secondswest == 0);
555 ASSERT(fsp->pcfs_secsize == 0);
556
557 if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
558 fsp->pcfs_flags |= PCFS_HIDDEN;
559 if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
560 fsp->pcfs_flags |= PCFS_FOLDCASE;
561 if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
562 fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
563 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
564 fsp->pcfs_flags |= PCFS_NOATIME;
565
566 if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
567 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
568 endptr == c + strlen(c)) {
569 /*
570 * A number alright - in the allowed range ?
571 */
572 if (l <= -12*3600 || l >= 12*3600) {
573 cmn_err(CE_WARN, "!pcfs: invalid use of "
574 "'timezone' mount option - %ld "
575 "is out of range. Assuming 0.", l);
576 l = 0;
577 }
578 } else {
579 cmn_err(CE_WARN, "!pcfs: invalid use of "
580 "'timezone' mount option - argument %s "
581 "is not a valid number. Assuming 0.", c);
582 l = 0;
583 }
584 fsp->pcfs_secondswest = l;
585 }
586
587 /*
588 * The "secsize=..." mount option is a workaround for the lack of
589 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
590 * partition table of a disk image and it has been partitioned with
591 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
592 * images.
593 * That should really be fixed in lofi ... this is a workaround.
594 */
595 if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
596 if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
597 endptr == c + strlen(c)) {
598 /*
599 * A number alright - a valid sector size as well ?
600 */
601 if (!VALID_SECSIZE(l)) {
602 cmn_err(CE_WARN, "!pcfs: invalid use of "
603 "'secsize' mount option - %ld is "
604 "unsupported. Autodetecting.", l);
605 l = 0;
606 }
607 } else {
608 cmn_err(CE_WARN, "!pcfs: invalid use of "
609 "'secsize' mount option - argument %s "
610 "is not a valid number. Autodetecting.", c);
611 l = 0;
612 }
613 fsp->pcfs_secsize = l;
614 fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
615 }
616 }
617
618 /*
619 * vfs operations
620 */
621
622 /*
623 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
624 */
625 static int
pcfs_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)626 pcfs_mount(
627 struct vfs *vfsp,
628 struct vnode *mvp,
629 struct mounta *uap,
630 struct cred *cr)
631 {
632 struct pcfs *fsp;
633 struct vnode *devvp;
634 dev_t pseudodev;
635 dev_t xdev;
636 int dos_ldrive = 0;
637 int error;
638 int remounting;
639
640 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
641 return (error);
642
643 if (mvp->v_type != VDIR)
644 return (ENOTDIR);
645
646 mutex_enter(&mvp->v_lock);
647 if ((uap->flags & MS_REMOUNT) == 0 &&
648 (uap->flags & MS_OVERLAY) == 0 &&
649 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
650 mutex_exit(&mvp->v_lock);
651 return (EBUSY);
652 }
653 mutex_exit(&mvp->v_lock);
654
655 /*
656 * PCFS doesn't do mount arguments anymore - everything's a mount
657 * option these days. In order not to break existing callers, we
658 * don't reject it yet, just warn that the data (if any) is ignored.
659 */
660 if (uap->datalen != 0)
661 cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
662 "mount argument structures instead of mount options. "
663 "Ignoring mount(2) 'dataptr' argument.");
664
665 /*
666 * This is needed early, to make sure the access / open calls
667 * are done using the correct mode. Processing this mount option
668 * only when calling pcfs_parse_mntopts() would lead us to attempt
669 * a read/write access to a possibly writeprotected device, and
670 * a readonly mount attempt might fail because of that.
671 */
672 if (uap->flags & MS_RDONLY) {
673 vfsp->vfs_flag |= VFS_RDONLY;
674 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
675 }
676
677 /*
678 * For most filesystems, this is just a lookupname() on the
679 * mount pathname string. PCFS historically has to do its own
680 * partition table parsing because not all Solaris architectures
681 * support all styles of partitioning that PC media can have, and
682 * hence PCFS understands "device names" that don't map to actual
683 * physical device nodes. Parsing the "PCFS syntax" for device
684 * names is done in pcfs_device_identify() - see there.
685 *
686 * Once all block device drivers that can host FAT filesystems have
687 * been enhanced to create device nodes for all PC-style partitions,
688 * this code can go away.
689 */
690 if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
691 return (error);
692
693 /*
694 * As with looking up the actual device to mount, PCFS cannot rely
695 * on just the checks done by vfs_ismounted() whether a given device
696 * is mounted already. The additional check against the "PCFS syntax"
697 * is done in pcfs_device_ismounted().
698 */
699 remounting = (uap->flags & MS_REMOUNT);
700
701 if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
702 &pseudodev))
703 return (error);
704
705 if (remounting)
706 return (0);
707
708 /*
709 * Mount the filesystem.
710 * An instance structure is required before the attempt to locate
711 * and parse the FAT BPB. This is because mount options may change
712 * the behaviour of the filesystem type matching code. Precreate
713 * it and fill it in to a degree that allows parsing the mount
714 * options.
715 */
716 devvp = makespecvp(xdev, VBLK);
717 if (IS_SWAPVP(devvp)) {
718 VN_RELE(devvp);
719 return (EBUSY);
720 }
721 error = VOP_OPEN(&devvp,
722 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
723 if (error) {
724 VN_RELE(devvp);
725 return (error);
726 }
727
728 fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
729 fsp->pcfs_vfs = vfsp;
730 fsp->pcfs_xdev = xdev;
731 fsp->pcfs_devvp = devvp;
732 fsp->pcfs_ldrive = dos_ldrive;
733 mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
734
735 pcfs_parse_mntopts(fsp);
736
737 /*
738 * This is the actual "mount" - the PCFS superblock check.
739 *
740 * Find the requested logical drive and the FAT BPB therein.
741 * Check device type and flag the instance if media is removeable.
742 *
743 * Initializes most members of the filesystem instance structure.
744 * Returns EINVAL if no valid BPB can be found. Other errors may
745 * occur after I/O failures, or when invalid / unparseable partition
746 * tables are encountered.
747 */
748 if (error = pc_getfattype(fsp))
749 goto errout;
750
751 /*
752 * Now that the BPB has been parsed, this structural information
753 * is available and known to be valid. Initialize the VFS.
754 */
755 vfsp->vfs_data = fsp;
756 vfsp->vfs_dev = pseudodev;
757 vfsp->vfs_fstype = pcfstype;
758 vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
759 vfsp->vfs_bcount = 0;
760 vfsp->vfs_bsize = fsp->pcfs_clsize;
761
762 /*
763 * Validate that we can access the FAT and that it is, to the
764 * degree we can verify here, self-consistent.
765 */
766 if (error = pc_verify(fsp))
767 goto errout;
768
769 /*
770 * Record the time of the mount, to return as an "approximate"
771 * timestamp for the FAT root directory. Since FAT roots don't
772 * have timestamps, this is less confusing to the user than
773 * claiming "zero" / Jan/01/1970.
774 */
775 gethrestime(&fsp->pcfs_mounttime);
776
777 /*
778 * Fix up the mount options. Because "noatime" is made default on
779 * removeable media only, a fixed disk will have neither "atime"
780 * nor "noatime" set. We set the options explicitly depending on
781 * the PCFS_NOATIME flag, to inform the user of what applies.
782 * Mount option cancellation will take care that the mutually
783 * exclusive 'other' is cleared.
784 */
785 vfs_setmntopt(vfsp,
786 fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
787 NULL, 0);
788
789 /*
790 * All clear - insert the FS instance into PCFS' list.
791 */
792 mutex_enter(&pcfslock);
793 fsp->pcfs_nxt = pc_mounttab;
794 pc_mounttab = fsp;
795 mutex_exit(&pcfslock);
796 atomic_inc_32(&pcfs_mountcount);
797 return (0);
798
799 errout:
800 (void) VOP_CLOSE(devvp,
801 vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
802 1, (offset_t)0, cr, NULL);
803 VN_RELE(devvp);
804 mutex_destroy(&fsp->pcfs_lock);
805 kmem_free(fsp, sizeof (*fsp));
806 return (error);
807
808 }
809
810 static int
pcfs_unmount(struct vfs * vfsp,int flag,struct cred * cr)811 pcfs_unmount(
812 struct vfs *vfsp,
813 int flag,
814 struct cred *cr)
815 {
816 struct pcfs *fsp, *fsp1;
817
818 if (secpolicy_fs_unmount(cr, vfsp) != 0)
819 return (EPERM);
820
821 fsp = VFSTOPCFS(vfsp);
822
823 /*
824 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
825 * prevent lookuppn from crossing the mount point.
826 * If this is not a forced umount request and there's ongoing I/O,
827 * don't allow the mount to proceed.
828 */
829 if (flag & MS_FORCE)
830 vfsp->vfs_flag |= VFS_UNMOUNTED;
831 else if (fsp->pcfs_nrefs)
832 return (EBUSY);
833
834 mutex_enter(&pcfslock);
835
836 /*
837 * If this is a forced umount request or if the fs instance has
838 * been marked as beyond recovery, allow the umount to proceed
839 * regardless of state. pc_diskchanged() forcibly releases all
840 * inactive vnodes/pcnodes.
841 */
842 if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
843 rw_enter(&pcnodes_lock, RW_WRITER);
844 pc_diskchanged(fsp);
845 rw_exit(&pcnodes_lock);
846 }
847
848 /* now there should be no pcp node on pcfhead or pcdhead. */
849
850 if (fsp == pc_mounttab) {
851 pc_mounttab = fsp->pcfs_nxt;
852 } else {
853 for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
854 if (fsp1->pcfs_nxt == fsp)
855 fsp1->pcfs_nxt = fsp->pcfs_nxt;
856 }
857
858 mutex_exit(&pcfslock);
859
860 /*
861 * Since we support VFS_FREEVFS(), there's no need to
862 * free the fsp right now. The framework will tell us
863 * when the right time to do so has arrived by calling
864 * into pcfs_freevfs.
865 */
866 return (0);
867 }
868
869 /*
870 * find root of pcfs
871 */
872 static int
pcfs_root(struct vfs * vfsp,struct vnode ** vpp)873 pcfs_root(
874 struct vfs *vfsp,
875 struct vnode **vpp)
876 {
877 struct pcfs *fsp;
878 struct pcnode *pcp;
879 int error;
880
881 fsp = VFSTOPCFS(vfsp);
882 if (error = pc_lockfs(fsp, 0, 0))
883 return (error);
884
885 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
886 pc_unlockfs(fsp);
887 *vpp = PCTOV(pcp);
888 pcp->pc_flags |= PC_EXTERNAL;
889 return (0);
890 }
891
892 /*
893 * Get file system statistics.
894 */
895 static int
pcfs_statvfs(struct vfs * vfsp,struct statvfs64 * sp)896 pcfs_statvfs(
897 struct vfs *vfsp,
898 struct statvfs64 *sp)
899 {
900 struct pcfs *fsp;
901 int error;
902 dev32_t d32;
903
904 fsp = VFSTOPCFS(vfsp);
905 error = pc_getfat(fsp);
906 if (error)
907 return (error);
908 bzero(sp, sizeof (*sp));
909 sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
910 sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
911 sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
912 sp->f_files = (fsfilcnt64_t)-1;
913 sp->f_ffree = (fsfilcnt64_t)-1;
914 sp->f_favail = (fsfilcnt64_t)-1;
915 #ifdef notdef
916 (void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
917 #endif /* notdef */
918 (void) cmpldev(&d32, vfsp->vfs_dev);
919 sp->f_fsid = d32;
920 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
921 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
922 sp->f_namemax = PCMAXNAMLEN;
923 return (0);
924 }
925
926 static int
pc_syncfsnodes(struct pcfs * fsp)927 pc_syncfsnodes(struct pcfs *fsp)
928 {
929 struct pchead *hp;
930 struct pcnode *pcp;
931 int error;
932
933 if (error = pc_lockfs(fsp, 0, 0))
934 return (error);
935
936 if (!(error = pc_syncfat(fsp))) {
937 hp = pcfhead;
938 while (hp < & pcfhead [ NPCHASH ]) {
939 rw_enter(&pcnodes_lock, RW_READER);
940 pcp = hp->pch_forw;
941 while (pcp != (struct pcnode *)hp) {
942 if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
943 if (error = pc_nodesync(pcp))
944 break;
945 pcp = pcp -> pc_forw;
946 }
947 rw_exit(&pcnodes_lock);
948 if (error)
949 break;
950 hp++;
951 }
952 }
953 pc_unlockfs(fsp);
954 return (error);
955 }
956
957 /*
958 * Flush any pending I/O.
959 */
960 /*ARGSUSED*/
961 static int
pcfs_sync(struct vfs * vfsp,short flag,struct cred * cr)962 pcfs_sync(
963 struct vfs *vfsp,
964 short flag,
965 struct cred *cr)
966 {
967 struct pcfs *fsp;
968 int error = 0;
969
970 /* this prevents the filesystem from being umounted. */
971 mutex_enter(&pcfslock);
972 if (vfsp != NULL) {
973 fsp = VFSTOPCFS(vfsp);
974 if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
975 error = pc_syncfsnodes(fsp);
976 } else {
977 rw_enter(&pcnodes_lock, RW_WRITER);
978 pc_diskchanged(fsp);
979 rw_exit(&pcnodes_lock);
980 error = EIO;
981 }
982 } else {
983 fsp = pc_mounttab;
984 while (fsp != NULL) {
985 if (fsp->pcfs_flags & PCFS_IRRECOV) {
986 rw_enter(&pcnodes_lock, RW_WRITER);
987 pc_diskchanged(fsp);
988 rw_exit(&pcnodes_lock);
989 error = EIO;
990 break;
991 }
992 error = pc_syncfsnodes(fsp);
993 if (error) break;
994 fsp = fsp->pcfs_nxt;
995 }
996 }
997 mutex_exit(&pcfslock);
998 return (error);
999 }
1000
1001 int
pc_lockfs(struct pcfs * fsp,int diskchanged,int releasing)1002 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1003 {
1004 int err;
1005
1006 if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1007 return (EIO);
1008
1009 if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1010 fsp->pcfs_count++;
1011 } else {
1012 mutex_enter(&fsp->pcfs_lock);
1013 if (fsp->pcfs_flags & PCFS_LOCKED)
1014 panic("pc_lockfs");
1015 /*
1016 * We check the IRRECOV bit again just in case somebody
1017 * snuck past the initial check but then got held up before
1018 * they could grab the lock. (And in the meantime someone
1019 * had grabbed the lock and set the bit)
1020 */
1021 if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1022 if ((err = pc_getfat(fsp))) {
1023 mutex_exit(&fsp->pcfs_lock);
1024 return (err);
1025 }
1026 }
1027 fsp->pcfs_flags |= PCFS_LOCKED;
1028 fsp->pcfs_owner = curthread;
1029 fsp->pcfs_count++;
1030 }
1031 return (0);
1032 }
1033
1034 void
pc_unlockfs(struct pcfs * fsp)1035 pc_unlockfs(struct pcfs *fsp)
1036 {
1037
1038 if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1039 panic("pc_unlockfs");
1040 if (--fsp->pcfs_count < 0)
1041 panic("pc_unlockfs: count");
1042 if (fsp->pcfs_count == 0) {
1043 fsp->pcfs_flags &= ~PCFS_LOCKED;
1044 fsp->pcfs_owner = 0;
1045 mutex_exit(&fsp->pcfs_lock);
1046 }
1047 }
1048
1049 int
pc_syncfat(struct pcfs * fsp)1050 pc_syncfat(struct pcfs *fsp)
1051 {
1052 struct buf *bp;
1053 int nfat;
1054 int error = 0;
1055 struct fat_od_fsi *fsinfo_disk;
1056
1057 if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1058 !(fsp->pcfs_flags & PCFS_FATMOD))
1059 return (0);
1060 /*
1061 * write out all copies of FATs
1062 */
1063 fsp->pcfs_flags &= ~PCFS_FATMOD;
1064 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1065 for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1066 error = pc_writefat(fsp, pc_dbdaddr(fsp,
1067 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1068 if (error) {
1069 pc_mark_irrecov(fsp);
1070 return (EIO);
1071 }
1072 }
1073 pc_clear_fatchanges(fsp);
1074
1075 /*
1076 * Write out fsinfo sector.
1077 */
1078 if (IS_FAT32(fsp)) {
1079 bp = bread(fsp->pcfs_xdev,
1080 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1081 if (bp->b_flags & (B_ERROR | B_STALE)) {
1082 error = geterror(bp);
1083 }
1084 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1085 if (!error && FSISIG_OK(fsinfo_disk)) {
1086 fsinfo_disk->fsi_incore.fs_free_clusters =
1087 LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1088 fsinfo_disk->fsi_incore.fs_next_free =
1089 LE_32(FSINFO_UNKNOWN);
1090 bwrite2(bp);
1091 error = geterror(bp);
1092 }
1093 brelse(bp);
1094 if (error) {
1095 pc_mark_irrecov(fsp);
1096 return (EIO);
1097 }
1098 }
1099 return (0);
1100 }
1101
1102 void
pc_invalfat(struct pcfs * fsp)1103 pc_invalfat(struct pcfs *fsp)
1104 {
1105 struct pcfs *xfsp;
1106 int mount_cnt = 0;
1107
1108 if (fsp->pcfs_fatp == (uchar_t *)0)
1109 panic("pc_invalfat");
1110 /*
1111 * Release FAT
1112 */
1113 kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1114 fsp->pcfs_fatp = NULL;
1115 kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1116 fsp->pcfs_fat_changemap = NULL;
1117 /*
1118 * Invalidate all the blocks associated with the device.
1119 * Not needed if stateless.
1120 */
1121 for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1122 if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1123 mount_cnt++;
1124
1125 if (!mount_cnt)
1126 binval(fsp->pcfs_xdev);
1127 /*
1128 * close mounted device
1129 */
1130 (void) VOP_CLOSE(fsp->pcfs_devvp,
1131 (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1132 1, (offset_t)0, CRED(), NULL);
1133 }
1134
1135 void
pc_badfs(struct pcfs * fsp)1136 pc_badfs(struct pcfs *fsp)
1137 {
1138 cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1139 getmajor(fsp->pcfs_devvp->v_rdev),
1140 getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1141 }
1142
1143 /*
1144 * The problem with supporting NFS on the PCFS filesystem is that there
1145 * is no good place to keep the generation number. The only possible
1146 * place is inside a directory entry. There are a few words that we
1147 * don't use - they store NT & OS/2 attributes, and the creation/last access
1148 * time of the file - but it seems wrong to use them. In addition, directory
1149 * entries come and go. If a directory is removed completely, its directory
1150 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1151 * inode blocks are dedicated for inodes, so the generation numbers are
1152 * permanently kept on the disk.
1153 */
1154 static int
pcfs_vget(struct vfs * vfsp,struct vnode ** vpp,struct fid * fidp)1155 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1156 {
1157 struct pcnode *pcp;
1158 struct pc_fid *pcfid;
1159 struct pcfs *fsp;
1160 struct pcdir *ep;
1161 daddr_t eblkno;
1162 int eoffset;
1163 struct buf *bp;
1164 int error;
1165 pc_cluster32_t cn;
1166
1167 pcfid = (struct pc_fid *)fidp;
1168 fsp = VFSTOPCFS(vfsp);
1169
1170 error = pc_lockfs(fsp, 0, 0);
1171 if (error) {
1172 *vpp = NULL;
1173 return (error);
1174 }
1175
1176 if (pcfid->pcfid_block == 0) {
1177 pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1178 pcp->pc_flags |= PC_EXTERNAL;
1179 *vpp = PCTOV(pcp);
1180 pc_unlockfs(fsp);
1181 return (0);
1182 }
1183 eblkno = pcfid->pcfid_block;
1184 eoffset = pcfid->pcfid_offset;
1185
1186 if ((pc_dbtocl(fsp,
1187 eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1188 (eoffset > fsp->pcfs_clsize)) {
1189 pc_unlockfs(fsp);
1190 *vpp = NULL;
1191 return (EINVAL);
1192 }
1193
1194 if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1195 < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1196 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1197 fsp->pcfs_clsize);
1198 } else {
1199 /*
1200 * This is an access "backwards" into the FAT12/FAT16
1201 * root directory. A better code structure would
1202 * significantly improve maintainability here ...
1203 */
1204 bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1205 (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1206 }
1207 if (bp->b_flags & (B_ERROR | B_STALE)) {
1208 error = geterror(bp);
1209 brelse(bp);
1210 if (error)
1211 pc_mark_irrecov(fsp);
1212 *vpp = NULL;
1213 pc_unlockfs(fsp);
1214 return (error);
1215 }
1216 ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1217 /*
1218 * Ok, if this is a valid file handle that we gave out,
1219 * then simply ensuring that the creation time matches,
1220 * the entry has not been deleted, and it has a valid first
1221 * character should be enough.
1222 *
1223 * Unfortunately, verifying that the <blkno, offset> _still_
1224 * refers to a directory entry is not easy, since we'd have
1225 * to search _all_ directories starting from root to find it.
1226 * That's a high price to pay just in case somebody is forging
1227 * file handles. So instead we verify that as much of the
1228 * entry is valid as we can:
1229 *
1230 * 1. The starting cluster is 0 (unallocated) or valid
1231 * 2. It is not an LFN entry
1232 * 3. It is not hidden (unless mounted as such)
1233 * 4. It is not the label
1234 */
1235 cn = pc_getstartcluster(fsp, ep);
1236 /*
1237 * if the starting cluster is valid, but not valid according
1238 * to pc_validcl(), force it to be to simplify the following if.
1239 */
1240 if (cn == 0)
1241 cn = PCF_FIRSTCLUSTER;
1242 if (IS_FAT32(fsp)) {
1243 if (cn >= PCF_LASTCLUSTER32)
1244 cn = PCF_FIRSTCLUSTER;
1245 } else {
1246 if (cn >= PCF_LASTCLUSTER)
1247 cn = PCF_FIRSTCLUSTER;
1248 }
1249 if ((!pc_validcl(fsp, cn)) ||
1250 (PCDL_IS_LFN(ep)) ||
1251 (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1252 ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1253 bp->b_flags |= B_STALE | B_AGE;
1254 brelse(bp);
1255 pc_unlockfs(fsp);
1256 return (EINVAL);
1257 }
1258 if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1259 (ep->pcd_filename[0] != PCD_ERASED) &&
1260 (pc_validchar(ep->pcd_filename[0]) ||
1261 (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1262 pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1263 pcp->pc_flags |= PC_EXTERNAL;
1264 *vpp = PCTOV(pcp);
1265 } else {
1266 *vpp = NULL;
1267 }
1268 bp->b_flags |= B_STALE | B_AGE;
1269 brelse(bp);
1270 pc_unlockfs(fsp);
1271 return (0);
1272 }
1273
1274 /*
1275 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1276 * a meg), so we can't bread() it all in at once. This routine reads a
1277 * fat a chunk at a time.
1278 */
1279 static int
pc_readfat(struct pcfs * fsp,uchar_t * fatp)1280 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1281 {
1282 struct buf *bp;
1283 size_t off;
1284 size_t readsize;
1285 daddr_t diskblk;
1286 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1287 daddr_t start = fsp->pcfs_fatstart;
1288
1289 readsize = fsp->pcfs_clsize;
1290 for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1291 if (readsize > (fatsize - off))
1292 readsize = fatsize - off;
1293 diskblk = pc_dbdaddr(fsp, start +
1294 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1295 bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1296 if (bp->b_flags & (B_ERROR | B_STALE)) {
1297 brelse(bp);
1298 return (EIO);
1299 }
1300 bp->b_flags |= B_STALE | B_AGE;
1301 bcopy(bp->b_un.b_addr, fatp, readsize);
1302 brelse(bp);
1303 }
1304 return (0);
1305 }
1306
1307 /*
1308 * We write the FAT out a _lot_, in order to make sure that it
1309 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1310 * the FAT might be a couple of megabytes, and writing it all out just
1311 * because we created or deleted a small file is painful (especially
1312 * since we do it for each alternate FAT too). So instead, for FAT16 and
1313 * FAT32 we only write out the bit that has changed. We don't clear
1314 * the 'updated' fields here because the caller might be writing out
1315 * several FATs, so the caller must use pc_clear_fatchanges() after
1316 * all FATs have been updated.
1317 * This function doesn't take "start" from fsp->pcfs_dosstart because
1318 * callers can use it to write either the primary or any of the alternate
1319 * FAT tables.
1320 */
1321 static int
pc_writefat(struct pcfs * fsp,daddr_t start)1322 pc_writefat(struct pcfs *fsp, daddr_t start)
1323 {
1324 struct buf *bp;
1325 size_t off;
1326 size_t writesize;
1327 int error;
1328 uchar_t *fatp = fsp->pcfs_fatp;
1329 size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1330
1331 writesize = fsp->pcfs_clsize;
1332 for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1333 if (writesize > (fatsize - off))
1334 writesize = fatsize - off;
1335 if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1336 continue;
1337 }
1338 bp = ngeteblk(writesize);
1339 bp->b_edev = fsp->pcfs_xdev;
1340 bp->b_dev = cmpdev(bp->b_edev);
1341 bp->b_blkno = pc_dbdaddr(fsp, start +
1342 pc_cltodb(fsp, pc_lblkno(fsp, off)));
1343 bcopy(fatp, bp->b_un.b_addr, writesize);
1344 bwrite2(bp);
1345 error = geterror(bp);
1346 brelse(bp);
1347 if (error) {
1348 return (error);
1349 }
1350 }
1351 return (0);
1352 }
1353
1354 /*
1355 * Mark the FAT cluster that 'cn' is stored in as modified.
1356 */
1357 void
pc_mark_fat_updated(struct pcfs * fsp,pc_cluster32_t cn)1358 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1359 {
1360 pc_cluster32_t bn;
1361 size_t size;
1362
1363 /* which fat block is the cluster number stored in? */
1364 if (IS_FAT32(fsp)) {
1365 size = sizeof (pc_cluster32_t);
1366 bn = pc_lblkno(fsp, cn * size);
1367 fsp->pcfs_fat_changemap[bn] = 1;
1368 } else if (IS_FAT16(fsp)) {
1369 size = sizeof (pc_cluster16_t);
1370 bn = pc_lblkno(fsp, cn * size);
1371 fsp->pcfs_fat_changemap[bn] = 1;
1372 } else {
1373 offset_t off;
1374 pc_cluster32_t nbn;
1375
1376 ASSERT(IS_FAT12(fsp));
1377 off = cn + (cn >> 1);
1378 bn = pc_lblkno(fsp, off);
1379 fsp->pcfs_fat_changemap[bn] = 1;
1380 /* does this field wrap into the next fat cluster? */
1381 nbn = pc_lblkno(fsp, off + 1);
1382 if (nbn != bn) {
1383 fsp->pcfs_fat_changemap[nbn] = 1;
1384 }
1385 }
1386 }
1387
1388 /*
1389 * return whether the FAT cluster 'bn' is updated and needs to
1390 * be written out.
1391 */
1392 int
pc_fat_is_changed(struct pcfs * fsp,pc_cluster32_t bn)1393 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1394 {
1395 return (fsp->pcfs_fat_changemap[bn] == 1);
1396 }
1397
1398 /*
1399 * Implementation of VFS_FREEVFS() to support forced umounts.
1400 * This is called by the vfs framework after umount, to trigger
1401 * the release of any resources still associated with the given
1402 * vfs_t once the need to keep them has gone away.
1403 */
1404 void
pcfs_freevfs(vfs_t * vfsp)1405 pcfs_freevfs(vfs_t *vfsp)
1406 {
1407 struct pcfs *fsp = VFSTOPCFS(vfsp);
1408
1409 mutex_enter(&pcfslock);
1410 /*
1411 * Purging the FAT closes the device - can't do any more
1412 * I/O after this.
1413 */
1414 if (fsp->pcfs_fatp != (uchar_t *)0)
1415 pc_invalfat(fsp);
1416 mutex_exit(&pcfslock);
1417
1418 VN_RELE(fsp->pcfs_devvp);
1419 mutex_destroy(&fsp->pcfs_lock);
1420 kmem_free(fsp, sizeof (*fsp));
1421
1422 /*
1423 * Allow _fini() to succeed now, if so desired.
1424 */
1425 atomic_dec_32(&pcfs_mountcount);
1426 }
1427
1428
1429 /*
1430 * PC-style partition parsing and FAT BPB identification/validation code.
1431 * The partition parsers here assume:
1432 * - a FAT filesystem will be in a partition that has one of a set of
1433 * recognized partition IDs
1434 * - the user wants the 'numbering' (C:, D:, ...) that one would get
1435 * on MSDOS 6.x.
1436 * That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1437 * will not factor in the enumeration.
1438 * These days, such assumptions should be revisited. FAT is no longer the
1439 * only game in 'PC town'.
1440 */
1441 /*
1442 * isDosDrive()
1443 * Boolean function. Give it the systid field for an fdisk partition
1444 * and it decides if that's a systid that describes a DOS drive. We
1445 * use systid values defined in sys/dktp/fdisk.h.
1446 */
1447 static int
isDosDrive(uchar_t checkMe)1448 isDosDrive(uchar_t checkMe)
1449 {
1450 return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1451 (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1452 (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1453 (checkMe == DIAGPART));
1454 }
1455
1456
1457 /*
1458 * isDosExtended()
1459 * Boolean function. Give it the systid field for an fdisk partition
1460 * and it decides if that's a systid that describes an extended DOS
1461 * partition.
1462 */
1463 static int
isDosExtended(uchar_t checkMe)1464 isDosExtended(uchar_t checkMe)
1465 {
1466 return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1467 }
1468
1469
1470 /*
1471 * isBootPart()
1472 * Boolean function. Give it the systid field for an fdisk partition
1473 * and it decides if that's a systid that describes a Solaris boot
1474 * partition.
1475 */
1476 static int
isBootPart(uchar_t checkMe)1477 isBootPart(uchar_t checkMe)
1478 {
1479 return (checkMe == X86BOOT);
1480 }
1481
1482
1483 /*
1484 * noLogicalDrive()
1485 * Display error message about not being able to find a logical
1486 * drive.
1487 */
1488 static void
noLogicalDrive(int ldrive)1489 noLogicalDrive(int ldrive)
1490 {
1491 if (ldrive == BOOT_PARTITION_DRIVE) {
1492 cmn_err(CE_NOTE, "!pcfs: no boot partition");
1493 } else {
1494 cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1495 }
1496 }
1497
1498
1499 /*
1500 * findTheDrive()
1501 * Discover offset of the requested logical drive, and return
1502 * that offset (startSector), the systid of that drive (sysid),
1503 * and a buffer pointer (bp), with the buffer contents being
1504 * the first sector of the logical drive (i.e., the sector that
1505 * contains the BPB for that drive).
1506 *
1507 * Note: this code is not capable of addressing >2TB disks, as it uses
1508 * daddr_t not diskaddr_t, some of the calculations would overflow
1509 */
1510 #define COPY_PTBL(mbr, ptblp) \
1511 bcopy(&(((struct mboot *)(mbr))->parts), (ptblp), \
1512 FD_NUMPART * sizeof (struct ipart))
1513
1514 static int
findTheDrive(struct pcfs * fsp,buf_t ** bp)1515 findTheDrive(struct pcfs *fsp, buf_t **bp)
1516 {
1517 int ldrive = fsp->pcfs_ldrive;
1518 dev_t dev = fsp->pcfs_devvp->v_rdev;
1519
1520 struct ipart dosp[FD_NUMPART]; /* incore fdisk partition structure */
1521 daddr_t lastseek = 0; /* Disk block we sought previously */
1522 daddr_t diskblk = 0; /* Disk block to get */
1523 daddr_t xstartsect; /* base of Extended DOS partition */
1524 int logicalDriveCount = 0; /* Count of logical drives seen */
1525 int extendedPart = -1; /* index of extended dos partition */
1526 int primaryPart = -1; /* index of primary dos partition */
1527 int bootPart = -1; /* index of a Solaris boot partition */
1528 uint32_t xnumsect = 0; /* length of extended DOS partition */
1529 int driveIndex; /* computed FDISK table index */
1530 daddr_t startsec;
1531 len_t mediasize;
1532 int i;
1533 /*
1534 * Count of drives in the current extended partition's
1535 * FDISK table, and indexes of the drives themselves.
1536 */
1537 int extndDrives[FD_NUMPART];
1538 int numDrives = 0;
1539
1540 /*
1541 * Count of drives (beyond primary) in master boot record's
1542 * FDISK table, and indexes of the drives themselves.
1543 */
1544 int extraDrives[FD_NUMPART];
1545 int numExtraDrives = 0;
1546
1547 /*
1548 * "ldrive == 0" should never happen, as this is a request to
1549 * mount the physical device (and ignore partitioning). The code
1550 * in pcfs_mount() should have made sure that a logical drive number
1551 * is at least 1, meaning we're looking for drive "C:". It is not
1552 * safe (and a bug in the callers of this function) to request logical
1553 * drive number 0; we could ASSERT() but a graceful EIO is a more
1554 * polite way.
1555 */
1556 if (ldrive == 0) {
1557 cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1558 noLogicalDrive(ldrive);
1559 return (EIO);
1560 }
1561
1562 /*
1563 * Copy from disk block into memory aligned structure for fdisk usage.
1564 */
1565 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1566
1567 /*
1568 * This check is ok because a FAT BPB and a master boot record (MBB)
1569 * have the same signature, in the same position within the block.
1570 */
1571 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1572 cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1573 "device (%x.%x):%d\n",
1574 getmajor(dev), getminor(dev), ldrive);
1575 return (EINVAL);
1576 }
1577
1578 /*
1579 * Get a summary of what is in the Master FDISK table.
1580 * Normally we expect to find one partition marked as a DOS drive.
1581 * This partition is the one Windows calls the primary dos partition.
1582 * If the machine has any logical drives then we also expect
1583 * to find a partition marked as an extended DOS partition.
1584 *
1585 * Sometimes we'll find multiple partitions marked as DOS drives.
1586 * The Solaris fdisk program allows these partitions
1587 * to be created, but Windows fdisk no longer does. We still need
1588 * to support these, though, since Windows does. We also need to fix
1589 * our fdisk to behave like the Windows version.
1590 *
1591 * It turns out that some off-the-shelf media have *only* an
1592 * Extended partition, so we need to deal with that case as well.
1593 *
1594 * Only a single (the first) Extended or Boot Partition will
1595 * be recognized. Any others will be ignored.
1596 */
1597 for (i = 0; i < FD_NUMPART; i++) {
1598 DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1599 uint_t, (uint_t)dosp[i].systid,
1600 uint_t, LE_32(dosp[i].relsect),
1601 uint_t, LE_32(dosp[i].numsect));
1602
1603 if (isDosDrive(dosp[i].systid)) {
1604 if (primaryPart < 0) {
1605 logicalDriveCount++;
1606 primaryPart = i;
1607 } else {
1608 extraDrives[numExtraDrives++] = i;
1609 }
1610 continue;
1611 }
1612 if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1613 extendedPart = i;
1614 continue;
1615 }
1616 if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1617 bootPart = i;
1618 continue;
1619 }
1620 }
1621
1622 if (ldrive == BOOT_PARTITION_DRIVE) {
1623 if (bootPart < 0) {
1624 noLogicalDrive(ldrive);
1625 return (EINVAL);
1626 }
1627 startsec = LE_32(dosp[bootPart].relsect);
1628 mediasize = LE_32(dosp[bootPart].numsect);
1629 goto found;
1630 }
1631
1632 if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1633 startsec = LE_32(dosp[primaryPart].relsect);
1634 mediasize = LE_32(dosp[primaryPart].numsect);
1635 goto found;
1636 }
1637
1638 /*
1639 * We are not looking for the C: drive (or the primary drive
1640 * was not found), so we had better have an extended partition
1641 * or extra drives in the Master FDISK table.
1642 */
1643 if ((extendedPart < 0) && (numExtraDrives == 0)) {
1644 cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1645 noLogicalDrive(ldrive);
1646 return (EINVAL);
1647 }
1648
1649 if (extendedPart >= 0) {
1650 diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1651 xnumsect = LE_32(dosp[extendedPart].numsect);
1652 do {
1653 /*
1654 * If the seek would not cause us to change
1655 * position on the drive, then we're out of
1656 * extended partitions to examine.
1657 */
1658 if (diskblk == lastseek)
1659 break;
1660 logicalDriveCount += numDrives;
1661 /*
1662 * Seek the next extended partition, and find
1663 * logical drives within it.
1664 */
1665 brelse(*bp);
1666 /*
1667 * bread() block numbers are multiples of DEV_BSIZE
1668 * but the device sector size (the unit of partitioning)
1669 * might be larger than that; pcfs_get_device_info()
1670 * has calculated the multiplicator for us.
1671 */
1672 *bp = bread(dev,
1673 pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1674 if ((*bp)->b_flags & B_ERROR) {
1675 return (EIO);
1676 }
1677
1678 lastseek = diskblk;
1679 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1680 if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1681 cmn_err(CE_NOTE, "!pcfs: "
1682 "extended partition table signature err, "
1683 "device (%x.%x):%d, LBA %u",
1684 getmajor(dev), getminor(dev), ldrive,
1685 (uint_t)pc_dbdaddr(fsp, diskblk));
1686 return (EINVAL);
1687 }
1688 /*
1689 * Count up drives, and track where the next
1690 * extended partition is in case we need it. We
1691 * are expecting only one extended partition. If
1692 * there is more than one we'll only go to the
1693 * first one we see, but warn about ignoring.
1694 */
1695 numDrives = 0;
1696 for (i = 0; i < FD_NUMPART; i++) {
1697 DTRACE_PROBE4(extendedpart,
1698 struct pcfs *, fsp,
1699 uint_t, (uint_t)dosp[i].systid,
1700 uint_t, LE_32(dosp[i].relsect),
1701 uint_t, LE_32(dosp[i].numsect));
1702 if (isDosDrive(dosp[i].systid)) {
1703 extndDrives[numDrives++] = i;
1704 } else if (isDosExtended(dosp[i].systid)) {
1705 if (diskblk != lastseek) {
1706 /*
1707 * Already found an extended
1708 * partition in this table.
1709 */
1710 cmn_err(CE_NOTE,
1711 "!pcfs: ignoring unexpected"
1712 " additional extended"
1713 " partition");
1714 } else {
1715 diskblk = xstartsect +
1716 LE_32(dosp[i].relsect);
1717 }
1718 }
1719 }
1720 } while (ldrive > logicalDriveCount + numDrives);
1721
1722 ASSERT(numDrives <= FD_NUMPART);
1723
1724 if (ldrive <= logicalDriveCount + numDrives) {
1725 /*
1726 * The number of logical drives we've found thus
1727 * far is enough to get us to the one we were
1728 * searching for.
1729 */
1730 driveIndex = logicalDriveCount + numDrives - ldrive;
1731 mediasize =
1732 LE_32(dosp[extndDrives[driveIndex]].numsect);
1733 startsec =
1734 LE_32(dosp[extndDrives[driveIndex]].relsect) +
1735 lastseek;
1736 if (startsec > (xstartsect + xnumsect)) {
1737 cmn_err(CE_NOTE, "!pcfs: extended partition "
1738 "values bad");
1739 return (EINVAL);
1740 }
1741 goto found;
1742 } else {
1743 /*
1744 * We ran out of extended dos partition
1745 * drives. The only hope now is to go
1746 * back to extra drives defined in the master
1747 * fdisk table. But we overwrote that table
1748 * already, so we must load it in again.
1749 */
1750 logicalDriveCount += numDrives;
1751 brelse(*bp);
1752 ASSERT(fsp->pcfs_dosstart == 0);
1753 *bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1754 fsp->pcfs_secsize);
1755 if ((*bp)->b_flags & B_ERROR) {
1756 return (EIO);
1757 }
1758 COPY_PTBL((*bp)->b_un.b_addr, dosp);
1759 }
1760 }
1761 /*
1762 * Still haven't found the drive, is it an extra
1763 * drive defined in the main FDISK table?
1764 */
1765 if (ldrive <= logicalDriveCount + numExtraDrives) {
1766 driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1767 ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1768 mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1769 startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1770 goto found;
1771 }
1772 /*
1773 * Still haven't found the drive, and there is
1774 * nowhere else to look.
1775 */
1776 noLogicalDrive(ldrive);
1777 return (EINVAL);
1778
1779 found:
1780 /*
1781 * We need this value in units of sectorsize, because PCFS' internal
1782 * offset calculations go haywire for > 512Byte sectors unless all
1783 * pcfs_.*start values are in units of sectors.
1784 * So, assign before the capacity check (that's done in DEV_BSIZE)
1785 */
1786 fsp->pcfs_dosstart = startsec;
1787
1788 /*
1789 * convert from device sectors to proper units:
1790 * - starting sector: DEV_BSIZE (as argument to bread())
1791 * - media size: Bytes
1792 */
1793 startsec = pc_dbdaddr(fsp, startsec);
1794 mediasize *= fsp->pcfs_secsize;
1795
1796 /*
1797 * some additional validation / warnings in case the partition table
1798 * and the actual media capacity are not in accordance ...
1799 */
1800 if (fsp->pcfs_mediasize != 0) {
1801 diskaddr_t startoff =
1802 (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1803
1804 if (startoff >= fsp->pcfs_mediasize ||
1805 startoff + mediasize > fsp->pcfs_mediasize) {
1806 cmn_err(CE_WARN,
1807 "!pcfs: partition size (LBA start %u, %lld bytes, "
1808 "device (%x.%x):%d) smaller than "
1809 "mediasize (%lld bytes).\n"
1810 "filesystem may be truncated, access errors "
1811 "may result.\n",
1812 (uint_t)startsec, (long long)mediasize,
1813 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1814 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1815 }
1816 } else {
1817 fsp->pcfs_mediasize = mediasize;
1818 }
1819
1820 return (0);
1821 }
1822
1823
1824 static fattype_t
secondaryBPBChecks(struct pcfs * fsp,uchar_t * bpb,size_t secsize)1825 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1826 {
1827 uint32_t ncl = fsp->pcfs_ncluster;
1828
1829 if (ncl <= 4096) {
1830 if (bpb_get_FatSz16(bpb) == 0)
1831 return (FAT_UNKNOWN);
1832
1833 if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1834 bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1835 return (FAT12);
1836 if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1837 return (FAT12);
1838 if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1839 return (FAT16);
1840
1841 switch (bpb_get_Media(bpb)) {
1842 case SS8SPT:
1843 case DS8SPT:
1844 case SS9SPT:
1845 case DS9SPT:
1846 case DS18SPT:
1847 case DS9_15SPT:
1848 /*
1849 * Is this reliable - all floppies are FAT12 ?
1850 */
1851 return (FAT12);
1852 case MD_FIXED:
1853 /*
1854 * Is this reliable - disks are always FAT16 ?
1855 */
1856 return (FAT16);
1857 default:
1858 break;
1859 }
1860 } else if (ncl <= 65536) {
1861 if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1862 return (FAT32);
1863 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1864 return (FAT32);
1865 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1866 return (FAT32);
1867
1868 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1869 return (FAT16);
1870 if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1871 return (FAT16);
1872 }
1873
1874 /*
1875 * We don't know
1876 */
1877 return (FAT_UNKNOWN);
1878 }
1879
1880 /*
1881 * Check to see if the BPB we found is correct.
1882 *
1883 * This looks far more complicated that it needs to be for pure structural
1884 * validation. The reason for this is that parseBPB() is also used for
1885 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1886 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1887 * the BPB when attempting to mount the filesystem.
1888 *
1889 * Real-world usage of FAT shows there are a lot of corner-case situations
1890 * and, following the specification strictly, invalid filesystems out there.
1891 * Known are situations such as:
1892 * - FAT12/FAT16 filesystems with garbage in either totsec16/32
1893 * instead of the zero in one of the fields mandated by the spec
1894 * - filesystems that claim to be larger than the partition they're in
1895 * - filesystems without valid media descriptor
1896 * - FAT32 filesystems with RootEntCnt != 0
1897 * - FAT32 filesystems with less than 65526 clusters
1898 * - FAT32 filesystems without valid FSI sector
1899 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1900 *
1901 * Such filesystems are accessible by PCFS - if it'd know to start with that
1902 * the filesystem should be treated as a specific FAT type. Before S10, it
1903 * relied on the PC/fdisk partition type for the purpose and almost completely
1904 * ignored the BPB; now it ignores the partition type for anything else but
1905 * logical drive enumeration, which can result in rejection of (invalid)
1906 * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1907 * has less than 65526 clusters.
1908 *
1909 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1910 * not possible to allow all such mostly-compliant filesystems in unless one
1911 * accepts false positives (definitely invalid filesystems that cause problems
1912 * later). This at least allows to pinpoint why the mount failed.
1913 *
1914 * Due to the use of FAT on removeable media, all relaxations of the rules
1915 * here need to be carefully evaluated wrt. to potential effects on PCFS
1916 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1917 * beware.
1918 */
1919 static int
parseBPB(struct pcfs * fsp,uchar_t * bpb,int * valid)1920 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1921 {
1922 fattype_t type;
1923
1924 uint32_t ncl; /* number of clusters in file area */
1925 uint32_t rec;
1926 uint32_t reserved;
1927 uint32_t fsisec, bkbootsec;
1928 blkcnt_t totsec, totsec16, totsec32, datasec;
1929 size_t fatsec, fatsec16, fatsec32, rdirsec;
1930 size_t secsize;
1931 len_t mediasize;
1932 uint64_t validflags = 0;
1933
1934 if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1935 validflags |= BPB_BPBSIG_OK;
1936
1937 rec = bpb_get_RootEntCnt(bpb);
1938 reserved = bpb_get_RsvdSecCnt(bpb);
1939 fsisec = bpb_get_FSInfo32(bpb);
1940 bkbootsec = bpb_get_BkBootSec32(bpb);
1941 totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1942 totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1943 fatsec16 = bpb_get_FatSz16(bpb);
1944 fatsec32 = bpb_get_FatSz32(bpb);
1945
1946 totsec = totsec16 ? totsec16 : totsec32;
1947 fatsec = fatsec16 ? fatsec16 : fatsec32;
1948
1949 secsize = bpb_get_BytesPerSec(bpb);
1950 if (!VALID_SECSIZE(secsize))
1951 secsize = fsp->pcfs_secsize;
1952 if (secsize != fsp->pcfs_secsize) {
1953 PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1954 getmajor(fsp->pcfs_xdev),
1955 getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1956 PC_DPRINTF2(3, "!BPB secsize %d != "
1957 "autodetected media block size %d\n",
1958 (int)secsize, (int)fsp->pcfs_secsize);
1959 if (fsp->pcfs_ldrive) {
1960 /*
1961 * We've already attempted to parse the partition
1962 * table. If the block size used for that don't match
1963 * the PCFS sector size, we're hosed one way or the
1964 * other. Just try what happens.
1965 */
1966 secsize = fsp->pcfs_secsize;
1967 PC_DPRINTF1(3,
1968 "!pcfs: Using autodetected secsize %d\n",
1969 (int)secsize);
1970 } else {
1971 /*
1972 * This allows mounting lofi images of PCFS partitions
1973 * with sectorsize != DEV_BSIZE. We can't parse the
1974 * partition table on whole-disk images unless the
1975 * (undocumented) "secsize=..." mount option is used,
1976 * but at least this allows us to mount if we have
1977 * an image of a partition.
1978 */
1979 PC_DPRINTF1(3,
1980 "!pcfs: Using BPB secsize %d\n", (int)secsize);
1981 }
1982 }
1983
1984 if (fsp->pcfs_mediasize == 0) {
1985 mediasize = (len_t)totsec * (len_t)secsize;
1986 /*
1987 * This is not an error because not all devices support the
1988 * dkio(7i) mediasize queries, and/or not all devices are
1989 * partitioned. If we have not been able to figure out the
1990 * size of the underlaying medium, we have to trust the BPB.
1991 */
1992 PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1993 "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1994 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1995 fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1996 } else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1997 cmn_err(CE_WARN,
1998 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1999 "FAT BPB mediasize (%lld Bytes).\n"
2000 "truncated filesystem on device (%x.%x):%d, access errors "
2001 "possible.\n",
2002 (long long)fsp->pcfs_mediasize,
2003 (long long)(totsec * (blkcnt_t)secsize),
2004 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2005 fsp->pcfs_ldrive);
2006 mediasize = fsp->pcfs_mediasize;
2007 } else {
2008 /*
2009 * This is actually ok. A FAT needs not occupy the maximum
2010 * space available in its partition, it can be shorter.
2011 */
2012 mediasize = (len_t)totsec * (len_t)secsize;
2013 }
2014
2015 /*
2016 * Since we let just about anything pass through this function,
2017 * fence against divide-by-zero here.
2018 */
2019 if (secsize)
2020 rdirsec = roundup(rec * 32, secsize) / secsize;
2021 else
2022 rdirsec = 0;
2023
2024 /*
2025 * This assignment is necessary before pc_dbdaddr() can first be
2026 * used. Must initialize the value here.
2027 */
2028 fsp->pcfs_secsize = secsize;
2029 fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2030
2031 fsp->pcfs_mediasize = mediasize;
2032
2033 fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2034 fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2035 fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2036 fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2037 fsp->pcfs_rdirsec = rdirsec;
2038
2039 /*
2040 * Remember: All PCFS offset calculations in sectors. Before I/O
2041 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2042 * necessary so that media with > 512Byte sector sizes work correctly.
2043 */
2044 fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2045 fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2046 fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2047 datasec = totsec -
2048 (blkcnt_t)fatsec * fsp->pcfs_numfat -
2049 (blkcnt_t)rdirsec -
2050 (blkcnt_t)reserved;
2051
2052 DTRACE_PROBE4(fatgeometry,
2053 blkcnt_t, totsec, size_t, fatsec,
2054 size_t, rdirsec, blkcnt_t, datasec);
2055
2056 /*
2057 * 'totsec' is taken directly from the BPB and guaranteed to fit
2058 * into a 32bit unsigned integer. The calculation of 'datasec',
2059 * on the other hand, could underflow for incorrect values in
2060 * rdirsec/reserved/fatsec. Check for that.
2061 * We also check that the BPB conforms to the FAT specification's
2062 * requirement that either of the 16/32bit total sector counts
2063 * must be zero.
2064 */
2065 if (totsec != 0 &&
2066 (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2067 datasec < totsec && datasec <= UINT32_MAX)
2068 validflags |= BPB_TOTSEC_OK;
2069
2070 if ((len_t)totsec * (len_t)secsize <= mediasize)
2071 validflags |= BPB_MEDIASZ_OK;
2072
2073 if (VALID_SECSIZE(secsize))
2074 validflags |= BPB_SECSIZE_OK;
2075 if (VALID_SPCL(fsp->pcfs_spcl))
2076 validflags |= BPB_SECPERCLUS_OK;
2077 if (VALID_CLSIZE(fsp->pcfs_clsize))
2078 validflags |= BPB_CLSIZE_OK;
2079 if (VALID_NUMFATS(fsp->pcfs_numfat))
2080 validflags |= BPB_NUMFAT_OK;
2081 if (VALID_RSVDSEC(reserved) && reserved < totsec)
2082 validflags |= BPB_RSVDSECCNT_OK;
2083 if (VALID_MEDIA(fsp->pcfs_mediadesc))
2084 validflags |= BPB_MEDIADESC_OK;
2085 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2086 validflags |= BPB_BOOTSIG16_OK;
2087 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2088 validflags |= BPB_BOOTSIG32_OK;
2089 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2090 validflags |= BPB_FSTYPSTR16_OK;
2091 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2092 validflags |= BPB_FSTYPSTR32_OK;
2093 if (VALID_OEMNAME(bpb_OEMName(bpb)))
2094 validflags |= BPB_OEMNAME_OK;
2095 if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2096 validflags |= BPB_BKBOOTSEC_OK;
2097 if (fsisec > 0 && fsisec <= reserved)
2098 validflags |= BPB_FSISEC_OK;
2099 if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2100 validflags |= BPB_JMPBOOT_OK;
2101 if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2102 validflags |= BPB_FSVER_OK;
2103 if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2104 validflags |= BPB_VOLLAB16_OK;
2105 if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2106 validflags |= BPB_VOLLAB32_OK;
2107 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2108 validflags |= BPB_EXTFLAGS_OK;
2109
2110 /*
2111 * Try to determine which FAT format to use.
2112 *
2113 * Calculate the number of clusters in order to determine
2114 * the type of FAT we are looking at. This is the only
2115 * recommended way of determining FAT type, though there
2116 * are other hints in the data, this is the best way.
2117 *
2118 * Since we let just about "anything" pass through this function
2119 * without early exits, fence against divide-by-zero here.
2120 *
2121 * datasec was already validated against UINT32_MAX so we know
2122 * the result will not overflow the 32bit calculation.
2123 */
2124 if (fsp->pcfs_spcl)
2125 ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2126 else
2127 ncl = 0;
2128
2129 fsp->pcfs_ncluster = ncl;
2130
2131 /*
2132 * From the Microsoft FAT specification:
2133 * In the following example, when it says <, it does not mean <=.
2134 * Note also that the numbers are correct. The first number for
2135 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2136 * and the '<' signs are not wrong.
2137 *
2138 * We "specialdetect" the corner cases, and use at least one "extra"
2139 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2140 * count is dangerously close to the boundaries.
2141 */
2142
2143 if (ncl <= PCF_FIRSTCLUSTER) {
2144 type = FAT_UNKNOWN;
2145 } else if (ncl < 4085) {
2146 type = FAT12;
2147 } else if (ncl <= 4096) {
2148 type = FAT_QUESTIONABLE;
2149 } else if (ncl < 65525) {
2150 type = FAT16;
2151 } else if (ncl <= 65536) {
2152 type = FAT_QUESTIONABLE;
2153 } else if (ncl < PCF_LASTCLUSTER32) {
2154 type = FAT32;
2155 } else {
2156 type = FAT_UNKNOWN;
2157 }
2158
2159 DTRACE_PROBE4(parseBPB__initial,
2160 struct pcfs *, fsp, unsigned char *, bpb,
2161 int, validflags, fattype_t, type);
2162
2163 recheck:
2164 fsp->pcfs_fatsec = fatsec;
2165
2166 /* Do some final sanity checks for each specific type of FAT */
2167 switch (type) {
2168 case FAT12:
2169 if (rec != 0)
2170 validflags |= BPB_ROOTENTCNT_OK;
2171 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2172 bpb_get_TotSec16(bpb) == 0)
2173 validflags |= BPB_TOTSEC16_OK;
2174 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2175 bpb_get_TotSec32(bpb) == 0)
2176 validflags |= BPB_TOTSEC32_OK;
2177 if (bpb_get_FatSz16(bpb) == fatsec)
2178 validflags |= BPB_FATSZ16_OK;
2179 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2180 * 3 / 2)
2181 validflags |= BPB_FATSZ_OK;
2182 if (ncl < 4085)
2183 validflags |= BPB_NCLUSTERS_OK;
2184
2185 fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2186 fsp->pcfs_rootblksize =
2187 fsp->pcfs_rdirsec * secsize;
2188 fsp->pcfs_fsistart = 0;
2189
2190 if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2191 type = FAT_UNKNOWN;
2192 break;
2193 case FAT16:
2194 if (rec != 0)
2195 validflags |= BPB_ROOTENTCNT_OK;
2196 if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2197 bpb_get_TotSec16(bpb) == 0)
2198 validflags |= BPB_TOTSEC16_OK;
2199 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2200 bpb_get_TotSec32(bpb) == 0)
2201 validflags |= BPB_TOTSEC32_OK;
2202 if (bpb_get_FatSz16(bpb) == fatsec)
2203 validflags |= BPB_FATSZ16_OK;
2204 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2205 validflags |= BPB_FATSZ_OK;
2206 if (ncl >= 4085 && ncl < 65525)
2207 validflags |= BPB_NCLUSTERS_OK;
2208
2209 fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2210 fsp->pcfs_rootblksize =
2211 fsp->pcfs_rdirsec * secsize;
2212 fsp->pcfs_fsistart = 0;
2213
2214 if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2215 type = FAT_UNKNOWN;
2216 break;
2217 case FAT32:
2218 if (rec == 0)
2219 validflags |= BPB_ROOTENTCNT_OK;
2220 if (bpb_get_TotSec16(bpb) == 0)
2221 validflags |= BPB_TOTSEC16_OK;
2222 if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2223 validflags |= BPB_TOTSEC32_OK;
2224 if (bpb_get_FatSz16(bpb) == 0)
2225 validflags |= BPB_FATSZ16_OK;
2226 if (bpb_get_FatSz32(bpb) == fatsec)
2227 validflags |= BPB_FATSZ32_OK;
2228 if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2229 validflags |= BPB_FATSZ_OK;
2230 if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2231 validflags |= BPB_NCLUSTERS_OK;
2232
2233 fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2234 fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2235 fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2236 if (validflags & BPB_FSISEC_OK)
2237 fsp->pcfs_flags |= PCFS_FSINFO_OK;
2238 fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2239 if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2240 validflags |= BPB_ROOTCLUSTER_OK;
2241
2242 /*
2243 * Current PCFS code only works if 'pcfs_rdirstart'
2244 * contains the root cluster number on FAT32.
2245 * That's a mis-use and would better be changed.
2246 */
2247 fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2248
2249 if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2250 type = FAT_UNKNOWN;
2251 break;
2252 case FAT_QUESTIONABLE:
2253 type = secondaryBPBChecks(fsp, bpb, secsize);
2254 goto recheck;
2255 default:
2256 ASSERT(type == FAT_UNKNOWN);
2257 break;
2258 }
2259
2260 ASSERT(type != FAT_QUESTIONABLE);
2261
2262 fsp->pcfs_fattype = type;
2263
2264 if (valid)
2265 *valid = validflags;
2266
2267 DTRACE_PROBE4(parseBPB__final,
2268 struct pcfs *, fsp, unsigned char *, bpb,
2269 int, validflags, fattype_t, type);
2270
2271 if (type != FAT_UNKNOWN) {
2272 ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2273 ASSERT(ISP2(secsize / DEV_BSIZE));
2274 return (1);
2275 }
2276
2277 return (0);
2278 }
2279
2280
2281 /*
2282 * Detect the device's native block size (sector size).
2283 *
2284 * Test whether the device is:
2285 * - a floppy device from a known controller type via DKIOCINFO
2286 * - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2287 * - a USB floppy drive (identified by drive geometry)
2288 *
2289 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2290 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2291 *
2292 * This might be a bit wasteful on kernel stack space; if anyone's
2293 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2294 */
2295 static void
pcfs_device_getinfo(struct pcfs * fsp)2296 pcfs_device_getinfo(struct pcfs *fsp)
2297 {
2298 dev_t rdev = fsp->pcfs_xdev;
2299 int error;
2300 union {
2301 struct dk_minfo mi;
2302 struct dk_cinfo ci;
2303 struct dk_geom gi;
2304 struct fd_char fc;
2305 } arg; /* save stackspace ... */
2306 intptr_t argp = (intptr_t)&arg;
2307 ldi_handle_t lh;
2308 ldi_ident_t li;
2309 int isfloppy, isremoveable, ishotpluggable;
2310 cred_t *cr = CRED();
2311
2312 if (ldi_ident_from_dev(rdev, &li))
2313 goto out;
2314
2315 error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2316 ldi_ident_release(li);
2317 if (error)
2318 goto out;
2319
2320 /*
2321 * Not sure if this could possibly happen. It'd be a bit like
2322 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2323 * expecting it, needs some thought if triggered ...
2324 */
2325 ASSERT(fsp->pcfs_xdev == rdev);
2326
2327 /*
2328 * Check for removeable/hotpluggable media.
2329 */
2330 if (ldi_ioctl(lh, DKIOCREMOVABLE,
2331 (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2332 isremoveable = 0;
2333 }
2334 if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2335 (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2336 ishotpluggable = 0;
2337 }
2338
2339 /*
2340 * Make sure we don't use "half-initialized" values if the ioctls fail.
2341 */
2342 if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2343 bzero(&arg, sizeof (arg));
2344 fsp->pcfs_mediasize = 0;
2345 } else {
2346 fsp->pcfs_mediasize =
2347 (len_t)arg.mi.dki_lbsize *
2348 (len_t)arg.mi.dki_capacity;
2349 }
2350
2351 if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2352 if (fsp->pcfs_secsize == 0) {
2353 fsp->pcfs_secsize = arg.mi.dki_lbsize;
2354 fsp->pcfs_sdshift =
2355 ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2356 } else {
2357 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2358 "%d, device (%x.%x), different from user-provided "
2359 "%d. User override - ignoring autodetect result.\n",
2360 arg.mi.dki_lbsize,
2361 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2362 fsp->pcfs_secsize);
2363 }
2364 } else if (arg.mi.dki_lbsize) {
2365 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2366 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2367 "Ignoring autodetect result.\n",
2368 arg.mi.dki_lbsize,
2369 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2370 }
2371
2372 /*
2373 * We treat the following media types as a floppy by default.
2374 */
2375 isfloppy =
2376 (arg.mi.dki_media_type == DK_FLOPPY ||
2377 arg.mi.dki_media_type == DK_ZIP ||
2378 arg.mi.dki_media_type == DK_JAZ);
2379
2380 /*
2381 * if this device understands fdio(7I) requests it's
2382 * obviously a floppy drive.
2383 */
2384 if (!isfloppy &&
2385 !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2386 isfloppy = 1;
2387
2388 /*
2389 * some devices we like to treat as floppies, but they don't
2390 * understand fdio(7I) requests.
2391 */
2392 if (!isfloppy &&
2393 !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2394 (arg.ci.dki_ctype == DKC_WDC2880 ||
2395 arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2396 arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2397 arg.ci.dki_ctype == DKC_INTEL82077))
2398 isfloppy = 1;
2399
2400 /*
2401 * This is the "final fallback" test - media with
2402 * 2 heads and 80 cylinders are assumed to be floppies.
2403 * This is normally true for USB floppy drives ...
2404 */
2405 if (!isfloppy &&
2406 !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2407 (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2408 isfloppy = 1;
2409
2410 /*
2411 * This is similar to the "old" PCFS code that sets this flag
2412 * just based on the media descriptor being 0xf8 (MD_FIXED).
2413 * Should be re-worked. We really need some specialcasing for
2414 * removeable media.
2415 */
2416 if (!isfloppy) {
2417 fsp->pcfs_flags |= PCFS_NOCHK;
2418 }
2419
2420 /*
2421 * We automatically disable access time updates if the medium is
2422 * removeable and/or hotpluggable, and the admin did not explicitly
2423 * request access time updates (via the "atime" mount option).
2424 * The majority of flash-based media should fit this category.
2425 * Minimizing write access extends the lifetime of your memory stick !
2426 */
2427 if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2428 (isremoveable || ishotpluggable | isfloppy)) {
2429 fsp->pcfs_flags |= PCFS_NOATIME;
2430 }
2431
2432 (void) ldi_close(lh, FREAD, cr);
2433 out:
2434 if (fsp->pcfs_secsize == 0) {
2435 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2436 "device (%x.%x) failed, no user-provided fallback. "
2437 "Using %d bytes.\n",
2438 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2439 DEV_BSIZE);
2440 fsp->pcfs_secsize = DEV_BSIZE;
2441 fsp->pcfs_sdshift = 0;
2442 }
2443 ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2444 ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2445 }
2446
2447 /*
2448 * Get the FAT type for the DOS medium.
2449 *
2450 * -------------------------
2451 * According to Microsoft:
2452 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2453 * count of clusters on the volume and nothing else.
2454 * -------------------------
2455 *
2456 */
2457 static int
pc_getfattype(struct pcfs * fsp)2458 pc_getfattype(struct pcfs *fsp)
2459 {
2460 int error = 0;
2461 buf_t *bp = NULL;
2462 struct vnode *devvp = fsp->pcfs_devvp;
2463 dev_t dev = devvp->v_rdev;
2464
2465 /*
2466 * Detect the native block size of the medium, and attempt to
2467 * detect whether the medium is removeable.
2468 * We do treat removable media (floppies, USB and FireWire disks)
2469 * differently wrt. to the frequency and synchronicity of FAT updates.
2470 * We need to know the media block size in order to be able to
2471 * parse the partition table.
2472 */
2473 pcfs_device_getinfo(fsp);
2474
2475 /*
2476 * Unpartitioned media (floppies and some removeable devices)
2477 * don't have a partition table, the FAT BPB is at disk block 0.
2478 * Start out by reading block 0.
2479 */
2480 fsp->pcfs_dosstart = 0;
2481 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2482
2483 if (error = geterror(bp))
2484 goto out;
2485
2486 /*
2487 * If a logical drive number is requested, parse the partition table
2488 * and attempt to locate it. Otherwise, proceed immediately to the
2489 * BPB check. findTheDrive(), if successful, returns the disk block
2490 * number where the requested partition starts in "startsec".
2491 */
2492 if (fsp->pcfs_ldrive != 0) {
2493 PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2494 "device (%x,%x):%d to find BPB\n",
2495 getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2496
2497 if (error = findTheDrive(fsp, &bp))
2498 goto out;
2499
2500 ASSERT(fsp->pcfs_dosstart != 0);
2501
2502 brelse(bp);
2503 bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2504 fsp->pcfs_secsize);
2505 if (error = geterror(bp))
2506 goto out;
2507 }
2508
2509 /*
2510 * Validate the BPB and fill in the instance structure.
2511 */
2512 if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2513 PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2514 "device (%x.%x):%d, disk LBA %u\n",
2515 getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2516 (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2517 error = EINVAL;
2518 goto out;
2519 }
2520
2521 ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2522
2523 out:
2524 /*
2525 * Release the buffer used
2526 */
2527 if (bp != NULL)
2528 brelse(bp);
2529 return (error);
2530 }
2531
2532
2533 /*
2534 * Get the file allocation table.
2535 * If there is an old FAT, invalidate it.
2536 */
2537 int
pc_getfat(struct pcfs * fsp)2538 pc_getfat(struct pcfs *fsp)
2539 {
2540 struct buf *bp = NULL;
2541 uchar_t *fatp = NULL;
2542 uchar_t *fat_changemap = NULL;
2543 int error;
2544 int fat_changemapsize;
2545 int flags = 0;
2546 int nfat;
2547 int altfat_mustmatch = 0;
2548 int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2549
2550 if (fsp->pcfs_fatp) {
2551 /*
2552 * There is a FAT in core.
2553 * If there are open file pcnodes or we have modified it or
2554 * it hasn't timed out yet use the in core FAT.
2555 * Otherwise invalidate it and get a new one
2556 */
2557 #ifdef notdef
2558 if (fsp->pcfs_frefs ||
2559 (fsp->pcfs_flags & PCFS_FATMOD) ||
2560 (gethrestime_sec() < fsp->pcfs_fattime)) {
2561 return (0);
2562 } else {
2563 mutex_enter(&pcfslock);
2564 pc_invalfat(fsp);
2565 mutex_exit(&pcfslock);
2566 }
2567 #endif /* notdef */
2568 return (0);
2569 }
2570
2571 /*
2572 * Get FAT and check it for validity
2573 */
2574 fatp = kmem_alloc(fatsize, KM_SLEEP);
2575 error = pc_readfat(fsp, fatp);
2576 if (error) {
2577 flags = B_ERROR;
2578 goto out;
2579 }
2580 fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2581 fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2582 fsp->pcfs_fatp = fatp;
2583 fsp->pcfs_fat_changemapsize = fat_changemapsize;
2584 fsp->pcfs_fat_changemap = fat_changemap;
2585
2586 /*
2587 * The only definite signature check is that the
2588 * media descriptor byte should match the first byte
2589 * of the FAT block.
2590 */
2591 if (fatp[0] != fsp->pcfs_mediadesc) {
2592 cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2593 "media descriptor %x, FAT[0] lowbyte %x\n",
2594 (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2595 cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2596 altfat_mustmatch = 1;
2597 }
2598
2599 /*
2600 * Get alternate FATs and check for consistency
2601 * This is an inlined version of pc_readfat().
2602 * Since we're only comparing FAT and alternate FAT,
2603 * there's no reason to let pc_readfat() copy data out
2604 * of the buf. Instead, compare in-situ, one cluster
2605 * at a time.
2606 */
2607 for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2608 size_t startsec;
2609 size_t off;
2610
2611 startsec = pc_dbdaddr(fsp,
2612 fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2613
2614 for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2615 daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2616 pc_cltodb(fsp, pc_lblkno(fsp, off)));
2617
2618 bp = bread(fsp->pcfs_xdev, fatblk,
2619 MIN(fsp->pcfs_clsize, fatsize - off));
2620 if (bp->b_flags & (B_ERROR | B_STALE)) {
2621 cmn_err(CE_NOTE,
2622 "!pcfs: alternate FAT #%d (start LBA %p)"
2623 " read error at offset %ld on device"
2624 " (%x.%x):%d",
2625 nfat, (void *)(uintptr_t)startsec, off,
2626 getmajor(fsp->pcfs_xdev),
2627 getminor(fsp->pcfs_xdev),
2628 fsp->pcfs_ldrive);
2629 flags = B_ERROR;
2630 error = EIO;
2631 goto out;
2632 }
2633 bp->b_flags |= B_STALE | B_AGE;
2634 if (bcmp(bp->b_un.b_addr, fatp + off,
2635 MIN(fsp->pcfs_clsize, fatsize - off))) {
2636 cmn_err(CE_NOTE,
2637 "!pcfs: alternate FAT #%d (start LBA %p)"
2638 " corrupted at offset %ld on device"
2639 " (%x.%x):%d",
2640 nfat, (void *)(uintptr_t)startsec, off,
2641 getmajor(fsp->pcfs_xdev),
2642 getminor(fsp->pcfs_xdev),
2643 fsp->pcfs_ldrive);
2644 if (altfat_mustmatch) {
2645 flags = B_ERROR;
2646 error = EIO;
2647 goto out;
2648 }
2649 }
2650 brelse(bp);
2651 bp = NULL; /* prevent double release */
2652 }
2653 }
2654
2655 fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2656 fsp->pcfs_fatjustread = 1;
2657
2658 /*
2659 * Retrieve FAT32 fsinfo sector.
2660 * A failure to read this is not fatal to accessing the volume.
2661 * It simply means operations that count or search free blocks
2662 * will have to do a full FAT walk, vs. a possibly quicker lookup
2663 * of the summary information.
2664 * Hence, we log a message but return success overall after this point.
2665 */
2666 if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2667 struct fat_od_fsi *fsinfo_disk;
2668
2669 bp = bread(fsp->pcfs_xdev,
2670 pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2671 fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2672 if (bp->b_flags & (B_ERROR | B_STALE) ||
2673 !FSISIG_OK(fsinfo_disk)) {
2674 cmn_err(CE_NOTE,
2675 "!pcfs: error reading fat32 fsinfo from "
2676 "device (%x.%x):%d, block %lld",
2677 getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2678 fsp->pcfs_ldrive,
2679 (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2680 fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2681 fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2682 fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2683 } else {
2684 bp->b_flags |= B_STALE | B_AGE;
2685 fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2686 fsp->pcfs_fsinfo.fs_free_clusters =
2687 LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2688 fsp->pcfs_fsinfo.fs_next_free =
2689 LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2690 }
2691 brelse(bp);
2692 bp = NULL;
2693 }
2694
2695 if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2696 fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2697 else
2698 fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2699
2700 return (0);
2701
2702 out:
2703 cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2704 if (bp)
2705 brelse(bp);
2706 if (fatp)
2707 kmem_free(fatp, fatsize);
2708 if (fat_changemap)
2709 kmem_free(fat_changemap, fat_changemapsize);
2710
2711 if (flags) {
2712 pc_mark_irrecov(fsp);
2713 }
2714 return (error);
2715 }
2716