xref: /titanic_50/usr/src/uts/common/fs/pcfs/pc_vfsops.c (revision c48ac12eb805b96b1b8d7582c4168b4e4fa56865)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/user.h>
32 #include <sys/proc.h>
33 #include <sys/cred.h>
34 #include <sys/disp.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/vnode.h>
39 #include <sys/fdio.h>
40 #include <sys/file.h>
41 #include <sys/uio.h>
42 #include <sys/conf.h>
43 #include <sys/statvfs.h>
44 #include <sys/mount.h>
45 #include <sys/pathname.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/sysmacros.h>
49 #include <sys/conf.h>
50 #include <sys/mkdev.h>
51 #include <sys/swap.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/dktp/fdisk.h>
55 #include <sys/fs/pc_label.h>
56 #include <sys/fs/pc_fs.h>
57 #include <sys/fs/pc_dir.h>
58 #include <sys/fs/pc_node.h>
59 #include <fs/fs_subr.h>
60 #include <sys/modctl.h>
61 #include <sys/dkio.h>
62 #include <sys/open.h>
63 #include <sys/mntent.h>
64 #include <sys/policy.h>
65 #include <sys/atomic.h>
66 #include <sys/sdt.h>
67 
68 /*
69  * The majority of PC media use a 512 sector size, but
70  * occasionally you will run across a 1k sector size.
71  * For media with a 1k sector size, fd_strategy() requires
72  * the I/O size to be a 1k multiple; so when the sector size
73  * is not yet known, always read 1k.
74  */
75 #define	PC_SAFESECSIZE	(PC_SECSIZE * 2)
76 
77 static int pcfs_pseudo_floppy(dev_t);
78 
79 static int pcfsinit(int, char *);
80 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
81 	struct cred *);
82 static int pcfs_unmount(struct vfs *, int, struct cred *);
83 static int pcfs_root(struct vfs *, struct vnode **);
84 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
85 static int pc_syncfsnodes(struct pcfs *);
86 static int pcfs_sync(struct vfs *, short, struct cred *);
87 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
88 static void pcfs_freevfs(vfs_t *vfsp);
89 
90 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
91 static int pc_writefat(struct pcfs *fsp, daddr_t start);
92 
93 static int pc_getfattype(struct pcfs *fsp);
94 static void pcfs_parse_mntopts(struct pcfs *fsp);
95 
96 
97 /*
98  * pcfs mount options table
99  */
100 
101 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
102 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
103 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
104 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
105 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
106 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
107 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
108 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
109 
110 static mntopt_t mntopts[] = {
111 /*
112  *	option name	cancel option	default arg	flags	opt data
113  */
114 	{ MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
115 	{ MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
116 	{ MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
117 	{ MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
118 	{ MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
119 	{ MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
120 	{ MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
121 	{ MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
122 	{ MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
123 	{ MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
124 };
125 
126 static mntopts_t pcfs_mntopts = {
127 	sizeof (mntopts) / sizeof (mntopt_t),
128 	mntopts
129 };
130 
131 int pcfsdebuglevel = 0;
132 
133 /*
134  * pcfslock:	protects the list of mounted pc filesystems "pc_mounttab.
135  * pcfs_lock:	(inside per filesystem structure "pcfs")
136  *		per filesystem lock. Most of the vfsops and vnodeops are
137  *		protected by this lock.
138  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
139  *
140  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
141  *
142  * pcfs_mountcount:	used to prevent module unloads while there is still
143  *			pcfs state from a former mount hanging around. With
144  *			forced umount support, the filesystem module must not
145  *			be allowed to go away before the last VFS_FREEVFS()
146  *			call has been made.
147  *			Since this is just an atomic counter, there's no need
148  *			for locking.
149  */
150 kmutex_t	pcfslock;
151 krwlock_t	pcnodes_lock;
152 uint32_t	pcfs_mountcount;
153 
154 static int pcfstype;
155 
156 static vfsdef_t vfw = {
157 	VFSDEF_VERSION,
158 	"pcfs",
159 	pcfsinit,
160 	VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS,
161 	&pcfs_mntopts
162 };
163 
164 extern struct mod_ops mod_fsops;
165 
166 static struct modlfs modlfs = {
167 	&mod_fsops,
168 	"PC filesystem",
169 	&vfw
170 };
171 
172 static struct modlinkage modlinkage = {
173 	MODREV_1,
174 	&modlfs,
175 	NULL
176 };
177 
178 int
179 _init(void)
180 {
181 	int	error;
182 
183 #if !defined(lint)
184 	/* make sure the on-disk structures are sane */
185 	ASSERT(sizeof (struct pcdir) == 32);
186 	ASSERT(sizeof (struct pcdir_lfn) == 32);
187 #endif
188 	mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
189 	rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
190 	error = mod_install(&modlinkage);
191 	if (error) {
192 		mutex_destroy(&pcfslock);
193 		rw_destroy(&pcnodes_lock);
194 	}
195 	return (error);
196 }
197 
198 int
199 _fini(void)
200 {
201 	int	error;
202 
203 	/*
204 	 * If a forcedly unmounted instance is still hanging around,
205 	 * we cannot allow the module to be unloaded because that would
206 	 * cause panics once the VFS framework decides it's time to call
207 	 * into VFS_FREEVFS().
208 	 */
209 	if (pcfs_mountcount)
210 		return (EBUSY);
211 
212 	error = mod_remove(&modlinkage);
213 	if (error)
214 		return (error);
215 	mutex_destroy(&pcfslock);
216 	rw_destroy(&pcnodes_lock);
217 	/*
218 	 * Tear down the operations vectors
219 	 */
220 	(void) vfs_freevfsops_by_type(pcfstype);
221 	vn_freevnodeops(pcfs_fvnodeops);
222 	vn_freevnodeops(pcfs_dvnodeops);
223 	return (0);
224 }
225 
226 int
227 _info(struct modinfo *modinfop)
228 {
229 	return (mod_info(&modlinkage, modinfop));
230 }
231 
232 /* ARGSUSED1 */
233 static int
234 pcfsinit(int fstype, char *name)
235 {
236 	static const fs_operation_def_t pcfs_vfsops_template[] = {
237 		VFSNAME_MOUNT,		{ .vfs_mount = pcfs_mount },
238 		VFSNAME_UNMOUNT,	{ .vfs_unmount = pcfs_unmount },
239 		VFSNAME_ROOT,		{ .vfs_root = pcfs_root },
240 		VFSNAME_STATVFS,	{ .vfs_statvfs = pcfs_statvfs },
241 		VFSNAME_SYNC,		{ .vfs_sync = pcfs_sync },
242 		VFSNAME_VGET,		{ .vfs_vget = pcfs_vget },
243 		VFSNAME_FREEVFS,	{ .vfs_freevfs = pcfs_freevfs },
244 		NULL,			NULL
245 	};
246 	int error;
247 
248 	error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
249 	if (error != 0) {
250 		cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
251 		return (error);
252 	}
253 
254 	error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
255 	if (error != 0) {
256 		(void) vfs_freevfsops_by_type(fstype);
257 		cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
258 		return (error);
259 	}
260 
261 	error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
262 	if (error != 0) {
263 		(void) vfs_freevfsops_by_type(fstype);
264 		vn_freevnodeops(pcfs_fvnodeops);
265 		cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
266 		return (error);
267 	}
268 
269 	pcfstype = fstype;
270 	(void) pc_init();
271 	pcfs_mountcount = 0;
272 	return (0);
273 }
274 
275 static struct pcfs *pc_mounttab = NULL;
276 
277 extern struct pcfs_args pc_tz;
278 
279 /*
280  *  Define some special logical drives we use internal to this file.
281  */
282 #define	BOOT_PARTITION_DRIVE	99
283 #define	PRIMARY_DOS_DRIVE	1
284 #define	UNPARTITIONED_DRIVE	0
285 
286 static int
287 pcfs_device_identify(
288 	struct vfs *vfsp,
289 	struct mounta *uap,
290 	struct cred *cr,
291 	int *dos_ldrive,
292 	dev_t *xdev)
293 {
294 	struct pathname special;
295 	char *c;
296 	struct vnode *bvp;
297 	int oflag, aflag;
298 	int error;
299 
300 	/*
301 	 * Resolve path name of special file being mounted.
302 	 */
303 	if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
304 		return (error);
305 	}
306 
307 	*dos_ldrive = -1;
308 
309 	if (error =
310 	    lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &bvp)) {
311 		/*
312 		 * If there's no device node, the name specified most likely
313 		 * maps to a PCFS-style "partition specifier" to select a
314 		 * harddisk primary/logical partition. Disable floppy-specific
315 		 * checks in such cases unless an explicit :A or :B is
316 		 * requested.
317 		 */
318 
319 		/*
320 		 * Split the pathname string at the last ':' separator.
321 		 * If there's no ':' in the device name, or the ':' is the
322 		 * last character in the string, the name is invalid and
323 		 * the error from the previous lookup will be returned.
324 		 */
325 		c = strrchr(special.pn_path, ':');
326 		if (c == NULL || strlen(c) == 0)
327 			goto devlookup_done;
328 
329 		*c++ = '\0';
330 
331 		/*
332 		 * PCFS partition name suffixes can be:
333 		 *	- "boot" to indicate the X86BOOT partition
334 		 *	- a drive letter [c-z] for the "DOS logical drive"
335 		 *	- a drive number 1..24 for the "DOS logical drive"
336 		 *	- a "floppy name letter", 'a' or 'b' (just strip this)
337 		 */
338 		if (strcasecmp(c, "boot") == 0) {
339 			/*
340 			 * The Solaris boot partition is requested.
341 			 */
342 			*dos_ldrive = BOOT_PARTITION_DRIVE;
343 		} else if (strspn(c, "0123456789") == strlen(c)) {
344 			/*
345 			 * All digits - parse the partition number.
346 			 */
347 			long drvnum = 0;
348 
349 			if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
350 				/*
351 				 * A number alright - in the allowed range ?
352 				 */
353 				if (drvnum > 24 || drvnum == 0)
354 					error = ENXIO;
355 			}
356 			if (error)
357 				goto devlookup_done;
358 			*dos_ldrive = (int)drvnum;
359 		} else if (strlen(c) == 1) {
360 			/*
361 			 * A single trailing character was specified.
362 			 *	- [c-zC-Z] means a harddisk partition, and
363 			 *	  we retrieve the partition number.
364 			 *	- [abAB] means a floppy drive, so we swallow
365 			 *	  the "drive specifier" and test later
366 			 *	  whether the physical device is a floppy or
367 			 *	  PCMCIA pseudofloppy (sram card).
368 			 */
369 			*c = tolower(*c);
370 			if (*c == 'a' || *c == 'b') {
371 				*dos_ldrive = UNPARTITIONED_DRIVE;
372 			} else if (*c < 'c' || *c > 'z') {
373 				error = ENXIO;
374 				goto devlookup_done;
375 			} else {
376 				*dos_ldrive = 1 + *c - 'c';
377 			}
378 		} else {
379 			/*
380 			 * Can't parse this - pass through previous error.
381 			 */
382 			goto devlookup_done;
383 		}
384 
385 
386 		error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
387 		    NULLVPP, &bvp);
388 	} else {
389 		*dos_ldrive = UNPARTITIONED_DRIVE;
390 	}
391 devlookup_done:
392 	pn_free(&special);
393 	if (error)
394 		return (error);
395 
396 	ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
397 
398 	*xdev = bvp->v_rdev;
399 
400 	/*
401 	 * Verify caller's permission to open the device special file.
402 	 */
403 	if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
404 	    ((uap->flags & MS_RDONLY) != 0)) {
405 		oflag = FREAD;
406 		aflag = VREAD;
407 	} else {
408 		oflag = FREAD | FWRITE;
409 		aflag = VREAD | VWRITE;
410 	}
411 
412 	if (bvp->v_type != VBLK)
413 		error = ENOTBLK;
414 	else if (getmajor(*xdev) >= devcnt)
415 		error = ENXIO;
416 
417 	if ((error != 0) ||
418 	    (error = VOP_ACCESS(bvp, aflag, 0, cr, NULL)) != 0 ||
419 	    (error = secpolicy_spec_open(cr, bvp, oflag)) != 0) {
420 		VN_RELE(bvp);
421 		return (error);
422 	}
423 
424 	VN_RELE(bvp);
425 	return (0);
426 }
427 
428 static int
429 pcfs_device_ismounted(
430 	struct vfs *vfsp,
431 	int dos_ldrive,
432 	dev_t xdev,
433 	int *remounting,
434 	dev_t *pseudodev)
435 {
436 	struct pcfs *fsp;
437 	int remount = *remounting;
438 
439 	/*
440 	 * Ensure that this logical drive isn't already mounted, unless
441 	 * this is a REMOUNT request.
442 	 * Note: The framework will perform this check if the "...:c"
443 	 * PCFS-style "logical drive" syntax has not been used and an
444 	 * actually existing physical device is backing this filesystem.
445 	 * Once all block device drivers support PC-style partitioning,
446 	 * this codeblock can be dropped.
447 	 */
448 	*pseudodev = xdev;
449 
450 	if (dos_ldrive) {
451 		mutex_enter(&pcfslock);
452 		for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
453 			if (fsp->pcfs_xdev == xdev &&
454 			    fsp->pcfs_ldrive == dos_ldrive) {
455 				mutex_exit(&pcfslock);
456 				if (remount) {
457 					return (0);
458 				} else {
459 					return (EBUSY);
460 				}
461 			}
462 		/*
463 		 * Assign a unique device number for the vfs
464 		 * The old way (getudev() + a constantly incrementing
465 		 * major number) was wrong because it changes vfs_dev
466 		 * across mounts and reboots, which breaks nfs file handles.
467 		 * UFS just uses the real dev_t. We can't do that because
468 		 * of the way pcfs opens fdisk partitons (the :c and :d
469 		 * partitions are on the same dev_t). Though that _might_
470 		 * actually be ok, since the file handle contains an
471 		 * absolute block number, it's probably better to make them
472 		 * different. So I think we should retain the original
473 		 * dev_t, but come up with a different minor number based
474 		 * on the logical drive that will _always_ come up the same.
475 		 * For now, we steal the upper 6 bits.
476 		 */
477 #ifdef notdef
478 		/* what should we do here? */
479 		if (((getminor(xdev) >> 12) & 0x3F) != 0)
480 			printf("whoops - upper bits used!\n");
481 #endif
482 		*pseudodev = makedevice(getmajor(xdev),
483 		    ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
484 		if (vfs_devmounting(*pseudodev, vfsp)) {
485 			mutex_exit(&pcfslock);
486 			return (EBUSY);
487 		}
488 		if (vfs_devismounted(*pseudodev)) {
489 			mutex_exit(&pcfslock);
490 			if (remount) {
491 				return (0);
492 			} else {
493 				return (EBUSY);
494 			}
495 		}
496 		mutex_exit(&pcfslock);
497 	} else {
498 		*pseudodev = xdev;
499 		if (vfs_devmounting(*pseudodev, vfsp)) {
500 			return (EBUSY);
501 		}
502 		if (vfs_devismounted(*pseudodev))
503 			if (remount) {
504 				return (0);
505 			} else {
506 				return (EBUSY);
507 			}
508 	}
509 
510 	/*
511 	 * This is not a remount. Even if MS_REMOUNT was requested,
512 	 * the caller needs to proceed as it would on an ordinary
513 	 * mount.
514 	 */
515 	*remounting = 0;
516 
517 	ASSERT(*pseudodev);
518 	return (0);
519 }
520 
521 /*
522  * Get the PCFS-specific mount options from the VFS framework.
523  * For "timezone" and "secsize", we need to parse the number
524  * ourselves and ensure its validity.
525  * Note: "secsize" is deliberately undocumented at this time,
526  * it's a workaround for devices (particularly: lofi image files)
527  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
528  */
529 static void
530 pcfs_parse_mntopts(struct pcfs *fsp)
531 {
532 	char *c;
533 	char *endptr;
534 	long l;
535 	struct vfs *vfsp = fsp->pcfs_vfs;
536 
537 	ASSERT(fsp->pcfs_secondswest == 0);
538 	ASSERT(fsp->pcfs_secsize == 0);
539 
540 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
541 		fsp->pcfs_flags |= PCFS_HIDDEN;
542 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
543 		fsp->pcfs_flags |= PCFS_FOLDCASE;
544 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
545 		fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
546 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
547 		fsp->pcfs_flags |= PCFS_NOATIME;
548 
549 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
550 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
551 		    endptr == c + strlen(c)) {
552 			/*
553 			 * A number alright - in the allowed range ?
554 			 */
555 			if (l <= -12*3600 || l >= 12*3600) {
556 				cmn_err(CE_WARN, "!pcfs: invalid use of "
557 				    "'timezone' mount option - %ld "
558 				    "is out of range. Assuming 0.", l);
559 				l = 0;
560 			}
561 		} else {
562 			cmn_err(CE_WARN, "!pcfs: invalid use of "
563 			    "'timezone' mount option - argument %s "
564 			    "is not a valid number. Assuming 0.", c);
565 			l = 0;
566 		}
567 		fsp->pcfs_secondswest = l;
568 	}
569 
570 	/*
571 	 * The "secsize=..." mount option is a workaround for the lack of
572 	 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
573 	 * partition table of a disk image and it has been partitioned with
574 	 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
575 	 * images.
576 	 * That should really be fixed in lofi ... this is a workaround.
577 	 */
578 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
579 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
580 		    endptr == c + strlen(c)) {
581 			/*
582 			 * A number alright - a valid sector size as well ?
583 			 */
584 			if (!VALID_SECSIZE(l)) {
585 				cmn_err(CE_WARN, "!pcfs: invalid use of "
586 				    "'secsize' mount option - %ld is "
587 				    "unsupported. Autodetecting.", l);
588 				l = 0;
589 			}
590 		} else {
591 			cmn_err(CE_WARN, "!pcfs: invalid use of "
592 			    "'secsize' mount option - argument %s "
593 			    "is not a valid number. Autodetecting.", c);
594 			l = 0;
595 		}
596 		fsp->pcfs_secsize = l;
597 		fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
598 	}
599 }
600 
601 /*
602  * vfs operations
603  */
604 
605 /*
606  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
607  */
608 static int
609 pcfs_mount(
610 	struct vfs *vfsp,
611 	struct vnode *mvp,
612 	struct mounta *uap,
613 	struct cred *cr)
614 {
615 	struct pcfs *fsp;
616 	struct vnode *devvp;
617 	dev_t pseudodev;
618 	dev_t xdev;
619 	int dos_ldrive = 0;
620 	int error;
621 	int remounting;
622 
623 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
624 		return (error);
625 
626 	if (mvp->v_type != VDIR)
627 		return (ENOTDIR);
628 
629 	mutex_enter(&mvp->v_lock);
630 	if ((uap->flags & MS_REMOUNT) == 0 &&
631 	    (uap->flags & MS_OVERLAY) == 0 &&
632 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
633 		mutex_exit(&mvp->v_lock);
634 		return (EBUSY);
635 	}
636 	mutex_exit(&mvp->v_lock);
637 
638 	/*
639 	 * PCFS doesn't do mount arguments anymore - everything's a mount
640 	 * option these days. In order not to break existing callers, we
641 	 * don't reject it yet, just warn that the data (if any) is ignored.
642 	 */
643 	if (uap->datalen != 0)
644 		cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
645 		    "mount argument structures instead of mount options. "
646 		    "Ignoring mount(2) 'dataptr' argument.");
647 
648 	/*
649 	 * This is needed early, to make sure the access / open calls
650 	 * are done using the correct mode. Processing this mount option
651 	 * only when calling pcfs_parse_mntopts() would lead us to attempt
652 	 * a read/write access to a possibly writeprotected device, and
653 	 * a readonly mount attempt might fail because of that.
654 	 */
655 	if (uap->flags & MS_RDONLY) {
656 		vfsp->vfs_flag |= VFS_RDONLY;
657 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
658 	}
659 
660 	/*
661 	 * For most filesystems, this is just a lookupname() on the
662 	 * mount pathname string. PCFS historically has to do its own
663 	 * partition table parsing because not all Solaris architectures
664 	 * support all styles of partitioning that PC media can have, and
665 	 * hence PCFS understands "device names" that don't map to actual
666 	 * physical device nodes. Parsing the "PCFS syntax" for device
667 	 * names is done in pcfs_device_identify() - see there.
668 	 *
669 	 * Once all block device drivers that can host FAT filesystems have
670 	 * been enhanced to create device nodes for all PC-style partitions,
671 	 * this code can go away.
672 	 */
673 	if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
674 		return (error);
675 
676 	/*
677 	 * As with looking up the actual device to mount, PCFS cannot rely
678 	 * on just the checks done by vfs_ismounted() whether a given device
679 	 * is mounted already. The additional check against the "PCFS syntax"
680 	 * is done in  pcfs_device_ismounted().
681 	 */
682 	remounting = (uap->flags & MS_REMOUNT);
683 
684 	if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
685 	    &pseudodev))
686 		return (error);
687 
688 	if (remounting)
689 		return (0);
690 
691 	/*
692 	 * Mount the filesystem.
693 	 * An instance structure is required before the attempt to locate
694 	 * and parse the FAT BPB. This is because mount options may change
695 	 * the behaviour of the filesystem type matching code. Precreate
696 	 * it and fill it in to a degree that allows parsing the mount
697 	 * options.
698 	 */
699 	devvp = makespecvp(xdev, VBLK);
700 	if (IS_SWAPVP(devvp)) {
701 		VN_RELE(devvp);
702 		return (EBUSY);
703 	}
704 	error = VOP_OPEN(&devvp,
705 	    (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
706 	if (error) {
707 		VN_RELE(devvp);
708 		return (error);
709 	}
710 
711 	fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
712 	fsp->pcfs_vfs = vfsp;
713 	fsp->pcfs_xdev = xdev;
714 	fsp->pcfs_devvp = devvp;
715 	fsp->pcfs_ldrive = dos_ldrive;
716 	mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
717 
718 	pcfs_parse_mntopts(fsp);
719 
720 	/*
721 	 * This is the actual "mount" - the PCFS superblock check.
722 	 *
723 	 * Find the requested logical drive and the FAT BPB therein.
724 	 * Check device type and flag the instance if media is removeable.
725 	 *
726 	 * Initializes most members of the filesystem instance structure.
727 	 * Returns EINVAL if no valid BPB can be found. Other errors may
728 	 * occur after I/O failures, or when invalid / unparseable partition
729 	 * tables are encountered.
730 	 */
731 	if (error = pc_getfattype(fsp))
732 		goto errout;
733 
734 	/*
735 	 * Now that the BPB has been parsed, this structural information
736 	 * is available and known to be valid. Initialize the VFS.
737 	 */
738 	vfsp->vfs_data = fsp;
739 	vfsp->vfs_dev = pseudodev;
740 	vfsp->vfs_fstype = pcfstype;
741 	vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
742 	vfsp->vfs_bcount = 0;
743 	vfsp->vfs_bsize = fsp->pcfs_clsize;
744 
745 	/*
746 	 * Validate that we can access the FAT and that it is, to the
747 	 * degree we can verify here, self-consistent.
748 	 */
749 	if (error = pc_verify(fsp))
750 		goto errout;
751 
752 	/*
753 	 * Record the time of the mount, to return as an "approximate"
754 	 * timestamp for the FAT root directory. Since FAT roots don't
755 	 * have timestamps, this is less confusing to the user than
756 	 * claiming "zero" / Jan/01/1970.
757 	 */
758 	gethrestime(&fsp->pcfs_mounttime);
759 
760 	/*
761 	 * Fix up the mount options. Because "noatime" is made default on
762 	 * removeable media only, a fixed disk will have neither "atime"
763 	 * nor "noatime" set. We set the options explicitly depending on
764 	 * the PCFS_NOATIME flag, to inform the user of what applies.
765 	 * Mount option cancellation will take care that the mutually
766 	 * exclusive 'other' is cleared.
767 	 */
768 	vfs_setmntopt(vfsp,
769 	    fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
770 	    NULL, 0);
771 
772 	/*
773 	 * All clear - insert the FS instance into PCFS' list.
774 	 */
775 	mutex_enter(&pcfslock);
776 	fsp->pcfs_nxt = pc_mounttab;
777 	pc_mounttab = fsp;
778 	mutex_exit(&pcfslock);
779 	atomic_inc_32(&pcfs_mountcount);
780 	return (0);
781 
782 errout:
783 	(void) VOP_CLOSE(devvp,
784 	    vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
785 	    1, (offset_t)0, cr, NULL);
786 	VN_RELE(devvp);
787 	mutex_destroy(&fsp->pcfs_lock);
788 	kmem_free(fsp, sizeof (*fsp));
789 	return (error);
790 
791 }
792 
793 static int
794 pcfs_unmount(
795 	struct vfs *vfsp,
796 	int flag,
797 	struct cred *cr)
798 {
799 	struct pcfs *fsp, *fsp1;
800 
801 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
802 		return (EPERM);
803 
804 	fsp = VFSTOPCFS(vfsp);
805 
806 	/*
807 	 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
808 	 * prevent lookuppn from crossing the mount point.
809 	 * If this is not a forced umount request and there's ongoing I/O,
810 	 * don't allow the mount to proceed.
811 	 */
812 	if (flag & MS_FORCE)
813 		vfsp->vfs_flag |= VFS_UNMOUNTED;
814 	else if (fsp->pcfs_nrefs)
815 		return (EBUSY);
816 
817 	mutex_enter(&pcfslock);
818 
819 	/*
820 	 * If this is a forced umount request or if the fs instance has
821 	 * been marked as beyond recovery, allow the umount to proceed
822 	 * regardless of state. pc_diskchanged() forcibly releases all
823 	 * inactive vnodes/pcnodes.
824 	 */
825 	if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
826 		rw_enter(&pcnodes_lock, RW_WRITER);
827 		pc_diskchanged(fsp);
828 		rw_exit(&pcnodes_lock);
829 	}
830 
831 	/* now there should be no pcp node on pcfhead or pcdhead. */
832 
833 	if (fsp == pc_mounttab) {
834 		pc_mounttab = fsp->pcfs_nxt;
835 	} else {
836 		for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
837 			if (fsp1->pcfs_nxt == fsp)
838 				fsp1->pcfs_nxt = fsp->pcfs_nxt;
839 	}
840 
841 	mutex_exit(&pcfslock);
842 
843 	/*
844 	 * Since we support VFS_FREEVFS(), there's no need to
845 	 * free the fsp right now. The framework will tell us
846 	 * when the right time to do so has arrived by calling
847 	 * into pcfs_freevfs.
848 	 */
849 	return (0);
850 }
851 
852 /*
853  * find root of pcfs
854  */
855 static int
856 pcfs_root(
857 	struct vfs *vfsp,
858 	struct vnode **vpp)
859 {
860 	struct pcfs *fsp;
861 	struct pcnode *pcp;
862 	int error;
863 
864 	fsp = VFSTOPCFS(vfsp);
865 	if (error = pc_lockfs(fsp, 0, 0))
866 		return (error);
867 
868 	pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
869 	pc_unlockfs(fsp);
870 	*vpp = PCTOV(pcp);
871 	pcp->pc_flags |= PC_EXTERNAL;
872 	return (0);
873 }
874 
875 /*
876  * Get file system statistics.
877  */
878 static int
879 pcfs_statvfs(
880 	struct vfs *vfsp,
881 	struct statvfs64 *sp)
882 {
883 	struct pcfs *fsp;
884 	int error;
885 	dev32_t d32;
886 
887 	fsp = VFSTOPCFS(vfsp);
888 	error = pc_getfat(fsp);
889 	if (error)
890 		return (error);
891 	bzero(sp, sizeof (*sp));
892 	sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
893 	sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
894 	sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
895 	sp->f_files = (fsfilcnt64_t)-1;
896 	sp->f_ffree = (fsfilcnt64_t)-1;
897 	sp->f_favail = (fsfilcnt64_t)-1;
898 #ifdef notdef
899 	(void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
900 #endif /* notdef */
901 	(void) cmpldev(&d32, vfsp->vfs_dev);
902 	sp->f_fsid = d32;
903 	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
904 	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
905 	sp->f_namemax = PCFNAMESIZE;
906 	return (0);
907 }
908 
909 static int
910 pc_syncfsnodes(struct pcfs *fsp)
911 {
912 	struct pchead *hp;
913 	struct pcnode *pcp;
914 	int error;
915 
916 	if (error = pc_lockfs(fsp, 0, 0))
917 		return (error);
918 
919 	if (!(error = pc_syncfat(fsp))) {
920 		hp = pcfhead;
921 		while (hp < & pcfhead [ NPCHASH ]) {
922 			rw_enter(&pcnodes_lock, RW_READER);
923 			pcp = hp->pch_forw;
924 			while (pcp != (struct pcnode *)hp) {
925 				if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
926 					if (error = pc_nodesync(pcp))
927 						break;
928 				pcp = pcp -> pc_forw;
929 			}
930 			rw_exit(&pcnodes_lock);
931 			if (error)
932 				break;
933 			hp++;
934 		}
935 	}
936 	pc_unlockfs(fsp);
937 	return (error);
938 }
939 
940 /*
941  * Flush any pending I/O.
942  */
943 /*ARGSUSED*/
944 static int
945 pcfs_sync(
946 	struct vfs *vfsp,
947 	short flag,
948 	struct cred *cr)
949 {
950 	struct pcfs *fsp;
951 	int error = 0;
952 
953 	/* this prevents the filesystem from being umounted. */
954 	mutex_enter(&pcfslock);
955 	if (vfsp != NULL) {
956 		fsp = VFSTOPCFS(vfsp);
957 		if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
958 			error = pc_syncfsnodes(fsp);
959 		} else {
960 			rw_enter(&pcnodes_lock, RW_WRITER);
961 			pc_diskchanged(fsp);
962 			rw_exit(&pcnodes_lock);
963 			error = EIO;
964 		}
965 	} else {
966 		fsp = pc_mounttab;
967 		while (fsp != NULL) {
968 			if (fsp->pcfs_flags & PCFS_IRRECOV) {
969 				rw_enter(&pcnodes_lock, RW_WRITER);
970 				pc_diskchanged(fsp);
971 				rw_exit(&pcnodes_lock);
972 				error = EIO;
973 				break;
974 			}
975 			error = pc_syncfsnodes(fsp);
976 			if (error) break;
977 			fsp = fsp->pcfs_nxt;
978 		}
979 	}
980 	mutex_exit(&pcfslock);
981 	return (error);
982 }
983 
984 int
985 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
986 {
987 	int err;
988 
989 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
990 		return (EIO);
991 
992 	if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
993 		fsp->pcfs_count++;
994 	} else {
995 		mutex_enter(&fsp->pcfs_lock);
996 		if (fsp->pcfs_flags & PCFS_LOCKED)
997 			panic("pc_lockfs");
998 		/*
999 		 * We check the IRRECOV bit again just in case somebody
1000 		 * snuck past the initial check but then got held up before
1001 		 * they could grab the lock.  (And in the meantime someone
1002 		 * had grabbed the lock and set the bit)
1003 		 */
1004 		if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1005 			if ((err = pc_getfat(fsp))) {
1006 				mutex_exit(&fsp->pcfs_lock);
1007 				return (err);
1008 			}
1009 		}
1010 		fsp->pcfs_flags |= PCFS_LOCKED;
1011 		fsp->pcfs_owner = curthread;
1012 		fsp->pcfs_count++;
1013 	}
1014 	return (0);
1015 }
1016 
1017 void
1018 pc_unlockfs(struct pcfs *fsp)
1019 {
1020 
1021 	if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1022 		panic("pc_unlockfs");
1023 	if (--fsp->pcfs_count < 0)
1024 		panic("pc_unlockfs: count");
1025 	if (fsp->pcfs_count == 0) {
1026 		fsp->pcfs_flags &= ~PCFS_LOCKED;
1027 		fsp->pcfs_owner = 0;
1028 		mutex_exit(&fsp->pcfs_lock);
1029 	}
1030 }
1031 
1032 int
1033 pc_syncfat(struct pcfs *fsp)
1034 {
1035 	struct buf *bp;
1036 	int nfat;
1037 	int	error = 0;
1038 	struct fat_od_fsi *fsinfo_disk;
1039 
1040 	if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1041 	    !(fsp->pcfs_flags & PCFS_FATMOD))
1042 		return (0);
1043 	/*
1044 	 * write out all copies of FATs
1045 	 */
1046 	fsp->pcfs_flags &= ~PCFS_FATMOD;
1047 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1048 	for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1049 		error = pc_writefat(fsp, pc_dbdaddr(fsp,
1050 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1051 		if (error) {
1052 			pc_mark_irrecov(fsp);
1053 			return (EIO);
1054 		}
1055 	}
1056 	pc_clear_fatchanges(fsp);
1057 
1058 	/*
1059 	 * Write out fsinfo sector.
1060 	 */
1061 	if (IS_FAT32(fsp)) {
1062 		bp = bread(fsp->pcfs_xdev,
1063 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1064 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1065 			error = geterror(bp);
1066 		}
1067 		fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1068 		if (!error && FSISIG_OK(fsinfo_disk)) {
1069 			fsinfo_disk->fsi_incore.fs_free_clusters =
1070 			    LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1071 			fsinfo_disk->fsi_incore.fs_next_free =
1072 			    LE_32(FSINFO_UNKNOWN);
1073 			bwrite2(bp);
1074 			error = geterror(bp);
1075 		}
1076 		brelse(bp);
1077 		if (error) {
1078 			pc_mark_irrecov(fsp);
1079 			return (EIO);
1080 		}
1081 	}
1082 	return (0);
1083 }
1084 
1085 void
1086 pc_invalfat(struct pcfs *fsp)
1087 {
1088 	struct pcfs *xfsp;
1089 	int mount_cnt = 0;
1090 
1091 	if (fsp->pcfs_fatp == (uchar_t *)0)
1092 		panic("pc_invalfat");
1093 	/*
1094 	 * Release FAT
1095 	 */
1096 	kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1097 	fsp->pcfs_fatp = NULL;
1098 	kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1099 	fsp->pcfs_fat_changemap = NULL;
1100 	/*
1101 	 * Invalidate all the blocks associated with the device.
1102 	 * Not needed if stateless.
1103 	 */
1104 	for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1105 		if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1106 			mount_cnt++;
1107 
1108 	if (!mount_cnt)
1109 		binval(fsp->pcfs_xdev);
1110 	/*
1111 	 * close mounted device
1112 	 */
1113 	(void) VOP_CLOSE(fsp->pcfs_devvp,
1114 	    (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1115 	    1, (offset_t)0, CRED(), NULL);
1116 }
1117 
1118 void
1119 pc_badfs(struct pcfs *fsp)
1120 {
1121 	cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1122 	    getmajor(fsp->pcfs_devvp->v_rdev),
1123 	    getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1124 }
1125 
1126 /*
1127  * The problem with supporting NFS on the PCFS filesystem is that there
1128  * is no good place to keep the generation number. The only possible
1129  * place is inside a directory entry. There are a few words that we
1130  * don't use - they store NT & OS/2 attributes, and the creation/last access
1131  * time of the file - but it seems wrong to use them. In addition, directory
1132  * entries come and go. If a directory is removed completely, its directory
1133  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1134  * inode blocks are dedicated for inodes, so the generation numbers are
1135  * permanently kept on the disk.
1136  */
1137 static int
1138 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1139 {
1140 	struct pcnode *pcp;
1141 	struct pc_fid *pcfid;
1142 	struct pcfs *fsp;
1143 	struct pcdir *ep;
1144 	daddr_t eblkno;
1145 	int eoffset;
1146 	struct buf *bp;
1147 	int error;
1148 	pc_cluster32_t	cn;
1149 
1150 	pcfid = (struct pc_fid *)fidp;
1151 	fsp = VFSTOPCFS(vfsp);
1152 
1153 	error = pc_lockfs(fsp, 0, 0);
1154 	if (error) {
1155 		*vpp = NULL;
1156 		return (error);
1157 	}
1158 
1159 	if (pcfid->pcfid_block == 0) {
1160 		pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1161 		pcp->pc_flags |= PC_EXTERNAL;
1162 		*vpp = PCTOV(pcp);
1163 		pc_unlockfs(fsp);
1164 		return (0);
1165 	}
1166 	eblkno = pcfid->pcfid_block;
1167 	eoffset = pcfid->pcfid_offset;
1168 
1169 	if ((pc_dbtocl(fsp,
1170 	    eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1171 	    (eoffset > fsp->pcfs_clsize)) {
1172 		pc_unlockfs(fsp);
1173 		*vpp = NULL;
1174 		return (EINVAL);
1175 	}
1176 
1177 	if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1178 	    < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1179 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1180 		    fsp->pcfs_clsize);
1181 	} else {
1182 		/*
1183 		 * This is an access "backwards" into the FAT12/FAT16
1184 		 * root directory. A better code structure would
1185 		 * significantly improve maintainability here ...
1186 		 */
1187 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1188 		    (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1189 	}
1190 	if (bp->b_flags & (B_ERROR | B_STALE)) {
1191 		error = geterror(bp);
1192 		brelse(bp);
1193 		if (error)
1194 			pc_mark_irrecov(fsp);
1195 		*vpp = NULL;
1196 		pc_unlockfs(fsp);
1197 		return (error);
1198 	}
1199 	ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1200 	/*
1201 	 * Ok, if this is a valid file handle that we gave out,
1202 	 * then simply ensuring that the creation time matches,
1203 	 * the entry has not been deleted, and it has a valid first
1204 	 * character should be enough.
1205 	 *
1206 	 * Unfortunately, verifying that the <blkno, offset> _still_
1207 	 * refers to a directory entry is not easy, since we'd have
1208 	 * to search _all_ directories starting from root to find it.
1209 	 * That's a high price to pay just in case somebody is forging
1210 	 * file handles. So instead we verify that as much of the
1211 	 * entry is valid as we can:
1212 	 *
1213 	 * 1. The starting cluster is 0 (unallocated) or valid
1214 	 * 2. It is not an LFN entry
1215 	 * 3. It is not hidden (unless mounted as such)
1216 	 * 4. It is not the label
1217 	 */
1218 	cn = pc_getstartcluster(fsp, ep);
1219 	/*
1220 	 * if the starting cluster is valid, but not valid according
1221 	 * to pc_validcl(), force it to be to simplify the following if.
1222 	 */
1223 	if (cn == 0)
1224 		cn = PCF_FIRSTCLUSTER;
1225 	if (IS_FAT32(fsp)) {
1226 		if (cn >= PCF_LASTCLUSTER32)
1227 			cn = PCF_FIRSTCLUSTER;
1228 	} else {
1229 		if (cn >= PCF_LASTCLUSTER)
1230 			cn = PCF_FIRSTCLUSTER;
1231 	}
1232 	if ((!pc_validcl(fsp, cn)) ||
1233 	    (PCDL_IS_LFN(ep)) ||
1234 	    (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1235 	    ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1236 		bp->b_flags |= B_STALE | B_AGE;
1237 		brelse(bp);
1238 		pc_unlockfs(fsp);
1239 		return (EINVAL);
1240 	}
1241 	if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1242 	    (ep->pcd_filename[0] != PCD_ERASED) &&
1243 	    (pc_validchar(ep->pcd_filename[0]) ||
1244 	    (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1245 		pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1246 		pcp->pc_flags |= PC_EXTERNAL;
1247 		*vpp = PCTOV(pcp);
1248 	} else {
1249 		*vpp = NULL;
1250 	}
1251 	bp->b_flags |= B_STALE | B_AGE;
1252 	brelse(bp);
1253 	pc_unlockfs(fsp);
1254 	return (0);
1255 }
1256 
1257 /*
1258  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1259  * a meg), so we can't bread() it all in at once. This routine reads a
1260  * fat a chunk at a time.
1261  */
1262 static int
1263 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1264 {
1265 	struct buf *bp;
1266 	size_t off;
1267 	size_t readsize;
1268 	daddr_t diskblk;
1269 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1270 	daddr_t start = fsp->pcfs_fatstart;
1271 
1272 	readsize = fsp->pcfs_clsize;
1273 	for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1274 		if (readsize > (fatsize - off))
1275 			readsize = fatsize - off;
1276 		diskblk = pc_dbdaddr(fsp, start +
1277 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1278 		bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1279 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1280 			brelse(bp);
1281 			return (EIO);
1282 		}
1283 		bp->b_flags |= B_STALE | B_AGE;
1284 		bcopy(bp->b_un.b_addr, fatp, readsize);
1285 		brelse(bp);
1286 	}
1287 	return (0);
1288 }
1289 
1290 /*
1291  * We write the FAT out a _lot_, in order to make sure that it
1292  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1293  * the FAT might be a couple of megabytes, and writing it all out just
1294  * because we created or deleted a small file is painful (especially
1295  * since we do it for each alternate FAT too). So instead, for FAT16 and
1296  * FAT32 we only write out the bit that has changed. We don't clear
1297  * the 'updated' fields here because the caller might be writing out
1298  * several FATs, so the caller must use pc_clear_fatchanges() after
1299  * all FATs have been updated.
1300  * This function doesn't take "start" from fsp->pcfs_dosstart because
1301  * callers can use it to write either the primary or any of the alternate
1302  * FAT tables.
1303  */
1304 static int
1305 pc_writefat(struct pcfs *fsp, daddr_t start)
1306 {
1307 	struct buf *bp;
1308 	size_t off;
1309 	size_t writesize;
1310 	int	error;
1311 	uchar_t *fatp = fsp->pcfs_fatp;
1312 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1313 
1314 	writesize = fsp->pcfs_clsize;
1315 	for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1316 		if (writesize > (fatsize - off))
1317 			writesize = fatsize - off;
1318 		if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1319 			continue;
1320 		}
1321 		bp = ngeteblk(writesize);
1322 		bp->b_edev = fsp->pcfs_xdev;
1323 		bp->b_dev = cmpdev(bp->b_edev);
1324 		bp->b_blkno = pc_dbdaddr(fsp, start +
1325 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1326 		bcopy(fatp, bp->b_un.b_addr, writesize);
1327 		bwrite2(bp);
1328 		error = geterror(bp);
1329 		brelse(bp);
1330 		if (error) {
1331 			return (error);
1332 		}
1333 	}
1334 	return (0);
1335 }
1336 
1337 /*
1338  * Mark the FAT cluster that 'cn' is stored in as modified.
1339  */
1340 void
1341 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1342 {
1343 	pc_cluster32_t	bn;
1344 	size_t		size;
1345 
1346 	/* which fat block is the cluster number stored in? */
1347 	if (IS_FAT32(fsp)) {
1348 		size = sizeof (pc_cluster32_t);
1349 		bn = pc_lblkno(fsp, cn * size);
1350 		fsp->pcfs_fat_changemap[bn] = 1;
1351 	} else if (IS_FAT16(fsp)) {
1352 		size = sizeof (pc_cluster16_t);
1353 		bn = pc_lblkno(fsp, cn * size);
1354 		fsp->pcfs_fat_changemap[bn] = 1;
1355 	} else {
1356 		offset_t off;
1357 		pc_cluster32_t nbn;
1358 
1359 		ASSERT(IS_FAT12(fsp));
1360 		off = cn + (cn >> 1);
1361 		bn = pc_lblkno(fsp, off);
1362 		fsp->pcfs_fat_changemap[bn] = 1;
1363 		/* does this field wrap into the next fat cluster? */
1364 		nbn = pc_lblkno(fsp, off + 1);
1365 		if (nbn != bn) {
1366 			fsp->pcfs_fat_changemap[nbn] = 1;
1367 		}
1368 	}
1369 }
1370 
1371 /*
1372  * return whether the FAT cluster 'bn' is updated and needs to
1373  * be written out.
1374  */
1375 int
1376 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1377 {
1378 	return (fsp->pcfs_fat_changemap[bn] == 1);
1379 }
1380 
1381 /*
1382  * Implementation of VFS_FREEVFS() to support forced umounts.
1383  * This is called by the vfs framework after umount, to trigger
1384  * the release of any resources still associated with the given
1385  * vfs_t once the need to keep them has gone away.
1386  */
1387 void
1388 pcfs_freevfs(vfs_t *vfsp)
1389 {
1390 	struct pcfs *fsp = VFSTOPCFS(vfsp);
1391 
1392 	mutex_enter(&pcfslock);
1393 	/*
1394 	 * Purging the FAT closes the device - can't do any more
1395 	 * I/O after this.
1396 	 */
1397 	if (fsp->pcfs_fatp != (uchar_t *)0)
1398 		pc_invalfat(fsp);
1399 	mutex_exit(&pcfslock);
1400 
1401 	VN_RELE(fsp->pcfs_devvp);
1402 	mutex_destroy(&fsp->pcfs_lock);
1403 	kmem_free(fsp, sizeof (*fsp));
1404 
1405 	/*
1406 	 * Allow _fini() to succeed now, if so desired.
1407 	 */
1408 	atomic_dec_32(&pcfs_mountcount);
1409 }
1410 
1411 
1412 /*
1413  * PC-style partition parsing and FAT BPB identification/validation code.
1414  * The partition parsers here assume:
1415  *	- a FAT filesystem will be in a partition that has one of a set of
1416  *	  recognized partition IDs
1417  *	- the user wants the 'numbering' (C:, D:, ...) that one would get
1418  *	  on MSDOS 6.x.
1419  *	  That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1420  *	  will not factor in the enumeration.
1421  * These days, such assumptions should be revisited. FAT is no longer the
1422  * only game in 'PC town'.
1423  */
1424 /*
1425  * isDosDrive()
1426  *	Boolean function.  Give it the systid field for an fdisk partition
1427  *	and it decides if that's a systid that describes a DOS drive.  We
1428  *	use systid values defined in sys/dktp/fdisk.h.
1429  */
1430 static int
1431 isDosDrive(uchar_t checkMe)
1432 {
1433 	return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1434 	    (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1435 	    (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1436 	    (checkMe == DIAGPART));
1437 }
1438 
1439 
1440 /*
1441  * isDosExtended()
1442  *	Boolean function.  Give it the systid field for an fdisk partition
1443  *	and it decides if that's a systid that describes an extended DOS
1444  *	partition.
1445  */
1446 static int
1447 isDosExtended(uchar_t checkMe)
1448 {
1449 	return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1450 }
1451 
1452 
1453 /*
1454  * isBootPart()
1455  *	Boolean function.  Give it the systid field for an fdisk partition
1456  *	and it decides if that's a systid that describes a Solaris boot
1457  *	partition.
1458  */
1459 static int
1460 isBootPart(uchar_t checkMe)
1461 {
1462 	return (checkMe == X86BOOT);
1463 }
1464 
1465 
1466 /*
1467  * noLogicalDrive()
1468  *	Display error message about not being able to find a logical
1469  *	drive.
1470  */
1471 static void
1472 noLogicalDrive(int ldrive)
1473 {
1474 	if (ldrive == BOOT_PARTITION_DRIVE) {
1475 		cmn_err(CE_NOTE, "!pcfs: no boot partition");
1476 	} else {
1477 		cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1478 	}
1479 }
1480 
1481 
1482 /*
1483  * findTheDrive()
1484  *	Discover offset of the requested logical drive, and return
1485  *	that offset (startSector), the systid of that drive (sysid),
1486  *	and a buffer pointer (bp), with the buffer contents being
1487  *	the first sector of the logical drive (i.e., the sector that
1488  *	contains the BPB for that drive).
1489  *
1490  * Note: this code is not capable of addressing >2TB disks, as it uses
1491  *       daddr_t not diskaddr_t, some of the calculations would overflow
1492  */
1493 #define	COPY_PTBL(mbr, ptblp)					\
1494 	bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),	\
1495 	    FD_NUMPART * sizeof (struct ipart))
1496 
1497 static int
1498 findTheDrive(struct pcfs *fsp, buf_t **bp)
1499 {
1500 	int ldrive = fsp->pcfs_ldrive;
1501 	dev_t dev = fsp->pcfs_devvp->v_rdev;
1502 
1503 	struct ipart dosp[FD_NUMPART];	/* incore fdisk partition structure */
1504 	daddr_t lastseek = 0;		/* Disk block we sought previously */
1505 	daddr_t diskblk = 0;		/* Disk block to get */
1506 	daddr_t xstartsect;		/* base of Extended DOS partition */
1507 	int logicalDriveCount = 0;	/* Count of logical drives seen */
1508 	int extendedPart = -1;		/* index of extended dos partition */
1509 	int primaryPart = -1;		/* index of primary dos partition */
1510 	int bootPart = -1;		/* index of a Solaris boot partition */
1511 	int xnumsect = -1;		/* length of extended DOS partition */
1512 	int driveIndex;			/* computed FDISK table index */
1513 	daddr_t startsec;
1514 	len_t mediasize;
1515 	int i;
1516 	/*
1517 	 * Count of drives in the current extended partition's
1518 	 * FDISK table, and indexes of the drives themselves.
1519 	 */
1520 	int extndDrives[FD_NUMPART];
1521 	int numDrives = 0;
1522 
1523 	/*
1524 	 * Count of drives (beyond primary) in master boot record's
1525 	 * FDISK table, and indexes of the drives themselves.
1526 	 */
1527 	int extraDrives[FD_NUMPART];
1528 	int numExtraDrives = 0;
1529 
1530 	/*
1531 	 * "ldrive == 0" should never happen, as this is a request to
1532 	 * mount the physical device (and ignore partitioning). The code
1533 	 * in pcfs_mount() should have made sure that a logical drive number
1534 	 * is at least 1, meaning we're looking for drive "C:". It is not
1535 	 * safe (and a bug in the callers of this function) to request logical
1536 	 * drive number 0; we could ASSERT() but a graceful EIO is a more
1537 	 * polite way.
1538 	 */
1539 	if (ldrive == 0) {
1540 		cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1541 		noLogicalDrive(ldrive);
1542 		return (EIO);
1543 	}
1544 
1545 	/*
1546 	 *  Copy from disk block into memory aligned structure for fdisk usage.
1547 	 */
1548 	COPY_PTBL((*bp)->b_un.b_addr, dosp);
1549 
1550 	/*
1551 	 * This check is ok because a FAT BPB and a master boot record (MBB)
1552 	 * have the same signature, in the same position within the block.
1553 	 */
1554 	if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1555 		cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1556 		    "device (%x.%x):%d\n",
1557 		    getmajor(dev), getminor(dev), ldrive);
1558 		return (EINVAL);
1559 	}
1560 
1561 	/*
1562 	 * Get a summary of what is in the Master FDISK table.
1563 	 * Normally we expect to find one partition marked as a DOS drive.
1564 	 * This partition is the one Windows calls the primary dos partition.
1565 	 * If the machine has any logical drives then we also expect
1566 	 * to find a partition marked as an extended DOS partition.
1567 	 *
1568 	 * Sometimes we'll find multiple partitions marked as DOS drives.
1569 	 * The Solaris fdisk program allows these partitions
1570 	 * to be created, but Windows fdisk no longer does.  We still need
1571 	 * to support these, though, since Windows does.  We also need to fix
1572 	 * our fdisk to behave like the Windows version.
1573 	 *
1574 	 * It turns out that some off-the-shelf media have *only* an
1575 	 * Extended partition, so we need to deal with that case as well.
1576 	 *
1577 	 * Only a single (the first) Extended or Boot Partition will
1578 	 * be recognized.  Any others will be ignored.
1579 	 */
1580 	for (i = 0; i < FD_NUMPART; i++) {
1581 		DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1582 		    uint_t, (uint_t)dosp[i].systid,
1583 		    uint_t, LE_32(dosp[i].relsect),
1584 		    uint_t, LE_32(dosp[i].numsect));
1585 
1586 		if (isDosDrive(dosp[i].systid)) {
1587 			if (primaryPart < 0) {
1588 				logicalDriveCount++;
1589 				primaryPart = i;
1590 			} else {
1591 				extraDrives[numExtraDrives++] = i;
1592 			}
1593 			continue;
1594 		}
1595 		if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1596 			extendedPart = i;
1597 			continue;
1598 		}
1599 		if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1600 			bootPart = i;
1601 			continue;
1602 		}
1603 	}
1604 
1605 	if (ldrive == BOOT_PARTITION_DRIVE) {
1606 		if (bootPart < 0) {
1607 			noLogicalDrive(ldrive);
1608 			return (EINVAL);
1609 		}
1610 		startsec = LE_32(dosp[bootPart].relsect);
1611 		mediasize = LE_32(dosp[bootPart].numsect);
1612 		goto found;
1613 	}
1614 
1615 	if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1616 		startsec = LE_32(dosp[primaryPart].relsect);
1617 		mediasize = LE_32(dosp[primaryPart].numsect);
1618 		goto found;
1619 	}
1620 
1621 	/*
1622 	 * We are not looking for the C: drive (or the primary drive
1623 	 * was not found), so we had better have an extended partition
1624 	 * or extra drives in the Master FDISK table.
1625 	 */
1626 	if ((extendedPart < 0) && (numExtraDrives == 0)) {
1627 		cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1628 		noLogicalDrive(ldrive);
1629 		return (EINVAL);
1630 	}
1631 
1632 	if (extendedPart >= 0) {
1633 		diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1634 		xnumsect = LE_32(dosp[extendedPart].numsect);
1635 		do {
1636 			/*
1637 			 *  If the seek would not cause us to change
1638 			 *  position on the drive, then we're out of
1639 			 *  extended partitions to examine.
1640 			 */
1641 			if (diskblk == lastseek)
1642 				break;
1643 			logicalDriveCount += numDrives;
1644 			/*
1645 			 *  Seek the next extended partition, and find
1646 			 *  logical drives within it.
1647 			 */
1648 			brelse(*bp);
1649 			/*
1650 			 * bread() block numbers are multiples of DEV_BSIZE
1651 			 * but the device sector size (the unit of partitioning)
1652 			 * might be larger than that; pcfs_get_device_info()
1653 			 * has calculated the multiplicator for us.
1654 			 */
1655 			*bp = bread(dev,
1656 			    pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1657 			if ((*bp)->b_flags & B_ERROR) {
1658 				return (EIO);
1659 			}
1660 
1661 			lastseek = diskblk;
1662 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1663 			if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1664 				cmn_err(CE_NOTE, "!pcfs: "
1665 				    "extended partition table signature err, "
1666 				    "device (%x.%x):%d, LBA %u",
1667 				    getmajor(dev), getminor(dev), ldrive,
1668 				    (uint_t)pc_dbdaddr(fsp, diskblk));
1669 				return (EINVAL);
1670 			}
1671 			/*
1672 			 *  Count up drives, and track where the next
1673 			 *  extended partition is in case we need it.  We
1674 			 *  are expecting only one extended partition.  If
1675 			 *  there is more than one we'll only go to the
1676 			 *  first one we see, but warn about ignoring.
1677 			 */
1678 			numDrives = 0;
1679 			for (i = 0; i < FD_NUMPART; i++) {
1680 				DTRACE_PROBE4(extendedpart,
1681 				    struct pcfs *, fsp,
1682 				    uint_t, (uint_t)dosp[i].systid,
1683 				    uint_t, LE_32(dosp[i].relsect),
1684 				    uint_t, LE_32(dosp[i].numsect));
1685 				if (isDosDrive(dosp[i].systid)) {
1686 					extndDrives[numDrives++] = i;
1687 				} else if (isDosExtended(dosp[i].systid)) {
1688 					if (diskblk != lastseek) {
1689 						/*
1690 						 * Already found an extended
1691 						 * partition in this table.
1692 						 */
1693 						cmn_err(CE_NOTE,
1694 						    "!pcfs: ignoring unexpected"
1695 						    " additional extended"
1696 						    " partition");
1697 					} else {
1698 						diskblk = xstartsect +
1699 						    LE_32(dosp[i].relsect);
1700 					}
1701 				}
1702 			}
1703 		} while (ldrive > logicalDriveCount + numDrives);
1704 
1705 		ASSERT(numDrives <= FD_NUMPART);
1706 
1707 		if (ldrive <= logicalDriveCount + numDrives) {
1708 			/*
1709 			 * The number of logical drives we've found thus
1710 			 * far is enough to get us to the one we were
1711 			 * searching for.
1712 			 */
1713 			driveIndex = logicalDriveCount + numDrives - ldrive;
1714 			mediasize =
1715 			    LE_32(dosp[extndDrives[driveIndex]].numsect);
1716 			startsec =
1717 			    LE_32(dosp[extndDrives[driveIndex]].relsect) +
1718 			    lastseek;
1719 			if (startsec > (xstartsect + xnumsect)) {
1720 				cmn_err(CE_NOTE, "!pcfs: extended partition "
1721 				    "values bad");
1722 				return (EINVAL);
1723 			}
1724 			goto found;
1725 		} else {
1726 			/*
1727 			 * We ran out of extended dos partition
1728 			 * drives.  The only hope now is to go
1729 			 * back to extra drives defined in the master
1730 			 * fdisk table.  But we overwrote that table
1731 			 * already, so we must load it in again.
1732 			 */
1733 			logicalDriveCount += numDrives;
1734 			brelse(*bp);
1735 			ASSERT(fsp->pcfs_dosstart == 0);
1736 			*bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1737 			    fsp->pcfs_secsize);
1738 			if ((*bp)->b_flags & B_ERROR) {
1739 				return (EIO);
1740 			}
1741 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1742 		}
1743 	}
1744 	/*
1745 	 *  Still haven't found the drive, is it an extra
1746 	 *  drive defined in the main FDISK table?
1747 	 */
1748 	if (ldrive <= logicalDriveCount + numExtraDrives) {
1749 		driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1750 		ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1751 		mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1752 		startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1753 		goto found;
1754 	}
1755 	/*
1756 	 *  Still haven't found the drive, and there is
1757 	 *  nowhere else to look.
1758 	 */
1759 	noLogicalDrive(ldrive);
1760 	return (EINVAL);
1761 
1762 found:
1763 	/*
1764 	 * We need this value in units of sectorsize, because PCFS' internal
1765 	 * offset calculations go haywire for > 512Byte sectors unless all
1766 	 * pcfs_.*start values are in units of sectors.
1767 	 * So, assign before the capacity check (that's done in DEV_BSIZE)
1768 	 */
1769 	fsp->pcfs_dosstart = startsec;
1770 
1771 	/*
1772 	 * convert from device sectors to proper units:
1773 	 *	- starting sector: DEV_BSIZE (as argument to bread())
1774 	 *	- media size: Bytes
1775 	 */
1776 	startsec = pc_dbdaddr(fsp, startsec);
1777 	mediasize *= fsp->pcfs_secsize;
1778 
1779 	/*
1780 	 * some additional validation / warnings in case the partition table
1781 	 * and the actual media capacity are not in accordance ...
1782 	 */
1783 	if (fsp->pcfs_mediasize != 0) {
1784 		diskaddr_t startoff =
1785 		    (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1786 
1787 		if (startoff >= fsp->pcfs_mediasize ||
1788 		    startoff + mediasize > fsp->pcfs_mediasize) {
1789 			cmn_err(CE_WARN,
1790 			    "!pcfs: partition size (LBA start %u, %lld bytes, "
1791 			    "device (%x.%x):%d) smaller than "
1792 			    "mediasize (%lld bytes).\n"
1793 			    "filesystem may be truncated, access errors "
1794 			    "may result.\n",
1795 			    (uint_t)startsec, (long long)mediasize,
1796 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1797 			    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1798 		}
1799 	} else {
1800 		fsp->pcfs_mediasize = mediasize;
1801 	}
1802 
1803 	return (0);
1804 }
1805 
1806 
1807 static fattype_t
1808 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1809 {
1810 	uint32_t ncl = fsp->pcfs_ncluster;
1811 
1812 	if (ncl <= 4096) {
1813 		if (bpb_get_FatSz16(bpb) == 0)
1814 			return (FAT_UNKNOWN);
1815 
1816 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1817 		    bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1818 			return (FAT12);
1819 		if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1820 			return (FAT12);
1821 		if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1822 			return (FAT16);
1823 
1824 		switch (bpb_get_Media(bpb)) {
1825 			case SS8SPT:
1826 			case DS8SPT:
1827 			case SS9SPT:
1828 			case DS9SPT:
1829 			case DS18SPT:
1830 			case DS9_15SPT:
1831 				/*
1832 				 * Is this reliable - all floppies are FAT12 ?
1833 				 */
1834 				return (FAT12);
1835 			case MD_FIXED:
1836 				/*
1837 				 * Is this reliable - disks are always FAT16 ?
1838 				 */
1839 				return (FAT16);
1840 			default:
1841 				break;
1842 		}
1843 	} else if (ncl <= 65536) {
1844 		if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1845 			return (FAT32);
1846 		if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1847 			return (FAT32);
1848 		if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1849 			return (FAT32);
1850 
1851 		if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1852 			return (FAT16);
1853 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1854 			return (FAT16);
1855 	}
1856 
1857 	/*
1858 	 * We don't know
1859 	 */
1860 	return (FAT_UNKNOWN);
1861 }
1862 
1863 /*
1864  * Check to see if the BPB we found is correct.
1865  *
1866  * This looks far more complicated that it needs to be for pure structural
1867  * validation. The reason for this is that parseBPB() is also used for
1868  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1869  * BPB fields (do not) have 'known good' values, even if we (do not) reject
1870  * the BPB when attempting to mount the filesystem.
1871  *
1872  * Real-world usage of FAT shows there are a lot of corner-case situations
1873  * and, following the specification strictly, invalid filesystems out there.
1874  * Known are situations such as:
1875  *	- FAT12/FAT16 filesystems with garbage in either totsec16/32
1876  *	  instead of the zero in one of the fields mandated by the spec
1877  *	- filesystems that claim to be larger than the partition they're in
1878  *	- filesystems without valid media descriptor
1879  *	- FAT32 filesystems with RootEntCnt != 0
1880  *	- FAT32 filesystems with less than 65526 clusters
1881  *	- FAT32 filesystems without valid FSI sector
1882  *	- FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1883  *
1884  * Such filesystems are accessible by PCFS - if it'd know to start with that
1885  * the filesystem should be treated as a specific FAT type. Before S10, it
1886  * relied on the PC/fdisk partition type for the purpose and almost completely
1887  * ignored the BPB; now it ignores the partition type for anything else but
1888  * logical drive enumeration, which can result in rejection of (invalid)
1889  * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1890  * has less than 65526 clusters.
1891  *
1892  * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1893  * not possible to allow all such mostly-compliant filesystems in unless one
1894  * accepts false positives (definitely invalid filesystems that cause problems
1895  * later). This at least allows to pinpoint why the mount failed.
1896  *
1897  * Due to the use of FAT on removeable media, all relaxations of the rules
1898  * here need to be carefully evaluated wrt. to potential effects on PCFS
1899  * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1900  * beware.
1901  */
1902 static int
1903 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1904 {
1905 	fattype_t type;
1906 
1907 	uint32_t	ncl;	/* number of clusters in file area */
1908 	uint32_t	rec;
1909 	uint32_t	reserved;
1910 	uint32_t	fsisec, bkbootsec;
1911 	blkcnt_t	totsec, totsec16, totsec32, datasec;
1912 	size_t		fatsec, fatsec16, fatsec32, rdirsec;
1913 	size_t		secsize;
1914 	len_t		mediasize;
1915 	uint64_t	validflags = 0;
1916 
1917 	if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1918 		validflags |= BPB_BPBSIG_OK;
1919 
1920 	rec = bpb_get_RootEntCnt(bpb);
1921 	reserved = bpb_get_RsvdSecCnt(bpb);
1922 	fsisec = bpb_get_FSInfo32(bpb);
1923 	bkbootsec = bpb_get_BkBootSec32(bpb);
1924 	totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1925 	totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1926 	fatsec16 = bpb_get_FatSz16(bpb);
1927 	fatsec32 = bpb_get_FatSz32(bpb);
1928 
1929 	totsec = totsec16 ? totsec16 : totsec32;
1930 	fatsec = fatsec16 ? fatsec16 : fatsec32;
1931 
1932 	secsize = bpb_get_BytesPerSec(bpb);
1933 	if (!VALID_SECSIZE(secsize))
1934 		secsize = fsp->pcfs_secsize;
1935 	if (secsize != fsp->pcfs_secsize) {
1936 		PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1937 		    getmajor(fsp->pcfs_xdev),
1938 		    getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1939 		PC_DPRINTF2(3, "!BPB secsize %d != "
1940 		    "autodetected media block size %d\n",
1941 		    (int)secsize, (int)fsp->pcfs_secsize);
1942 		if (fsp->pcfs_ldrive) {
1943 			/*
1944 			 * We've already attempted to parse the partition
1945 			 * table. If the block size used for that don't match
1946 			 * the PCFS sector size, we're hosed one way or the
1947 			 * other. Just try what happens.
1948 			 */
1949 			secsize = fsp->pcfs_secsize;
1950 			PC_DPRINTF1(3,
1951 			    "!pcfs: Using autodetected secsize %d\n",
1952 			    (int)secsize);
1953 		} else {
1954 			/*
1955 			 * This allows mounting lofi images of PCFS partitions
1956 			 * with sectorsize != DEV_BSIZE. We can't parse the
1957 			 * partition table on whole-disk images unless the
1958 			 * (undocumented) "secsize=..." mount option is used,
1959 			 * but at least this allows us to mount if we have
1960 			 * an image of a partition.
1961 			 */
1962 			PC_DPRINTF1(3,
1963 			    "!pcfs: Using BPB secsize %d\n", (int)secsize);
1964 		}
1965 	}
1966 
1967 	if (fsp->pcfs_mediasize == 0) {
1968 		mediasize = (len_t)totsec * (len_t)secsize;
1969 		/*
1970 		 * This is not an error because not all devices support the
1971 		 * dkio(7i) mediasize queries, and/or not all devices are
1972 		 * partitioned. If we have not been able to figure out the
1973 		 * size of the underlaying medium, we have to trust the BPB.
1974 		 */
1975 		PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1976 		    "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1977 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1978 		    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1979 	} else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1980 		cmn_err(CE_WARN,
1981 		    "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1982 		    "FAT BPB mediasize (%lld Bytes).\n"
1983 		    "truncated filesystem on device (%x.%x):%d, access errors "
1984 		    "possible.\n",
1985 		    (long long)fsp->pcfs_mediasize,
1986 		    (long long)(totsec * (blkcnt_t)secsize),
1987 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1988 		    fsp->pcfs_ldrive);
1989 		mediasize = fsp->pcfs_mediasize;
1990 	} else {
1991 		/*
1992 		 * This is actually ok. A FAT needs not occupy the maximum
1993 		 * space available in its partition, it can be shorter.
1994 		 */
1995 		mediasize = (len_t)totsec * (len_t)secsize;
1996 	}
1997 
1998 	/*
1999 	 * Since we let just about anything pass through this function,
2000 	 * fence against divide-by-zero here.
2001 	 */
2002 	if (secsize)
2003 		rdirsec = roundup(rec * 32, secsize) / secsize;
2004 	else
2005 		rdirsec = 0;
2006 
2007 	/*
2008 	 * This assignment is necessary before pc_dbdaddr() can first be
2009 	 * used. Must initialize the value here.
2010 	 */
2011 	fsp->pcfs_secsize = secsize;
2012 	fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2013 
2014 	fsp->pcfs_mediasize = mediasize;
2015 
2016 	fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2017 	fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2018 	fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2019 	fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2020 	fsp->pcfs_rdirsec = rdirsec;
2021 
2022 	/*
2023 	 * Remember: All PCFS offset calculations in sectors. Before I/O
2024 	 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2025 	 * necessary so that media with > 512Byte sector sizes work correctly.
2026 	 */
2027 	fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2028 	fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2029 	fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2030 	datasec = totsec -
2031 	    (blkcnt_t)fatsec * fsp->pcfs_numfat -
2032 	    (blkcnt_t)rdirsec -
2033 	    (blkcnt_t)reserved;
2034 
2035 	DTRACE_PROBE4(fatgeometry,
2036 	    blkcnt_t, totsec, size_t, fatsec,
2037 	    size_t, rdirsec, blkcnt_t, datasec);
2038 
2039 	/*
2040 	 * 'totsec' is taken directly from the BPB and guaranteed to fit
2041 	 * into a 32bit unsigned integer. The calculation of 'datasec',
2042 	 * on the other hand, could underflow for incorrect values in
2043 	 * rdirsec/reserved/fatsec. Check for that.
2044 	 * We also check that the BPB conforms to the FAT specification's
2045 	 * requirement that either of the 16/32bit total sector counts
2046 	 * must be zero.
2047 	 */
2048 	if (totsec != 0 &&
2049 	    (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2050 	    datasec < totsec && datasec <= UINT32_MAX)
2051 		validflags |= BPB_TOTSEC_OK;
2052 
2053 	if ((len_t)totsec * (len_t)secsize <= mediasize)
2054 		validflags |= BPB_MEDIASZ_OK;
2055 
2056 	if (VALID_SECSIZE(secsize))
2057 		validflags |= BPB_SECSIZE_OK;
2058 	if (VALID_SPCL(fsp->pcfs_spcl))
2059 		validflags |= BPB_SECPERCLUS_OK;
2060 	if (VALID_CLSIZE(fsp->pcfs_clsize))
2061 		validflags |= BPB_CLSIZE_OK;
2062 	if (VALID_NUMFATS(fsp->pcfs_numfat))
2063 		validflags |= BPB_NUMFAT_OK;
2064 	if (VALID_RSVDSEC(reserved) && reserved < totsec)
2065 		validflags |= BPB_RSVDSECCNT_OK;
2066 	if (VALID_MEDIA(fsp->pcfs_mediadesc))
2067 		validflags |= BPB_MEDIADESC_OK;
2068 	if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2069 		validflags |= BPB_BOOTSIG16_OK;
2070 	if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2071 		validflags |= BPB_BOOTSIG32_OK;
2072 	if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2073 		validflags |= BPB_FSTYPSTR16_OK;
2074 	if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2075 		validflags |= BPB_FSTYPSTR32_OK;
2076 	if (VALID_OEMNAME(bpb_OEMName(bpb)))
2077 		validflags |= BPB_OEMNAME_OK;
2078 	if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2079 		validflags |= BPB_BKBOOTSEC_OK;
2080 	if (fsisec > 0 && fsisec <= reserved)
2081 		validflags |= BPB_FSISEC_OK;
2082 	if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2083 		validflags |= BPB_JMPBOOT_OK;
2084 	if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2085 		validflags |= BPB_FSVER_OK;
2086 	if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2087 		validflags |= BPB_VOLLAB16_OK;
2088 	if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2089 		validflags |= BPB_VOLLAB32_OK;
2090 	if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2091 		validflags |= BPB_EXTFLAGS_OK;
2092 
2093 	/*
2094 	 * Try to determine which FAT format to use.
2095 	 *
2096 	 * Calculate the number of clusters in order to determine
2097 	 * the type of FAT we are looking at.  This is the only
2098 	 * recommended way of determining FAT type, though there
2099 	 * are other hints in the data, this is the best way.
2100 	 *
2101 	 * Since we let just about "anything" pass through this function
2102 	 * without early exits, fence against divide-by-zero here.
2103 	 *
2104 	 * datasec was already validated against UINT32_MAX so we know
2105 	 * the result will not overflow the 32bit calculation.
2106 	 */
2107 	if (fsp->pcfs_spcl)
2108 		ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2109 	else
2110 		ncl = 0;
2111 
2112 	fsp->pcfs_ncluster = ncl;
2113 
2114 	/*
2115 	 * From the Microsoft FAT specification:
2116 	 * In the following example, when it says <, it does not mean <=.
2117 	 * Note also that the numbers are correct.  The first number for
2118 	 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2119 	 * and the '<' signs are not wrong.
2120 	 *
2121 	 * We "specialdetect" the corner cases, and use at least one "extra"
2122 	 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2123 	 * count is dangerously close to the boundaries.
2124 	 */
2125 
2126 	if (ncl <= PCF_FIRSTCLUSTER) {
2127 		type = FAT_UNKNOWN;
2128 	} else if (ncl < 4085) {
2129 		type = FAT12;
2130 	} else if (ncl <= 4096) {
2131 		type = FAT_QUESTIONABLE;
2132 	} else if (ncl < 65525) {
2133 		type = FAT16;
2134 	} else if (ncl <= 65536) {
2135 		type = FAT_QUESTIONABLE;
2136 	} else if (ncl < PCF_LASTCLUSTER32) {
2137 		type = FAT32;
2138 	} else {
2139 		type = FAT_UNKNOWN;
2140 	}
2141 
2142 	DTRACE_PROBE4(parseBPB__initial,
2143 	    struct pcfs *, fsp, unsigned char *, bpb,
2144 	    int, validflags, fattype_t, type);
2145 
2146 recheck:
2147 	fsp->pcfs_fatsec = fatsec;
2148 
2149 	/* Do some final sanity checks for each specific type of FAT */
2150 	switch (type) {
2151 		case FAT12:
2152 			if (rec != 0)
2153 				validflags |= BPB_ROOTENTCNT_OK;
2154 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2155 			    bpb_get_TotSec16(bpb) == 0)
2156 				validflags |= BPB_TOTSEC16_OK;
2157 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2158 			    bpb_get_TotSec32(bpb) == 0)
2159 				validflags |= BPB_TOTSEC32_OK;
2160 			if (bpb_get_FatSz16(bpb) == fatsec)
2161 				validflags |= BPB_FATSZ16_OK;
2162 			if (fatsec * secsize >= ncl * 3 / 2)
2163 				validflags |= BPB_FATSZ_OK;
2164 			if (ncl < 4085)
2165 				validflags |= BPB_NCLUSTERS_OK;
2166 
2167 			fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2168 			fsp->pcfs_rootblksize =
2169 			    fsp->pcfs_rdirsec * secsize;
2170 			fsp->pcfs_fsistart = 0;
2171 
2172 			if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2173 				type = FAT_UNKNOWN;
2174 			break;
2175 		case FAT16:
2176 			if (rec != 0)
2177 				validflags |= BPB_ROOTENTCNT_OK;
2178 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2179 			    bpb_get_TotSec16(bpb) == 0)
2180 				validflags |= BPB_TOTSEC16_OK;
2181 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2182 			    bpb_get_TotSec32(bpb) == 0)
2183 				validflags |= BPB_TOTSEC32_OK;
2184 			if (bpb_get_FatSz16(bpb) == fatsec)
2185 				validflags |= BPB_FATSZ16_OK;
2186 			if (fatsec * secsize >= ncl * 2)
2187 				validflags |= BPB_FATSZ_OK;
2188 			if (ncl >= 4085 && ncl < 65525)
2189 				validflags |= BPB_NCLUSTERS_OK;
2190 
2191 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2192 			fsp->pcfs_rootblksize =
2193 			    fsp->pcfs_rdirsec * secsize;
2194 			fsp->pcfs_fsistart = 0;
2195 
2196 			if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2197 				type = FAT_UNKNOWN;
2198 			break;
2199 		case FAT32:
2200 			if (rec == 0)
2201 				validflags |= BPB_ROOTENTCNT_OK;
2202 			if (bpb_get_TotSec16(bpb) == 0)
2203 				validflags |= BPB_TOTSEC16_OK;
2204 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2205 				validflags |= BPB_TOTSEC32_OK;
2206 			if (bpb_get_FatSz16(bpb) == 0)
2207 				validflags |= BPB_FATSZ16_OK;
2208 			if (bpb_get_FatSz32(bpb) == fatsec)
2209 				validflags |= BPB_FATSZ32_OK;
2210 			if (fatsec * secsize >= ncl * 4)
2211 				validflags |= BPB_FATSZ_OK;
2212 			if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2213 				validflags |= BPB_NCLUSTERS_OK;
2214 
2215 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2216 			fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2217 			fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2218 			if (validflags & BPB_FSISEC_OK)
2219 				fsp->pcfs_flags |= PCFS_FSINFO_OK;
2220 			fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2221 			if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2222 				validflags |= BPB_ROOTCLUSTER_OK;
2223 
2224 			/*
2225 			 * Current PCFS code only works if 'pcfs_rdirstart'
2226 			 * contains the root cluster number on FAT32.
2227 			 * That's a mis-use and would better be changed.
2228 			 */
2229 			fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2230 
2231 			if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2232 				type = FAT_UNKNOWN;
2233 			break;
2234 		case FAT_QUESTIONABLE:
2235 			type = secondaryBPBChecks(fsp, bpb, secsize);
2236 			goto recheck;
2237 		default:
2238 			ASSERT(type == FAT_UNKNOWN);
2239 			break;
2240 	}
2241 
2242 	ASSERT(type != FAT_QUESTIONABLE);
2243 
2244 	fsp->pcfs_fattype = type;
2245 
2246 	if (valid)
2247 		*valid = validflags;
2248 
2249 	DTRACE_PROBE4(parseBPB__final,
2250 	    struct pcfs *, fsp, unsigned char *, bpb,
2251 	    int, validflags, fattype_t, type);
2252 
2253 	if (type != FAT_UNKNOWN) {
2254 		ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2255 		ASSERT(ISP2(secsize / DEV_BSIZE));
2256 		return (1);
2257 	}
2258 
2259 	return (0);
2260 }
2261 
2262 
2263 /*
2264  * Detect the device's native block size (sector size).
2265  *
2266  * Test whether the device is:
2267  *	- a floppy device from a known controller type via DKIOCINFO
2268  *	- a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2269  *	- a PCMCIA sram memory card (pseudofloppy) using pcram(7d)
2270  *	- a USB floppy drive (identified by drive geometry)
2271  *
2272  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2273  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2274  *
2275  * This might be a bit wasteful on kernel stack space; if anyone's
2276  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2277  */
2278 static void
2279 pcfs_device_getinfo(struct pcfs *fsp)
2280 {
2281 	dev_t			rdev = fsp->pcfs_xdev;
2282 	int			error;
2283 	union {
2284 		struct dk_minfo		mi;
2285 		struct dk_cinfo		ci;
2286 		struct dk_geom		gi;
2287 		struct fd_char		fc;
2288 	} arg;				/* save stackspace ... */
2289 	intptr_t argp = (intptr_t)&arg;
2290 	ldi_handle_t		lh;
2291 	ldi_ident_t		li;
2292 	int isfloppy, isremoveable, ishotpluggable;
2293 	cred_t			*cr = CRED();
2294 
2295 	if (ldi_ident_from_dev(rdev, &li))
2296 		goto out;
2297 
2298 	error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2299 	ldi_ident_release(li);
2300 	if (error)
2301 		goto out;
2302 
2303 	/*
2304 	 * Not sure if this could possibly happen. It'd be a bit like
2305 	 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2306 	 * expecting it, needs some thought if triggered ...
2307 	 */
2308 	ASSERT(fsp->pcfs_xdev == rdev);
2309 
2310 	/*
2311 	 * Check for removeable/hotpluggable media.
2312 	 */
2313 	if (ldi_ioctl(lh, DKIOCREMOVABLE,
2314 	    (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2315 		isremoveable = 0;
2316 	}
2317 	if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2318 	    (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2319 		ishotpluggable = 0;
2320 	}
2321 
2322 	/*
2323 	 * Make sure we don't use "half-initialized" values if the ioctls fail.
2324 	 */
2325 	if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2326 		bzero(&arg, sizeof (arg));
2327 		fsp->pcfs_mediasize = 0;
2328 	} else {
2329 		fsp->pcfs_mediasize =
2330 		    (len_t)arg.mi.dki_lbsize *
2331 		    (len_t)arg.mi.dki_capacity;
2332 	}
2333 
2334 	if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2335 		if (fsp->pcfs_secsize == 0) {
2336 			fsp->pcfs_secsize = arg.mi.dki_lbsize;
2337 			fsp->pcfs_sdshift =
2338 			    ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2339 		} else {
2340 			PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2341 			    "%d, device (%x.%x), different from user-provided "
2342 			    "%d. User override - ignoring autodetect result.\n",
2343 			    arg.mi.dki_lbsize,
2344 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2345 			    fsp->pcfs_secsize);
2346 		}
2347 	} else if (arg.mi.dki_lbsize) {
2348 		PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2349 		    "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2350 		    "Ignoring autodetect result.\n",
2351 		    arg.mi.dki_lbsize,
2352 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2353 	}
2354 
2355 	/*
2356 	 * We treat the following media types as a floppy by default.
2357 	 */
2358 	isfloppy =
2359 	    (arg.mi.dki_media_type == DK_FLOPPY ||
2360 	    arg.mi.dki_media_type == DK_ZIP ||
2361 	    arg.mi.dki_media_type == DK_JAZ);
2362 
2363 	/*
2364 	 * if this device understands fdio(7I) requests it's
2365 	 * obviously a floppy drive.
2366 	 */
2367 	if (!isfloppy &&
2368 	    !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2369 		isfloppy = 1;
2370 
2371 	/*
2372 	 * some devices (PCMCIA pseudofloppies) we like to treat
2373 	 * as floppies, but they don't understand fdio(7I) requests.
2374 	 */
2375 	if (!isfloppy &&
2376 	    !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2377 	    (arg.ci.dki_ctype == DKC_WDC2880 ||
2378 	    arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2379 	    arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2380 	    arg.ci.dki_ctype == DKC_INTEL82077 ||
2381 	    (arg.ci.dki_ctype == DKC_PCMCIA_MEM &&
2382 	    arg.ci.dki_flags & DKI_PCMCIA_PFD)))
2383 		isfloppy = 1;
2384 
2385 	/*
2386 	 * This is the "final fallback" test - media with
2387 	 * 2 heads and 80 cylinders are assumed to be floppies.
2388 	 * This is normally true for USB floppy drives ...
2389 	 */
2390 	if (!isfloppy &&
2391 	    !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2392 	    (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2393 		isfloppy = 1;
2394 
2395 	/*
2396 	 * This is similar to the "old" PCFS code that sets this flag
2397 	 * just based on the media descriptor being 0xf8 (MD_FIXED).
2398 	 * Should be re-worked. We really need some specialcasing for
2399 	 * removeable media.
2400 	 */
2401 	if (!isfloppy) {
2402 		fsp->pcfs_flags |= PCFS_NOCHK;
2403 	}
2404 
2405 	/*
2406 	 * We automatically disable access time updates if the medium is
2407 	 * removeable and/or hotpluggable, and the admin did not explicitly
2408 	 * request access time updates (via the "atime" mount option).
2409 	 * The majority of flash-based media should fit this category.
2410 	 * Minimizing write access extends the lifetime of your memory stick !
2411 	 */
2412 	if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2413 	    (isremoveable || ishotpluggable | isfloppy)) {
2414 		fsp->pcfs_flags |= PCFS_NOATIME;
2415 	}
2416 
2417 	(void) ldi_close(lh, FREAD, cr);
2418 out:
2419 	if (fsp->pcfs_secsize == 0) {
2420 		PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2421 		    "device (%x.%x) failed, no user-provided fallback. "
2422 		    "Using %d bytes.\n",
2423 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2424 		    DEV_BSIZE);
2425 		fsp->pcfs_secsize = DEV_BSIZE;
2426 		fsp->pcfs_sdshift = 0;
2427 	}
2428 	ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2429 	ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2430 }
2431 
2432 /*
2433  * Get the FAT type for the DOS medium.
2434  *
2435  * -------------------------
2436  * According to Microsoft:
2437  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2438  * count of clusters on the volume and nothing else.
2439  * -------------------------
2440  *
2441  */
2442 static int
2443 pc_getfattype(struct pcfs *fsp)
2444 {
2445 	int error = 0;
2446 	buf_t *bp = NULL;
2447 	struct vnode *devvp = fsp->pcfs_devvp;
2448 	dev_t	dev = devvp->v_rdev;
2449 
2450 	/*
2451 	 * Detect the native block size of the medium, and attempt to
2452 	 * detect whether the medium is removeable.
2453 	 * We do treat removeable media (floppies, PCMCIA memory cards,
2454 	 * USB and FireWire disks) differently wrt. to the frequency
2455 	 * and synchronicity of FAT updates.
2456 	 * We need to know the media block size in order to be able to
2457 	 * parse the partition table.
2458 	 */
2459 	pcfs_device_getinfo(fsp);
2460 
2461 	/*
2462 	 * Unpartitioned media (floppies and some removeable devices)
2463 	 * don't have a partition table, the FAT BPB is at disk block 0.
2464 	 * Start out by reading block 0.
2465 	 */
2466 	fsp->pcfs_dosstart = 0;
2467 	bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2468 
2469 	if (error = geterror(bp))
2470 		goto out;
2471 
2472 	/*
2473 	 * If a logical drive number is requested, parse the partition table
2474 	 * and attempt to locate it. Otherwise, proceed immediately to the
2475 	 * BPB check. findTheDrive(), if successful, returns the disk block
2476 	 * number where the requested partition starts in "startsec".
2477 	 */
2478 	if (fsp->pcfs_ldrive != 0) {
2479 		PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2480 		    "device (%x,%x):%d to find BPB\n",
2481 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2482 
2483 		if (error = findTheDrive(fsp, &bp))
2484 			goto out;
2485 
2486 		ASSERT(fsp->pcfs_dosstart != 0);
2487 
2488 		brelse(bp);
2489 		bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2490 		    fsp->pcfs_secsize);
2491 		if (error = geterror(bp))
2492 			goto out;
2493 	}
2494 
2495 	/*
2496 	 * Validate the BPB and fill in the instance structure.
2497 	 */
2498 	if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2499 		PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2500 		    "device (%x.%x):%d, disk LBA %u\n",
2501 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2502 		    (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2503 		error = EINVAL;
2504 		goto out;
2505 	}
2506 
2507 	ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2508 
2509 out:
2510 	/*
2511 	 * Release the buffer used
2512 	 */
2513 	if (bp != NULL)
2514 		brelse(bp);
2515 	return (error);
2516 }
2517 
2518 
2519 /*
2520  * Get the file allocation table.
2521  * If there is an old FAT, invalidate it.
2522  */
2523 int
2524 pc_getfat(struct pcfs *fsp)
2525 {
2526 	struct buf *bp = NULL;
2527 	uchar_t *fatp = NULL;
2528 	uchar_t *fat_changemap = NULL;
2529 	int error;
2530 	int fat_changemapsize;
2531 	int flags = 0;
2532 	int nfat;
2533 	int altfat_mustmatch = 0;
2534 	int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2535 
2536 	if (fsp->pcfs_fatp) {
2537 		/*
2538 		 * There is a FAT in core.
2539 		 * If there are open file pcnodes or we have modified it or
2540 		 * it hasn't timed out yet use the in core FAT.
2541 		 * Otherwise invalidate it and get a new one
2542 		 */
2543 #ifdef notdef
2544 		if (fsp->pcfs_frefs ||
2545 		    (fsp->pcfs_flags & PCFS_FATMOD) ||
2546 		    (gethrestime_sec() < fsp->pcfs_fattime)) {
2547 			return (0);
2548 		} else {
2549 			mutex_enter(&pcfslock);
2550 			pc_invalfat(fsp);
2551 			mutex_exit(&pcfslock);
2552 		}
2553 #endif /* notdef */
2554 		return (0);
2555 	}
2556 
2557 	/*
2558 	 * Get FAT and check it for validity
2559 	 */
2560 	fatp = kmem_alloc(fatsize, KM_SLEEP);
2561 	error = pc_readfat(fsp, fatp);
2562 	if (error) {
2563 		flags = B_ERROR;
2564 		goto out;
2565 	}
2566 	fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2567 	fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2568 	fsp->pcfs_fatp = fatp;
2569 	fsp->pcfs_fat_changemapsize = fat_changemapsize;
2570 	fsp->pcfs_fat_changemap = fat_changemap;
2571 
2572 	/*
2573 	 * The only definite signature check is that the
2574 	 * media descriptor byte should match the first byte
2575 	 * of the FAT block.
2576 	 */
2577 	if (fatp[0] != fsp->pcfs_mediadesc) {
2578 		cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2579 		    "media descriptor %x, FAT[0] lowbyte %x\n",
2580 		    (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2581 		cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2582 		altfat_mustmatch = 1;
2583 	}
2584 
2585 	/*
2586 	 * Get alternate FATs and check for consistency
2587 	 * This is an inlined version of pc_readfat().
2588 	 * Since we're only comparing FAT and alternate FAT,
2589 	 * there's no reason to let pc_readfat() copy data out
2590 	 * of the buf. Instead, compare in-situ, one cluster
2591 	 * at a time.
2592 	 */
2593 	for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2594 		size_t startsec;
2595 		size_t off;
2596 
2597 		startsec = pc_dbdaddr(fsp,
2598 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2599 
2600 		for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2601 			daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2602 			    pc_cltodb(fsp, pc_lblkno(fsp, off)));
2603 
2604 			bp = bread(fsp->pcfs_xdev, fatblk,
2605 			    MIN(fsp->pcfs_clsize, fatsize - off));
2606 			if (bp->b_flags & (B_ERROR | B_STALE)) {
2607 				cmn_err(CE_NOTE,
2608 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2609 				    " read error at offset %ld on device"
2610 				    " (%x.%x):%d",
2611 				    nfat, (void *)(uintptr_t)startsec, off,
2612 				    getmajor(fsp->pcfs_xdev),
2613 				    getminor(fsp->pcfs_xdev),
2614 				    fsp->pcfs_ldrive);
2615 				flags = B_ERROR;
2616 				error = EIO;
2617 				goto out;
2618 			}
2619 			bp->b_flags |= B_STALE | B_AGE;
2620 			if (bcmp(bp->b_un.b_addr, fatp + off,
2621 			    MIN(fsp->pcfs_clsize, fatsize - off))) {
2622 				cmn_err(CE_NOTE,
2623 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2624 				    " corrupted at offset %ld on device"
2625 				    " (%x.%x):%d",
2626 				    nfat, (void *)(uintptr_t)startsec, off,
2627 				    getmajor(fsp->pcfs_xdev),
2628 				    getminor(fsp->pcfs_xdev),
2629 				    fsp->pcfs_ldrive);
2630 				if (altfat_mustmatch) {
2631 					flags = B_ERROR;
2632 					error = EIO;
2633 					goto out;
2634 				}
2635 			}
2636 			brelse(bp);
2637 			bp = NULL;	/* prevent double release */
2638 		}
2639 	}
2640 
2641 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2642 	fsp->pcfs_fatjustread = 1;
2643 
2644 	/*
2645 	 * Retrieve FAT32 fsinfo sector.
2646 	 * A failure to read this is not fatal to accessing the volume.
2647 	 * It simply means operations that count or search free blocks
2648 	 * will have to do a full FAT walk, vs. a possibly quicker lookup
2649 	 * of the summary information.
2650 	 * Hence, we log a message but return success overall after this point.
2651 	 */
2652 	if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2653 		struct fat_od_fsi *fsinfo_disk;
2654 
2655 		bp = bread(fsp->pcfs_xdev,
2656 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2657 		fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2658 		if (bp->b_flags & (B_ERROR | B_STALE) ||
2659 		    !FSISIG_OK(fsinfo_disk)) {
2660 			cmn_err(CE_NOTE,
2661 			    "!pcfs: error reading fat32 fsinfo from "
2662 			    "device (%x.%x):%d, block %lld",
2663 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2664 			    fsp->pcfs_ldrive,
2665 			    (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2666 			fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2667 			fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2668 			fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2669 		} else {
2670 			bp->b_flags |= B_STALE | B_AGE;
2671 			fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2672 			fsp->pcfs_fsinfo.fs_free_clusters =
2673 			    LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2674 			fsp->pcfs_fsinfo.fs_next_free =
2675 			    LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2676 		}
2677 		brelse(bp);
2678 		bp = NULL;
2679 	}
2680 
2681 	if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2682 		fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2683 	else
2684 		fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2685 
2686 	return (0);
2687 
2688 out:
2689 	cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2690 	if (bp)
2691 		brelse(bp);
2692 	if (fatp)
2693 		kmem_free(fatp, fatsize);
2694 	if (fat_changemap)
2695 		kmem_free(fat_changemap, fat_changemapsize);
2696 
2697 	if (flags) {
2698 		pc_mark_irrecov(fsp);
2699 	}
2700 	return (error);
2701 }
2702