xref: /illumos-gate/usr/src/uts/common/fs/pcfs/pc_vfsops.c (revision c3377ee9a5b3bff76dbf51347a8de3d215eb6cca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright (c) 2017 by Delphix. All rights reserved.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/user.h>
34 #include <sys/proc.h>
35 #include <sys/cred.h>
36 #include <sys/disp.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/fdio.h>
42 #include <sys/file.h>
43 #include <sys/uio.h>
44 #include <sys/conf.h>
45 #include <sys/statvfs.h>
46 #include <sys/mount.h>
47 #include <sys/pathname.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/sysmacros.h>
51 #include <sys/conf.h>
52 #include <sys/mkdev.h>
53 #include <sys/swap.h>
54 #include <sys/sunddi.h>
55 #include <sys/sunldi.h>
56 #include <sys/dktp/fdisk.h>
57 #include <sys/fs/pc_label.h>
58 #include <sys/fs/pc_fs.h>
59 #include <sys/fs/pc_dir.h>
60 #include <sys/fs/pc_node.h>
61 #include <fs/fs_subr.h>
62 #include <sys/modctl.h>
63 #include <sys/dkio.h>
64 #include <sys/open.h>
65 #include <sys/mntent.h>
66 #include <sys/policy.h>
67 #include <sys/atomic.h>
68 #include <sys/sdt.h>
69 
70 /*
71  * The majority of PC media use a 512 sector size, but
72  * occasionally you will run across a 1k sector size.
73  * For media with a 1k sector size, fd_strategy() requires
74  * the I/O size to be a 1k multiple; so when the sector size
75  * is not yet known, always read 1k.
76  */
77 #define	PC_SAFESECSIZE	(PC_SECSIZE * 2)
78 
79 static int pcfs_pseudo_floppy(dev_t);
80 
81 static int pcfsinit(int, char *);
82 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
83 	struct cred *);
84 static int pcfs_unmount(struct vfs *, int, struct cred *);
85 static int pcfs_root(struct vfs *, struct vnode **);
86 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
87 static int pc_syncfsnodes(struct pcfs *);
88 static int pcfs_sync(struct vfs *, short, struct cred *);
89 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
90 static void pcfs_freevfs(vfs_t *vfsp);
91 
92 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
93 static int pc_writefat(struct pcfs *fsp, daddr_t start);
94 
95 static int pc_getfattype(struct pcfs *fsp);
96 static void pcfs_parse_mntopts(struct pcfs *fsp);
97 
98 
99 /*
100  * pcfs mount options table
101  */
102 
103 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
104 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
105 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
106 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
107 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
108 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
109 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
110 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
111 
112 static mntopt_t mntopts[] = {
113 /*
114  *	option name	cancel option	default arg	flags	opt data
115  */
116 	{ MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
117 	{ MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
118 	{ MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
119 	{ MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
120 	{ MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
121 	{ MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, 0, NULL },
122 	{ MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
123 	{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL },
124 	{ MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
125 	{ MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
126 };
127 
128 static mntopts_t pcfs_mntopts = {
129 	sizeof (mntopts) / sizeof (mntopt_t),
130 	mntopts
131 };
132 
133 int pcfsdebuglevel = 0;
134 
135 /*
136  * pcfslock:	protects the list of mounted pc filesystems "pc_mounttab.
137  * pcfs_lock:	(inside per filesystem structure "pcfs")
138  *		per filesystem lock. Most of the vfsops and vnodeops are
139  *		protected by this lock.
140  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
141  *
142  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
143  *
144  * pcfs_mountcount:	used to prevent module unloads while there is still
145  *			pcfs state from a former mount hanging around. With
146  *			forced umount support, the filesystem module must not
147  *			be allowed to go away before the last VFS_FREEVFS()
148  *			call has been made.
149  *			Since this is just an atomic counter, there's no need
150  *			for locking.
151  */
152 kmutex_t	pcfslock;
153 krwlock_t	pcnodes_lock;
154 uint32_t	pcfs_mountcount;
155 
156 static int pcfstype;
157 
158 static vfsdef_t vfw = {
159 	VFSDEF_VERSION,
160 	"pcfs",
161 	pcfsinit,
162 	VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
163 	&pcfs_mntopts
164 };
165 
166 extern struct mod_ops mod_fsops;
167 
168 static struct modlfs modlfs = {
169 	&mod_fsops,
170 	"PC filesystem",
171 	&vfw
172 };
173 
174 static struct modlinkage modlinkage = {
175 	MODREV_1,
176 	&modlfs,
177 	NULL
178 };
179 
180 int
181 _init(void)
182 {
183 	int	error;
184 
185 #if !defined(lint)
186 	/* make sure the on-disk structures are sane */
187 	ASSERT(sizeof (struct pcdir) == 32);
188 	ASSERT(sizeof (struct pcdir_lfn) == 32);
189 #endif
190 	mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
191 	rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
192 	error = mod_install(&modlinkage);
193 	if (error) {
194 		mutex_destroy(&pcfslock);
195 		rw_destroy(&pcnodes_lock);
196 	}
197 	return (error);
198 }
199 
200 int
201 _fini(void)
202 {
203 	int	error;
204 
205 	/*
206 	 * If a forcedly unmounted instance is still hanging around,
207 	 * we cannot allow the module to be unloaded because that would
208 	 * cause panics once the VFS framework decides it's time to call
209 	 * into VFS_FREEVFS().
210 	 */
211 	if (pcfs_mountcount)
212 		return (EBUSY);
213 
214 	error = mod_remove(&modlinkage);
215 	if (error)
216 		return (error);
217 	mutex_destroy(&pcfslock);
218 	rw_destroy(&pcnodes_lock);
219 	/*
220 	 * Tear down the operations vectors
221 	 */
222 	(void) vfs_freevfsops_by_type(pcfstype);
223 	vn_freevnodeops(pcfs_fvnodeops);
224 	vn_freevnodeops(pcfs_dvnodeops);
225 	return (0);
226 }
227 
228 int
229 _info(struct modinfo *modinfop)
230 {
231 	return (mod_info(&modlinkage, modinfop));
232 }
233 
234 /* ARGSUSED1 */
235 static int
236 pcfsinit(int fstype, char *name)
237 {
238 	static const fs_operation_def_t pcfs_vfsops_template[] = {
239 		VFSNAME_MOUNT,		{ .vfs_mount = pcfs_mount },
240 		VFSNAME_UNMOUNT,	{ .vfs_unmount = pcfs_unmount },
241 		VFSNAME_ROOT,		{ .vfs_root = pcfs_root },
242 		VFSNAME_STATVFS,	{ .vfs_statvfs = pcfs_statvfs },
243 		VFSNAME_SYNC,		{ .vfs_sync = pcfs_sync },
244 		VFSNAME_VGET,		{ .vfs_vget = pcfs_vget },
245 		VFSNAME_FREEVFS,	{ .vfs_freevfs = pcfs_freevfs },
246 		NULL,			NULL
247 	};
248 	int error;
249 
250 	error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
251 	if (error != 0) {
252 		cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
253 		return (error);
254 	}
255 
256 	error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
257 	if (error != 0) {
258 		(void) vfs_freevfsops_by_type(fstype);
259 		cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
260 		return (error);
261 	}
262 
263 	error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
264 	if (error != 0) {
265 		(void) vfs_freevfsops_by_type(fstype);
266 		vn_freevnodeops(pcfs_fvnodeops);
267 		cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
268 		return (error);
269 	}
270 
271 	pcfstype = fstype;
272 	(void) pc_init();
273 	pcfs_mountcount = 0;
274 	return (0);
275 }
276 
277 static struct pcfs *pc_mounttab = NULL;
278 
279 extern struct pcfs_args pc_tz;
280 
281 /*
282  *  Define some special logical drives we use internal to this file.
283  */
284 #define	BOOT_PARTITION_DRIVE	99
285 #define	PRIMARY_DOS_DRIVE	1
286 #define	UNPARTITIONED_DRIVE	0
287 
288 static int
289 pcfs_device_identify(
290 	struct vfs *vfsp,
291 	struct mounta *uap,
292 	struct cred *cr,
293 	int *dos_ldrive,
294 	dev_t *xdev)
295 {
296 	struct pathname special;
297 	char *c;
298 	struct vnode *svp = NULL;
299 	struct vnode *lvp = NULL;
300 	int oflag, aflag;
301 	int error;
302 
303 	/*
304 	 * Resolve path name of special file being mounted.
305 	 */
306 	if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
307 		return (error);
308 	}
309 
310 	*dos_ldrive = -1;
311 
312 	if (error =
313 	    lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
314 		/*
315 		 * If there's no device node, the name specified most likely
316 		 * maps to a PCFS-style "partition specifier" to select a
317 		 * harddisk primary/logical partition. Disable floppy-specific
318 		 * checks in such cases unless an explicit :A or :B is
319 		 * requested.
320 		 */
321 
322 		/*
323 		 * Split the pathname string at the last ':' separator.
324 		 * If there's no ':' in the device name, or the ':' is the
325 		 * last character in the string, the name is invalid and
326 		 * the error from the previous lookup will be returned.
327 		 */
328 		c = strrchr(special.pn_path, ':');
329 		if (c == NULL || strlen(c) == 0)
330 			goto devlookup_done;
331 
332 		*c++ = '\0';
333 
334 		/*
335 		 * PCFS partition name suffixes can be:
336 		 *	- "boot" to indicate the X86BOOT partition
337 		 *	- a drive letter [c-z] for the "DOS logical drive"
338 		 *	- a drive number 1..24 for the "DOS logical drive"
339 		 *	- a "floppy name letter", 'a' or 'b' (just strip this)
340 		 */
341 		if (strcasecmp(c, "boot") == 0) {
342 			/*
343 			 * The Solaris boot partition is requested.
344 			 */
345 			*dos_ldrive = BOOT_PARTITION_DRIVE;
346 		} else if (strspn(c, "0123456789") == strlen(c)) {
347 			/*
348 			 * All digits - parse the partition number.
349 			 */
350 			long drvnum = 0;
351 
352 			if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
353 				/*
354 				 * A number alright - in the allowed range ?
355 				 */
356 				if (drvnum > 24 || drvnum == 0)
357 					error = ENXIO;
358 			}
359 			if (error)
360 				goto devlookup_done;
361 			*dos_ldrive = (int)drvnum;
362 		} else if (strlen(c) == 1) {
363 			/*
364 			 * A single trailing character was specified.
365 			 *	- [c-zC-Z] means a harddisk partition, and
366 			 *	  we retrieve the partition number.
367 			 *	- [abAB] means a floppy drive, so we swallow
368 			 *	  the "drive specifier" and test later
369 			 *	  whether the physical device is a floppy.
370 			 */
371 			*c = tolower(*c);
372 			if (*c == 'a' || *c == 'b') {
373 				*dos_ldrive = UNPARTITIONED_DRIVE;
374 			} else if (*c < 'c' || *c > 'z') {
375 				error = ENXIO;
376 				goto devlookup_done;
377 			} else {
378 				*dos_ldrive = 1 + *c - 'c';
379 			}
380 		} else {
381 			/*
382 			 * Can't parse this - pass through previous error.
383 			 */
384 			goto devlookup_done;
385 		}
386 
387 
388 		error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
389 		    NULLVPP, &svp);
390 	} else {
391 		*dos_ldrive = UNPARTITIONED_DRIVE;
392 	}
393 devlookup_done:
394 	pn_free(&special);
395 	if (error)
396 		return (error);
397 
398 	ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
399 
400 	/*
401 	 * Verify caller's permission to open the device special file.
402 	 */
403 	if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
404 	    ((uap->flags & MS_RDONLY) != 0)) {
405 		oflag = FREAD;
406 		aflag = VREAD;
407 	} else {
408 		oflag = FREAD | FWRITE;
409 		aflag = VREAD | VWRITE;
410 	}
411 
412 	error = vfs_get_lofi(vfsp, &lvp);
413 
414 	if (error > 0) {
415 		if (error == ENOENT)
416 			error = ENODEV;
417 		goto out;
418 	} else if (error == 0) {
419 		*xdev = lvp->v_rdev;
420 	} else {
421 		*xdev = svp->v_rdev;
422 
423 		if (svp->v_type != VBLK) {
424 			error = ENOTBLK;
425 			goto out;
426 		}
427 
428 		if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
429 			goto out;
430 	}
431 
432 	if (getmajor(*xdev) >= devcnt) {
433 		error = ENXIO;
434 		goto out;
435 	}
436 
437 	if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
438 		goto out;
439 
440 out:
441 	if (svp != NULL)
442 		VN_RELE(svp);
443 	if (lvp != NULL)
444 		VN_RELE(lvp);
445 	return (error);
446 }
447 
448 static int
449 pcfs_device_ismounted(
450 	struct vfs *vfsp,
451 	int dos_ldrive,
452 	dev_t xdev,
453 	int *remounting,
454 	dev_t *pseudodev)
455 {
456 	struct pcfs *fsp;
457 	int remount = *remounting;
458 
459 	/*
460 	 * Ensure that this logical drive isn't already mounted, unless
461 	 * this is a REMOUNT request.
462 	 * Note: The framework will perform this check if the "...:c"
463 	 * PCFS-style "logical drive" syntax has not been used and an
464 	 * actually existing physical device is backing this filesystem.
465 	 * Once all block device drivers support PC-style partitioning,
466 	 * this codeblock can be dropped.
467 	 */
468 	*pseudodev = xdev;
469 
470 	if (dos_ldrive) {
471 		mutex_enter(&pcfslock);
472 		for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
473 			if (fsp->pcfs_xdev == xdev &&
474 			    fsp->pcfs_ldrive == dos_ldrive) {
475 				mutex_exit(&pcfslock);
476 				if (remount) {
477 					return (0);
478 				} else {
479 					return (EBUSY);
480 				}
481 			}
482 		/*
483 		 * Assign a unique device number for the vfs
484 		 * The old way (getudev() + a constantly incrementing
485 		 * major number) was wrong because it changes vfs_dev
486 		 * across mounts and reboots, which breaks nfs file handles.
487 		 * UFS just uses the real dev_t. We can't do that because
488 		 * of the way pcfs opens fdisk partitons (the :c and :d
489 		 * partitions are on the same dev_t). Though that _might_
490 		 * actually be ok, since the file handle contains an
491 		 * absolute block number, it's probably better to make them
492 		 * different. So I think we should retain the original
493 		 * dev_t, but come up with a different minor number based
494 		 * on the logical drive that will _always_ come up the same.
495 		 * For now, we steal the upper 6 bits.
496 		 */
497 #ifdef notdef
498 		/* what should we do here? */
499 		if (((getminor(xdev) >> 12) & 0x3F) != 0)
500 			printf("whoops - upper bits used!\n");
501 #endif
502 		*pseudodev = makedevice(getmajor(xdev),
503 		    ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
504 		if (vfs_devmounting(*pseudodev, vfsp)) {
505 			mutex_exit(&pcfslock);
506 			return (EBUSY);
507 		}
508 		if (vfs_devismounted(*pseudodev)) {
509 			mutex_exit(&pcfslock);
510 			if (remount) {
511 				return (0);
512 			} else {
513 				return (EBUSY);
514 			}
515 		}
516 		mutex_exit(&pcfslock);
517 	} else {
518 		*pseudodev = xdev;
519 		if (vfs_devmounting(*pseudodev, vfsp)) {
520 			return (EBUSY);
521 		}
522 		if (vfs_devismounted(*pseudodev))
523 			if (remount) {
524 				return (0);
525 			} else {
526 				return (EBUSY);
527 			}
528 	}
529 
530 	/*
531 	 * This is not a remount. Even if MS_REMOUNT was requested,
532 	 * the caller needs to proceed as it would on an ordinary
533 	 * mount.
534 	 */
535 	*remounting = 0;
536 
537 	ASSERT(*pseudodev);
538 	return (0);
539 }
540 
541 /*
542  * Get the PCFS-specific mount options from the VFS framework.
543  * For "timezone" and "secsize", we need to parse the number
544  * ourselves and ensure its validity.
545  * Note: "secsize" is deliberately undocumented at this time,
546  * it's a workaround for devices (particularly: lofi image files)
547  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
548  */
549 static void
550 pcfs_parse_mntopts(struct pcfs *fsp)
551 {
552 	char *c;
553 	char *endptr;
554 	long l;
555 	struct vfs *vfsp = fsp->pcfs_vfs;
556 
557 	ASSERT(fsp->pcfs_secondswest == 0);
558 	ASSERT(fsp->pcfs_secsize == 0);
559 
560 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
561 		fsp->pcfs_flags |= PCFS_HIDDEN;
562 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
563 		fsp->pcfs_flags |= PCFS_FOLDCASE;
564 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
565 		fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
566 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
567 		fsp->pcfs_flags |= PCFS_NOATIME;
568 
569 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
570 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
571 		    endptr == c + strlen(c)) {
572 			/*
573 			 * A number alright - in the allowed range ?
574 			 */
575 			if (l <= -12*3600 || l >= 12*3600) {
576 				cmn_err(CE_WARN, "!pcfs: invalid use of "
577 				    "'timezone' mount option - %ld "
578 				    "is out of range. Assuming 0.", l);
579 				l = 0;
580 			}
581 		} else {
582 			cmn_err(CE_WARN, "!pcfs: invalid use of "
583 			    "'timezone' mount option - argument %s "
584 			    "is not a valid number. Assuming 0.", c);
585 			l = 0;
586 		}
587 		fsp->pcfs_secondswest = l;
588 	}
589 
590 	/*
591 	 * The "secsize=..." mount option is a workaround for the lack of
592 	 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
593 	 * partition table of a disk image and it has been partitioned with
594 	 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
595 	 * images.
596 	 * That should really be fixed in lofi ... this is a workaround.
597 	 */
598 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
599 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
600 		    endptr == c + strlen(c)) {
601 			/*
602 			 * A number alright - a valid sector size as well ?
603 			 */
604 			if (!VALID_SECSIZE(l)) {
605 				cmn_err(CE_WARN, "!pcfs: invalid use of "
606 				    "'secsize' mount option - %ld is "
607 				    "unsupported. Autodetecting.", l);
608 				l = 0;
609 			}
610 		} else {
611 			cmn_err(CE_WARN, "!pcfs: invalid use of "
612 			    "'secsize' mount option - argument %s "
613 			    "is not a valid number. Autodetecting.", c);
614 			l = 0;
615 		}
616 		fsp->pcfs_secsize = l;
617 		fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
618 	}
619 }
620 
621 /*
622  * vfs operations
623  */
624 
625 /*
626  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
627  */
628 static int
629 pcfs_mount(
630 	struct vfs *vfsp,
631 	struct vnode *mvp,
632 	struct mounta *uap,
633 	struct cred *cr)
634 {
635 	struct pcfs *fsp;
636 	struct vnode *devvp;
637 	dev_t pseudodev;
638 	dev_t xdev;
639 	int dos_ldrive = 0;
640 	int error;
641 	int remounting;
642 
643 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
644 		return (error);
645 
646 	if (mvp->v_type != VDIR)
647 		return (ENOTDIR);
648 
649 	mutex_enter(&mvp->v_lock);
650 	if ((uap->flags & MS_REMOUNT) == 0 &&
651 	    (uap->flags & MS_OVERLAY) == 0 &&
652 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
653 		mutex_exit(&mvp->v_lock);
654 		return (EBUSY);
655 	}
656 	mutex_exit(&mvp->v_lock);
657 
658 	/*
659 	 * PCFS doesn't do mount arguments anymore - everything's a mount
660 	 * option these days. In order not to break existing callers, we
661 	 * don't reject it yet, just warn that the data (if any) is ignored.
662 	 */
663 	if (uap->datalen != 0)
664 		cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
665 		    "mount argument structures instead of mount options. "
666 		    "Ignoring mount(2) 'dataptr' argument.");
667 
668 	/*
669 	 * This is needed early, to make sure the access / open calls
670 	 * are done using the correct mode. Processing this mount option
671 	 * only when calling pcfs_parse_mntopts() would lead us to attempt
672 	 * a read/write access to a possibly writeprotected device, and
673 	 * a readonly mount attempt might fail because of that.
674 	 */
675 	if (uap->flags & MS_RDONLY) {
676 		vfsp->vfs_flag |= VFS_RDONLY;
677 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
678 	}
679 
680 	/*
681 	 * For most filesystems, this is just a lookupname() on the
682 	 * mount pathname string. PCFS historically has to do its own
683 	 * partition table parsing because not all Solaris architectures
684 	 * support all styles of partitioning that PC media can have, and
685 	 * hence PCFS understands "device names" that don't map to actual
686 	 * physical device nodes. Parsing the "PCFS syntax" for device
687 	 * names is done in pcfs_device_identify() - see there.
688 	 *
689 	 * Once all block device drivers that can host FAT filesystems have
690 	 * been enhanced to create device nodes for all PC-style partitions,
691 	 * this code can go away.
692 	 */
693 	if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
694 		return (error);
695 
696 	/*
697 	 * As with looking up the actual device to mount, PCFS cannot rely
698 	 * on just the checks done by vfs_ismounted() whether a given device
699 	 * is mounted already. The additional check against the "PCFS syntax"
700 	 * is done in  pcfs_device_ismounted().
701 	 */
702 	remounting = (uap->flags & MS_REMOUNT);
703 
704 	if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
705 	    &pseudodev))
706 		return (error);
707 
708 	if (remounting)
709 		return (0);
710 
711 	/*
712 	 * Mount the filesystem.
713 	 * An instance structure is required before the attempt to locate
714 	 * and parse the FAT BPB. This is because mount options may change
715 	 * the behaviour of the filesystem type matching code. Precreate
716 	 * it and fill it in to a degree that allows parsing the mount
717 	 * options.
718 	 */
719 	devvp = makespecvp(xdev, VBLK);
720 	if (IS_SWAPVP(devvp)) {
721 		VN_RELE(devvp);
722 		return (EBUSY);
723 	}
724 	error = VOP_OPEN(&devvp,
725 	    (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
726 	if (error) {
727 		VN_RELE(devvp);
728 		return (error);
729 	}
730 
731 	fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
732 	fsp->pcfs_vfs = vfsp;
733 	fsp->pcfs_xdev = xdev;
734 	fsp->pcfs_devvp = devvp;
735 	fsp->pcfs_ldrive = dos_ldrive;
736 	mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
737 
738 	pcfs_parse_mntopts(fsp);
739 
740 	/*
741 	 * This is the actual "mount" - the PCFS superblock check.
742 	 *
743 	 * Find the requested logical drive and the FAT BPB therein.
744 	 * Check device type and flag the instance if media is removeable.
745 	 *
746 	 * Initializes most members of the filesystem instance structure.
747 	 * Returns EINVAL if no valid BPB can be found. Other errors may
748 	 * occur after I/O failures, or when invalid / unparseable partition
749 	 * tables are encountered.
750 	 */
751 	if (error = pc_getfattype(fsp))
752 		goto errout;
753 
754 	/*
755 	 * Now that the BPB has been parsed, this structural information
756 	 * is available and known to be valid. Initialize the VFS.
757 	 */
758 	vfsp->vfs_data = fsp;
759 	vfsp->vfs_dev = pseudodev;
760 	vfsp->vfs_fstype = pcfstype;
761 	vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
762 	vfsp->vfs_bcount = 0;
763 	vfsp->vfs_bsize = fsp->pcfs_clsize;
764 
765 	/*
766 	 * Validate that we can access the FAT and that it is, to the
767 	 * degree we can verify here, self-consistent.
768 	 */
769 	if (error = pc_verify(fsp))
770 		goto errout;
771 
772 	/*
773 	 * Record the time of the mount, to return as an "approximate"
774 	 * timestamp for the FAT root directory. Since FAT roots don't
775 	 * have timestamps, this is less confusing to the user than
776 	 * claiming "zero" / Jan/01/1970.
777 	 */
778 	gethrestime(&fsp->pcfs_mounttime);
779 
780 	/*
781 	 * Fix up the mount options. Because "noatime" is made default on
782 	 * removeable media only, a fixed disk will have neither "atime"
783 	 * nor "noatime" set. We set the options explicitly depending on
784 	 * the PCFS_NOATIME flag, to inform the user of what applies.
785 	 * Mount option cancellation will take care that the mutually
786 	 * exclusive 'other' is cleared.
787 	 */
788 	vfs_setmntopt(vfsp,
789 	    fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
790 	    NULL, 0);
791 
792 	/*
793 	 * All clear - insert the FS instance into PCFS' list.
794 	 */
795 	mutex_enter(&pcfslock);
796 	fsp->pcfs_nxt = pc_mounttab;
797 	pc_mounttab = fsp;
798 	mutex_exit(&pcfslock);
799 	atomic_inc_32(&pcfs_mountcount);
800 	return (0);
801 
802 errout:
803 	(void) VOP_CLOSE(devvp,
804 	    vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
805 	    1, (offset_t)0, cr, NULL);
806 	VN_RELE(devvp);
807 	mutex_destroy(&fsp->pcfs_lock);
808 	kmem_free(fsp, sizeof (*fsp));
809 	return (error);
810 
811 }
812 
813 static int
814 pcfs_unmount(
815 	struct vfs *vfsp,
816 	int flag,
817 	struct cred *cr)
818 {
819 	struct pcfs *fsp, *fsp1;
820 
821 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
822 		return (EPERM);
823 
824 	fsp = VFSTOPCFS(vfsp);
825 
826 	/*
827 	 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
828 	 * prevent lookuppn from crossing the mount point.
829 	 * If this is not a forced umount request and there's ongoing I/O,
830 	 * don't allow the mount to proceed.
831 	 */
832 	if (flag & MS_FORCE)
833 		vfsp->vfs_flag |= VFS_UNMOUNTED;
834 	else if (fsp->pcfs_nrefs)
835 		return (EBUSY);
836 
837 	mutex_enter(&pcfslock);
838 
839 	/*
840 	 * If this is a forced umount request or if the fs instance has
841 	 * been marked as beyond recovery, allow the umount to proceed
842 	 * regardless of state. pc_diskchanged() forcibly releases all
843 	 * inactive vnodes/pcnodes.
844 	 */
845 	if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
846 		rw_enter(&pcnodes_lock, RW_WRITER);
847 		pc_diskchanged(fsp);
848 		rw_exit(&pcnodes_lock);
849 	}
850 
851 	/* now there should be no pcp node on pcfhead or pcdhead. */
852 
853 	if (fsp == pc_mounttab) {
854 		pc_mounttab = fsp->pcfs_nxt;
855 	} else {
856 		for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
857 			if (fsp1->pcfs_nxt == fsp)
858 				fsp1->pcfs_nxt = fsp->pcfs_nxt;
859 	}
860 
861 	mutex_exit(&pcfslock);
862 
863 	/*
864 	 * Since we support VFS_FREEVFS(), there's no need to
865 	 * free the fsp right now. The framework will tell us
866 	 * when the right time to do so has arrived by calling
867 	 * into pcfs_freevfs.
868 	 */
869 	return (0);
870 }
871 
872 /*
873  * find root of pcfs
874  */
875 static int
876 pcfs_root(
877 	struct vfs *vfsp,
878 	struct vnode **vpp)
879 {
880 	struct pcfs *fsp;
881 	struct pcnode *pcp;
882 	int error;
883 
884 	fsp = VFSTOPCFS(vfsp);
885 	if (error = pc_lockfs(fsp, 0, 0))
886 		return (error);
887 
888 	pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
889 	pc_unlockfs(fsp);
890 	*vpp = PCTOV(pcp);
891 	pcp->pc_flags |= PC_EXTERNAL;
892 	return (0);
893 }
894 
895 /*
896  * Get file system statistics.
897  */
898 static int
899 pcfs_statvfs(
900 	struct vfs *vfsp,
901 	struct statvfs64 *sp)
902 {
903 	struct pcfs *fsp;
904 	int error;
905 	dev32_t d32;
906 
907 	fsp = VFSTOPCFS(vfsp);
908 	error = pc_getfat(fsp);
909 	if (error)
910 		return (error);
911 	bzero(sp, sizeof (*sp));
912 	sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
913 	sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
914 	sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
915 	sp->f_files = (fsfilcnt64_t)-1;
916 	sp->f_ffree = (fsfilcnt64_t)-1;
917 	sp->f_favail = (fsfilcnt64_t)-1;
918 #ifdef notdef
919 	(void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
920 #endif /* notdef */
921 	(void) cmpldev(&d32, vfsp->vfs_dev);
922 	sp->f_fsid = d32;
923 	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
924 	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
925 	sp->f_namemax = PCMAXNAMLEN;
926 	return (0);
927 }
928 
929 static int
930 pc_syncfsnodes(struct pcfs *fsp)
931 {
932 	struct pchead *hp;
933 	struct pcnode *pcp;
934 	int error;
935 
936 	if (error = pc_lockfs(fsp, 0, 0))
937 		return (error);
938 
939 	if (!(error = pc_syncfat(fsp))) {
940 		hp = pcfhead;
941 		while (hp < & pcfhead [ NPCHASH ]) {
942 			rw_enter(&pcnodes_lock, RW_READER);
943 			pcp = hp->pch_forw;
944 			while (pcp != (struct pcnode *)hp) {
945 				if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
946 					if (error = pc_nodesync(pcp))
947 						break;
948 				pcp = pcp -> pc_forw;
949 			}
950 			rw_exit(&pcnodes_lock);
951 			if (error)
952 				break;
953 			hp++;
954 		}
955 	}
956 	pc_unlockfs(fsp);
957 	return (error);
958 }
959 
960 /*
961  * Flush any pending I/O.
962  */
963 /*ARGSUSED*/
964 static int
965 pcfs_sync(
966 	struct vfs *vfsp,
967 	short flag,
968 	struct cred *cr)
969 {
970 	struct pcfs *fsp;
971 	int error = 0;
972 
973 	/* this prevents the filesystem from being umounted. */
974 	mutex_enter(&pcfslock);
975 	if (vfsp != NULL) {
976 		fsp = VFSTOPCFS(vfsp);
977 		if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
978 			error = pc_syncfsnodes(fsp);
979 		} else {
980 			rw_enter(&pcnodes_lock, RW_WRITER);
981 			pc_diskchanged(fsp);
982 			rw_exit(&pcnodes_lock);
983 			error = EIO;
984 		}
985 	} else {
986 		fsp = pc_mounttab;
987 		while (fsp != NULL) {
988 			if (fsp->pcfs_flags & PCFS_IRRECOV) {
989 				rw_enter(&pcnodes_lock, RW_WRITER);
990 				pc_diskchanged(fsp);
991 				rw_exit(&pcnodes_lock);
992 				error = EIO;
993 				break;
994 			}
995 			error = pc_syncfsnodes(fsp);
996 			if (error) break;
997 			fsp = fsp->pcfs_nxt;
998 		}
999 	}
1000 	mutex_exit(&pcfslock);
1001 	return (error);
1002 }
1003 
1004 int
1005 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1006 {
1007 	int err;
1008 
1009 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1010 		return (EIO);
1011 
1012 	if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1013 		fsp->pcfs_count++;
1014 	} else {
1015 		mutex_enter(&fsp->pcfs_lock);
1016 		if (fsp->pcfs_flags & PCFS_LOCKED)
1017 			panic("pc_lockfs");
1018 		/*
1019 		 * We check the IRRECOV bit again just in case somebody
1020 		 * snuck past the initial check but then got held up before
1021 		 * they could grab the lock.  (And in the meantime someone
1022 		 * had grabbed the lock and set the bit)
1023 		 */
1024 		if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1025 			if ((err = pc_getfat(fsp))) {
1026 				mutex_exit(&fsp->pcfs_lock);
1027 				return (err);
1028 			}
1029 		}
1030 		fsp->pcfs_flags |= PCFS_LOCKED;
1031 		fsp->pcfs_owner = curthread;
1032 		fsp->pcfs_count++;
1033 	}
1034 	return (0);
1035 }
1036 
1037 void
1038 pc_unlockfs(struct pcfs *fsp)
1039 {
1040 
1041 	if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1042 		panic("pc_unlockfs");
1043 	if (--fsp->pcfs_count < 0)
1044 		panic("pc_unlockfs: count");
1045 	if (fsp->pcfs_count == 0) {
1046 		fsp->pcfs_flags &= ~PCFS_LOCKED;
1047 		fsp->pcfs_owner = 0;
1048 		mutex_exit(&fsp->pcfs_lock);
1049 	}
1050 }
1051 
1052 int
1053 pc_syncfat(struct pcfs *fsp)
1054 {
1055 	struct buf *bp;
1056 	int nfat;
1057 	int	error = 0;
1058 	struct fat_od_fsi *fsinfo_disk;
1059 
1060 	if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1061 	    !(fsp->pcfs_flags & PCFS_FATMOD))
1062 		return (0);
1063 	/*
1064 	 * write out all copies of FATs
1065 	 */
1066 	fsp->pcfs_flags &= ~PCFS_FATMOD;
1067 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1068 	for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1069 		error = pc_writefat(fsp, pc_dbdaddr(fsp,
1070 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1071 		if (error) {
1072 			pc_mark_irrecov(fsp);
1073 			return (EIO);
1074 		}
1075 	}
1076 	pc_clear_fatchanges(fsp);
1077 
1078 	/*
1079 	 * Write out fsinfo sector.
1080 	 */
1081 	if (IS_FAT32(fsp)) {
1082 		bp = bread(fsp->pcfs_xdev,
1083 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1084 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1085 			error = geterror(bp);
1086 		}
1087 		fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1088 		if (!error && FSISIG_OK(fsinfo_disk)) {
1089 			fsinfo_disk->fsi_incore.fs_free_clusters =
1090 			    LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1091 			fsinfo_disk->fsi_incore.fs_next_free =
1092 			    LE_32(FSINFO_UNKNOWN);
1093 			bwrite2(bp);
1094 			error = geterror(bp);
1095 		}
1096 		brelse(bp);
1097 		if (error) {
1098 			pc_mark_irrecov(fsp);
1099 			return (EIO);
1100 		}
1101 	}
1102 	return (0);
1103 }
1104 
1105 void
1106 pc_invalfat(struct pcfs *fsp)
1107 {
1108 	struct pcfs *xfsp;
1109 	int mount_cnt = 0;
1110 
1111 	if (fsp->pcfs_fatp == (uchar_t *)0)
1112 		panic("pc_invalfat");
1113 	/*
1114 	 * Release FAT
1115 	 */
1116 	kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1117 	fsp->pcfs_fatp = NULL;
1118 	kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1119 	fsp->pcfs_fat_changemap = NULL;
1120 	/*
1121 	 * Invalidate all the blocks associated with the device.
1122 	 * Not needed if stateless.
1123 	 */
1124 	for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1125 		if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1126 			mount_cnt++;
1127 
1128 	if (!mount_cnt)
1129 		binval(fsp->pcfs_xdev);
1130 	/*
1131 	 * close mounted device
1132 	 */
1133 	(void) VOP_CLOSE(fsp->pcfs_devvp,
1134 	    (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1135 	    1, (offset_t)0, CRED(), NULL);
1136 }
1137 
1138 void
1139 pc_badfs(struct pcfs *fsp)
1140 {
1141 	cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1142 	    getmajor(fsp->pcfs_devvp->v_rdev),
1143 	    getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1144 }
1145 
1146 /*
1147  * The problem with supporting NFS on the PCFS filesystem is that there
1148  * is no good place to keep the generation number. The only possible
1149  * place is inside a directory entry. There are a few words that we
1150  * don't use - they store NT & OS/2 attributes, and the creation/last access
1151  * time of the file - but it seems wrong to use them. In addition, directory
1152  * entries come and go. If a directory is removed completely, its directory
1153  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1154  * inode blocks are dedicated for inodes, so the generation numbers are
1155  * permanently kept on the disk.
1156  */
1157 static int
1158 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1159 {
1160 	struct pcnode *pcp;
1161 	struct pc_fid *pcfid;
1162 	struct pcfs *fsp;
1163 	struct pcdir *ep;
1164 	daddr_t eblkno;
1165 	int eoffset;
1166 	struct buf *bp;
1167 	int error;
1168 	pc_cluster32_t	cn;
1169 
1170 	pcfid = (struct pc_fid *)fidp;
1171 	fsp = VFSTOPCFS(vfsp);
1172 
1173 	error = pc_lockfs(fsp, 0, 0);
1174 	if (error) {
1175 		*vpp = NULL;
1176 		return (error);
1177 	}
1178 
1179 	if (pcfid->pcfid_block == 0) {
1180 		pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1181 		pcp->pc_flags |= PC_EXTERNAL;
1182 		*vpp = PCTOV(pcp);
1183 		pc_unlockfs(fsp);
1184 		return (0);
1185 	}
1186 	eblkno = pcfid->pcfid_block;
1187 	eoffset = pcfid->pcfid_offset;
1188 
1189 	if ((pc_dbtocl(fsp,
1190 	    eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1191 	    (eoffset > fsp->pcfs_clsize)) {
1192 		pc_unlockfs(fsp);
1193 		*vpp = NULL;
1194 		return (EINVAL);
1195 	}
1196 
1197 	if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1198 	    < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1199 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1200 		    fsp->pcfs_clsize);
1201 	} else {
1202 		/*
1203 		 * This is an access "backwards" into the FAT12/FAT16
1204 		 * root directory. A better code structure would
1205 		 * significantly improve maintainability here ...
1206 		 */
1207 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1208 		    (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1209 	}
1210 	if (bp->b_flags & (B_ERROR | B_STALE)) {
1211 		error = geterror(bp);
1212 		brelse(bp);
1213 		if (error)
1214 			pc_mark_irrecov(fsp);
1215 		*vpp = NULL;
1216 		pc_unlockfs(fsp);
1217 		return (error);
1218 	}
1219 	ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1220 	/*
1221 	 * Ok, if this is a valid file handle that we gave out,
1222 	 * then simply ensuring that the creation time matches,
1223 	 * the entry has not been deleted, and it has a valid first
1224 	 * character should be enough.
1225 	 *
1226 	 * Unfortunately, verifying that the <blkno, offset> _still_
1227 	 * refers to a directory entry is not easy, since we'd have
1228 	 * to search _all_ directories starting from root to find it.
1229 	 * That's a high price to pay just in case somebody is forging
1230 	 * file handles. So instead we verify that as much of the
1231 	 * entry is valid as we can:
1232 	 *
1233 	 * 1. The starting cluster is 0 (unallocated) or valid
1234 	 * 2. It is not an LFN entry
1235 	 * 3. It is not hidden (unless mounted as such)
1236 	 * 4. It is not the label
1237 	 */
1238 	cn = pc_getstartcluster(fsp, ep);
1239 	/*
1240 	 * if the starting cluster is valid, but not valid according
1241 	 * to pc_validcl(), force it to be to simplify the following if.
1242 	 */
1243 	if (cn == 0)
1244 		cn = PCF_FIRSTCLUSTER;
1245 	if (IS_FAT32(fsp)) {
1246 		if (cn >= PCF_LASTCLUSTER32)
1247 			cn = PCF_FIRSTCLUSTER;
1248 	} else {
1249 		if (cn >= PCF_LASTCLUSTER)
1250 			cn = PCF_FIRSTCLUSTER;
1251 	}
1252 	if ((!pc_validcl(fsp, cn)) ||
1253 	    (PCDL_IS_LFN(ep)) ||
1254 	    (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1255 	    ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1256 		bp->b_flags |= B_STALE | B_AGE;
1257 		brelse(bp);
1258 		pc_unlockfs(fsp);
1259 		return (EINVAL);
1260 	}
1261 	if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1262 	    (ep->pcd_filename[0] != PCD_ERASED) &&
1263 	    (pc_validchar(ep->pcd_filename[0]) ||
1264 	    (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1265 		pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1266 		pcp->pc_flags |= PC_EXTERNAL;
1267 		*vpp = PCTOV(pcp);
1268 	} else {
1269 		*vpp = NULL;
1270 	}
1271 	bp->b_flags |= B_STALE | B_AGE;
1272 	brelse(bp);
1273 	pc_unlockfs(fsp);
1274 	return (0);
1275 }
1276 
1277 /*
1278  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1279  * a meg), so we can't bread() it all in at once. This routine reads a
1280  * fat a chunk at a time.
1281  */
1282 static int
1283 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1284 {
1285 	struct buf *bp;
1286 	size_t off;
1287 	size_t readsize;
1288 	daddr_t diskblk;
1289 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1290 	daddr_t start = fsp->pcfs_fatstart;
1291 
1292 	readsize = fsp->pcfs_clsize;
1293 	for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1294 		if (readsize > (fatsize - off))
1295 			readsize = fatsize - off;
1296 		diskblk = pc_dbdaddr(fsp, start +
1297 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1298 		bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1299 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1300 			brelse(bp);
1301 			return (EIO);
1302 		}
1303 		bp->b_flags |= B_STALE | B_AGE;
1304 		bcopy(bp->b_un.b_addr, fatp, readsize);
1305 		brelse(bp);
1306 	}
1307 	return (0);
1308 }
1309 
1310 /*
1311  * We write the FAT out a _lot_, in order to make sure that it
1312  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1313  * the FAT might be a couple of megabytes, and writing it all out just
1314  * because we created or deleted a small file is painful (especially
1315  * since we do it for each alternate FAT too). So instead, for FAT16 and
1316  * FAT32 we only write out the bit that has changed. We don't clear
1317  * the 'updated' fields here because the caller might be writing out
1318  * several FATs, so the caller must use pc_clear_fatchanges() after
1319  * all FATs have been updated.
1320  * This function doesn't take "start" from fsp->pcfs_dosstart because
1321  * callers can use it to write either the primary or any of the alternate
1322  * FAT tables.
1323  */
1324 static int
1325 pc_writefat(struct pcfs *fsp, daddr_t start)
1326 {
1327 	struct buf *bp;
1328 	size_t off;
1329 	size_t writesize;
1330 	int	error;
1331 	uchar_t *fatp = fsp->pcfs_fatp;
1332 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1333 
1334 	writesize = fsp->pcfs_clsize;
1335 	for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1336 		if (writesize > (fatsize - off))
1337 			writesize = fatsize - off;
1338 		if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1339 			continue;
1340 		}
1341 		bp = ngeteblk(writesize);
1342 		bp->b_edev = fsp->pcfs_xdev;
1343 		bp->b_dev = cmpdev(bp->b_edev);
1344 		bp->b_blkno = pc_dbdaddr(fsp, start +
1345 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1346 		bcopy(fatp, bp->b_un.b_addr, writesize);
1347 		bwrite2(bp);
1348 		error = geterror(bp);
1349 		brelse(bp);
1350 		if (error) {
1351 			return (error);
1352 		}
1353 	}
1354 	return (0);
1355 }
1356 
1357 /*
1358  * Mark the FAT cluster that 'cn' is stored in as modified.
1359  */
1360 void
1361 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1362 {
1363 	pc_cluster32_t	bn;
1364 	size_t		size;
1365 
1366 	/* which fat block is the cluster number stored in? */
1367 	if (IS_FAT32(fsp)) {
1368 		size = sizeof (pc_cluster32_t);
1369 		bn = pc_lblkno(fsp, cn * size);
1370 		fsp->pcfs_fat_changemap[bn] = 1;
1371 	} else if (IS_FAT16(fsp)) {
1372 		size = sizeof (pc_cluster16_t);
1373 		bn = pc_lblkno(fsp, cn * size);
1374 		fsp->pcfs_fat_changemap[bn] = 1;
1375 	} else {
1376 		offset_t off;
1377 		pc_cluster32_t nbn;
1378 
1379 		ASSERT(IS_FAT12(fsp));
1380 		off = cn + (cn >> 1);
1381 		bn = pc_lblkno(fsp, off);
1382 		fsp->pcfs_fat_changemap[bn] = 1;
1383 		/* does this field wrap into the next fat cluster? */
1384 		nbn = pc_lblkno(fsp, off + 1);
1385 		if (nbn != bn) {
1386 			fsp->pcfs_fat_changemap[nbn] = 1;
1387 		}
1388 	}
1389 }
1390 
1391 /*
1392  * return whether the FAT cluster 'bn' is updated and needs to
1393  * be written out.
1394  */
1395 int
1396 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1397 {
1398 	return (fsp->pcfs_fat_changemap[bn] == 1);
1399 }
1400 
1401 /*
1402  * Implementation of VFS_FREEVFS() to support forced umounts.
1403  * This is called by the vfs framework after umount, to trigger
1404  * the release of any resources still associated with the given
1405  * vfs_t once the need to keep them has gone away.
1406  */
1407 void
1408 pcfs_freevfs(vfs_t *vfsp)
1409 {
1410 	struct pcfs *fsp = VFSTOPCFS(vfsp);
1411 
1412 	mutex_enter(&pcfslock);
1413 	/*
1414 	 * Purging the FAT closes the device - can't do any more
1415 	 * I/O after this.
1416 	 */
1417 	if (fsp->pcfs_fatp != (uchar_t *)0)
1418 		pc_invalfat(fsp);
1419 	mutex_exit(&pcfslock);
1420 
1421 	VN_RELE(fsp->pcfs_devvp);
1422 	mutex_destroy(&fsp->pcfs_lock);
1423 	kmem_free(fsp, sizeof (*fsp));
1424 
1425 	/*
1426 	 * Allow _fini() to succeed now, if so desired.
1427 	 */
1428 	atomic_dec_32(&pcfs_mountcount);
1429 }
1430 
1431 
1432 /*
1433  * PC-style partition parsing and FAT BPB identification/validation code.
1434  * The partition parsers here assume:
1435  *	- a FAT filesystem will be in a partition that has one of a set of
1436  *	  recognized partition IDs
1437  *	- the user wants the 'numbering' (C:, D:, ...) that one would get
1438  *	  on MSDOS 6.x.
1439  *	  That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1440  *	  will not factor in the enumeration.
1441  * These days, such assumptions should be revisited. FAT is no longer the
1442  * only game in 'PC town'.
1443  */
1444 /*
1445  * isDosDrive()
1446  *	Boolean function.  Give it the systid field for an fdisk partition
1447  *	and it decides if that's a systid that describes a DOS drive.  We
1448  *	use systid values defined in sys/dktp/fdisk.h.
1449  */
1450 static int
1451 isDosDrive(uchar_t checkMe)
1452 {
1453 	return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1454 	    (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1455 	    (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1456 	    (checkMe == DIAGPART));
1457 }
1458 
1459 
1460 /*
1461  * isDosExtended()
1462  *	Boolean function.  Give it the systid field for an fdisk partition
1463  *	and it decides if that's a systid that describes an extended DOS
1464  *	partition.
1465  */
1466 static int
1467 isDosExtended(uchar_t checkMe)
1468 {
1469 	return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1470 }
1471 
1472 
1473 /*
1474  * isBootPart()
1475  *	Boolean function.  Give it the systid field for an fdisk partition
1476  *	and it decides if that's a systid that describes a Solaris boot
1477  *	partition.
1478  */
1479 static int
1480 isBootPart(uchar_t checkMe)
1481 {
1482 	return (checkMe == X86BOOT);
1483 }
1484 
1485 
1486 /*
1487  * noLogicalDrive()
1488  *	Display error message about not being able to find a logical
1489  *	drive.
1490  */
1491 static void
1492 noLogicalDrive(int ldrive)
1493 {
1494 	if (ldrive == BOOT_PARTITION_DRIVE) {
1495 		cmn_err(CE_NOTE, "!pcfs: no boot partition");
1496 	} else {
1497 		cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1498 	}
1499 }
1500 
1501 
1502 /*
1503  * findTheDrive()
1504  *	Discover offset of the requested logical drive, and return
1505  *	that offset (startSector), the systid of that drive (sysid),
1506  *	and a buffer pointer (bp), with the buffer contents being
1507  *	the first sector of the logical drive (i.e., the sector that
1508  *	contains the BPB for that drive).
1509  *
1510  * Note: this code is not capable of addressing >2TB disks, as it uses
1511  *       daddr_t not diskaddr_t, some of the calculations would overflow
1512  */
1513 #define	COPY_PTBL(mbr, ptblp)					\
1514 	bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),	\
1515 	    FD_NUMPART * sizeof (struct ipart))
1516 
1517 static int
1518 findTheDrive(struct pcfs *fsp, buf_t **bp)
1519 {
1520 	int ldrive = fsp->pcfs_ldrive;
1521 	dev_t dev = fsp->pcfs_devvp->v_rdev;
1522 
1523 	struct ipart dosp[FD_NUMPART];	/* incore fdisk partition structure */
1524 	daddr_t lastseek = 0;		/* Disk block we sought previously */
1525 	daddr_t diskblk = 0;		/* Disk block to get */
1526 	daddr_t xstartsect;		/* base of Extended DOS partition */
1527 	int logicalDriveCount = 0;	/* Count of logical drives seen */
1528 	int extendedPart = -1;		/* index of extended dos partition */
1529 	int primaryPart = -1;		/* index of primary dos partition */
1530 	int bootPart = -1;		/* index of a Solaris boot partition */
1531 	uint32_t xnumsect = 0;		/* length of extended DOS partition */
1532 	int driveIndex;			/* computed FDISK table index */
1533 	daddr_t startsec;
1534 	len_t mediasize;
1535 	int i;
1536 	/*
1537 	 * Count of drives in the current extended partition's
1538 	 * FDISK table, and indexes of the drives themselves.
1539 	 */
1540 	int extndDrives[FD_NUMPART];
1541 	int numDrives = 0;
1542 
1543 	/*
1544 	 * Count of drives (beyond primary) in master boot record's
1545 	 * FDISK table, and indexes of the drives themselves.
1546 	 */
1547 	int extraDrives[FD_NUMPART];
1548 	int numExtraDrives = 0;
1549 
1550 	/*
1551 	 * "ldrive == 0" should never happen, as this is a request to
1552 	 * mount the physical device (and ignore partitioning). The code
1553 	 * in pcfs_mount() should have made sure that a logical drive number
1554 	 * is at least 1, meaning we're looking for drive "C:". It is not
1555 	 * safe (and a bug in the callers of this function) to request logical
1556 	 * drive number 0; we could ASSERT() but a graceful EIO is a more
1557 	 * polite way.
1558 	 */
1559 	if (ldrive == 0) {
1560 		cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1561 		noLogicalDrive(ldrive);
1562 		return (EIO);
1563 	}
1564 
1565 	/*
1566 	 *  Copy from disk block into memory aligned structure for fdisk usage.
1567 	 */
1568 	COPY_PTBL((*bp)->b_un.b_addr, dosp);
1569 
1570 	/*
1571 	 * This check is ok because a FAT BPB and a master boot record (MBB)
1572 	 * have the same signature, in the same position within the block.
1573 	 */
1574 	if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1575 		cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1576 		    "device (%x.%x):%d\n",
1577 		    getmajor(dev), getminor(dev), ldrive);
1578 		return (EINVAL);
1579 	}
1580 
1581 	/*
1582 	 * Get a summary of what is in the Master FDISK table.
1583 	 * Normally we expect to find one partition marked as a DOS drive.
1584 	 * This partition is the one Windows calls the primary dos partition.
1585 	 * If the machine has any logical drives then we also expect
1586 	 * to find a partition marked as an extended DOS partition.
1587 	 *
1588 	 * Sometimes we'll find multiple partitions marked as DOS drives.
1589 	 * The Solaris fdisk program allows these partitions
1590 	 * to be created, but Windows fdisk no longer does.  We still need
1591 	 * to support these, though, since Windows does.  We also need to fix
1592 	 * our fdisk to behave like the Windows version.
1593 	 *
1594 	 * It turns out that some off-the-shelf media have *only* an
1595 	 * Extended partition, so we need to deal with that case as well.
1596 	 *
1597 	 * Only a single (the first) Extended or Boot Partition will
1598 	 * be recognized.  Any others will be ignored.
1599 	 */
1600 	for (i = 0; i < FD_NUMPART; i++) {
1601 		DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1602 		    uint_t, (uint_t)dosp[i].systid,
1603 		    uint_t, LE_32(dosp[i].relsect),
1604 		    uint_t, LE_32(dosp[i].numsect));
1605 
1606 		if (isDosDrive(dosp[i].systid)) {
1607 			if (primaryPart < 0) {
1608 				logicalDriveCount++;
1609 				primaryPart = i;
1610 			} else {
1611 				extraDrives[numExtraDrives++] = i;
1612 			}
1613 			continue;
1614 		}
1615 		if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1616 			extendedPart = i;
1617 			continue;
1618 		}
1619 		if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1620 			bootPart = i;
1621 			continue;
1622 		}
1623 	}
1624 
1625 	if (ldrive == BOOT_PARTITION_DRIVE) {
1626 		if (bootPart < 0) {
1627 			noLogicalDrive(ldrive);
1628 			return (EINVAL);
1629 		}
1630 		startsec = LE_32(dosp[bootPart].relsect);
1631 		mediasize = LE_32(dosp[bootPart].numsect);
1632 		goto found;
1633 	}
1634 
1635 	if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1636 		startsec = LE_32(dosp[primaryPart].relsect);
1637 		mediasize = LE_32(dosp[primaryPart].numsect);
1638 		goto found;
1639 	}
1640 
1641 	/*
1642 	 * We are not looking for the C: drive (or the primary drive
1643 	 * was not found), so we had better have an extended partition
1644 	 * or extra drives in the Master FDISK table.
1645 	 */
1646 	if ((extendedPart < 0) && (numExtraDrives == 0)) {
1647 		cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1648 		noLogicalDrive(ldrive);
1649 		return (EINVAL);
1650 	}
1651 
1652 	if (extendedPart >= 0) {
1653 		diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1654 		xnumsect = LE_32(dosp[extendedPart].numsect);
1655 		do {
1656 			/*
1657 			 *  If the seek would not cause us to change
1658 			 *  position on the drive, then we're out of
1659 			 *  extended partitions to examine.
1660 			 */
1661 			if (diskblk == lastseek)
1662 				break;
1663 			logicalDriveCount += numDrives;
1664 			/*
1665 			 *  Seek the next extended partition, and find
1666 			 *  logical drives within it.
1667 			 */
1668 			brelse(*bp);
1669 			/*
1670 			 * bread() block numbers are multiples of DEV_BSIZE
1671 			 * but the device sector size (the unit of partitioning)
1672 			 * might be larger than that; pcfs_get_device_info()
1673 			 * has calculated the multiplicator for us.
1674 			 */
1675 			*bp = bread(dev,
1676 			    pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1677 			if ((*bp)->b_flags & B_ERROR) {
1678 				return (EIO);
1679 			}
1680 
1681 			lastseek = diskblk;
1682 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1683 			if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1684 				cmn_err(CE_NOTE, "!pcfs: "
1685 				    "extended partition table signature err, "
1686 				    "device (%x.%x):%d, LBA %u",
1687 				    getmajor(dev), getminor(dev), ldrive,
1688 				    (uint_t)pc_dbdaddr(fsp, diskblk));
1689 				return (EINVAL);
1690 			}
1691 			/*
1692 			 *  Count up drives, and track where the next
1693 			 *  extended partition is in case we need it.  We
1694 			 *  are expecting only one extended partition.  If
1695 			 *  there is more than one we'll only go to the
1696 			 *  first one we see, but warn about ignoring.
1697 			 */
1698 			numDrives = 0;
1699 			for (i = 0; i < FD_NUMPART; i++) {
1700 				DTRACE_PROBE4(extendedpart,
1701 				    struct pcfs *, fsp,
1702 				    uint_t, (uint_t)dosp[i].systid,
1703 				    uint_t, LE_32(dosp[i].relsect),
1704 				    uint_t, LE_32(dosp[i].numsect));
1705 				if (isDosDrive(dosp[i].systid)) {
1706 					extndDrives[numDrives++] = i;
1707 				} else if (isDosExtended(dosp[i].systid)) {
1708 					if (diskblk != lastseek) {
1709 						/*
1710 						 * Already found an extended
1711 						 * partition in this table.
1712 						 */
1713 						cmn_err(CE_NOTE,
1714 						    "!pcfs: ignoring unexpected"
1715 						    " additional extended"
1716 						    " partition");
1717 					} else {
1718 						diskblk = xstartsect +
1719 						    LE_32(dosp[i].relsect);
1720 					}
1721 				}
1722 			}
1723 		} while (ldrive > logicalDriveCount + numDrives);
1724 
1725 		ASSERT(numDrives <= FD_NUMPART);
1726 
1727 		if (ldrive <= logicalDriveCount + numDrives) {
1728 			/*
1729 			 * The number of logical drives we've found thus
1730 			 * far is enough to get us to the one we were
1731 			 * searching for.
1732 			 */
1733 			driveIndex = logicalDriveCount + numDrives - ldrive;
1734 			mediasize =
1735 			    LE_32(dosp[extndDrives[driveIndex]].numsect);
1736 			startsec =
1737 			    LE_32(dosp[extndDrives[driveIndex]].relsect) +
1738 			    lastseek;
1739 			if (startsec > (xstartsect + xnumsect)) {
1740 				cmn_err(CE_NOTE, "!pcfs: extended partition "
1741 				    "values bad");
1742 				return (EINVAL);
1743 			}
1744 			goto found;
1745 		} else {
1746 			/*
1747 			 * We ran out of extended dos partition
1748 			 * drives.  The only hope now is to go
1749 			 * back to extra drives defined in the master
1750 			 * fdisk table.  But we overwrote that table
1751 			 * already, so we must load it in again.
1752 			 */
1753 			logicalDriveCount += numDrives;
1754 			brelse(*bp);
1755 			ASSERT(fsp->pcfs_dosstart == 0);
1756 			*bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1757 			    fsp->pcfs_secsize);
1758 			if ((*bp)->b_flags & B_ERROR) {
1759 				return (EIO);
1760 			}
1761 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1762 		}
1763 	}
1764 	/*
1765 	 *  Still haven't found the drive, is it an extra
1766 	 *  drive defined in the main FDISK table?
1767 	 */
1768 	if (ldrive <= logicalDriveCount + numExtraDrives) {
1769 		driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1770 		ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1771 		mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1772 		startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1773 		goto found;
1774 	}
1775 	/*
1776 	 *  Still haven't found the drive, and there is
1777 	 *  nowhere else to look.
1778 	 */
1779 	noLogicalDrive(ldrive);
1780 	return (EINVAL);
1781 
1782 found:
1783 	/*
1784 	 * We need this value in units of sectorsize, because PCFS' internal
1785 	 * offset calculations go haywire for > 512Byte sectors unless all
1786 	 * pcfs_.*start values are in units of sectors.
1787 	 * So, assign before the capacity check (that's done in DEV_BSIZE)
1788 	 */
1789 	fsp->pcfs_dosstart = startsec;
1790 
1791 	/*
1792 	 * convert from device sectors to proper units:
1793 	 *	- starting sector: DEV_BSIZE (as argument to bread())
1794 	 *	- media size: Bytes
1795 	 */
1796 	startsec = pc_dbdaddr(fsp, startsec);
1797 	mediasize *= fsp->pcfs_secsize;
1798 
1799 	/*
1800 	 * some additional validation / warnings in case the partition table
1801 	 * and the actual media capacity are not in accordance ...
1802 	 */
1803 	if (fsp->pcfs_mediasize != 0) {
1804 		diskaddr_t startoff =
1805 		    (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1806 
1807 		if (startoff >= fsp->pcfs_mediasize ||
1808 		    startoff + mediasize > fsp->pcfs_mediasize) {
1809 			cmn_err(CE_WARN,
1810 			    "!pcfs: partition size (LBA start %u, %lld bytes, "
1811 			    "device (%x.%x):%d) smaller than "
1812 			    "mediasize (%lld bytes).\n"
1813 			    "filesystem may be truncated, access errors "
1814 			    "may result.\n",
1815 			    (uint_t)startsec, (long long)mediasize,
1816 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1817 			    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1818 		}
1819 	} else {
1820 		fsp->pcfs_mediasize = mediasize;
1821 	}
1822 
1823 	return (0);
1824 }
1825 
1826 
1827 static fattype_t
1828 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1829 {
1830 	uint32_t ncl = fsp->pcfs_ncluster;
1831 
1832 	if (ncl <= 4096) {
1833 		if (bpb_get_FatSz16(bpb) == 0)
1834 			return (FAT_UNKNOWN);
1835 
1836 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1837 		    bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1838 			return (FAT12);
1839 		if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1840 			return (FAT12);
1841 		if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1842 			return (FAT16);
1843 
1844 		switch (bpb_get_Media(bpb)) {
1845 			case SS8SPT:
1846 			case DS8SPT:
1847 			case SS9SPT:
1848 			case DS9SPT:
1849 			case DS18SPT:
1850 			case DS9_15SPT:
1851 				/*
1852 				 * Is this reliable - all floppies are FAT12 ?
1853 				 */
1854 				return (FAT12);
1855 			case MD_FIXED:
1856 				/*
1857 				 * Is this reliable - disks are always FAT16 ?
1858 				 */
1859 				return (FAT16);
1860 			default:
1861 				break;
1862 		}
1863 	} else if (ncl <= 65536) {
1864 		if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1865 			return (FAT32);
1866 		if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1867 			return (FAT32);
1868 		if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1869 			return (FAT32);
1870 
1871 		if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1872 			return (FAT16);
1873 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1874 			return (FAT16);
1875 	}
1876 
1877 	/*
1878 	 * We don't know
1879 	 */
1880 	return (FAT_UNKNOWN);
1881 }
1882 
1883 /*
1884  * Check to see if the BPB we found is correct.
1885  *
1886  * This looks far more complicated that it needs to be for pure structural
1887  * validation. The reason for this is that parseBPB() is also used for
1888  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1889  * BPB fields (do not) have 'known good' values, even if we (do not) reject
1890  * the BPB when attempting to mount the filesystem.
1891  *
1892  * Real-world usage of FAT shows there are a lot of corner-case situations
1893  * and, following the specification strictly, invalid filesystems out there.
1894  * Known are situations such as:
1895  *	- FAT12/FAT16 filesystems with garbage in either totsec16/32
1896  *	  instead of the zero in one of the fields mandated by the spec
1897  *	- filesystems that claim to be larger than the partition they're in
1898  *	- filesystems without valid media descriptor
1899  *	- FAT32 filesystems with RootEntCnt != 0
1900  *	- FAT32 filesystems with less than 65526 clusters
1901  *	- FAT32 filesystems without valid FSI sector
1902  *	- FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1903  *
1904  * Such filesystems are accessible by PCFS - if it'd know to start with that
1905  * the filesystem should be treated as a specific FAT type. Before S10, it
1906  * relied on the PC/fdisk partition type for the purpose and almost completely
1907  * ignored the BPB; now it ignores the partition type for anything else but
1908  * logical drive enumeration, which can result in rejection of (invalid)
1909  * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1910  * has less than 65526 clusters.
1911  *
1912  * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1913  * not possible to allow all such mostly-compliant filesystems in unless one
1914  * accepts false positives (definitely invalid filesystems that cause problems
1915  * later). This at least allows to pinpoint why the mount failed.
1916  *
1917  * Due to the use of FAT on removeable media, all relaxations of the rules
1918  * here need to be carefully evaluated wrt. to potential effects on PCFS
1919  * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1920  * beware.
1921  */
1922 static int
1923 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1924 {
1925 	fattype_t type;
1926 
1927 	uint32_t	ncl;	/* number of clusters in file area */
1928 	uint32_t	rec;
1929 	uint32_t	reserved;
1930 	uint32_t	fsisec, bkbootsec;
1931 	blkcnt_t	totsec, totsec16, totsec32, datasec;
1932 	size_t		fatsec, fatsec16, fatsec32, rdirsec;
1933 	size_t		secsize;
1934 	len_t		mediasize;
1935 	uint64_t	validflags = 0;
1936 
1937 	if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1938 		validflags |= BPB_BPBSIG_OK;
1939 
1940 	rec = bpb_get_RootEntCnt(bpb);
1941 	reserved = bpb_get_RsvdSecCnt(bpb);
1942 	fsisec = bpb_get_FSInfo32(bpb);
1943 	bkbootsec = bpb_get_BkBootSec32(bpb);
1944 	totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1945 	totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1946 	fatsec16 = bpb_get_FatSz16(bpb);
1947 	fatsec32 = bpb_get_FatSz32(bpb);
1948 
1949 	totsec = totsec16 ? totsec16 : totsec32;
1950 	fatsec = fatsec16 ? fatsec16 : fatsec32;
1951 
1952 	secsize = bpb_get_BytesPerSec(bpb);
1953 	if (!VALID_SECSIZE(secsize))
1954 		secsize = fsp->pcfs_secsize;
1955 	if (secsize != fsp->pcfs_secsize) {
1956 		PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1957 		    getmajor(fsp->pcfs_xdev),
1958 		    getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1959 		PC_DPRINTF2(3, "!BPB secsize %d != "
1960 		    "autodetected media block size %d\n",
1961 		    (int)secsize, (int)fsp->pcfs_secsize);
1962 		if (fsp->pcfs_ldrive) {
1963 			/*
1964 			 * We've already attempted to parse the partition
1965 			 * table. If the block size used for that don't match
1966 			 * the PCFS sector size, we're hosed one way or the
1967 			 * other. Just try what happens.
1968 			 */
1969 			secsize = fsp->pcfs_secsize;
1970 			PC_DPRINTF1(3,
1971 			    "!pcfs: Using autodetected secsize %d\n",
1972 			    (int)secsize);
1973 		} else {
1974 			/*
1975 			 * This allows mounting lofi images of PCFS partitions
1976 			 * with sectorsize != DEV_BSIZE. We can't parse the
1977 			 * partition table on whole-disk images unless the
1978 			 * (undocumented) "secsize=..." mount option is used,
1979 			 * but at least this allows us to mount if we have
1980 			 * an image of a partition.
1981 			 */
1982 			PC_DPRINTF1(3,
1983 			    "!pcfs: Using BPB secsize %d\n", (int)secsize);
1984 		}
1985 	}
1986 
1987 	if (fsp->pcfs_mediasize == 0) {
1988 		mediasize = (len_t)totsec * (len_t)secsize;
1989 		/*
1990 		 * This is not an error because not all devices support the
1991 		 * dkio(7i) mediasize queries, and/or not all devices are
1992 		 * partitioned. If we have not been able to figure out the
1993 		 * size of the underlaying medium, we have to trust the BPB.
1994 		 */
1995 		PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1996 		    "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1997 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1998 		    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1999 	} else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
2000 		cmn_err(CE_WARN,
2001 		    "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
2002 		    "FAT BPB mediasize (%lld Bytes).\n"
2003 		    "truncated filesystem on device (%x.%x):%d, access errors "
2004 		    "possible.\n",
2005 		    (long long)fsp->pcfs_mediasize,
2006 		    (long long)(totsec * (blkcnt_t)secsize),
2007 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2008 		    fsp->pcfs_ldrive);
2009 		mediasize = fsp->pcfs_mediasize;
2010 	} else {
2011 		/*
2012 		 * This is actually ok. A FAT needs not occupy the maximum
2013 		 * space available in its partition, it can be shorter.
2014 		 */
2015 		mediasize = (len_t)totsec * (len_t)secsize;
2016 	}
2017 
2018 	/*
2019 	 * Since we let just about anything pass through this function,
2020 	 * fence against divide-by-zero here.
2021 	 */
2022 	if (secsize)
2023 		rdirsec = roundup(rec * 32, secsize) / secsize;
2024 	else
2025 		rdirsec = 0;
2026 
2027 	/*
2028 	 * This assignment is necessary before pc_dbdaddr() can first be
2029 	 * used. Must initialize the value here.
2030 	 */
2031 	fsp->pcfs_secsize = secsize;
2032 	fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2033 
2034 	fsp->pcfs_mediasize = mediasize;
2035 
2036 	fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2037 	fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2038 	fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2039 	fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2040 	fsp->pcfs_rdirsec = rdirsec;
2041 
2042 	/*
2043 	 * Remember: All PCFS offset calculations in sectors. Before I/O
2044 	 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2045 	 * necessary so that media with > 512Byte sector sizes work correctly.
2046 	 */
2047 	fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2048 	fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2049 	fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2050 	datasec = totsec -
2051 	    (blkcnt_t)fatsec * fsp->pcfs_numfat -
2052 	    (blkcnt_t)rdirsec -
2053 	    (blkcnt_t)reserved;
2054 
2055 	DTRACE_PROBE4(fatgeometry,
2056 	    blkcnt_t, totsec, size_t, fatsec,
2057 	    size_t, rdirsec, blkcnt_t, datasec);
2058 
2059 	/*
2060 	 * 'totsec' is taken directly from the BPB and guaranteed to fit
2061 	 * into a 32bit unsigned integer. The calculation of 'datasec',
2062 	 * on the other hand, could underflow for incorrect values in
2063 	 * rdirsec/reserved/fatsec. Check for that.
2064 	 * We also check that the BPB conforms to the FAT specification's
2065 	 * requirement that either of the 16/32bit total sector counts
2066 	 * must be zero.
2067 	 */
2068 	if (totsec != 0 &&
2069 	    (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2070 	    datasec < totsec && datasec <= UINT32_MAX)
2071 		validflags |= BPB_TOTSEC_OK;
2072 
2073 	if ((len_t)totsec * (len_t)secsize <= mediasize)
2074 		validflags |= BPB_MEDIASZ_OK;
2075 
2076 	if (VALID_SECSIZE(secsize))
2077 		validflags |= BPB_SECSIZE_OK;
2078 	if (VALID_SPCL(fsp->pcfs_spcl))
2079 		validflags |= BPB_SECPERCLUS_OK;
2080 	if (VALID_CLSIZE(fsp->pcfs_clsize))
2081 		validflags |= BPB_CLSIZE_OK;
2082 	if (VALID_NUMFATS(fsp->pcfs_numfat))
2083 		validflags |= BPB_NUMFAT_OK;
2084 	if (VALID_RSVDSEC(reserved) && reserved < totsec)
2085 		validflags |= BPB_RSVDSECCNT_OK;
2086 	if (VALID_MEDIA(fsp->pcfs_mediadesc))
2087 		validflags |= BPB_MEDIADESC_OK;
2088 	if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2089 		validflags |= BPB_BOOTSIG16_OK;
2090 	if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2091 		validflags |= BPB_BOOTSIG32_OK;
2092 	if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2093 		validflags |= BPB_FSTYPSTR16_OK;
2094 	if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2095 		validflags |= BPB_FSTYPSTR32_OK;
2096 	if (VALID_OEMNAME(bpb_OEMName(bpb)))
2097 		validflags |= BPB_OEMNAME_OK;
2098 	if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2099 		validflags |= BPB_BKBOOTSEC_OK;
2100 	if (fsisec > 0 && fsisec <= reserved)
2101 		validflags |= BPB_FSISEC_OK;
2102 	if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2103 		validflags |= BPB_JMPBOOT_OK;
2104 	if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2105 		validflags |= BPB_FSVER_OK;
2106 	if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2107 		validflags |= BPB_VOLLAB16_OK;
2108 	if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2109 		validflags |= BPB_VOLLAB32_OK;
2110 	if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2111 		validflags |= BPB_EXTFLAGS_OK;
2112 
2113 	/*
2114 	 * Try to determine which FAT format to use.
2115 	 *
2116 	 * Calculate the number of clusters in order to determine
2117 	 * the type of FAT we are looking at.  This is the only
2118 	 * recommended way of determining FAT type, though there
2119 	 * are other hints in the data, this is the best way.
2120 	 *
2121 	 * Since we let just about "anything" pass through this function
2122 	 * without early exits, fence against divide-by-zero here.
2123 	 *
2124 	 * datasec was already validated against UINT32_MAX so we know
2125 	 * the result will not overflow the 32bit calculation.
2126 	 */
2127 	if (fsp->pcfs_spcl)
2128 		ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2129 	else
2130 		ncl = 0;
2131 
2132 	fsp->pcfs_ncluster = ncl;
2133 
2134 	/*
2135 	 * From the Microsoft FAT specification:
2136 	 * In the following example, when it says <, it does not mean <=.
2137 	 * Note also that the numbers are correct.  The first number for
2138 	 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2139 	 * and the '<' signs are not wrong.
2140 	 *
2141 	 * We "specialdetect" the corner cases, and use at least one "extra"
2142 	 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2143 	 * count is dangerously close to the boundaries.
2144 	 */
2145 
2146 	if (ncl <= PCF_FIRSTCLUSTER) {
2147 		type = FAT_UNKNOWN;
2148 	} else if (ncl < 4085) {
2149 		type = FAT12;
2150 	} else if (ncl <= 4096) {
2151 		type = FAT_QUESTIONABLE;
2152 	} else if (ncl < 65525) {
2153 		type = FAT16;
2154 	} else if (ncl <= 65536) {
2155 		type = FAT_QUESTIONABLE;
2156 	} else if (ncl < PCF_LASTCLUSTER32) {
2157 		type = FAT32;
2158 	} else {
2159 		type = FAT_UNKNOWN;
2160 	}
2161 
2162 	DTRACE_PROBE4(parseBPB__initial,
2163 	    struct pcfs *, fsp, unsigned char *, bpb,
2164 	    int, validflags, fattype_t, type);
2165 
2166 recheck:
2167 	fsp->pcfs_fatsec = fatsec;
2168 
2169 	/* Do some final sanity checks for each specific type of FAT */
2170 	switch (type) {
2171 		case FAT12:
2172 			if (rec != 0)
2173 				validflags |= BPB_ROOTENTCNT_OK;
2174 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2175 			    bpb_get_TotSec16(bpb) == 0)
2176 				validflags |= BPB_TOTSEC16_OK;
2177 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2178 			    bpb_get_TotSec32(bpb) == 0)
2179 				validflags |= BPB_TOTSEC32_OK;
2180 			if (bpb_get_FatSz16(bpb) == fatsec)
2181 				validflags |= BPB_FATSZ16_OK;
2182 			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2183 			    * 3 / 2)
2184 				validflags |= BPB_FATSZ_OK;
2185 			if (ncl < 4085)
2186 				validflags |= BPB_NCLUSTERS_OK;
2187 
2188 			fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2189 			fsp->pcfs_rootblksize =
2190 			    fsp->pcfs_rdirsec * secsize;
2191 			fsp->pcfs_fsistart = 0;
2192 
2193 			if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2194 				type = FAT_UNKNOWN;
2195 			break;
2196 		case FAT16:
2197 			if (rec != 0)
2198 				validflags |= BPB_ROOTENTCNT_OK;
2199 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2200 			    bpb_get_TotSec16(bpb) == 0)
2201 				validflags |= BPB_TOTSEC16_OK;
2202 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2203 			    bpb_get_TotSec32(bpb) == 0)
2204 				validflags |= BPB_TOTSEC32_OK;
2205 			if (bpb_get_FatSz16(bpb) == fatsec)
2206 				validflags |= BPB_FATSZ16_OK;
2207 			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2208 				validflags |= BPB_FATSZ_OK;
2209 			if (ncl >= 4085 && ncl < 65525)
2210 				validflags |= BPB_NCLUSTERS_OK;
2211 
2212 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2213 			fsp->pcfs_rootblksize =
2214 			    fsp->pcfs_rdirsec * secsize;
2215 			fsp->pcfs_fsistart = 0;
2216 
2217 			if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2218 				type = FAT_UNKNOWN;
2219 			break;
2220 		case FAT32:
2221 			if (rec == 0)
2222 				validflags |= BPB_ROOTENTCNT_OK;
2223 			if (bpb_get_TotSec16(bpb) == 0)
2224 				validflags |= BPB_TOTSEC16_OK;
2225 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2226 				validflags |= BPB_TOTSEC32_OK;
2227 			if (bpb_get_FatSz16(bpb) == 0)
2228 				validflags |= BPB_FATSZ16_OK;
2229 			if (bpb_get_FatSz32(bpb) == fatsec)
2230 				validflags |= BPB_FATSZ32_OK;
2231 			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2232 				validflags |= BPB_FATSZ_OK;
2233 			if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2234 				validflags |= BPB_NCLUSTERS_OK;
2235 
2236 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2237 			fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2238 			fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2239 			if (validflags & BPB_FSISEC_OK)
2240 				fsp->pcfs_flags |= PCFS_FSINFO_OK;
2241 			fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2242 			if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2243 				validflags |= BPB_ROOTCLUSTER_OK;
2244 
2245 			/*
2246 			 * Current PCFS code only works if 'pcfs_rdirstart'
2247 			 * contains the root cluster number on FAT32.
2248 			 * That's a mis-use and would better be changed.
2249 			 */
2250 			fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2251 
2252 			if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2253 				type = FAT_UNKNOWN;
2254 			break;
2255 		case FAT_QUESTIONABLE:
2256 			type = secondaryBPBChecks(fsp, bpb, secsize);
2257 			goto recheck;
2258 		default:
2259 			ASSERT(type == FAT_UNKNOWN);
2260 			break;
2261 	}
2262 
2263 	ASSERT(type != FAT_QUESTIONABLE);
2264 
2265 	fsp->pcfs_fattype = type;
2266 
2267 	if (valid)
2268 		*valid = validflags;
2269 
2270 	DTRACE_PROBE4(parseBPB__final,
2271 	    struct pcfs *, fsp, unsigned char *, bpb,
2272 	    int, validflags, fattype_t, type);
2273 
2274 	if (type != FAT_UNKNOWN) {
2275 		ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2276 		ASSERT(ISP2(secsize / DEV_BSIZE));
2277 		return (1);
2278 	}
2279 
2280 	return (0);
2281 }
2282 
2283 
2284 /*
2285  * Detect the device's native block size (sector size).
2286  *
2287  * Test whether the device is:
2288  *	- a floppy device from a known controller type via DKIOCINFO
2289  *	- a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2290  *	- a USB floppy drive (identified by drive geometry)
2291  *
2292  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2293  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2294  *
2295  * This might be a bit wasteful on kernel stack space; if anyone's
2296  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2297  */
2298 static void
2299 pcfs_device_getinfo(struct pcfs *fsp)
2300 {
2301 	dev_t			rdev = fsp->pcfs_xdev;
2302 	int			error;
2303 	union {
2304 		struct dk_minfo		mi;
2305 		struct dk_cinfo		ci;
2306 		struct dk_geom		gi;
2307 		struct fd_char		fc;
2308 	} arg;				/* save stackspace ... */
2309 	intptr_t argp = (intptr_t)&arg;
2310 	ldi_handle_t		lh;
2311 	ldi_ident_t		li;
2312 	int isfloppy, isremoveable, ishotpluggable;
2313 	cred_t			*cr = CRED();
2314 
2315 	if (ldi_ident_from_dev(rdev, &li))
2316 		goto out;
2317 
2318 	error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2319 	ldi_ident_release(li);
2320 	if (error)
2321 		goto out;
2322 
2323 	/*
2324 	 * Not sure if this could possibly happen. It'd be a bit like
2325 	 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2326 	 * expecting it, needs some thought if triggered ...
2327 	 */
2328 	ASSERT(fsp->pcfs_xdev == rdev);
2329 
2330 	/*
2331 	 * Check for removeable/hotpluggable media.
2332 	 */
2333 	if (ldi_ioctl(lh, DKIOCREMOVABLE,
2334 	    (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2335 		isremoveable = 0;
2336 	}
2337 	if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2338 	    (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2339 		ishotpluggable = 0;
2340 	}
2341 
2342 	/*
2343 	 * Make sure we don't use "half-initialized" values if the ioctls fail.
2344 	 */
2345 	if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2346 		bzero(&arg, sizeof (arg));
2347 		fsp->pcfs_mediasize = 0;
2348 	} else {
2349 		fsp->pcfs_mediasize =
2350 		    (len_t)arg.mi.dki_lbsize *
2351 		    (len_t)arg.mi.dki_capacity;
2352 	}
2353 
2354 	if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2355 		if (fsp->pcfs_secsize == 0) {
2356 			fsp->pcfs_secsize = arg.mi.dki_lbsize;
2357 			fsp->pcfs_sdshift =
2358 			    ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2359 		} else {
2360 			PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2361 			    "%d, device (%x.%x), different from user-provided "
2362 			    "%d. User override - ignoring autodetect result.\n",
2363 			    arg.mi.dki_lbsize,
2364 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2365 			    fsp->pcfs_secsize);
2366 		}
2367 	} else if (arg.mi.dki_lbsize) {
2368 		PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2369 		    "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2370 		    "Ignoring autodetect result.\n",
2371 		    arg.mi.dki_lbsize,
2372 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2373 	}
2374 
2375 	/*
2376 	 * We treat the following media types as a floppy by default.
2377 	 */
2378 	isfloppy =
2379 	    (arg.mi.dki_media_type == DK_FLOPPY ||
2380 	    arg.mi.dki_media_type == DK_ZIP ||
2381 	    arg.mi.dki_media_type == DK_JAZ);
2382 
2383 	/*
2384 	 * if this device understands fdio(7I) requests it's
2385 	 * obviously a floppy drive.
2386 	 */
2387 	if (!isfloppy &&
2388 	    !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2389 		isfloppy = 1;
2390 
2391 	/*
2392 	 * some devices we like to treat as floppies, but they don't
2393 	 * understand fdio(7I) requests.
2394 	 */
2395 	if (!isfloppy &&
2396 	    !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2397 	    (arg.ci.dki_ctype == DKC_WDC2880 ||
2398 	    arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2399 	    arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2400 	    arg.ci.dki_ctype == DKC_INTEL82077))
2401 		isfloppy = 1;
2402 
2403 	/*
2404 	 * This is the "final fallback" test - media with
2405 	 * 2 heads and 80 cylinders are assumed to be floppies.
2406 	 * This is normally true for USB floppy drives ...
2407 	 */
2408 	if (!isfloppy &&
2409 	    !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2410 	    (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2411 		isfloppy = 1;
2412 
2413 	/*
2414 	 * This is similar to the "old" PCFS code that sets this flag
2415 	 * just based on the media descriptor being 0xf8 (MD_FIXED).
2416 	 * Should be re-worked. We really need some specialcasing for
2417 	 * removeable media.
2418 	 */
2419 	if (!isfloppy) {
2420 		fsp->pcfs_flags |= PCFS_NOCHK;
2421 	}
2422 
2423 	/*
2424 	 * We automatically disable access time updates if the medium is
2425 	 * removeable and/or hotpluggable, and the admin did not explicitly
2426 	 * request access time updates (via the "atime" mount option).
2427 	 * The majority of flash-based media should fit this category.
2428 	 * Minimizing write access extends the lifetime of your memory stick !
2429 	 */
2430 	if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2431 	    (isremoveable || ishotpluggable | isfloppy)) {
2432 		fsp->pcfs_flags |= PCFS_NOATIME;
2433 	}
2434 
2435 	(void) ldi_close(lh, FREAD, cr);
2436 out:
2437 	if (fsp->pcfs_secsize == 0) {
2438 		PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2439 		    "device (%x.%x) failed, no user-provided fallback. "
2440 		    "Using %d bytes.\n",
2441 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2442 		    DEV_BSIZE);
2443 		fsp->pcfs_secsize = DEV_BSIZE;
2444 		fsp->pcfs_sdshift = 0;
2445 	}
2446 	ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2447 	ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2448 }
2449 
2450 /*
2451  * Get the FAT type for the DOS medium.
2452  *
2453  * -------------------------
2454  * According to Microsoft:
2455  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2456  * count of clusters on the volume and nothing else.
2457  * -------------------------
2458  *
2459  */
2460 static int
2461 pc_getfattype(struct pcfs *fsp)
2462 {
2463 	int error = 0;
2464 	buf_t *bp = NULL;
2465 	struct vnode *devvp = fsp->pcfs_devvp;
2466 	dev_t	dev = devvp->v_rdev;
2467 
2468 	/*
2469 	 * Detect the native block size of the medium, and attempt to
2470 	 * detect whether the medium is removeable.
2471 	 * We do treat removable media (floppies, USB and FireWire disks)
2472 	 * differently wrt. to the frequency and synchronicity of FAT updates.
2473 	 * We need to know the media block size in order to be able to
2474 	 * parse the partition table.
2475 	 */
2476 	pcfs_device_getinfo(fsp);
2477 
2478 	/*
2479 	 * Unpartitioned media (floppies and some removeable devices)
2480 	 * don't have a partition table, the FAT BPB is at disk block 0.
2481 	 * Start out by reading block 0.
2482 	 */
2483 	fsp->pcfs_dosstart = 0;
2484 	bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2485 
2486 	if (error = geterror(bp))
2487 		goto out;
2488 
2489 	/*
2490 	 * If a logical drive number is requested, parse the partition table
2491 	 * and attempt to locate it. Otherwise, proceed immediately to the
2492 	 * BPB check. findTheDrive(), if successful, returns the disk block
2493 	 * number where the requested partition starts in "startsec".
2494 	 */
2495 	if (fsp->pcfs_ldrive != 0) {
2496 		PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2497 		    "device (%x,%x):%d to find BPB\n",
2498 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2499 
2500 		if (error = findTheDrive(fsp, &bp))
2501 			goto out;
2502 
2503 		ASSERT(fsp->pcfs_dosstart != 0);
2504 
2505 		brelse(bp);
2506 		bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2507 		    fsp->pcfs_secsize);
2508 		if (error = geterror(bp))
2509 			goto out;
2510 	}
2511 
2512 	/*
2513 	 * Validate the BPB and fill in the instance structure.
2514 	 */
2515 	if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2516 		PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2517 		    "device (%x.%x):%d, disk LBA %u\n",
2518 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2519 		    (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2520 		error = EINVAL;
2521 		goto out;
2522 	}
2523 
2524 	ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2525 
2526 out:
2527 	/*
2528 	 * Release the buffer used
2529 	 */
2530 	if (bp != NULL)
2531 		brelse(bp);
2532 	return (error);
2533 }
2534 
2535 
2536 /*
2537  * Get the file allocation table.
2538  * If there is an old FAT, invalidate it.
2539  */
2540 int
2541 pc_getfat(struct pcfs *fsp)
2542 {
2543 	struct buf *bp = NULL;
2544 	uchar_t *fatp = NULL;
2545 	uchar_t *fat_changemap = NULL;
2546 	int error;
2547 	int fat_changemapsize;
2548 	int flags = 0;
2549 	int nfat;
2550 	int altfat_mustmatch = 0;
2551 	int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2552 
2553 	if (fsp->pcfs_fatp) {
2554 		/*
2555 		 * There is a FAT in core.
2556 		 * If there are open file pcnodes or we have modified it or
2557 		 * it hasn't timed out yet use the in core FAT.
2558 		 * Otherwise invalidate it and get a new one
2559 		 */
2560 #ifdef notdef
2561 		if (fsp->pcfs_frefs ||
2562 		    (fsp->pcfs_flags & PCFS_FATMOD) ||
2563 		    (gethrestime_sec() < fsp->pcfs_fattime)) {
2564 			return (0);
2565 		} else {
2566 			mutex_enter(&pcfslock);
2567 			pc_invalfat(fsp);
2568 			mutex_exit(&pcfslock);
2569 		}
2570 #endif /* notdef */
2571 		return (0);
2572 	}
2573 
2574 	/*
2575 	 * Get FAT and check it for validity
2576 	 */
2577 	fatp = kmem_alloc(fatsize, KM_SLEEP);
2578 	error = pc_readfat(fsp, fatp);
2579 	if (error) {
2580 		flags = B_ERROR;
2581 		goto out;
2582 	}
2583 	fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2584 	fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2585 	fsp->pcfs_fatp = fatp;
2586 	fsp->pcfs_fat_changemapsize = fat_changemapsize;
2587 	fsp->pcfs_fat_changemap = fat_changemap;
2588 
2589 	/*
2590 	 * The only definite signature check is that the
2591 	 * media descriptor byte should match the first byte
2592 	 * of the FAT block.
2593 	 */
2594 	if (fatp[0] != fsp->pcfs_mediadesc) {
2595 		cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2596 		    "media descriptor %x, FAT[0] lowbyte %x\n",
2597 		    (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2598 		cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2599 		altfat_mustmatch = 1;
2600 	}
2601 
2602 	/*
2603 	 * Get alternate FATs and check for consistency
2604 	 * This is an inlined version of pc_readfat().
2605 	 * Since we're only comparing FAT and alternate FAT,
2606 	 * there's no reason to let pc_readfat() copy data out
2607 	 * of the buf. Instead, compare in-situ, one cluster
2608 	 * at a time.
2609 	 */
2610 	for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2611 		size_t startsec;
2612 		size_t off;
2613 
2614 		startsec = pc_dbdaddr(fsp,
2615 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2616 
2617 		for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2618 			daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2619 			    pc_cltodb(fsp, pc_lblkno(fsp, off)));
2620 
2621 			bp = bread(fsp->pcfs_xdev, fatblk,
2622 			    MIN(fsp->pcfs_clsize, fatsize - off));
2623 			if (bp->b_flags & (B_ERROR | B_STALE)) {
2624 				cmn_err(CE_NOTE,
2625 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2626 				    " read error at offset %ld on device"
2627 				    " (%x.%x):%d",
2628 				    nfat, (void *)(uintptr_t)startsec, off,
2629 				    getmajor(fsp->pcfs_xdev),
2630 				    getminor(fsp->pcfs_xdev),
2631 				    fsp->pcfs_ldrive);
2632 				flags = B_ERROR;
2633 				error = EIO;
2634 				goto out;
2635 			}
2636 			bp->b_flags |= B_STALE | B_AGE;
2637 			if (bcmp(bp->b_un.b_addr, fatp + off,
2638 			    MIN(fsp->pcfs_clsize, fatsize - off))) {
2639 				cmn_err(CE_NOTE,
2640 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2641 				    " corrupted at offset %ld on device"
2642 				    " (%x.%x):%d",
2643 				    nfat, (void *)(uintptr_t)startsec, off,
2644 				    getmajor(fsp->pcfs_xdev),
2645 				    getminor(fsp->pcfs_xdev),
2646 				    fsp->pcfs_ldrive);
2647 				if (altfat_mustmatch) {
2648 					flags = B_ERROR;
2649 					error = EIO;
2650 					goto out;
2651 				}
2652 			}
2653 			brelse(bp);
2654 			bp = NULL;	/* prevent double release */
2655 		}
2656 	}
2657 
2658 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2659 	fsp->pcfs_fatjustread = 1;
2660 
2661 	/*
2662 	 * Retrieve FAT32 fsinfo sector.
2663 	 * A failure to read this is not fatal to accessing the volume.
2664 	 * It simply means operations that count or search free blocks
2665 	 * will have to do a full FAT walk, vs. a possibly quicker lookup
2666 	 * of the summary information.
2667 	 * Hence, we log a message but return success overall after this point.
2668 	 */
2669 	if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2670 		struct fat_od_fsi *fsinfo_disk;
2671 
2672 		bp = bread(fsp->pcfs_xdev,
2673 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2674 		fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2675 		if (bp->b_flags & (B_ERROR | B_STALE) ||
2676 		    !FSISIG_OK(fsinfo_disk)) {
2677 			cmn_err(CE_NOTE,
2678 			    "!pcfs: error reading fat32 fsinfo from "
2679 			    "device (%x.%x):%d, block %lld",
2680 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2681 			    fsp->pcfs_ldrive,
2682 			    (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2683 			fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2684 			fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2685 			fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2686 		} else {
2687 			bp->b_flags |= B_STALE | B_AGE;
2688 			fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2689 			fsp->pcfs_fsinfo.fs_free_clusters =
2690 			    LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2691 			fsp->pcfs_fsinfo.fs_next_free =
2692 			    LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2693 		}
2694 		brelse(bp);
2695 		bp = NULL;
2696 	}
2697 
2698 	if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2699 		fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2700 	else
2701 		fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2702 
2703 	return (0);
2704 
2705 out:
2706 	cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2707 	if (bp)
2708 		brelse(bp);
2709 	if (fatp)
2710 		kmem_free(fatp, fatsize);
2711 	if (fat_changemap)
2712 		kmem_free(fat_changemap, fat_changemapsize);
2713 
2714 	if (flags) {
2715 		pc_mark_irrecov(fsp);
2716 	}
2717 	return (error);
2718 }
2719