xref: /titanic_51/usr/src/uts/common/fs/pcfs/pc_vfsops.c (revision 8ef10d209f1443865a8ce5b1eb59838b64a64e67)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/user.h>
32 #include <sys/proc.h>
33 #include <sys/cred.h>
34 #include <sys/disp.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/vnode.h>
39 #include <sys/fdio.h>
40 #include <sys/file.h>
41 #include <sys/uio.h>
42 #include <sys/conf.h>
43 #include <sys/statvfs.h>
44 #include <sys/mount.h>
45 #include <sys/pathname.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/sysmacros.h>
49 #include <sys/conf.h>
50 #include <sys/mkdev.h>
51 #include <sys/swap.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/dktp/fdisk.h>
55 #include <sys/fs/pc_label.h>
56 #include <sys/fs/pc_fs.h>
57 #include <sys/fs/pc_dir.h>
58 #include <sys/fs/pc_node.h>
59 #include <fs/fs_subr.h>
60 #include <sys/modctl.h>
61 #include <sys/dkio.h>
62 #include <sys/open.h>
63 #include <sys/mntent.h>
64 #include <sys/policy.h>
65 #include <sys/atomic.h>
66 #include <sys/sdt.h>
67 
68 /*
69  * The majority of PC media use a 512 sector size, but
70  * occasionally you will run across a 1k sector size.
71  * For media with a 1k sector size, fd_strategy() requires
72  * the I/O size to be a 1k multiple; so when the sector size
73  * is not yet known, always read 1k.
74  */
75 #define	PC_SAFESECSIZE	(PC_SECSIZE * 2)
76 
77 static int pcfs_pseudo_floppy(dev_t);
78 
79 static int pcfsinit(int, char *);
80 static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
81 	struct cred *);
82 static int pcfs_unmount(struct vfs *, int, struct cred *);
83 static int pcfs_root(struct vfs *, struct vnode **);
84 static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
85 static int pc_syncfsnodes(struct pcfs *);
86 static int pcfs_sync(struct vfs *, short, struct cred *);
87 static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
88 static void pcfs_freevfs(vfs_t *vfsp);
89 
90 static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
91 static int pc_writefat(struct pcfs *fsp, daddr_t start);
92 
93 static int pc_getfattype(struct pcfs *fsp);
94 static void pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap);
95 
96 
97 /*
98  * pcfs mount options table
99  */
100 
101 static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
102 static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
103 static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
104 static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
105 static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
106 static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
107 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
108 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
109 
110 static mntopt_t mntopts[] = {
111 /*
112  *	option name	cancel option	default arg	flags	opt data
113  */
114 	{ MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
115 	{ MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
116 	{ MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
117 	{ MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
118 	{ MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
119 	{ MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
120 	{ MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
121 	{ MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
122 	{ MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
123 	{ MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
124 };
125 
126 static mntopts_t pcfs_mntopts = {
127 	sizeof (mntopts) / sizeof (mntopt_t),
128 	mntopts
129 };
130 
131 int pcfsdebuglevel = 0;
132 
133 /*
134  * pcfslock:	protects the list of mounted pc filesystems "pc_mounttab.
135  * pcfs_lock:	(inside per filesystem structure "pcfs")
136  *		per filesystem lock. Most of the vfsops and vnodeops are
137  *		protected by this lock.
138  * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
139  *
140  * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
141  *
142  * pcfs_mountcount:	used to prevent module unloads while there is still
143  *			pcfs state from a former mount hanging around. With
144  *			forced umount support, the filesystem module must not
145  *			be allowed to go away before the last VFS_FREEVFS()
146  *			call has been made.
147  *			Since this is just an atomic counter, there's no need
148  *			for locking.
149  */
150 kmutex_t	pcfslock;
151 krwlock_t	pcnodes_lock;
152 uint32_t	pcfs_mountcount;
153 
154 static int pcfstype;
155 
156 static vfsdef_t vfw = {
157 	VFSDEF_VERSION,
158 	"pcfs",
159 	pcfsinit,
160 	VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS,
161 	&pcfs_mntopts
162 };
163 
164 extern struct mod_ops mod_fsops;
165 
166 static struct modlfs modlfs = {
167 	&mod_fsops,
168 	"PC filesystem v1.2",
169 	&vfw
170 };
171 
172 static struct modlinkage modlinkage = {
173 	MODREV_1,
174 	&modlfs,
175 	NULL
176 };
177 
178 int
179 _init(void)
180 {
181 	int	error;
182 
183 #if !defined(lint)
184 	/* make sure the on-disk structures are sane */
185 	ASSERT(sizeof (struct pcdir) == 32);
186 	ASSERT(sizeof (struct pcdir_lfn) == 32);
187 #endif
188 	mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
189 	rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
190 	error = mod_install(&modlinkage);
191 	if (error) {
192 		mutex_destroy(&pcfslock);
193 		rw_destroy(&pcnodes_lock);
194 	}
195 	return (error);
196 }
197 
198 int
199 _fini(void)
200 {
201 	int	error;
202 
203 	/*
204 	 * If a forcedly unmounted instance is still hanging around,
205 	 * we cannot allow the module to be unloaded because that would
206 	 * cause panics once the VFS framework decides it's time to call
207 	 * into VFS_FREEVFS().
208 	 */
209 	if (pcfs_mountcount)
210 		return (EBUSY);
211 
212 	error = mod_remove(&modlinkage);
213 	if (error)
214 		return (error);
215 	mutex_destroy(&pcfslock);
216 	rw_destroy(&pcnodes_lock);
217 	/*
218 	 * Tear down the operations vectors
219 	 */
220 	(void) vfs_freevfsops_by_type(pcfstype);
221 	vn_freevnodeops(pcfs_fvnodeops);
222 	vn_freevnodeops(pcfs_dvnodeops);
223 	return (0);
224 }
225 
226 int
227 _info(struct modinfo *modinfop)
228 {
229 	return (mod_info(&modlinkage, modinfop));
230 }
231 
232 /* ARGSUSED1 */
233 static int
234 pcfsinit(int fstype, char *name)
235 {
236 	static const fs_operation_def_t pcfs_vfsops_template[] = {
237 		VFSNAME_MOUNT,		{ .vfs_mount = pcfs_mount },
238 		VFSNAME_UNMOUNT,	{ .vfs_unmount = pcfs_unmount },
239 		VFSNAME_ROOT,		{ .vfs_root = pcfs_root },
240 		VFSNAME_STATVFS,	{ .vfs_statvfs = pcfs_statvfs },
241 		VFSNAME_SYNC,		{ .vfs_sync = pcfs_sync },
242 		VFSNAME_VGET,		{ .vfs_vget = pcfs_vget },
243 		VFSNAME_FREEVFS,	{ .vfs_freevfs = pcfs_freevfs },
244 		NULL,			NULL
245 	};
246 	int error;
247 
248 	error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
249 	if (error != 0) {
250 		cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
251 		return (error);
252 	}
253 
254 	error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
255 	if (error != 0) {
256 		(void) vfs_freevfsops_by_type(fstype);
257 		cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
258 		return (error);
259 	}
260 
261 	error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
262 	if (error != 0) {
263 		(void) vfs_freevfsops_by_type(fstype);
264 		vn_freevnodeops(pcfs_fvnodeops);
265 		cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
266 		return (error);
267 	}
268 
269 	pcfstype = fstype;
270 	(void) pc_init();
271 	pcfs_mountcount = 0;
272 	return (0);
273 }
274 
275 static struct pcfs *pc_mounttab = NULL;
276 
277 extern struct pcfs_args pc_tz;
278 
279 /*
280  *  Define some special logical drives we use internal to this file.
281  */
282 #define	BOOT_PARTITION_DRIVE	99
283 #define	PRIMARY_DOS_DRIVE	1
284 #define	UNPARTITIONED_DRIVE	0
285 
286 static int
287 pcfs_device_identify(
288 	struct vfs *vfsp,
289 	struct mounta *uap,
290 	struct cred *cr,
291 	int *dos_ldrive,
292 	dev_t *xdev)
293 {
294 	struct pathname special;
295 	char *c;
296 	struct vnode *bvp;
297 	int oflag, aflag;
298 	int error;
299 
300 	/*
301 	 * Resolve path name of special file being mounted.
302 	 */
303 	if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
304 		return (error);
305 	}
306 
307 	*dos_ldrive = -1;
308 
309 	if (error =
310 	    lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &bvp)) {
311 		/*
312 		 * If there's no device node, the name specified most likely
313 		 * maps to a PCFS-style "partition specifier" to select a
314 		 * harddisk primary/logical partition. Disable floppy-specific
315 		 * checks in such cases unless an explicit :A or :B is
316 		 * requested.
317 		 */
318 
319 		/*
320 		 * Split the pathname string at the last ':' separator.
321 		 * If there's no ':' in the device name, or the ':' is the
322 		 * last character in the string, the name is invalid and
323 		 * the error from the previous lookup will be returned.
324 		 */
325 		c = strrchr(special.pn_path, ':');
326 		if (c == NULL || strlen(c) == 0)
327 			goto devlookup_done;
328 
329 		*c++ = '\0';
330 
331 		/*
332 		 * PCFS partition name suffixes can be:
333 		 *	- "boot" to indicate the X86BOOT partition
334 		 *	- a drive letter [c-z] for the "DOS logical drive"
335 		 *	- a drive number 1..24 for the "DOS logical drive"
336 		 *	- a "floppy name letter", 'a' or 'b' (just strip this)
337 		 */
338 		if (strcasecmp(c, "boot") == 0) {
339 			/*
340 			 * The Solaris boot partition is requested.
341 			 */
342 			*dos_ldrive = BOOT_PARTITION_DRIVE;
343 		} else if (strspn(c, "0123456789") == strlen(c)) {
344 			/*
345 			 * All digits - parse the partition number.
346 			 */
347 			long drvnum = 0;
348 
349 			if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
350 				/*
351 				 * A number alright - in the allowed range ?
352 				 */
353 				if (drvnum > 24 || drvnum == 0)
354 					error = ENXIO;
355 			}
356 			if (error)
357 				goto devlookup_done;
358 			*dos_ldrive = (int)drvnum;
359 		} else if (strlen(c) == 1) {
360 			/*
361 			 * A single trailing character was specified.
362 			 *	- [c-zC-Z] means a harddisk partition, and
363 			 *	  we retrieve the partition number.
364 			 *	- [abAB] means a floppy drive, so we swallow
365 			 *	  the "drive specifier" and test later
366 			 *	  whether the physical device is a floppy or
367 			 *	  PCMCIA pseudofloppy (sram card).
368 			 */
369 			*c = tolower(*c);
370 			if (*c == 'a' || *c == 'b') {
371 				*dos_ldrive = UNPARTITIONED_DRIVE;
372 			} else if (*c < 'c' || *c > 'z') {
373 				error = ENXIO;
374 				goto devlookup_done;
375 			} else {
376 				*dos_ldrive = 1 + *c - 'c';
377 			}
378 		} else {
379 			/*
380 			 * Can't parse this - pass through previous error.
381 			 */
382 			goto devlookup_done;
383 		}
384 
385 
386 		error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
387 		    NULLVPP, &bvp);
388 	} else {
389 		*dos_ldrive = UNPARTITIONED_DRIVE;
390 	}
391 devlookup_done:
392 	pn_free(&special);
393 	if (error)
394 		return (error);
395 
396 	ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
397 
398 	*xdev = bvp->v_rdev;
399 
400 	/*
401 	 * Verify caller's permission to open the device special file.
402 	 */
403 	if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
404 	    ((uap->flags & MS_RDONLY) != 0)) {
405 		oflag = FREAD;
406 		aflag = VREAD;
407 	} else {
408 		oflag = FREAD | FWRITE;
409 		aflag = VREAD | VWRITE;
410 	}
411 
412 	if (bvp->v_type != VBLK)
413 		error = ENOTBLK;
414 	else if (getmajor(*xdev) >= devcnt)
415 		error = ENXIO;
416 
417 	if ((error != 0) ||
418 	    (error = VOP_ACCESS(bvp, aflag, 0, cr, NULL)) != 0 ||
419 	    (error = secpolicy_spec_open(cr, bvp, oflag)) != 0) {
420 		VN_RELE(bvp);
421 		return (error);
422 	}
423 
424 	VN_RELE(bvp);
425 	return (0);
426 }
427 
428 static int
429 pcfs_device_ismounted(
430 	struct vfs *vfsp,
431 	int dos_ldrive,
432 	dev_t xdev,
433 	int *remounting,
434 	dev_t *pseudodev)
435 {
436 	struct pcfs *fsp;
437 	int remount = *remounting;
438 
439 	/*
440 	 * Ensure that this logical drive isn't already mounted, unless
441 	 * this is a REMOUNT request.
442 	 * Note: The framework will perform this check if the "...:c"
443 	 * PCFS-style "logical drive" syntax has not been used and an
444 	 * actually existing physical device is backing this filesystem.
445 	 * Once all block device drivers support PC-style partitioning,
446 	 * this codeblock can be dropped.
447 	 */
448 	*pseudodev = xdev;
449 
450 	if (dos_ldrive) {
451 		mutex_enter(&pcfslock);
452 		for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
453 			if (fsp->pcfs_xdev == xdev &&
454 			    fsp->pcfs_ldrive == dos_ldrive) {
455 				mutex_exit(&pcfslock);
456 				if (remount) {
457 					return (0);
458 				} else {
459 					return (EBUSY);
460 				}
461 			}
462 		/*
463 		 * Assign a unique device number for the vfs
464 		 * The old way (getudev() + a constantly incrementing
465 		 * major number) was wrong because it changes vfs_dev
466 		 * across mounts and reboots, which breaks nfs file handles.
467 		 * UFS just uses the real dev_t. We can't do that because
468 		 * of the way pcfs opens fdisk partitons (the :c and :d
469 		 * partitions are on the same dev_t). Though that _might_
470 		 * actually be ok, since the file handle contains an
471 		 * absolute block number, it's probably better to make them
472 		 * different. So I think we should retain the original
473 		 * dev_t, but come up with a different minor number based
474 		 * on the logical drive that will _always_ come up the same.
475 		 * For now, we steal the upper 6 bits.
476 		 */
477 #ifdef notdef
478 		/* what should we do here? */
479 		if (((getminor(xdev) >> 12) & 0x3F) != 0)
480 			printf("whoops - upper bits used!\n");
481 #endif
482 		*pseudodev = makedevice(getmajor(xdev),
483 		    ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
484 		if (vfs_devmounting(*pseudodev, vfsp)) {
485 			mutex_exit(&pcfslock);
486 			return (EBUSY);
487 		}
488 		if (vfs_devismounted(*pseudodev)) {
489 			mutex_exit(&pcfslock);
490 			if (remount) {
491 				return (0);
492 			} else {
493 				return (EBUSY);
494 			}
495 		}
496 		mutex_exit(&pcfslock);
497 	} else {
498 		*pseudodev = xdev;
499 		if (vfs_devmounting(*pseudodev, vfsp)) {
500 			return (EBUSY);
501 		}
502 		if (vfs_devismounted(*pseudodev))
503 			if (remount) {
504 				return (0);
505 			} else {
506 				return (EBUSY);
507 			}
508 	}
509 
510 	/*
511 	 * This is not a remount. Even if MS_REMOUNT was requested,
512 	 * the caller needs to proceed as it would on an ordinary
513 	 * mount.
514 	 */
515 	*remounting = 0;
516 
517 	ASSERT(*pseudodev);
518 	return (0);
519 }
520 
521 /*
522  * Get the PCFS-specific mount options from the VFS framework.
523  * For "timezone" and "secsize", we need to parse the number
524  * ourselves and ensure its validity.
525  * Note: "secsize" is deliberately undocumented at this time,
526  * it's a workaround for devices (particularly: lofi image files)
527  * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
528  */
529 static void
530 pcfs_parse_mntopts(struct pcfs *fsp, struct mounta *uap)
531 {
532 	char *c;
533 	char *endptr;
534 	long l;
535 	struct vfs *vfsp = fsp->pcfs_vfs;
536 
537 	ASSERT(fsp->pcfs_secondswest == 0);
538 	ASSERT(fsp->pcfs_secsize == 0);
539 
540 	if (uap->flags & MS_RDONLY) {
541 		vfsp->vfs_flag |= VFS_RDONLY;
542 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
543 	}
544 
545 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
546 		fsp->pcfs_flags |= PCFS_HIDDEN;
547 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
548 		fsp->pcfs_flags |= PCFS_FOLDCASE;
549 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
550 		fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
551 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
552 		fsp->pcfs_flags |= PCFS_NOATIME;
553 
554 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
555 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
556 		    endptr == c + strlen(c)) {
557 			/*
558 			 * A number alright - in the allowed range ?
559 			 */
560 			if (l <= -12*3600 || l >= 12*3600) {
561 				cmn_err(CE_WARN, "!pcfs: invalid use of "
562 				    "'timezone' mount option - %ld "
563 				    "is out of range. Assuming 0.", l);
564 				l = 0;
565 			}
566 		} else {
567 			cmn_err(CE_WARN, "!pcfs: invalid use of "
568 			    "'timezone' mount option - argument %s "
569 			    "is not a valid number. Assuming 0.", c);
570 			l = 0;
571 		}
572 		fsp->pcfs_secondswest = l;
573 	}
574 
575 	/*
576 	 * The "secsize=..." mount option is a workaround for the lack of
577 	 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
578 	 * partition table of a disk image and it has been partitioned with
579 	 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
580 	 * images.
581 	 * That should really be fixed in lofi ... this is a workaround.
582 	 */
583 	if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
584 		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
585 		    endptr == c + strlen(c)) {
586 			/*
587 			 * A number alright - a valid sector size as well ?
588 			 */
589 			if (!VALID_SECSIZE(l)) {
590 				cmn_err(CE_WARN, "!pcfs: invalid use of "
591 				    "'secsize' mount option - %ld is "
592 				    "unsupported. Autodetecting.", l);
593 				l = 0;
594 			}
595 		} else {
596 			cmn_err(CE_WARN, "!pcfs: invalid use of "
597 			    "'secsize' mount option - argument %s "
598 			    "is not a valid number. Autodetecting.", c);
599 			l = 0;
600 		}
601 		fsp->pcfs_secsize = l;
602 		fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
603 	}
604 }
605 
606 /*
607  * vfs operations
608  */
609 
610 /*
611  * pcfs_mount - backend for VFS_MOUNT() on PCFS.
612  */
613 static int
614 pcfs_mount(
615 	struct vfs *vfsp,
616 	struct vnode *mvp,
617 	struct mounta *uap,
618 	struct cred *cr)
619 {
620 	struct pcfs *fsp;
621 	struct vnode *devvp;
622 	dev_t pseudodev;
623 	dev_t xdev;
624 	int dos_ldrive = 0;
625 	int error;
626 	int remounting;
627 
628 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
629 		return (error);
630 
631 	if (mvp->v_type != VDIR)
632 		return (ENOTDIR);
633 
634 	mutex_enter(&mvp->v_lock);
635 	if ((uap->flags & MS_REMOUNT) == 0 &&
636 	    (uap->flags & MS_OVERLAY) == 0 &&
637 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
638 		mutex_exit(&mvp->v_lock);
639 		return (EBUSY);
640 	}
641 	mutex_exit(&mvp->v_lock);
642 
643 	/*
644 	 * PCFS doesn't do mount arguments anymore - everything's a mount
645 	 * option these days. In order not to break existing callers, we
646 	 * don't reject it yet, just warn that the data (if any) is ignored.
647 	 */
648 	if (uap->datalen != 0)
649 		cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
650 		    "mount argument structures instead of mount options. "
651 		    "Ignoring mount(2) 'dataptr' argument.");
652 
653 	/*
654 	 * For most filesystems, this is just a lookupname() on the
655 	 * mount pathname string. PCFS historically has to do its own
656 	 * partition table parsing because not all Solaris architectures
657 	 * support all styles of partitioning that PC media can have, and
658 	 * hence PCFS understands "device names" that don't map to actual
659 	 * physical device nodes. Parsing the "PCFS syntax" for device
660 	 * names is done in pcfs_device_identify() - see there.
661 	 *
662 	 * Once all block device drivers that can host FAT filesystems have
663 	 * been enhanced to create device nodes for all PC-style partitions,
664 	 * this code can go away.
665 	 */
666 	if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
667 		return (error);
668 
669 	/*
670 	 * As with looking up the actual device to mount, PCFS cannot rely
671 	 * on just the checks done by vfs_ismounted() whether a given device
672 	 * is mounted already. The additional check against the "PCFS syntax"
673 	 * is done in  pcfs_device_ismounted().
674 	 */
675 	remounting = (uap->flags & MS_REMOUNT);
676 
677 	if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
678 	    &pseudodev))
679 		return (error);
680 
681 	if (remounting)
682 		return (0);
683 
684 	/*
685 	 * Mount the filesystem.
686 	 * An instance structure is required before the attempt to locate
687 	 * and parse the FAT BPB. This is because mount options may change
688 	 * the behaviour of the filesystem type matching code. Precreate
689 	 * it and fill it in to a degree that allows parsing the mount
690 	 * options.
691 	 */
692 	devvp = makespecvp(xdev, VBLK);
693 	if (IS_SWAPVP(devvp)) {
694 		VN_RELE(devvp);
695 		return (EBUSY);
696 	}
697 	error = VOP_OPEN(&devvp,
698 	    (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
699 	if (error) {
700 		VN_RELE(devvp);
701 		return (error);
702 	}
703 
704 	fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
705 	fsp->pcfs_vfs = vfsp;
706 	fsp->pcfs_xdev = xdev;
707 	fsp->pcfs_devvp = devvp;
708 	fsp->pcfs_ldrive = dos_ldrive;
709 	mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
710 	vfsp->vfs_data = fsp;
711 	vfsp->vfs_dev = pseudodev;
712 	vfsp->vfs_fstype = pcfstype;
713 	vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
714 	vfsp->vfs_bcount = 0;
715 	vfsp->vfs_bsize = fsp->pcfs_clsize;
716 
717 	pcfs_parse_mntopts(fsp, uap);
718 
719 	/*
720 	 * This is the actual "mount" - the PCFS superblock check.
721 	 *
722 	 * Find the requested logical drive and the FAT BPB therein.
723 	 * Check device type and flag the instance if media is removeable.
724 	 *
725 	 * Initializes most members of the filesystem instance structure.
726 	 * Returns EINVAL if no valid BPB can be found. Other errors may
727 	 * occur after I/O failures, or when invalid / unparseable partition
728 	 * tables are encountered.
729 	 */
730 	if (error = pc_getfattype(fsp))
731 		goto errout;
732 
733 	/*
734 	 * Validate that we can access the FAT and that it is, to the
735 	 * degree we can verify here, self-consistent.
736 	 */
737 	if (error = pc_verify(fsp))
738 		goto errout;
739 
740 	/*
741 	 * Record the time of the mount, to return as an "approximate"
742 	 * timestamp for the FAT root directory. Since FAT roots don't
743 	 * have timestamps, this is less confusing to the user than
744 	 * claiming "zero" / Jan/01/1970.
745 	 */
746 	gethrestime(&fsp->pcfs_mounttime);
747 
748 	/*
749 	 * Fix up the mount options. Because "noatime" is made default on
750 	 * removeable media only, a fixed disk will have neither "atime"
751 	 * nor "noatime" set. We set the options explicitly depending on
752 	 * the PCFS_NOATIME flag, to inform the user of what applies.
753 	 * Mount option cancellation will take care that the mutually
754 	 * exclusive 'other' is cleared.
755 	 */
756 	vfs_setmntopt(vfsp,
757 	    fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
758 	    NULL, 0);
759 
760 	/*
761 	 * All clear - insert the FS instance into PCFS' list.
762 	 */
763 	mutex_enter(&pcfslock);
764 	fsp->pcfs_nxt = pc_mounttab;
765 	pc_mounttab = fsp;
766 	mutex_exit(&pcfslock);
767 	atomic_inc_32(&pcfs_mountcount);
768 	return (0);
769 
770 errout:
771 	(void) VOP_CLOSE(devvp,
772 	    vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
773 	    1, (offset_t)0, cr, NULL);
774 	VN_RELE(devvp);
775 	mutex_destroy(&fsp->pcfs_lock);
776 	kmem_free(fsp, sizeof (*fsp));
777 	return (error);
778 
779 }
780 
781 static int
782 pcfs_unmount(
783 	struct vfs *vfsp,
784 	int flag,
785 	struct cred *cr)
786 {
787 	struct pcfs *fsp, *fsp1;
788 
789 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
790 		return (EPERM);
791 
792 	fsp = VFSTOPCFS(vfsp);
793 
794 	/*
795 	 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
796 	 * prevent lookuppn from crossing the mount point.
797 	 * If this is not a forced umount request and there's ongoing I/O,
798 	 * don't allow the mount to proceed.
799 	 */
800 	if (flag & MS_FORCE)
801 		vfsp->vfs_flag |= VFS_UNMOUNTED;
802 	else if (fsp->pcfs_nrefs)
803 		return (EBUSY);
804 
805 	mutex_enter(&pcfslock);
806 
807 	/*
808 	 * If this is a forced umount request or if the fs instance has
809 	 * been marked as beyond recovery, allow the umount to proceed
810 	 * regardless of state. pc_diskchanged() forcibly releases all
811 	 * inactive vnodes/pcnodes.
812 	 */
813 	if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
814 		rw_enter(&pcnodes_lock, RW_WRITER);
815 		pc_diskchanged(fsp);
816 		rw_exit(&pcnodes_lock);
817 	}
818 
819 	/* now there should be no pcp node on pcfhead or pcdhead. */
820 
821 	if (fsp == pc_mounttab) {
822 		pc_mounttab = fsp->pcfs_nxt;
823 	} else {
824 		for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
825 			if (fsp1->pcfs_nxt == fsp)
826 				fsp1->pcfs_nxt = fsp->pcfs_nxt;
827 	}
828 
829 	mutex_exit(&pcfslock);
830 
831 	/*
832 	 * Since we support VFS_FREEVFS(), there's no need to
833 	 * free the fsp right now. The framework will tell us
834 	 * when the right time to do so has arrived by calling
835 	 * into pcfs_freevfs.
836 	 */
837 	return (0);
838 }
839 
840 /*
841  * find root of pcfs
842  */
843 static int
844 pcfs_root(
845 	struct vfs *vfsp,
846 	struct vnode **vpp)
847 {
848 	struct pcfs *fsp;
849 	struct pcnode *pcp;
850 	int error;
851 
852 	fsp = VFSTOPCFS(vfsp);
853 	if (error = pc_lockfs(fsp, 0, 0))
854 		return (error);
855 
856 	pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
857 	pc_unlockfs(fsp);
858 	*vpp = PCTOV(pcp);
859 	pcp->pc_flags |= PC_EXTERNAL;
860 	return (0);
861 }
862 
863 /*
864  * Get file system statistics.
865  */
866 static int
867 pcfs_statvfs(
868 	struct vfs *vfsp,
869 	struct statvfs64 *sp)
870 {
871 	struct pcfs *fsp;
872 	int error;
873 	dev32_t d32;
874 
875 	fsp = VFSTOPCFS(vfsp);
876 	error = pc_getfat(fsp);
877 	if (error)
878 		return (error);
879 	bzero(sp, sizeof (*sp));
880 	sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
881 	sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
882 	sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
883 	sp->f_files = (fsfilcnt64_t)-1;
884 	sp->f_ffree = (fsfilcnt64_t)-1;
885 	sp->f_favail = (fsfilcnt64_t)-1;
886 #ifdef notdef
887 	(void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
888 #endif /* notdef */
889 	(void) cmpldev(&d32, vfsp->vfs_dev);
890 	sp->f_fsid = d32;
891 	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
892 	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
893 	sp->f_namemax = PCFNAMESIZE;
894 	return (0);
895 }
896 
897 static int
898 pc_syncfsnodes(struct pcfs *fsp)
899 {
900 	struct pchead *hp;
901 	struct pcnode *pcp;
902 	int error;
903 
904 	if (error = pc_lockfs(fsp, 0, 0))
905 		return (error);
906 
907 	if (!(error = pc_syncfat(fsp))) {
908 		hp = pcfhead;
909 		while (hp < & pcfhead [ NPCHASH ]) {
910 			rw_enter(&pcnodes_lock, RW_READER);
911 			pcp = hp->pch_forw;
912 			while (pcp != (struct pcnode *)hp) {
913 				if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
914 					if (error = pc_nodesync(pcp))
915 						break;
916 				pcp = pcp -> pc_forw;
917 			}
918 			rw_exit(&pcnodes_lock);
919 			if (error)
920 				break;
921 			hp++;
922 		}
923 	}
924 	pc_unlockfs(fsp);
925 	return (error);
926 }
927 
928 /*
929  * Flush any pending I/O.
930  */
931 /*ARGSUSED*/
932 static int
933 pcfs_sync(
934 	struct vfs *vfsp,
935 	short flag,
936 	struct cred *cr)
937 {
938 	struct pcfs *fsp;
939 	int error = 0;
940 
941 	/* this prevents the filesystem from being umounted. */
942 	mutex_enter(&pcfslock);
943 	if (vfsp != NULL) {
944 		fsp = VFSTOPCFS(vfsp);
945 		if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
946 			error = pc_syncfsnodes(fsp);
947 		} else {
948 			rw_enter(&pcnodes_lock, RW_WRITER);
949 			pc_diskchanged(fsp);
950 			rw_exit(&pcnodes_lock);
951 			error = EIO;
952 		}
953 	} else {
954 		fsp = pc_mounttab;
955 		while (fsp != NULL) {
956 			if (fsp->pcfs_flags & PCFS_IRRECOV) {
957 				rw_enter(&pcnodes_lock, RW_WRITER);
958 				pc_diskchanged(fsp);
959 				rw_exit(&pcnodes_lock);
960 				error = EIO;
961 				break;
962 			}
963 			error = pc_syncfsnodes(fsp);
964 			if (error) break;
965 			fsp = fsp->pcfs_nxt;
966 		}
967 	}
968 	mutex_exit(&pcfslock);
969 	return (error);
970 }
971 
972 int
973 pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
974 {
975 	int err;
976 
977 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
978 		return (EIO);
979 
980 	if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
981 		fsp->pcfs_count++;
982 	} else {
983 		mutex_enter(&fsp->pcfs_lock);
984 		if (fsp->pcfs_flags & PCFS_LOCKED)
985 			panic("pc_lockfs");
986 		/*
987 		 * We check the IRRECOV bit again just in case somebody
988 		 * snuck past the initial check but then got held up before
989 		 * they could grab the lock.  (And in the meantime someone
990 		 * had grabbed the lock and set the bit)
991 		 */
992 		if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
993 			if ((err = pc_getfat(fsp))) {
994 				mutex_exit(&fsp->pcfs_lock);
995 				return (err);
996 			}
997 		}
998 		fsp->pcfs_flags |= PCFS_LOCKED;
999 		fsp->pcfs_owner = curthread;
1000 		fsp->pcfs_count++;
1001 	}
1002 	return (0);
1003 }
1004 
1005 void
1006 pc_unlockfs(struct pcfs *fsp)
1007 {
1008 
1009 	if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1010 		panic("pc_unlockfs");
1011 	if (--fsp->pcfs_count < 0)
1012 		panic("pc_unlockfs: count");
1013 	if (fsp->pcfs_count == 0) {
1014 		fsp->pcfs_flags &= ~PCFS_LOCKED;
1015 		fsp->pcfs_owner = 0;
1016 		mutex_exit(&fsp->pcfs_lock);
1017 	}
1018 }
1019 
1020 int
1021 pc_syncfat(struct pcfs *fsp)
1022 {
1023 	struct buf *bp;
1024 	int nfat;
1025 	int	error = 0;
1026 	struct fat_od_fsi *fsinfo_disk;
1027 
1028 	if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1029 	    !(fsp->pcfs_flags & PCFS_FATMOD))
1030 		return (0);
1031 	/*
1032 	 * write out all copies of FATs
1033 	 */
1034 	fsp->pcfs_flags &= ~PCFS_FATMOD;
1035 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1036 	for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1037 		error = pc_writefat(fsp, pc_dbdaddr(fsp,
1038 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1039 		if (error) {
1040 			pc_mark_irrecov(fsp);
1041 			return (EIO);
1042 		}
1043 	}
1044 	pc_clear_fatchanges(fsp);
1045 
1046 	/*
1047 	 * Write out fsinfo sector.
1048 	 */
1049 	if (IS_FAT32(fsp)) {
1050 		bp = bread(fsp->pcfs_xdev,
1051 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1052 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1053 			error = geterror(bp);
1054 		}
1055 		fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1056 		if (!error && FSISIG_OK(fsinfo_disk)) {
1057 			fsinfo_disk->fsi_incore.fs_free_clusters =
1058 			    LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1059 			fsinfo_disk->fsi_incore.fs_next_free =
1060 			    LE_32(FSINFO_UNKNOWN);
1061 			bwrite2(bp);
1062 			error = geterror(bp);
1063 		}
1064 		brelse(bp);
1065 		if (error) {
1066 			pc_mark_irrecov(fsp);
1067 			return (EIO);
1068 		}
1069 	}
1070 	return (0);
1071 }
1072 
1073 void
1074 pc_invalfat(struct pcfs *fsp)
1075 {
1076 	struct pcfs *xfsp;
1077 	int mount_cnt = 0;
1078 
1079 	if (fsp->pcfs_fatp == (uchar_t *)0)
1080 		panic("pc_invalfat");
1081 	/*
1082 	 * Release FAT
1083 	 */
1084 	kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1085 	fsp->pcfs_fatp = NULL;
1086 	kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1087 	fsp->pcfs_fat_changemap = NULL;
1088 	/*
1089 	 * Invalidate all the blocks associated with the device.
1090 	 * Not needed if stateless.
1091 	 */
1092 	for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1093 		if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1094 			mount_cnt++;
1095 
1096 	if (!mount_cnt)
1097 		binval(fsp->pcfs_xdev);
1098 	/*
1099 	 * close mounted device
1100 	 */
1101 	(void) VOP_CLOSE(fsp->pcfs_devvp,
1102 	    (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1103 	    1, (offset_t)0, CRED(), NULL);
1104 }
1105 
1106 void
1107 pc_badfs(struct pcfs *fsp)
1108 {
1109 	cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1110 	    getmajor(fsp->pcfs_devvp->v_rdev),
1111 	    getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1112 }
1113 
1114 /*
1115  * The problem with supporting NFS on the PCFS filesystem is that there
1116  * is no good place to keep the generation number. The only possible
1117  * place is inside a directory entry. There are a few words that we
1118  * don't use - they store NT & OS/2 attributes, and the creation/last access
1119  * time of the file - but it seems wrong to use them. In addition, directory
1120  * entries come and go. If a directory is removed completely, its directory
1121  * blocks are freed and the generation numbers are lost. Whereas in ufs,
1122  * inode blocks are dedicated for inodes, so the generation numbers are
1123  * permanently kept on the disk.
1124  */
1125 static int
1126 pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1127 {
1128 	struct pcnode *pcp;
1129 	struct pc_fid *pcfid;
1130 	struct pcfs *fsp;
1131 	struct pcdir *ep;
1132 	daddr_t eblkno;
1133 	int eoffset;
1134 	struct buf *bp;
1135 	int error;
1136 	pc_cluster32_t	cn;
1137 
1138 	pcfid = (struct pc_fid *)fidp;
1139 	fsp = VFSTOPCFS(vfsp);
1140 
1141 	error = pc_lockfs(fsp, 0, 0);
1142 	if (error) {
1143 		*vpp = NULL;
1144 		return (error);
1145 	}
1146 
1147 	if (pcfid->pcfid_block == 0) {
1148 		pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1149 		pcp->pc_flags |= PC_EXTERNAL;
1150 		*vpp = PCTOV(pcp);
1151 		pc_unlockfs(fsp);
1152 		return (0);
1153 	}
1154 	eblkno = pcfid->pcfid_block;
1155 	eoffset = pcfid->pcfid_offset;
1156 
1157 	if ((pc_dbtocl(fsp,
1158 	    eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1159 	    (eoffset > fsp->pcfs_clsize)) {
1160 		pc_unlockfs(fsp);
1161 		*vpp = NULL;
1162 		return (EINVAL);
1163 	}
1164 
1165 	if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1166 	    < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1167 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1168 		    fsp->pcfs_clsize);
1169 	} else {
1170 		/*
1171 		 * This is an access "backwards" into the FAT12/FAT16
1172 		 * root directory. A better code structure would
1173 		 * significantly improve maintainability here ...
1174 		 */
1175 		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1176 		    (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1177 	}
1178 	if (bp->b_flags & (B_ERROR | B_STALE)) {
1179 		error = geterror(bp);
1180 		brelse(bp);
1181 		if (error)
1182 			pc_mark_irrecov(fsp);
1183 		*vpp = NULL;
1184 		pc_unlockfs(fsp);
1185 		return (error);
1186 	}
1187 	ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1188 	/*
1189 	 * Ok, if this is a valid file handle that we gave out,
1190 	 * then simply ensuring that the creation time matches,
1191 	 * the entry has not been deleted, and it has a valid first
1192 	 * character should be enough.
1193 	 *
1194 	 * Unfortunately, verifying that the <blkno, offset> _still_
1195 	 * refers to a directory entry is not easy, since we'd have
1196 	 * to search _all_ directories starting from root to find it.
1197 	 * That's a high price to pay just in case somebody is forging
1198 	 * file handles. So instead we verify that as much of the
1199 	 * entry is valid as we can:
1200 	 *
1201 	 * 1. The starting cluster is 0 (unallocated) or valid
1202 	 * 2. It is not an LFN entry
1203 	 * 3. It is not hidden (unless mounted as such)
1204 	 * 4. It is not the label
1205 	 */
1206 	cn = pc_getstartcluster(fsp, ep);
1207 	/*
1208 	 * if the starting cluster is valid, but not valid according
1209 	 * to pc_validcl(), force it to be to simplify the following if.
1210 	 */
1211 	if (cn == 0)
1212 		cn = PCF_FIRSTCLUSTER;
1213 	if (IS_FAT32(fsp)) {
1214 		if (cn >= PCF_LASTCLUSTER32)
1215 			cn = PCF_FIRSTCLUSTER;
1216 	} else {
1217 		if (cn >= PCF_LASTCLUSTER)
1218 			cn = PCF_FIRSTCLUSTER;
1219 	}
1220 	if ((!pc_validcl(fsp, cn)) ||
1221 	    (PCDL_IS_LFN(ep)) ||
1222 	    (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1223 	    ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1224 		bp->b_flags |= B_STALE | B_AGE;
1225 		brelse(bp);
1226 		pc_unlockfs(fsp);
1227 		return (EINVAL);
1228 	}
1229 	if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1230 	    (ep->pcd_filename[0] != PCD_ERASED) &&
1231 	    (pc_validchar(ep->pcd_filename[0]) ||
1232 	    (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1233 		pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1234 		pcp->pc_flags |= PC_EXTERNAL;
1235 		*vpp = PCTOV(pcp);
1236 	} else {
1237 		*vpp = NULL;
1238 	}
1239 	bp->b_flags |= B_STALE | B_AGE;
1240 	brelse(bp);
1241 	pc_unlockfs(fsp);
1242 	return (0);
1243 }
1244 
1245 /*
1246  * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1247  * a meg), so we can't bread() it all in at once. This routine reads a
1248  * fat a chunk at a time.
1249  */
1250 static int
1251 pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1252 {
1253 	struct buf *bp;
1254 	size_t off;
1255 	size_t readsize;
1256 	daddr_t diskblk;
1257 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1258 	daddr_t start = fsp->pcfs_fatstart;
1259 
1260 	readsize = fsp->pcfs_clsize;
1261 	for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1262 		if (readsize > (fatsize - off))
1263 			readsize = fatsize - off;
1264 		diskblk = pc_dbdaddr(fsp, start +
1265 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1266 		bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1267 		if (bp->b_flags & (B_ERROR | B_STALE)) {
1268 			brelse(bp);
1269 			return (EIO);
1270 		}
1271 		bp->b_flags |= B_STALE | B_AGE;
1272 		bcopy(bp->b_un.b_addr, fatp, readsize);
1273 		brelse(bp);
1274 	}
1275 	return (0);
1276 }
1277 
1278 /*
1279  * We write the FAT out a _lot_, in order to make sure that it
1280  * is up-to-date. But on a FAT32 system (large drive, small clusters)
1281  * the FAT might be a couple of megabytes, and writing it all out just
1282  * because we created or deleted a small file is painful (especially
1283  * since we do it for each alternate FAT too). So instead, for FAT16 and
1284  * FAT32 we only write out the bit that has changed. We don't clear
1285  * the 'updated' fields here because the caller might be writing out
1286  * several FATs, so the caller must use pc_clear_fatchanges() after
1287  * all FATs have been updated.
1288  * This function doesn't take "start" from fsp->pcfs_dosstart because
1289  * callers can use it to write either the primary or any of the alternate
1290  * FAT tables.
1291  */
1292 static int
1293 pc_writefat(struct pcfs *fsp, daddr_t start)
1294 {
1295 	struct buf *bp;
1296 	size_t off;
1297 	size_t writesize;
1298 	int	error;
1299 	uchar_t *fatp = fsp->pcfs_fatp;
1300 	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1301 
1302 	writesize = fsp->pcfs_clsize;
1303 	for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1304 		if (writesize > (fatsize - off))
1305 			writesize = fatsize - off;
1306 		if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1307 			continue;
1308 		}
1309 		bp = ngeteblk(writesize);
1310 		bp->b_edev = fsp->pcfs_xdev;
1311 		bp->b_dev = cmpdev(bp->b_edev);
1312 		bp->b_blkno = pc_dbdaddr(fsp, start +
1313 		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1314 		bcopy(fatp, bp->b_un.b_addr, writesize);
1315 		bwrite2(bp);
1316 		error = geterror(bp);
1317 		brelse(bp);
1318 		if (error) {
1319 			return (error);
1320 		}
1321 	}
1322 	return (0);
1323 }
1324 
1325 /*
1326  * Mark the FAT cluster that 'cn' is stored in as modified.
1327  */
1328 void
1329 pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1330 {
1331 	pc_cluster32_t	bn;
1332 	size_t		size;
1333 
1334 	/* which fat block is the cluster number stored in? */
1335 	if (IS_FAT32(fsp)) {
1336 		size = sizeof (pc_cluster32_t);
1337 		bn = pc_lblkno(fsp, cn * size);
1338 		fsp->pcfs_fat_changemap[bn] = 1;
1339 	} else if (IS_FAT16(fsp)) {
1340 		size = sizeof (pc_cluster16_t);
1341 		bn = pc_lblkno(fsp, cn * size);
1342 		fsp->pcfs_fat_changemap[bn] = 1;
1343 	} else {
1344 		offset_t off;
1345 		pc_cluster32_t nbn;
1346 
1347 		ASSERT(IS_FAT12(fsp));
1348 		off = cn + (cn >> 1);
1349 		bn = pc_lblkno(fsp, off);
1350 		fsp->pcfs_fat_changemap[bn] = 1;
1351 		/* does this field wrap into the next fat cluster? */
1352 		nbn = pc_lblkno(fsp, off + 1);
1353 		if (nbn != bn) {
1354 			fsp->pcfs_fat_changemap[nbn] = 1;
1355 		}
1356 	}
1357 }
1358 
1359 /*
1360  * return whether the FAT cluster 'bn' is updated and needs to
1361  * be written out.
1362  */
1363 int
1364 pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1365 {
1366 	return (fsp->pcfs_fat_changemap[bn] == 1);
1367 }
1368 
1369 /*
1370  * Implementation of VFS_FREEVFS() to support forced umounts.
1371  * This is called by the vfs framework after umount, to trigger
1372  * the release of any resources still associated with the given
1373  * vfs_t once the need to keep them has gone away.
1374  */
1375 void
1376 pcfs_freevfs(vfs_t *vfsp)
1377 {
1378 	struct pcfs *fsp = VFSTOPCFS(vfsp);
1379 
1380 	mutex_enter(&pcfslock);
1381 	/*
1382 	 * Purging the FAT closes the device - can't do any more
1383 	 * I/O after this.
1384 	 */
1385 	if (fsp->pcfs_fatp != (uchar_t *)0)
1386 		pc_invalfat(fsp);
1387 	mutex_exit(&pcfslock);
1388 
1389 	VN_RELE(fsp->pcfs_devvp);
1390 	mutex_destroy(&fsp->pcfs_lock);
1391 	kmem_free(fsp, sizeof (*fsp));
1392 
1393 	/*
1394 	 * Allow _fini() to succeed now, if so desired.
1395 	 */
1396 	atomic_dec_32(&pcfs_mountcount);
1397 }
1398 
1399 
1400 /*
1401  * PC-style partition parsing and FAT BPB identification/validation code.
1402  * The partition parsers here assume:
1403  *	- a FAT filesystem will be in a partition that has one of a set of
1404  *	  recognized partition IDs
1405  *	- the user wants the 'numbering' (C:, D:, ...) that one would get
1406  *	  on MSDOS 6.x.
1407  *	  That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1408  *	  will not factor in the enumeration.
1409  * These days, such assumptions should be revisited. FAT is no longer the
1410  * only game in 'PC town'.
1411  */
1412 /*
1413  * isDosDrive()
1414  *	Boolean function.  Give it the systid field for an fdisk partition
1415  *	and it decides if that's a systid that describes a DOS drive.  We
1416  *	use systid values defined in sys/dktp/fdisk.h.
1417  */
1418 static int
1419 isDosDrive(uchar_t checkMe)
1420 {
1421 	return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1422 	    (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1423 	    (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1424 	    (checkMe == DIAGPART));
1425 }
1426 
1427 
1428 /*
1429  * isDosExtended()
1430  *	Boolean function.  Give it the systid field for an fdisk partition
1431  *	and it decides if that's a systid that describes an extended DOS
1432  *	partition.
1433  */
1434 static int
1435 isDosExtended(uchar_t checkMe)
1436 {
1437 	return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1438 }
1439 
1440 
1441 /*
1442  * isBootPart()
1443  *	Boolean function.  Give it the systid field for an fdisk partition
1444  *	and it decides if that's a systid that describes a Solaris boot
1445  *	partition.
1446  */
1447 static int
1448 isBootPart(uchar_t checkMe)
1449 {
1450 	return (checkMe == X86BOOT);
1451 }
1452 
1453 
1454 /*
1455  * noLogicalDrive()
1456  *	Display error message about not being able to find a logical
1457  *	drive.
1458  */
1459 static void
1460 noLogicalDrive(int ldrive)
1461 {
1462 	if (ldrive == BOOT_PARTITION_DRIVE) {
1463 		cmn_err(CE_NOTE, "!pcfs: no boot partition");
1464 	} else {
1465 		cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1466 	}
1467 }
1468 
1469 
1470 /*
1471  * findTheDrive()
1472  *	Discover offset of the requested logical drive, and return
1473  *	that offset (startSector), the systid of that drive (sysid),
1474  *	and a buffer pointer (bp), with the buffer contents being
1475  *	the first sector of the logical drive (i.e., the sector that
1476  *	contains the BPB for that drive).
1477  *
1478  * Note: this code is not capable of addressing >2TB disks, as it uses
1479  *       daddr_t not diskaddr_t, some of the calculations would overflow
1480  */
1481 #define	COPY_PTBL(mbr, ptblp)					\
1482 	bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),	\
1483 	    FD_NUMPART * sizeof (struct ipart))
1484 
1485 static int
1486 findTheDrive(struct pcfs *fsp, buf_t **bp)
1487 {
1488 	int ldrive = fsp->pcfs_ldrive;
1489 	dev_t dev = fsp->pcfs_devvp->v_rdev;
1490 
1491 	struct ipart dosp[FD_NUMPART];	/* incore fdisk partition structure */
1492 	daddr_t lastseek = 0;		/* Disk block we sought previously */
1493 	daddr_t diskblk = 0;		/* Disk block to get */
1494 	daddr_t xstartsect;		/* base of Extended DOS partition */
1495 	int logicalDriveCount = 0;	/* Count of logical drives seen */
1496 	int extendedPart = -1;		/* index of extended dos partition */
1497 	int primaryPart = -1;		/* index of primary dos partition */
1498 	int bootPart = -1;		/* index of a Solaris boot partition */
1499 	int xnumsect = -1;		/* length of extended DOS partition */
1500 	int driveIndex;			/* computed FDISK table index */
1501 	daddr_t startsec;
1502 	len_t mediasize;
1503 	int i;
1504 	/*
1505 	 * Count of drives in the current extended partition's
1506 	 * FDISK table, and indexes of the drives themselves.
1507 	 */
1508 	int extndDrives[FD_NUMPART];
1509 	int numDrives = 0;
1510 
1511 	/*
1512 	 * Count of drives (beyond primary) in master boot record's
1513 	 * FDISK table, and indexes of the drives themselves.
1514 	 */
1515 	int extraDrives[FD_NUMPART];
1516 	int numExtraDrives = 0;
1517 
1518 	/*
1519 	 * "ldrive == 0" should never happen, as this is a request to
1520 	 * mount the physical device (and ignore partitioning). The code
1521 	 * in pcfs_mount() should have made sure that a logical drive number
1522 	 * is at least 1, meaning we're looking for drive "C:". It is not
1523 	 * safe (and a bug in the callers of this function) to request logical
1524 	 * drive number 0; we could ASSERT() but a graceful EIO is a more
1525 	 * polite way.
1526 	 */
1527 	if (ldrive == 0) {
1528 		cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1529 		noLogicalDrive(ldrive);
1530 		return (EIO);
1531 	}
1532 
1533 	/*
1534 	 *  Copy from disk block into memory aligned structure for fdisk usage.
1535 	 */
1536 	COPY_PTBL((*bp)->b_un.b_addr, dosp);
1537 
1538 	/*
1539 	 * This check is ok because a FAT BPB and a master boot record (MBB)
1540 	 * have the same signature, in the same position within the block.
1541 	 */
1542 	if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1543 		cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1544 		    "device (%x.%x):%d\n",
1545 		    getmajor(dev), getminor(dev), ldrive);
1546 		return (EINVAL);
1547 	}
1548 
1549 	/*
1550 	 * Get a summary of what is in the Master FDISK table.
1551 	 * Normally we expect to find one partition marked as a DOS drive.
1552 	 * This partition is the one Windows calls the primary dos partition.
1553 	 * If the machine has any logical drives then we also expect
1554 	 * to find a partition marked as an extended DOS partition.
1555 	 *
1556 	 * Sometimes we'll find multiple partitions marked as DOS drives.
1557 	 * The Solaris fdisk program allows these partitions
1558 	 * to be created, but Windows fdisk no longer does.  We still need
1559 	 * to support these, though, since Windows does.  We also need to fix
1560 	 * our fdisk to behave like the Windows version.
1561 	 *
1562 	 * It turns out that some off-the-shelf media have *only* an
1563 	 * Extended partition, so we need to deal with that case as well.
1564 	 *
1565 	 * Only a single (the first) Extended or Boot Partition will
1566 	 * be recognized.  Any others will be ignored.
1567 	 */
1568 	for (i = 0; i < FD_NUMPART; i++) {
1569 		DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1570 		    uint_t, (uint_t)dosp[i].systid,
1571 		    uint_t, LE_32(dosp[i].relsect),
1572 		    uint_t, LE_32(dosp[i].numsect));
1573 
1574 		if (isDosDrive(dosp[i].systid)) {
1575 			if (primaryPart < 0) {
1576 				logicalDriveCount++;
1577 				primaryPart = i;
1578 			} else {
1579 				extraDrives[numExtraDrives++] = i;
1580 			}
1581 			continue;
1582 		}
1583 		if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1584 			extendedPart = i;
1585 			continue;
1586 		}
1587 		if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1588 			bootPart = i;
1589 			continue;
1590 		}
1591 	}
1592 
1593 	if (ldrive == BOOT_PARTITION_DRIVE) {
1594 		if (bootPart < 0) {
1595 			noLogicalDrive(ldrive);
1596 			return (EINVAL);
1597 		}
1598 		startsec = LE_32(dosp[bootPart].relsect);
1599 		mediasize = LE_32(dosp[bootPart].numsect);
1600 		goto found;
1601 	}
1602 
1603 	if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1604 		startsec = LE_32(dosp[primaryPart].relsect);
1605 		mediasize = LE_32(dosp[primaryPart].numsect);
1606 		goto found;
1607 	}
1608 
1609 	/*
1610 	 * We are not looking for the C: drive (or the primary drive
1611 	 * was not found), so we had better have an extended partition
1612 	 * or extra drives in the Master FDISK table.
1613 	 */
1614 	if ((extendedPart < 0) && (numExtraDrives == 0)) {
1615 		cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1616 		noLogicalDrive(ldrive);
1617 		return (EINVAL);
1618 	}
1619 
1620 	if (extendedPart >= 0) {
1621 		diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1622 		xnumsect = LE_32(dosp[extendedPart].numsect);
1623 		do {
1624 			/*
1625 			 *  If the seek would not cause us to change
1626 			 *  position on the drive, then we're out of
1627 			 *  extended partitions to examine.
1628 			 */
1629 			if (diskblk == lastseek)
1630 				break;
1631 			logicalDriveCount += numDrives;
1632 			/*
1633 			 *  Seek the next extended partition, and find
1634 			 *  logical drives within it.
1635 			 */
1636 			brelse(*bp);
1637 			/*
1638 			 * bread() block numbers are multiples of DEV_BSIZE
1639 			 * but the device sector size (the unit of partitioning)
1640 			 * might be larger than that; pcfs_get_device_info()
1641 			 * has calculated the multiplicator for us.
1642 			 */
1643 			*bp = bread(dev,
1644 			    pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1645 			if ((*bp)->b_flags & B_ERROR) {
1646 				return (EIO);
1647 			}
1648 
1649 			lastseek = diskblk;
1650 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1651 			if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1652 				cmn_err(CE_NOTE, "!pcfs: "
1653 				    "extended partition table signature err, "
1654 				    "device (%x.%x):%d, LBA %u",
1655 				    getmajor(dev), getminor(dev), ldrive,
1656 				    (uint_t)pc_dbdaddr(fsp, diskblk));
1657 				return (EINVAL);
1658 			}
1659 			/*
1660 			 *  Count up drives, and track where the next
1661 			 *  extended partition is in case we need it.  We
1662 			 *  are expecting only one extended partition.  If
1663 			 *  there is more than one we'll only go to the
1664 			 *  first one we see, but warn about ignoring.
1665 			 */
1666 			numDrives = 0;
1667 			for (i = 0; i < FD_NUMPART; i++) {
1668 				DTRACE_PROBE4(extendedpart,
1669 				    struct pcfs *, fsp,
1670 				    uint_t, (uint_t)dosp[i].systid,
1671 				    uint_t, LE_32(dosp[i].relsect),
1672 				    uint_t, LE_32(dosp[i].numsect));
1673 				if (isDosDrive(dosp[i].systid)) {
1674 					extndDrives[numDrives++] = i;
1675 				} else if (isDosExtended(dosp[i].systid)) {
1676 					if (diskblk != lastseek) {
1677 						/*
1678 						 * Already found an extended
1679 						 * partition in this table.
1680 						 */
1681 						cmn_err(CE_NOTE,
1682 						    "!pcfs: ignoring unexpected"
1683 						    " additional extended"
1684 						    " partition");
1685 					} else {
1686 						diskblk = xstartsect +
1687 						    LE_32(dosp[i].relsect);
1688 					}
1689 				}
1690 			}
1691 		} while (ldrive > logicalDriveCount + numDrives);
1692 
1693 		ASSERT(numDrives <= FD_NUMPART);
1694 
1695 		if (ldrive <= logicalDriveCount + numDrives) {
1696 			/*
1697 			 * The number of logical drives we've found thus
1698 			 * far is enough to get us to the one we were
1699 			 * searching for.
1700 			 */
1701 			driveIndex = logicalDriveCount + numDrives - ldrive;
1702 			mediasize =
1703 			    LE_32(dosp[extndDrives[driveIndex]].numsect);
1704 			startsec =
1705 			    LE_32(dosp[extndDrives[driveIndex]].relsect) +
1706 			    lastseek;
1707 			if (startsec > (xstartsect + xnumsect)) {
1708 				cmn_err(CE_NOTE, "!pcfs: extended partition "
1709 				    "values bad");
1710 				return (EINVAL);
1711 			}
1712 			goto found;
1713 		} else {
1714 			/*
1715 			 * We ran out of extended dos partition
1716 			 * drives.  The only hope now is to go
1717 			 * back to extra drives defined in the master
1718 			 * fdisk table.  But we overwrote that table
1719 			 * already, so we must load it in again.
1720 			 */
1721 			logicalDriveCount += numDrives;
1722 			brelse(*bp);
1723 			ASSERT(fsp->pcfs_dosstart == 0);
1724 			*bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1725 			    fsp->pcfs_secsize);
1726 			if ((*bp)->b_flags & B_ERROR) {
1727 				return (EIO);
1728 			}
1729 			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1730 		}
1731 	}
1732 	/*
1733 	 *  Still haven't found the drive, is it an extra
1734 	 *  drive defined in the main FDISK table?
1735 	 */
1736 	if (ldrive <= logicalDriveCount + numExtraDrives) {
1737 		driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1738 		ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1739 		mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1740 		startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1741 		goto found;
1742 	}
1743 	/*
1744 	 *  Still haven't found the drive, and there is
1745 	 *  nowhere else to look.
1746 	 */
1747 	noLogicalDrive(ldrive);
1748 	return (EINVAL);
1749 
1750 found:
1751 	/*
1752 	 * We need this value in units of sectorsize, because PCFS' internal
1753 	 * offset calculations go haywire for > 512Byte sectors unless all
1754 	 * pcfs_.*start values are in units of sectors.
1755 	 * So, assign before the capacity check (that's done in DEV_BSIZE)
1756 	 */
1757 	fsp->pcfs_dosstart = startsec;
1758 
1759 	/*
1760 	 * convert from device sectors to proper units:
1761 	 *	- starting sector: DEV_BSIZE (as argument to bread())
1762 	 *	- media size: Bytes
1763 	 */
1764 	startsec = pc_dbdaddr(fsp, startsec);
1765 	mediasize *= fsp->pcfs_secsize;
1766 
1767 	/*
1768 	 * some additional validation / warnings in case the partition table
1769 	 * and the actual media capacity are not in accordance ...
1770 	 */
1771 	if (fsp->pcfs_mediasize != 0) {
1772 		diskaddr_t startoff =
1773 		    (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1774 
1775 		if (startoff >= fsp->pcfs_mediasize ||
1776 		    startoff + mediasize > fsp->pcfs_mediasize) {
1777 			cmn_err(CE_WARN,
1778 			    "!pcfs: partition size (LBA start %u, %lld bytes, "
1779 			    "device (%x.%x):%d) smaller than "
1780 			    "mediasize (%lld bytes).\n"
1781 			    "filesystem may be truncated, access errors "
1782 			    "may result.\n",
1783 			    (uint_t)startsec, (long long)mediasize,
1784 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1785 			    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1786 		}
1787 	} else {
1788 		fsp->pcfs_mediasize = mediasize;
1789 	}
1790 
1791 	return (0);
1792 }
1793 
1794 
1795 static fattype_t
1796 secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1797 {
1798 	uint32_t ncl = fsp->pcfs_ncluster;
1799 
1800 	if (ncl <= 4096) {
1801 		if (bpb_get_FatSz16(bpb) == 0)
1802 			return (FAT_UNKNOWN);
1803 
1804 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1805 		    bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1806 			return (FAT12);
1807 		if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1808 			return (FAT12);
1809 		if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1810 			return (FAT16);
1811 
1812 		switch (bpb_get_Media(bpb)) {
1813 			case SS8SPT:
1814 			case DS8SPT:
1815 			case SS9SPT:
1816 			case DS9SPT:
1817 			case DS18SPT:
1818 			case DS9_15SPT:
1819 				/*
1820 				 * Is this reliable - all floppies are FAT12 ?
1821 				 */
1822 				return (FAT12);
1823 			case MD_FIXED:
1824 				/*
1825 				 * Is this reliable - disks are always FAT16 ?
1826 				 */
1827 				return (FAT16);
1828 			default:
1829 				break;
1830 		}
1831 	} else if (ncl <= 65536) {
1832 		if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1833 			return (FAT32);
1834 		if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1835 			return (FAT32);
1836 		if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1837 			return (FAT32);
1838 
1839 		if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1840 			return (FAT16);
1841 		if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1842 			return (FAT16);
1843 	}
1844 
1845 	/*
1846 	 * We don't know
1847 	 */
1848 	return (FAT_UNKNOWN);
1849 }
1850 
1851 /*
1852  * Check to see if the BPB we found is correct.
1853  *
1854  * This looks far more complicated that it needs to be for pure structural
1855  * validation. The reason for this is that parseBPB() is also used for
1856  * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1857  * BPB fields have 'known good' values, even if we do not reject the BPB
1858  * when attempting to mount the filesystem.
1859  */
1860 static int
1861 parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1862 {
1863 	fattype_t type;
1864 
1865 	uint32_t	ncl;	/* number of clusters in file area */
1866 	uint32_t	rec;
1867 	uint32_t	reserved;
1868 	uint32_t	fsisec, bkbootsec;
1869 	blkcnt_t	totsec, totsec16, totsec32, datasec;
1870 	size_t		fatsec, fatsec16, fatsec32, rdirsec;
1871 	size_t		secsize;
1872 	len_t		mediasize;
1873 	uint64_t	validflags = 0;
1874 
1875 	if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1876 		validflags |= BPB_BPBSIG_OK;
1877 
1878 	rec = bpb_get_RootEntCnt(bpb);
1879 	reserved = bpb_get_RsvdSecCnt(bpb);
1880 	fsisec = bpb_get_FSInfo32(bpb);
1881 	bkbootsec = bpb_get_BkBootSec32(bpb);
1882 	totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1883 	totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1884 	fatsec16 = bpb_get_FatSz16(bpb);
1885 	fatsec32 = bpb_get_FatSz32(bpb);
1886 
1887 	totsec = totsec16 ? totsec16 : totsec32;
1888 	fatsec = fatsec16 ? fatsec16 : fatsec32;
1889 
1890 	secsize = bpb_get_BytesPerSec(bpb);
1891 	if (!VALID_SECSIZE(secsize))
1892 		secsize = fsp->pcfs_secsize;
1893 	if (secsize != fsp->pcfs_secsize) {
1894 		PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1895 		    getmajor(fsp->pcfs_xdev),
1896 		    getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1897 		PC_DPRINTF2(3, "!BPB secsize %d != "
1898 		    "autodetected media block size %d\n",
1899 		    (int)secsize, (int)fsp->pcfs_secsize);
1900 		if (fsp->pcfs_ldrive) {
1901 			/*
1902 			 * We've already attempted to parse the partition
1903 			 * table. If the block size used for that don't match
1904 			 * the PCFS sector size, we're hosed one way or the
1905 			 * other. Just try what happens.
1906 			 */
1907 			secsize = fsp->pcfs_secsize;
1908 			PC_DPRINTF1(3,
1909 			    "!pcfs: Using autodetected secsize %d\n",
1910 			    (int)secsize);
1911 		} else {
1912 			/*
1913 			 * This allows mounting lofi images of PCFS partitions
1914 			 * with sectorsize != DEV_BSIZE. We can't parse the
1915 			 * partition table on whole-disk images unless the
1916 			 * (undocumented) "secsize=..." mount option is used,
1917 			 * but at least this allows us to mount if we have
1918 			 * an image of a partition.
1919 			 */
1920 			PC_DPRINTF1(3,
1921 			    "!pcfs: Using BPB secsize %d\n", (int)secsize);
1922 		}
1923 	}
1924 
1925 	if (fsp->pcfs_mediasize == 0) {
1926 		mediasize = (len_t)totsec * (len_t)secsize;
1927 		PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1928 		    "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1929 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1930 		    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1931 	} else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1932 		cmn_err(CE_WARN,
1933 		    "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1934 		    "FAT BPB mediasize (%lld Bytes).\n"
1935 		    "truncated filesystem on device (%x.%x):%d, access errors "
1936 		    "possible.\n",
1937 		    (long long)fsp->pcfs_mediasize,
1938 		    (long long)(totsec * (blkcnt_t)secsize),
1939 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1940 		    fsp->pcfs_ldrive);
1941 		mediasize = fsp->pcfs_mediasize;
1942 	} else {
1943 		/*
1944 		 * This is actually ok. A FAT needs not occupy the maximum
1945 		 * space available in its partition, it can be shorter.
1946 		 */
1947 		mediasize = (len_t)totsec * (len_t)secsize;
1948 	}
1949 
1950 	/*
1951 	 * Since we let just about anything pass through this function,
1952 	 * fence against divide-by-zero here.
1953 	 */
1954 	if (secsize)
1955 		rdirsec = roundup(rec * 32, secsize) / secsize;
1956 	else
1957 		rdirsec = 0;
1958 
1959 	/*
1960 	 * This assignment is necessary before pc_dbdaddr() can first be
1961 	 * used. Must initialize the value here.
1962 	 */
1963 	fsp->pcfs_secsize = secsize;
1964 	fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
1965 
1966 	fsp->pcfs_mediasize = mediasize;
1967 
1968 	fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
1969 	fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
1970 	fsp->pcfs_mediadesc = bpb_get_Media(bpb);
1971 	fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
1972 	fsp->pcfs_rdirsec = rdirsec;
1973 
1974 	/*
1975 	 * Remember: All PCFS offset calculations in sectors. Before I/O
1976 	 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
1977 	 * necessary so that media with > 512Byte sector sizes work correctly.
1978 	 */
1979 	fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
1980 	fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
1981 	fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
1982 	datasec = totsec -
1983 	    (blkcnt_t)fatsec * fsp->pcfs_numfat -
1984 	    (blkcnt_t)rdirsec -
1985 	    (blkcnt_t)reserved;
1986 
1987 	DTRACE_PROBE4(fatgeometry,
1988 	    blkcnt_t, totsec, size_t, fatsec,
1989 	    size_t, rdirsec, blkcnt_t, datasec);
1990 
1991 	/*
1992 	 * UINT32_MAX is an underflow check - we calculate in "blkcnt_t" which
1993 	 * is 64bit in order to be able to catch "impossible" sector counts.
1994 	 * A sector count in FAT must fit 32bit unsigned int.
1995 	 */
1996 	if (totsec != 0 &&
1997 	    (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
1998 	    (len_t)totsec * (len_t)secsize <= mediasize &&
1999 	    datasec < totsec && datasec <= UINT32_MAX)
2000 		validflags |= BPB_TOTSEC_OK;
2001 
2002 	if (mediasize >= (len_t)datasec * (len_t)secsize)
2003 		validflags |= BPB_MEDIASZ_OK;
2004 
2005 	if (VALID_SECSIZE(secsize))
2006 		validflags |= BPB_SECSIZE_OK;
2007 	if (VALID_SPCL(fsp->pcfs_spcl))
2008 		validflags |= BPB_SECPERCLUS_OK;
2009 	if (VALID_CLSIZE(fsp->pcfs_clsize))
2010 		validflags |= BPB_CLSIZE_OK;
2011 	if (VALID_NUMFATS(fsp->pcfs_numfat))
2012 		validflags |= BPB_NUMFAT_OK;
2013 	if (VALID_RSVDSEC(reserved) && reserved < totsec)
2014 		validflags |= BPB_RSVDSECCNT_OK;
2015 	if (VALID_MEDIA(fsp->pcfs_mediadesc))
2016 		validflags |= BPB_MEDIADESC_OK;
2017 	if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2018 		validflags |= BPB_BOOTSIG16_OK;
2019 	if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2020 		validflags |= BPB_BOOTSIG32_OK;
2021 	if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2022 		validflags |= BPB_FSTYPSTR16_OK;
2023 	if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2024 		validflags |= BPB_FSTYPSTR32_OK;
2025 	if (VALID_OEMNAME(bpb_OEMName(bpb)))
2026 		validflags |= BPB_OEMNAME_OK;
2027 	if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2028 		validflags |= BPB_BKBOOTSEC_OK;
2029 	if (fsisec > 0 && fsisec <= reserved)
2030 		validflags |= BPB_FSISEC_OK;
2031 	if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2032 		validflags |= BPB_JMPBOOT_OK;
2033 	if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2034 		validflags |= BPB_FSVER_OK;
2035 	if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2036 		validflags |= BPB_VOLLAB16_OK;
2037 	if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2038 		validflags |= BPB_VOLLAB32_OK;
2039 	if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2040 		validflags |= BPB_EXTFLAGS_OK;
2041 
2042 	/*
2043 	 * Try to determine which FAT format to use.
2044 	 *
2045 	 * Calculate the number of clusters in order to determine
2046 	 * the type of FAT we are looking at.  This is the only
2047 	 * recommended way of determining FAT type, though there
2048 	 * are other hints in the data, this is the best way.
2049 	 *
2050 	 * Since we let just about "anything" pass through this function
2051 	 * without early exits, fence against divide-by-zero here.
2052 	 *
2053 	 * datasec was already validated against UINT32_MAX so we know
2054 	 * the result will not overflow the 32bit calculation.
2055 	 */
2056 	if (fsp->pcfs_spcl)
2057 		ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2058 	else
2059 		ncl = 0;
2060 
2061 	fsp->pcfs_ncluster = ncl;
2062 
2063 	/*
2064 	 * From the Microsoft FAT specification:
2065 	 * In the following example, when it says <, it does not mean <=.
2066 	 * Note also that the numbers are correct.  The first number for
2067 	 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2068 	 * and the '<' signs are not wrong.
2069 	 *
2070 	 * We "specialdetect" the corner cases, and use at least one "extra"
2071 	 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2072 	 * count is dangerously close to the boundaries.
2073 	 */
2074 
2075 	if (ncl <= PCF_FIRSTCLUSTER) {
2076 		type = FAT_UNKNOWN;
2077 	} else if (ncl < 4085) {
2078 		type = FAT12;
2079 	} else if (ncl <= 4096) {
2080 		type = FAT_QUESTIONABLE;
2081 	} else if (ncl < 65525) {
2082 		type = FAT16;
2083 	} else if (ncl <= 65536) {
2084 		type = FAT_QUESTIONABLE;
2085 	} else if (ncl < PCF_LASTCLUSTER32) {
2086 		type = FAT32;
2087 	} else {
2088 		type = FAT_UNKNOWN;
2089 	}
2090 
2091 	DTRACE_PROBE4(parseBPB__initial,
2092 	    struct pcfs *, fsp, unsigned char *, bpb,
2093 	    int, validflags, fattype_t, type);
2094 
2095 recheck:
2096 	fsp->pcfs_fatsec = fatsec;
2097 
2098 	/* Do some final sanity checks for each specific type of FAT */
2099 	switch (type) {
2100 		case FAT12:
2101 			if (rec != 0)
2102 				validflags |= BPB_ROOTENTCNT_OK;
2103 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2104 			    bpb_get_TotSec16(bpb) == 0)
2105 				validflags |= BPB_TOTSEC16_OK;
2106 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2107 			    bpb_get_TotSec32(bpb) == 0)
2108 				validflags |= BPB_TOTSEC32_OK;
2109 			if (bpb_get_FatSz16(bpb) == fatsec)
2110 				validflags |= BPB_FATSZ16_OK;
2111 			if (fatsec * secsize >= ncl * 3 / 2)
2112 				validflags |= BPB_FATSZ_OK;
2113 			if (ncl < 4085)
2114 				validflags |= BPB_NCLUSTERS_OK;
2115 
2116 			fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2117 			fsp->pcfs_rootblksize =
2118 			    fsp->pcfs_rdirsec * secsize;
2119 			fsp->pcfs_fsistart = 0;
2120 
2121 			if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2122 				type = FAT_UNKNOWN;
2123 			break;
2124 		case FAT16:
2125 			if (rec != 0)
2126 				validflags |= BPB_ROOTENTCNT_OK;
2127 			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2128 			    bpb_get_TotSec16(bpb) == 0)
2129 				validflags |= BPB_TOTSEC16_OK;
2130 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2131 			    bpb_get_TotSec32(bpb) == 0)
2132 				validflags |= BPB_TOTSEC32_OK;
2133 			if (bpb_get_FatSz16(bpb) == fatsec)
2134 				validflags |= BPB_FATSZ16_OK;
2135 			if (fatsec * secsize >= ncl * 2)
2136 				validflags |= BPB_FATSZ_OK;
2137 			if (ncl >= 4085 && ncl < 65525)
2138 				validflags |= BPB_NCLUSTERS_OK;
2139 
2140 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2141 			fsp->pcfs_rootblksize =
2142 			    fsp->pcfs_rdirsec * secsize;
2143 			fsp->pcfs_fsistart = 0;
2144 
2145 			if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2146 				type = FAT_UNKNOWN;
2147 			break;
2148 		case FAT32:
2149 			if (rec == 0)
2150 				validflags |= BPB_ROOTENTCNT_OK;
2151 			if (bpb_get_TotSec16(bpb) == 0)
2152 				validflags |= BPB_TOTSEC16_OK;
2153 			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2154 				validflags |= BPB_TOTSEC32_OK;
2155 			if (bpb_get_FatSz16(bpb) == 0)
2156 				validflags |= BPB_FATSZ16_OK;
2157 			if (bpb_get_FatSz32(bpb) == fatsec)
2158 				validflags |= BPB_FATSZ32_OK;
2159 			if (fatsec * secsize >= ncl * 4)
2160 				validflags |= BPB_FATSZ_OK;
2161 			if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2162 				validflags |= BPB_NCLUSTERS_OK;
2163 
2164 			fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2165 			fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2166 			fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2167 			if (validflags & BPB_FSISEC_OK)
2168 				fsp->pcfs_flags |= PCFS_FSINFO_OK;
2169 			fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2170 			if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2171 				validflags |= BPB_ROOTCLUSTER_OK;
2172 
2173 			/*
2174 			 * Current PCFS code only works if 'pcfs_rdirstart'
2175 			 * contains the root cluster number on FAT32.
2176 			 * That's a mis-use and would better be changed.
2177 			 */
2178 			fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2179 
2180 			if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2181 				type = FAT_UNKNOWN;
2182 			break;
2183 		case FAT_QUESTIONABLE:
2184 			type = secondaryBPBChecks(fsp, bpb, secsize);
2185 			goto recheck;
2186 		default:
2187 			ASSERT(type == FAT_UNKNOWN);
2188 			break;
2189 	}
2190 
2191 	ASSERT(type != FAT_QUESTIONABLE);
2192 
2193 	fsp->pcfs_fattype = type;
2194 
2195 	if (valid)
2196 		*valid = validflags;
2197 
2198 	DTRACE_PROBE4(parseBPB__final,
2199 	    struct pcfs *, fsp, unsigned char *, bpb,
2200 	    int, validflags, fattype_t, type);
2201 
2202 	if (type != FAT_UNKNOWN) {
2203 		ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2204 		ASSERT(ISP2(secsize / DEV_BSIZE));
2205 		return (1);
2206 	}
2207 
2208 	return (0);
2209 }
2210 
2211 
2212 /*
2213  * Detect the device's native block size (sector size).
2214  *
2215  * Test whether the device is:
2216  *	- a floppy device from a known controller type via DKIOCINFO
2217  *	- a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2218  *	- a PCMCIA sram memory card (pseudofloppy) using pcram(7d)
2219  *	- a USB floppy drive (identified by drive geometry)
2220  *
2221  * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2222  * to minimize risks due to slow I/O and user hotplugging / device ejection.
2223  *
2224  * This might be a bit wasteful on kernel stack space; if anyone's
2225  * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2226  */
2227 static void
2228 pcfs_device_getinfo(struct pcfs *fsp)
2229 {
2230 	dev_t			rdev = fsp->pcfs_xdev;
2231 	int			error;
2232 	union {
2233 		struct dk_minfo		mi;
2234 		struct dk_cinfo		ci;
2235 		struct dk_geom		gi;
2236 		struct fd_char		fc;
2237 	} arg;				/* save stackspace ... */
2238 	intptr_t argp = (intptr_t)&arg;
2239 	ldi_handle_t		lh;
2240 	ldi_ident_t		li;
2241 	int isfloppy, isremoveable, ishotpluggable;
2242 	cred_t			*cr = CRED();
2243 
2244 	if (ldi_ident_from_dev(rdev, &li))
2245 		goto out;
2246 
2247 	error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2248 	ldi_ident_release(li);
2249 	if (error)
2250 		goto out;
2251 
2252 	/*
2253 	 * Not sure if this could possibly happen. It'd be a bit like
2254 	 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2255 	 * expecting it, needs some thought if triggered ...
2256 	 */
2257 	ASSERT(fsp->pcfs_xdev == rdev);
2258 
2259 	/*
2260 	 * Check for removeable/hotpluggable media.
2261 	 */
2262 	if (ldi_ioctl(lh, DKIOCREMOVABLE,
2263 	    (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2264 		isremoveable = 0;
2265 	}
2266 	if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2267 	    (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2268 		ishotpluggable = 0;
2269 	}
2270 
2271 	/*
2272 	 * Make sure we don't use "half-initialized" values if the ioctls fail.
2273 	 */
2274 	if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2275 		bzero(&arg, sizeof (arg));
2276 		fsp->pcfs_mediasize = 0;
2277 	} else {
2278 		fsp->pcfs_mediasize =
2279 		    (len_t)arg.mi.dki_lbsize *
2280 		    (len_t)arg.mi.dki_capacity;
2281 	}
2282 
2283 	if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2284 		if (fsp->pcfs_secsize == 0) {
2285 			fsp->pcfs_secsize = arg.mi.dki_lbsize;
2286 			fsp->pcfs_sdshift =
2287 			    ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2288 		} else {
2289 			PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2290 			    "%d, device (%x.%x), different from user-provided "
2291 			    "%d. User override - ignoring autodetect result.\n",
2292 			    arg.mi.dki_lbsize,
2293 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2294 			    fsp->pcfs_secsize);
2295 		}
2296 	} else if (arg.mi.dki_lbsize) {
2297 		PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2298 		    "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2299 		    "Ignoring autodetect result.\n",
2300 		    arg.mi.dki_lbsize,
2301 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2302 	}
2303 
2304 	/*
2305 	 * We treat the following media types as a floppy by default.
2306 	 */
2307 	isfloppy =
2308 	    (arg.mi.dki_media_type == DK_FLOPPY ||
2309 	    arg.mi.dki_media_type == DK_ZIP ||
2310 	    arg.mi.dki_media_type == DK_JAZ);
2311 
2312 	/*
2313 	 * if this device understands fdio(7I) requests it's
2314 	 * obviously a floppy drive.
2315 	 */
2316 	if (!isfloppy &&
2317 	    !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2318 		isfloppy = 1;
2319 
2320 	/*
2321 	 * some devices (PCMCIA pseudofloppies) we like to treat
2322 	 * as floppies, but they don't understand fdio(7I) requests.
2323 	 */
2324 	if (!isfloppy &&
2325 	    !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2326 	    (arg.ci.dki_ctype == DKC_WDC2880 ||
2327 	    arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2328 	    arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2329 	    arg.ci.dki_ctype == DKC_INTEL82077 ||
2330 	    (arg.ci.dki_ctype == DKC_PCMCIA_MEM &&
2331 	    arg.ci.dki_flags & DKI_PCMCIA_PFD)))
2332 		isfloppy = 1;
2333 
2334 	/*
2335 	 * This is the "final fallback" test - media with
2336 	 * 2 heads and 80 cylinders are assumed to be floppies.
2337 	 * This is normally true for USB floppy drives ...
2338 	 */
2339 	if (!isfloppy &&
2340 	    !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2341 	    (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2342 		isfloppy = 1;
2343 
2344 	/*
2345 	 * This is similar to the "old" PCFS code that sets this flag
2346 	 * just based on the media descriptor being 0xf8 (MD_FIXED).
2347 	 * Should be re-worked. We really need some specialcasing for
2348 	 * removeable media.
2349 	 */
2350 	if (!isfloppy) {
2351 		fsp->pcfs_flags |= PCFS_NOCHK;
2352 	}
2353 
2354 	/*
2355 	 * We automatically disable access time updates if the medium is
2356 	 * removeable and/or hotpluggable, and the admin did not explicitly
2357 	 * request access time updates (via the "atime" mount option).
2358 	 * The majority of flash-based media should fit this category.
2359 	 * Minimizing write access extends the lifetime of your memory stick !
2360 	 */
2361 	if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2362 	    (isremoveable || ishotpluggable | isfloppy)) {
2363 		fsp->pcfs_flags |= PCFS_NOATIME;
2364 	}
2365 
2366 	(void) ldi_close(lh, FREAD, cr);
2367 out:
2368 	if (fsp->pcfs_secsize == 0) {
2369 		PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2370 		    "device (%x.%x) failed, no user-provided fallback. "
2371 		    "Using %d bytes.\n",
2372 		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2373 		    DEV_BSIZE);
2374 		fsp->pcfs_secsize = DEV_BSIZE;
2375 		fsp->pcfs_sdshift = 0;
2376 	}
2377 	ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2378 	ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2379 }
2380 
2381 /*
2382  * Get the FAT type for the DOS medium.
2383  *
2384  * -------------------------
2385  * According to Microsoft:
2386  *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2387  * count of clusters on the volume and nothing else.
2388  * -------------------------
2389  *
2390  */
2391 static int
2392 pc_getfattype(struct pcfs *fsp)
2393 {
2394 	int error = 0;
2395 	buf_t *bp = NULL;
2396 	struct vnode *devvp = fsp->pcfs_devvp;
2397 	dev_t	dev = devvp->v_rdev;
2398 
2399 	/*
2400 	 * Detect the native block size of the medium, and attempt to
2401 	 * detect whether the medium is removeable.
2402 	 * We do treat removeable media (floppies, PCMCIA memory cards,
2403 	 * USB and FireWire disks) differently wrt. to the frequency
2404 	 * and synchronicity of FAT updates.
2405 	 * We need to know the media block size in order to be able to
2406 	 * parse the partition table.
2407 	 */
2408 	pcfs_device_getinfo(fsp);
2409 
2410 	/*
2411 	 * Unpartitioned media (floppies and some removeable devices)
2412 	 * don't have a partition table, the FAT BPB is at disk block 0.
2413 	 * Start out by reading block 0.
2414 	 */
2415 	fsp->pcfs_dosstart = 0;
2416 	bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2417 
2418 	if (error = geterror(bp))
2419 		goto out;
2420 
2421 	/*
2422 	 * If a logical drive number is requested, parse the partition table
2423 	 * and attempt to locate it. Otherwise, proceed immediately to the
2424 	 * BPB check. findTheDrive(), if successful, returns the disk block
2425 	 * number where the requested partition starts in "startsec".
2426 	 */
2427 	if (fsp->pcfs_ldrive != 0) {
2428 		PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2429 		    "device (%x,%x):%d to find BPB\n",
2430 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2431 
2432 		if (error = findTheDrive(fsp, &bp))
2433 			goto out;
2434 
2435 		ASSERT(fsp->pcfs_dosstart != 0);
2436 
2437 		brelse(bp);
2438 		bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2439 		    fsp->pcfs_secsize);
2440 		if (error = geterror(bp))
2441 			goto out;
2442 	}
2443 
2444 	/*
2445 	 * Validate the BPB and fill in the instance structure.
2446 	 */
2447 	if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2448 		PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2449 		    "device (%x.%x):%d, disk LBA %u\n",
2450 		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2451 		    (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2452 		error = EINVAL;
2453 		goto out;
2454 	}
2455 
2456 	ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2457 
2458 out:
2459 	/*
2460 	 * Release the buffer used
2461 	 */
2462 	if (bp != NULL)
2463 		brelse(bp);
2464 	return (error);
2465 }
2466 
2467 
2468 /*
2469  * Get the file allocation table.
2470  * If there is an old FAT, invalidate it.
2471  */
2472 int
2473 pc_getfat(struct pcfs *fsp)
2474 {
2475 	struct buf *bp = NULL;
2476 	uchar_t *fatp = NULL;
2477 	uchar_t *fat_changemap = NULL;
2478 	int error;
2479 	int fat_changemapsize;
2480 	int flags = 0;
2481 	int nfat;
2482 	int altfat_mustmatch = 0;
2483 	int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2484 
2485 	if (fsp->pcfs_fatp) {
2486 		/*
2487 		 * There is a FAT in core.
2488 		 * If there are open file pcnodes or we have modified it or
2489 		 * it hasn't timed out yet use the in core FAT.
2490 		 * Otherwise invalidate it and get a new one
2491 		 */
2492 #ifdef notdef
2493 		if (fsp->pcfs_frefs ||
2494 		    (fsp->pcfs_flags & PCFS_FATMOD) ||
2495 		    (gethrestime_sec() < fsp->pcfs_fattime)) {
2496 			return (0);
2497 		} else {
2498 			mutex_enter(&pcfslock);
2499 			pc_invalfat(fsp);
2500 			mutex_exit(&pcfslock);
2501 		}
2502 #endif /* notdef */
2503 		return (0);
2504 	}
2505 
2506 	/*
2507 	 * Get FAT and check it for validity
2508 	 */
2509 	fatp = kmem_alloc(fatsize, KM_SLEEP);
2510 	error = pc_readfat(fsp, fatp);
2511 	if (error) {
2512 		flags = B_ERROR;
2513 		goto out;
2514 	}
2515 	fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2516 	fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2517 	fsp->pcfs_fatp = fatp;
2518 	fsp->pcfs_fat_changemapsize = fat_changemapsize;
2519 	fsp->pcfs_fat_changemap = fat_changemap;
2520 
2521 	/*
2522 	 * The only definite signature check is that the
2523 	 * media descriptor byte should match the first byte
2524 	 * of the FAT block.
2525 	 */
2526 	if (fatp[0] != fsp->pcfs_mediadesc) {
2527 		cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2528 		    "media descriptor %x, FAT[0] lowbyte %x\n",
2529 		    (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2530 		cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2531 		altfat_mustmatch = 1;
2532 	}
2533 
2534 	/*
2535 	 * Get alternate FATs and check for consistency
2536 	 * This is an inlined version of pc_readfat().
2537 	 * Since we're only comparing FAT and alternate FAT,
2538 	 * there's no reason to let pc_readfat() copy data out
2539 	 * of the buf. Instead, compare in-situ, one cluster
2540 	 * at a time.
2541 	 */
2542 	for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2543 		size_t startsec;
2544 		size_t off;
2545 
2546 		startsec = pc_dbdaddr(fsp,
2547 		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2548 
2549 		for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2550 			daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2551 			    pc_cltodb(fsp, pc_lblkno(fsp, off)));
2552 
2553 			bp = bread(fsp->pcfs_xdev, fatblk,
2554 			    MIN(fsp->pcfs_clsize, fatsize - off));
2555 			if (bp->b_flags & (B_ERROR | B_STALE)) {
2556 				cmn_err(CE_NOTE,
2557 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2558 				    " read error at offset %ld on device"
2559 				    " (%x.%x):%d",
2560 				    nfat, (void *)(uintptr_t)startsec, off,
2561 				    getmajor(fsp->pcfs_xdev),
2562 				    getminor(fsp->pcfs_xdev),
2563 				    fsp->pcfs_ldrive);
2564 				flags = B_ERROR;
2565 				error = EIO;
2566 				goto out;
2567 			}
2568 			bp->b_flags |= B_STALE | B_AGE;
2569 			if (bcmp(bp->b_un.b_addr, fatp + off,
2570 			    MIN(fsp->pcfs_clsize, fatsize - off))) {
2571 				cmn_err(CE_NOTE,
2572 				    "!pcfs: alternate FAT #%d (start LBA %p)"
2573 				    " corrupted at offset %ld on device"
2574 				    " (%x.%x):%d",
2575 				    nfat, (void *)(uintptr_t)startsec, off,
2576 				    getmajor(fsp->pcfs_xdev),
2577 				    getminor(fsp->pcfs_xdev),
2578 				    fsp->pcfs_ldrive);
2579 				if (altfat_mustmatch) {
2580 					flags = B_ERROR;
2581 					error = EIO;
2582 					goto out;
2583 				}
2584 			}
2585 			brelse(bp);
2586 			bp = NULL;	/* prevent double release */
2587 		}
2588 	}
2589 
2590 	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2591 	fsp->pcfs_fatjustread = 1;
2592 
2593 	/*
2594 	 * Retrieve FAT32 fsinfo sector.
2595 	 * A failure to read this is not fatal to accessing the volume.
2596 	 * It simply means operations that count or search free blocks
2597 	 * will have to do a full FAT walk, vs. a possibly quicker lookup
2598 	 * of the summary information.
2599 	 * Hence, we log a message but return success overall after this point.
2600 	 */
2601 	if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2602 		struct fat_od_fsi *fsinfo_disk;
2603 
2604 		bp = bread(fsp->pcfs_xdev,
2605 		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2606 		fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2607 		if (bp->b_flags & (B_ERROR | B_STALE) ||
2608 		    !FSISIG_OK(fsinfo_disk)) {
2609 			cmn_err(CE_NOTE,
2610 			    "!pcfs: error reading fat32 fsinfo from "
2611 			    "device (%x.%x):%d, block %lld",
2612 			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2613 			    fsp->pcfs_ldrive,
2614 			    (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2615 			fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2616 			fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2617 			fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2618 		} else {
2619 			bp->b_flags |= B_STALE | B_AGE;
2620 			fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2621 			fsp->pcfs_fsinfo.fs_free_clusters =
2622 			    LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2623 			fsp->pcfs_fsinfo.fs_next_free =
2624 			    LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2625 		}
2626 		brelse(bp);
2627 		bp = NULL;
2628 	}
2629 
2630 	if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2631 		fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2632 	else
2633 		fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2634 
2635 	return (0);
2636 
2637 out:
2638 	cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2639 	if (bp)
2640 		brelse(bp);
2641 	if (fatp)
2642 		kmem_free(fatp, fatsize);
2643 	if (fat_changemap)
2644 		kmem_free(fat_changemap, fat_changemapsize);
2645 
2646 	if (flags) {
2647 		pc_mark_irrecov(fsp);
2648 	}
2649 	return (error);
2650 }
2651