xref: /titanic_51/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 14ea4bb737263733ad80a36b4f73f681c30a6b45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/dirent.h>
37 #include <sys/vnode.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/uio.h>
42 #include <sys/fs/pc_label.h>
43 #include <sys/fs/pc_fs.h>
44 #include <sys/fs/pc_dir.h>
45 #include <sys/fs/pc_node.h>
46 #include <sys/mman.h>
47 #include <sys/pathname.h>
48 #include <sys/vmsystm.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/statvfs.h>
52 #include <sys/unistd.h>
53 #include <sys/kmem.h>
54 #include <sys/conf.h>
55 #include <sys/flock.h>
56 #include <sys/policy.h>
57 #include <sys/sdt.h>
58 
59 #include <vm/seg.h>
60 #include <vm/page.h>
61 #include <vm/pvn.h>
62 #include <vm/seg_map.h>
63 #include <vm/seg_vn.h>
64 #include <vm/hat.h>
65 #include <vm/as.h>
66 #include <vm/seg_kmem.h>
67 
68 #include <fs/fs_subr.h>
69 
70 static int pcfs_open(struct vnode **, int, struct cred *);
71 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
72 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
73 	struct caller_context *);
74 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
75 	struct caller_context *);
76 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
77 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
78 	caller_context_t *);
79 static int pcfs_access(struct vnode *, int, int, struct cred *);
80 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
81 	struct pathname *, int, struct vnode *, struct cred *);
82 static int pcfs_create(struct vnode *, char *, struct vattr *,
83 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
84 static int pcfs_remove(struct vnode *, char *, struct cred *);
85 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
86 	struct cred *);
87 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
88 	struct cred *);
89 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
90 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
91 static int pcfs_fsync(struct vnode *, int, struct cred *);
92 static void pcfs_inactive(struct vnode *, struct cred *);
93 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
94 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
95 	offset_t, cred_t *, caller_context_t *);
96 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
97 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
98 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
99 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
100 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
101 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
102 	uchar_t, uchar_t, uint_t, struct cred *);
103 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
104 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
105 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
106 	size_t, uint_t, uint_t, uint_t, struct cred *);
107 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
108 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
109 
110 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
111 	struct cred *);
112 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
113 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
114 
115 extern krwlock_t pcnodes_lock;
116 
117 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
118 
119 /*
120  * vnode op vectors for files and directories.
121  */
122 struct vnodeops *pcfs_fvnodeops;
123 struct vnodeops *pcfs_dvnodeops;
124 
125 const fs_operation_def_t pcfs_fvnodeops_template[] = {
126 	VOPNAME_OPEN, pcfs_open,
127 	VOPNAME_CLOSE, pcfs_close,
128 	VOPNAME_READ, pcfs_read,
129 	VOPNAME_WRITE, pcfs_write,
130 	VOPNAME_GETATTR, pcfs_getattr,
131 	VOPNAME_SETATTR, pcfs_setattr,
132 	VOPNAME_ACCESS, pcfs_access,
133 	VOPNAME_FSYNC, pcfs_fsync,
134 	VOPNAME_INACTIVE, (fs_generic_func_p) pcfs_inactive,
135 	VOPNAME_FID, pcfs_fid,
136 	VOPNAME_SEEK, pcfs_seek,
137 	VOPNAME_SPACE, pcfs_space,
138 	VOPNAME_GETPAGE, pcfs_getpage,
139 	VOPNAME_PUTPAGE, pcfs_putpage,
140 	VOPNAME_MAP, (fs_generic_func_p) pcfs_map,
141 	VOPNAME_ADDMAP, (fs_generic_func_p) pcfs_addmap,
142 	VOPNAME_DELMAP, pcfs_delmap,
143 	VOPNAME_PATHCONF, pcfs_pathconf,
144 	VOPNAME_VNEVENT, fs_vnevent_support,
145 	NULL, NULL
146 };
147 
148 const fs_operation_def_t pcfs_dvnodeops_template[] = {
149 	VOPNAME_OPEN, pcfs_open,
150 	VOPNAME_CLOSE, pcfs_close,
151 	VOPNAME_GETATTR, pcfs_getattr,
152 	VOPNAME_SETATTR, pcfs_setattr,
153 	VOPNAME_ACCESS, pcfs_access,
154 	VOPNAME_LOOKUP, pcfs_lookup,
155 	VOPNAME_CREATE, pcfs_create,
156 	VOPNAME_REMOVE, pcfs_remove,
157 	VOPNAME_RENAME, pcfs_rename,
158 	VOPNAME_MKDIR, pcfs_mkdir,
159 	VOPNAME_RMDIR, pcfs_rmdir,
160 	VOPNAME_READDIR, pcfs_readdir,
161 	VOPNAME_FSYNC, pcfs_fsync,
162 	VOPNAME_INACTIVE, (fs_generic_func_p) pcfs_inactive,
163 	VOPNAME_FID, pcfs_fid,
164 	VOPNAME_SEEK, pcfs_seek,
165 	VOPNAME_PATHCONF, pcfs_pathconf,
166 	VOPNAME_VNEVENT, fs_vnevent_support,
167 	NULL, NULL
168 };
169 
170 
171 /*ARGSUSED*/
172 static int
173 pcfs_open(
174 	struct vnode **vpp,
175 	int flag,
176 	struct cred *cr)
177 {
178 	return (0);
179 }
180 
181 /*
182  * files are sync'ed on close to keep floppy up to date
183  */
184 
185 /*ARGSUSED*/
186 static int
187 pcfs_close(
188 	struct vnode *vp,
189 	int flag,
190 	int count,
191 	offset_t offset,
192 	struct cred *cr)
193 {
194 	return (0);
195 }
196 
197 /*ARGSUSED*/
198 static int
199 pcfs_read(
200 	struct vnode *vp,
201 	struct uio *uiop,
202 	int ioflag,
203 	struct cred *cr,
204 	struct caller_context *ct)
205 {
206 	struct pcfs *fsp;
207 	struct pcnode *pcp;
208 	int error;
209 
210 	fsp = VFSTOPCFS(vp->v_vfsp);
211 	if (error = pc_verify(fsp))
212 		return (error);
213 	error = pc_lockfs(fsp, 0, 0);
214 	if (error)
215 		return (error);
216 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
217 		pc_unlockfs(fsp);
218 		return (EIO);
219 	}
220 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
221 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
222 		pcp->pc_flags |= PC_ACC;
223 		pc_mark_acc(pcp);
224 	}
225 	pc_unlockfs(fsp);
226 	if (error) {
227 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
228 	}
229 	return (error);
230 }
231 
232 /*ARGSUSED*/
233 static int
234 pcfs_write(
235 	struct vnode *vp,
236 	struct uio *uiop,
237 	int ioflag,
238 	struct cred *cr,
239 	struct caller_context *ct)
240 {
241 	struct pcfs *fsp;
242 	struct pcnode *pcp;
243 	int error;
244 
245 	fsp = VFSTOPCFS(vp->v_vfsp);
246 	if (error = pc_verify(fsp))
247 		return (error);
248 	error = pc_lockfs(fsp, 0, 0);
249 	if (error)
250 		return (error);
251 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
252 		pc_unlockfs(fsp);
253 		return (EIO);
254 	}
255 	if (ioflag & FAPPEND) {
256 		/*
257 		 * in append mode start at end of file.
258 		 */
259 		uiop->uio_loffset = pcp->pc_size;
260 	}
261 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
262 	pcp->pc_flags |= PC_MOD;
263 	pc_mark_mod(pcp);
264 	if (ioflag & (FSYNC|FDSYNC))
265 		(void) pc_nodeupdate(pcp);
266 
267 	pc_unlockfs(fsp);
268 	if (error) {
269 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
270 	}
271 	return (error);
272 }
273 
274 /*
275  * read or write a vnode
276  */
277 static int
278 rwpcp(
279 	struct pcnode *pcp,
280 	struct uio *uio,
281 	enum uio_rw rw,
282 	int ioflag)
283 {
284 	struct vnode *vp = PCTOV(pcp);
285 	struct pcfs *fsp;
286 	daddr_t bn;			/* phys block number */
287 	int n;
288 	offset_t off;
289 	caddr_t base;
290 	int mapon, pagecreate;
291 	int newpage;
292 	int error = 0;
293 	rlim64_t limit = uio->uio_llimit;
294 	int oresid = uio->uio_resid;
295 
296 	/*
297 	 * If the filesystem was umounted by force, return immediately.
298 	 */
299 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
300 		return (EIO);
301 
302 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
303 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
304 
305 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
306 	ASSERT(vp->v_type == VREG);
307 
308 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
309 		return (0);
310 	}
311 
312 	if (uio->uio_loffset < 0)
313 		return (EINVAL);
314 
315 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
316 		limit = MAXOFFSET_T;
317 
318 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
319 		proc_t *p = ttoproc(curthread);
320 
321 		mutex_enter(&p->p_lock);
322 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
323 		    p, RCA_UNSAFE_SIGINFO);
324 		mutex_exit(&p->p_lock);
325 		return (EFBIG);
326 	}
327 
328 	/* the following condition will occur only for write */
329 
330 	if (uio->uio_loffset >= UINT32_MAX)
331 		return (EFBIG);
332 
333 	if (uio->uio_resid == 0)
334 		return (0);
335 
336 	if (limit > UINT32_MAX)
337 		limit = UINT32_MAX;
338 
339 	fsp = VFSTOPCFS(vp->v_vfsp);
340 	if (fsp->pcfs_flags & PCFS_IRRECOV)
341 		return (EIO);
342 
343 	do {
344 		/*
345 		 * Assignments to "n" in this block may appear
346 		 * to overflow in some cases.  However, after careful
347 		 * analysis it was determined that all assignments to
348 		 * "n" serve only to make "n" smaller.  Since "n"
349 		 * starts out as no larger than MAXBSIZE, "int" is
350 		 * safe.
351 		 */
352 		off = uio->uio_loffset & MAXBMASK;
353 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
354 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
355 		if (rw == UIO_READ) {
356 			offset_t diff;
357 
358 			diff = pcp->pc_size - uio->uio_loffset;
359 			if (diff <= 0)
360 				return (0);
361 			if (diff < n)
362 				n = (int)diff;
363 		}
364 		/*
365 		 * Compare limit with the actual offset + n, not the
366 		 * rounded down offset "off" or we will overflow
367 		 * the maximum file size after all.
368 		 */
369 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
370 			if (uio->uio_loffset >= limit) {
371 				error = EFBIG;
372 				break;
373 			}
374 			n = (int)(limit - uio->uio_loffset);
375 		}
376 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
377 		pagecreate = 0;
378 		newpage = 0;
379 		if (rw == UIO_WRITE) {
380 			/*
381 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
382 			 * with one page at a time, instead of one MAXBSIZE
383 			 * at a time, so we can fully explore pagecreate
384 			 * optimization??
385 			 */
386 			if (uio->uio_loffset + n > pcp->pc_size) {
387 				uint_t ncl, lcn;
388 
389 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
390 					fsp->pcfs_clsize);
391 				if (uio->uio_loffset > pcp->pc_size &&
392 				    ncl < (uint_t)howmany(uio->uio_loffset,
393 							fsp->pcfs_clsize)) {
394 					/*
395 					 * Allocate and zerofill skipped
396 					 * clusters. This may not be worth the
397 					 * effort since a small lseek beyond
398 					 * eof but still within the cluster
399 					 * will not be zeroed out.
400 					 */
401 					lcn = pc_lblkno(fsp, uio->uio_loffset);
402 					error = pc_balloc(pcp, (daddr_t)lcn,
403 					    1, &bn);
404 					ncl = lcn + 1;
405 				}
406 				if (!error &&
407 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
408 							fsp->pcfs_clsize))
409 					/*
410 					 * allocate clusters w/o zerofill
411 					 */
412 					error = pc_balloc(pcp,
413 					    (daddr_t)pc_lblkno(fsp,
414 					    uio->uio_loffset + n - 1),
415 					    0, &bn);
416 
417 				pcp->pc_flags |= PC_CHG;
418 
419 				if (error) {
420 					pc_cluster32_t ncl;
421 					int nerror;
422 
423 					/*
424 					 * figure out new file size from
425 					 * cluster chain length. If this
426 					 * is detected to loop, the chain
427 					 * is corrupted and we'd better
428 					 * keep our fingers off that file.
429 					 */
430 					nerror = pc_fileclsize(fsp,
431 					    pcp->pc_scluster, &ncl);
432 					if (nerror) {
433 						PC_DPRINTF1(2,
434 						    "cluster chain "
435 						    "corruption, "
436 						    "scluster=%d\n",
437 						    pcp->pc_scluster);
438 						pcp->pc_size = 0;
439 						pcp->pc_flags |= PC_INVAL;
440 						error = nerror;
441 						(void) segmap_release(segkmap,
442 						    base, 0);
443 						break;
444 					}
445 					pcp->pc_size = fsp->pcfs_clsize * ncl;
446 
447 					if (error == ENOSPC &&
448 					    (pcp->pc_size - uio->uio_loffset)
449 						> 0) {
450 						PC_DPRINTF3(2, "rwpcp ENOSPC "
451 						    "off=%lld n=%d size=%d\n",
452 						    uio->uio_loffset,
453 						    n, pcp->pc_size);
454 						n = (int)(pcp->pc_size -
455 							uio->uio_loffset);
456 					} else {
457 						PC_DPRINTF1(1,
458 						    "rwpcp error1=%d\n", error);
459 						(void) segmap_release(segkmap,
460 						    base, 0);
461 						break;
462 					}
463 				} else {
464 					pcp->pc_size =
465 					    (uint_t)(uio->uio_loffset + n);
466 				}
467 				if (mapon == 0) {
468 					newpage = segmap_pagecreate(segkmap,
469 						base, (size_t)n, 0);
470 					pagecreate = 1;
471 				}
472 			} else if (n == MAXBSIZE) {
473 				newpage = segmap_pagecreate(segkmap, base,
474 						(size_t)n, 0);
475 				pagecreate = 1;
476 			}
477 		}
478 		error = uiomove(base + mapon, (size_t)n, rw, uio);
479 
480 		if (pagecreate && uio->uio_loffset <
481 			roundup(off + mapon + n, PAGESIZE)) {
482 			offset_t nzero, nmoved;
483 
484 			nmoved = uio->uio_loffset - (off + mapon);
485 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
486 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
487 		}
488 
489 		/*
490 		 * Unlock the pages which have been allocated by
491 		 * page_create_va() in segmap_pagecreate().
492 		 */
493 		if (newpage)
494 			segmap_pageunlock(segkmap, base, (size_t)n,
495 				rw == UIO_WRITE ? S_WRITE : S_READ);
496 
497 		if (error) {
498 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
499 			/*
500 			 * If we failed on a write, we may have already
501 			 * allocated file blocks as well as pages.  It's hard
502 			 * to undo the block allocation, but we must be sure
503 			 * to invalidate any pages that may have been
504 			 * allocated.
505 			 */
506 			if (rw == UIO_WRITE)
507 				(void) segmap_release(segkmap, base, SM_INVAL);
508 			else
509 				(void) segmap_release(segkmap, base, 0);
510 		} else {
511 			uint_t flags = 0;
512 
513 			if (rw == UIO_READ) {
514 				if (n + mapon == MAXBSIZE ||
515 				    uio->uio_loffset == pcp->pc_size)
516 					flags = SM_DONTNEED;
517 			} else if (ioflag & (FSYNC|FDSYNC)) {
518 				flags = SM_WRITE;
519 			} else if (n + mapon == MAXBSIZE) {
520 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
521 			}
522 			error = segmap_release(segkmap, base, flags);
523 		}
524 
525 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
526 
527 	if (oresid != uio->uio_resid)
528 		error = 0;
529 	return (error);
530 }
531 
532 /*ARGSUSED*/
533 static int
534 pcfs_getattr(
535 	struct vnode *vp,
536 	struct vattr *vap,
537 	int flags,
538 	struct cred *cr)
539 {
540 	struct pcnode *pcp;
541 	struct pcfs *fsp;
542 	int error;
543 	char attr;
544 	struct pctime atime;
545 	int64_t unixtime;
546 
547 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
548 
549 	fsp = VFSTOPCFS(vp->v_vfsp);
550 	error = pc_lockfs(fsp, 0, 0);
551 	if (error)
552 		return (error);
553 
554 	/*
555 	 * Note that we don't check for "invalid node" (PC_INVAL) here
556 	 * only in order to make stat() succeed. We allow no I/O on such
557 	 * a node, but do allow to check for its existance.
558 	 */
559 	if ((pcp = VTOPC(vp)) == NULL) {
560 		pc_unlockfs(fsp);
561 		return (EIO);
562 	}
563 	/*
564 	 * Copy from pcnode.
565 	 */
566 	vap->va_type = vp->v_type;
567 	attr = pcp->pc_entry.pcd_attr;
568 	if (PCA_IS_HIDDEN(fsp, attr))
569 		vap->va_mode = 0;
570 	else if (attr & PCA_LABEL)
571 		vap->va_mode = 0444;
572 	else if (attr & PCA_RDONLY)
573 		vap->va_mode = 0555;
574 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
575 		vap->va_mode = 0755;
576 	} else {
577 		vap->va_mode = 0777;
578 	}
579 
580 	if (attr & PCA_DIR)
581 		vap->va_mode |= S_IFDIR;
582 	else
583 		vap->va_mode |= S_IFREG;
584 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
585 		vap->va_uid = 0;
586 		vap->va_gid = 0;
587 	} else {
588 		vap->va_uid = crgetuid(cr);
589 		vap->va_gid = crgetgid(cr);
590 	}
591 	vap->va_fsid = vp->v_vfsp->vfs_dev;
592 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
593 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
594 	    pc_getstartcluster(fsp, &pcp->pc_entry), fsp->pcfs_entps);
595 	vap->va_nlink = 1;
596 	vap->va_size = (u_offset_t)pcp->pc_size;
597 
598 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
599 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
600 		if (unixtime > INT32_MAX)
601 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
602 		unixtime = MIN(unixtime, INT32_MAX);
603 	} else if (unixtime > INT32_MAX &&
604 	    get_udatamodel() == DATAMODEL_ILP32) {
605 		pc_unlockfs(fsp);
606 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
607 		return (EOVERFLOW);
608 	}
609 
610 	vap->va_mtime.tv_sec = (time_t)unixtime;
611 	vap->va_mtime.tv_nsec = 0;
612 
613 	/*
614 	 * FAT doesn't know about POSIX ctime.
615 	 * Best approximation is to always set it to mtime.
616 	 */
617 	vap->va_ctime = vap->va_mtime;
618 
619 	/*
620 	 * FAT only stores "last access date". If that's the
621 	 * same as the date of last modification then the time
622 	 * of last access is known. Otherwise, use midnight.
623 	 */
624 	atime.pct_date = pcp->pc_entry.pcd_ladate;
625 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
626 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
627 	else
628 		atime.pct_time = 0;
629 	pc_pcttotv(&atime, &unixtime);
630 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
631 		if (unixtime > INT32_MAX)
632 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
633 		unixtime = MIN(unixtime, INT32_MAX);
634 	} else if (unixtime > INT32_MAX &&
635 	    get_udatamodel() == DATAMODEL_ILP32) {
636 		pc_unlockfs(fsp);
637 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
638 		return (EOVERFLOW);
639 	}
640 
641 	vap->va_atime.tv_sec = (time_t)unixtime;
642 	vap->va_atime.tv_nsec = 0;
643 
644 	vap->va_rdev = 0;
645 	vap->va_nblocks = (fsblkcnt64_t)howmany((offset_t)pcp->pc_size,
646 				DEV_BSIZE);
647 	vap->va_blksize = fsp->pcfs_clsize;
648 	pc_unlockfs(fsp);
649 	return (0);
650 }
651 
652 
653 /*ARGSUSED*/
654 static int
655 pcfs_setattr(
656 	struct vnode *vp,
657 	struct vattr *vap,
658 	int flags,
659 	struct cred *cr,
660 	caller_context_t *ct)
661 {
662 	struct pcnode *pcp;
663 	mode_t mask = vap->va_mask;
664 	int error;
665 	struct pcfs *fsp;
666 	timestruc_t now, *timep;
667 
668 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
669 	/*
670 	 * cannot set these attributes
671 	 */
672 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
673 		return (EINVAL);
674 	}
675 	/*
676 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
677 	 * from 'tar' when it tries to set times on a directory, and console
678 	 * printf's on the NFS server when it gets EINVAL back on such a
679 	 * request. One possible problem with that since a directory entry
680 	 * identifies a file, '.' and all the '..' entries in subdirectories
681 	 * may get out of sync when the directory is updated since they're
682 	 * treated like separate files. We could fix that by looking for
683 	 * '.' and giving it the same attributes, and then looking for
684 	 * all the subdirectories and updating '..', but that's pretty
685 	 * expensive for something that doesn't seem likely to matter.
686 	 */
687 	/* can't do some ops on directories anyway */
688 	if ((vp->v_type == VDIR) &&
689 	    (mask & AT_SIZE)) {
690 		return (EINVAL);
691 	}
692 
693 	fsp = VFSTOPCFS(vp->v_vfsp);
694 	error = pc_lockfs(fsp, 0, 0);
695 	if (error)
696 		return (error);
697 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
698 		pc_unlockfs(fsp);
699 		return (EIO);
700 	}
701 
702 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
703 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
704 			pc_unlockfs(fsp);
705 			return (EACCES);
706 		}
707 	}
708 
709 	/*
710 	 * Change file access modes.
711 	 * If nobody has write permission, file is marked readonly.
712 	 * Otherwise file is writable by anyone.
713 	 */
714 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
715 		if ((vap->va_mode & 0222) == 0)
716 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
717 		else
718 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
719 		pcp->pc_flags |= PC_CHG;
720 	}
721 	/*
722 	 * Truncate file. Must have write permission.
723 	 */
724 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
725 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
726 			error = EACCES;
727 			goto out;
728 		}
729 		if (vap->va_size > UINT32_MAX) {
730 			error = EFBIG;
731 			goto out;
732 		}
733 		error = pc_truncate(pcp, (uint_t)vap->va_size);
734 		if (error)
735 			goto out;
736 	}
737 	/*
738 	 * Change file modified times.
739 	 */
740 	if (mask & (AT_MTIME | AT_CTIME)) {
741 		/*
742 		 * If SysV-compatible option to set access and
743 		 * modified times if privileged, owner, or write access,
744 		 * use current time rather than va_mtime.
745 		 *
746 		 * XXX - va_mtime.tv_sec == -1 flags this.
747 		 */
748 		timep = &vap->va_mtime;
749 		if (vap->va_mtime.tv_sec == -1) {
750 			gethrestime(&now);
751 			timep = &now;
752 		}
753 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
754 		    timep->tv_sec > INT32_MAX) {
755 			error = EOVERFLOW;
756 			goto out;
757 		}
758 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
759 		if (error)
760 			goto out;
761 		pcp->pc_flags |= PC_CHG;
762 	}
763 	/*
764 	 * Change file access times.
765 	 */
766 	if (mask & AT_ATIME) {
767 		/*
768 		 * If SysV-compatible option to set access and
769 		 * modified times if privileged, owner, or write access,
770 		 * use current time rather than va_mtime.
771 		 *
772 		 * XXX - va_atime.tv_sec == -1 flags this.
773 		 */
774 		struct pctime	atime;
775 
776 		timep = &vap->va_atime;
777 		if (vap->va_atime.tv_sec == -1) {
778 			gethrestime(&now);
779 			timep = &now;
780 		}
781 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
782 		    timep->tv_sec > INT32_MAX) {
783 			error = EOVERFLOW;
784 			goto out;
785 		}
786 		error = pc_tvtopct(timep, &atime);
787 		if (error)
788 			goto out;
789 		pcp->pc_entry.pcd_ladate = atime.pct_date;
790 		pcp->pc_flags |= PC_CHG;
791 	}
792 out:
793 	pc_unlockfs(fsp);
794 	return (error);
795 }
796 
797 
798 /*ARGSUSED*/
799 static int
800 pcfs_access(
801 	struct vnode *vp,
802 	int mode,
803 	int flags,
804 	struct cred *cr)
805 {
806 	struct pcnode *pcp;
807 	struct pcfs *fsp;
808 
809 
810 	fsp = VFSTOPCFS(vp->v_vfsp);
811 
812 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
813 		return (EIO);
814 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
815 		return (EACCES);
816 
817 	/*
818 	 * If this is a boot partition, privileged users have full access while
819 	 * others have read-only access.
820 	 */
821 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
822 		if ((mode & VWRITE) &&
823 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
824 			return (EACCES);
825 	}
826 	return (0);
827 }
828 
829 
830 /*ARGSUSED*/
831 static int
832 pcfs_fsync(
833 	struct vnode *vp,
834 	int syncflag,
835 	struct cred *cr)
836 {
837 	struct pcfs *fsp;
838 	struct pcnode *pcp;
839 	int error;
840 
841 	fsp = VFSTOPCFS(vp->v_vfsp);
842 	if (error = pc_verify(fsp))
843 		return (error);
844 	error = pc_lockfs(fsp, 0, 0);
845 	if (error)
846 		return (error);
847 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
848 		pc_unlockfs(fsp);
849 		return (EIO);
850 	}
851 	rw_enter(&pcnodes_lock, RW_WRITER);
852 	error = pc_nodesync(pcp);
853 	rw_exit(&pcnodes_lock);
854 	pc_unlockfs(fsp);
855 	return (error);
856 }
857 
858 
859 /*ARGSUSED*/
860 static void
861 pcfs_inactive(
862 	struct vnode *vp,
863 	struct cred *cr)
864 {
865 	struct pcnode *pcp;
866 	struct pcfs *fsp;
867 	int error;
868 
869 	fsp = VFSTOPCFS(vp->v_vfsp);
870 	error = pc_lockfs(fsp, 0, 1);
871 
872 	/*
873 	 * If the filesystem was umounted by force, all dirty
874 	 * pages associated with this vnode are invalidated
875 	 * and then the vnode will be freed.
876 	 */
877 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
878 		pcp = VTOPC(vp);
879 		if (vn_has_cached_data(vp)) {
880 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
881 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
882 		}
883 		remque(pcp);
884 		if (error == 0)
885 			pc_unlockfs(fsp);
886 		vn_free(vp);
887 		kmem_free(pcp, sizeof (struct pcnode));
888 		VFS_RELE(PCFSTOVFS(fsp));
889 		return;
890 	}
891 
892 	mutex_enter(&vp->v_lock);
893 	ASSERT(vp->v_count >= 1);
894 	if (vp->v_count > 1) {
895 		vp->v_count--;  /* release our hold from vn_rele */
896 		mutex_exit(&vp->v_lock);
897 		pc_unlockfs(fsp);
898 		return;
899 	}
900 	mutex_exit(&vp->v_lock);
901 
902 	/*
903 	 * Check again to confirm that no intervening I/O error
904 	 * with a subsequent pc_diskchanged() call has released
905 	 * the pcnode. If it has then release the vnode as above.
906 	 */
907 	pcp = VTOPC(vp);
908 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
909 		if (vn_has_cached_data(vp))
910 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
911 			    pcfs_putapage, B_INVAL | B_TRUNC,
912 			    (struct cred *)NULL);
913 	}
914 
915 	if (pcp == NULL) {
916 		vn_free(vp);
917 	} else {
918 		pc_rele(pcp);
919 	}
920 
921 	if (!error)
922 		pc_unlockfs(fsp);
923 }
924 
925 /*ARGSUSED*/
926 static int
927 pcfs_lookup(
928 	struct vnode *dvp,
929 	char *nm,
930 	struct vnode **vpp,
931 	struct pathname *pnp,
932 	int flags,
933 	struct vnode *rdir,
934 	struct cred *cr)
935 {
936 	struct pcfs *fsp;
937 	struct pcnode *pcp;
938 	int error;
939 
940 	/*
941 	 * If the filesystem was umounted by force, return immediately.
942 	 */
943 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
944 		return (EIO);
945 
946 	/*
947 	 * verify that the dvp is still valid on the disk
948 	 */
949 	fsp = VFSTOPCFS(dvp->v_vfsp);
950 	if (error = pc_verify(fsp))
951 		return (error);
952 	error = pc_lockfs(fsp, 0, 0);
953 	if (error)
954 		return (error);
955 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
956 		pc_unlockfs(fsp);
957 		return (EIO);
958 	}
959 	/*
960 	 * Null component name is a synonym for directory being searched.
961 	 */
962 	if (*nm == '\0') {
963 		VN_HOLD(dvp);
964 		*vpp = dvp;
965 		pc_unlockfs(fsp);
966 		return (0);
967 	}
968 
969 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
970 	if (!error) {
971 		*vpp = PCTOV(pcp);
972 		pcp->pc_flags |= PC_EXTERNAL;
973 	}
974 	pc_unlockfs(fsp);
975 	return (error);
976 }
977 
978 
979 /*ARGSUSED*/
980 static int
981 pcfs_create(
982 	struct vnode *dvp,
983 	char *nm,
984 	struct vattr *vap,
985 	enum vcexcl exclusive,
986 	int mode,
987 	struct vnode **vpp,
988 	struct cred *cr,
989 	int flag)
990 {
991 	int error;
992 	struct pcnode *pcp;
993 	struct vnode *vp;
994 	struct pcfs *fsp;
995 
996 	/*
997 	 * can't create directories. use pcfs_mkdir.
998 	 * can't create anything other than files.
999 	 */
1000 	if (vap->va_type == VDIR)
1001 		return (EISDIR);
1002 	else if (vap->va_type != VREG)
1003 		return (EINVAL);
1004 
1005 	pcp = NULL;
1006 	fsp = VFSTOPCFS(dvp->v_vfsp);
1007 	error = pc_lockfs(fsp, 0, 0);
1008 	if (error)
1009 		return (error);
1010 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1011 		pc_unlockfs(fsp);
1012 		return (EIO);
1013 	}
1014 
1015 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1016 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1017 			pc_unlockfs(fsp);
1018 			return (EACCES);
1019 		}
1020 	}
1021 
1022 	if (*nm == '\0') {
1023 		/*
1024 		 * Null component name refers to the directory itself.
1025 		 */
1026 		VN_HOLD(dvp);
1027 		pcp = VTOPC(dvp);
1028 		error = EEXIST;
1029 	} else {
1030 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1031 	}
1032 	/*
1033 	 * if file exists and this is a nonexclusive create,
1034 	 * check for access permissions
1035 	 */
1036 	if (error == EEXIST) {
1037 		vp = PCTOV(pcp);
1038 		if (exclusive == NONEXCL) {
1039 			if (vp->v_type == VDIR) {
1040 				error = EISDIR;
1041 			} else if (mode) {
1042 				error = pcfs_access(PCTOV(pcp), mode, 0,
1043 					cr);
1044 			} else {
1045 				error = 0;
1046 			}
1047 		}
1048 		if (error) {
1049 			VN_RELE(PCTOV(pcp));
1050 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1051 			(vap->va_size == 0)) {
1052 			error = pc_truncate(pcp, 0L);
1053 			if (error)
1054 				VN_RELE(PCTOV(pcp));
1055 		}
1056 	}
1057 	if (error) {
1058 		pc_unlockfs(fsp);
1059 		return (error);
1060 	}
1061 	*vpp = PCTOV(pcp);
1062 	pcp->pc_flags |= PC_EXTERNAL;
1063 	pc_unlockfs(fsp);
1064 	return (error);
1065 }
1066 
1067 /*ARGSUSED*/
1068 static int
1069 pcfs_remove(
1070 	struct vnode *vp,
1071 	char *nm,
1072 	struct cred *cr)
1073 {
1074 	struct pcfs *fsp;
1075 	struct pcnode *pcp;
1076 	int error;
1077 
1078 	fsp = VFSTOPCFS(vp->v_vfsp);
1079 	if (error = pc_verify(fsp))
1080 		return (error);
1081 	error = pc_lockfs(fsp, 0, 0);
1082 	if (error)
1083 		return (error);
1084 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1085 		pc_unlockfs(fsp);
1086 		return (EIO);
1087 	}
1088 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1089 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1090 			pc_unlockfs(fsp);
1091 			return (EACCES);
1092 		}
1093 	}
1094 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1095 	pc_unlockfs(fsp);
1096 	return (error);
1097 }
1098 
1099 /*
1100  * Rename a file or directory
1101  * This rename is restricted to only rename files within a directory.
1102  * XX should make rename more general
1103  */
1104 /*ARGSUSED*/
1105 static int
1106 pcfs_rename(
1107 	struct vnode *sdvp,		/* old (source) parent vnode */
1108 	char *snm,			/* old (source) entry name */
1109 	struct vnode *tdvp,		/* new (target) parent vnode */
1110 	char *tnm,			/* new (target) entry name */
1111 	struct cred *cr)
1112 {
1113 	struct pcfs *fsp;
1114 	struct pcnode *dp;	/* parent pcnode */
1115 	struct pcnode *tdp;
1116 	int error;
1117 
1118 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1119 	if (error = pc_verify(fsp))
1120 		return (error);
1121 
1122 	/*
1123 	 * make sure we can muck with this directory.
1124 	 */
1125 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1126 	if (error) {
1127 		return (error);
1128 	}
1129 	error = pc_lockfs(fsp, 0, 0);
1130 	if (error)
1131 		return (error);
1132 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1133 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1134 		pc_unlockfs(fsp);
1135 		return (EIO);
1136 	}
1137 	error = pc_rename(dp, tdp, snm, tnm);
1138 	pc_unlockfs(fsp);
1139 	return (error);
1140 }
1141 
1142 /*ARGSUSED*/
1143 static int
1144 pcfs_mkdir(
1145 	struct vnode *dvp,
1146 	char *nm,
1147 	struct vattr *vap,
1148 	struct vnode **vpp,
1149 	struct cred *cr)
1150 {
1151 	struct pcfs *fsp;
1152 	struct pcnode *pcp;
1153 	int error;
1154 
1155 	fsp = VFSTOPCFS(dvp->v_vfsp);
1156 	if (error = pc_verify(fsp))
1157 		return (error);
1158 	error = pc_lockfs(fsp, 0, 0);
1159 	if (error)
1160 		return (error);
1161 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1162 		pc_unlockfs(fsp);
1163 		return (EIO);
1164 	}
1165 
1166 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1167 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1168 			pc_unlockfs(fsp);
1169 			return (EACCES);
1170 		}
1171 	}
1172 
1173 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1174 
1175 	if (!error) {
1176 		pcp -> pc_flags |= PC_EXTERNAL;
1177 		*vpp = PCTOV(pcp);
1178 	} else if (error == EEXIST) {
1179 		VN_RELE(PCTOV(pcp));
1180 	}
1181 	pc_unlockfs(fsp);
1182 	return (error);
1183 }
1184 
1185 /*ARGSUSED*/
1186 static int
1187 pcfs_rmdir(
1188 	struct vnode *dvp,
1189 	char *nm,
1190 	struct vnode *cdir,
1191 	struct cred *cr)
1192 {
1193 	struct pcfs *fsp;
1194 	struct pcnode *pcp;
1195 	int error;
1196 
1197 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1198 	if (error = pc_verify(fsp))
1199 		return (error);
1200 	if (error = pc_lockfs(fsp, 0, 0))
1201 		return (error);
1202 
1203 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1204 		pc_unlockfs(fsp);
1205 		return (EIO);
1206 	}
1207 
1208 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1209 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1210 			pc_unlockfs(fsp);
1211 			return (EACCES);
1212 		}
1213 	}
1214 
1215 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1216 	pc_unlockfs(fsp);
1217 	return (error);
1218 }
1219 
1220 /*
1221  * read entries in a directory.
1222  * we must convert pc format to unix format
1223  */
1224 
1225 /*ARGSUSED*/
1226 static int
1227 pcfs_readdir(
1228 	struct vnode *dvp,
1229 	struct uio *uiop,
1230 	struct cred *cr,
1231 	int *eofp)
1232 {
1233 	struct pcnode *pcp;
1234 	struct pcfs *fsp;
1235 	struct pcdir *ep;
1236 	struct buf *bp = NULL;
1237 	offset_t offset;
1238 	int boff;
1239 	struct pc_dirent lbp;
1240 	struct pc_dirent *ld = &lbp;
1241 	int error;
1242 
1243 	/*
1244 	 * If the filesystem was umounted by force, return immediately.
1245 	 */
1246 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1247 		return (EIO);
1248 
1249 	if ((uiop->uio_iovcnt != 1) ||
1250 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1251 		return (EINVAL);
1252 	}
1253 	fsp = VFSTOPCFS(dvp->v_vfsp);
1254 	/*
1255 	 * verify that the dp is still valid on the disk
1256 	 */
1257 	if (error = pc_verify(fsp)) {
1258 		return (error);
1259 	}
1260 	error = pc_lockfs(fsp, 0, 0);
1261 	if (error)
1262 		return (error);
1263 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1264 		pc_unlockfs(fsp);
1265 		return (EIO);
1266 	}
1267 
1268 	bzero(ld, sizeof (*ld));
1269 
1270 	if (eofp != NULL)
1271 		*eofp = 0;
1272 	offset = uiop->uio_loffset;
1273 
1274 	if (dvp->v_flag & VROOT) {
1275 		/*
1276 		 * kludge up entries for "." and ".." in the root.
1277 		 */
1278 		if (offset == 0) {
1279 			(void) strcpy(ld->d_name, ".");
1280 			ld->d_reclen = DIRENT64_RECLEN(1);
1281 			ld->d_off = (off64_t)sizeof (struct pcdir);
1282 			ld->d_ino = (ino64_t)UINT_MAX;
1283 			if (ld->d_reclen > uiop->uio_resid) {
1284 				pc_unlockfs(fsp);
1285 				return (ENOSPC);
1286 			}
1287 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1288 			uiop->uio_loffset = ld->d_off;
1289 			offset = uiop->uio_loffset;
1290 		}
1291 		if (offset == sizeof (struct pcdir)) {
1292 			(void) strcpy(ld->d_name, "..");
1293 			ld->d_reclen = DIRENT64_RECLEN(2);
1294 			if (ld->d_reclen > uiop->uio_resid) {
1295 				pc_unlockfs(fsp);
1296 				return (ENOSPC);
1297 			}
1298 			ld->d_off = (off64_t)(uiop->uio_loffset +
1299 			    sizeof (struct pcdir));
1300 			ld->d_ino = (ino64_t)UINT_MAX;
1301 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1302 			uiop->uio_loffset = ld->d_off;
1303 			offset = uiop->uio_loffset;
1304 		}
1305 		offset -= 2 * sizeof (struct pcdir);
1306 		/* offset now has the real offset value into directory file */
1307 	}
1308 
1309 	for (;;) {
1310 		boff = pc_blkoff(fsp, offset);
1311 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1312 			if (bp != NULL) {
1313 				brelse(bp);
1314 				bp = NULL;
1315 			}
1316 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1317 			if (error) {
1318 				if (error == ENOENT) {
1319 					error = 0;
1320 					if (eofp)
1321 						*eofp = 1;
1322 				}
1323 				break;
1324 			}
1325 		}
1326 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1327 			if (eofp)
1328 				*eofp = 1;
1329 			break;
1330 		}
1331 		/*
1332 		 * Don't display label because it may contain funny characters.
1333 		 */
1334 		if (ep->pcd_filename[0] == PCD_ERASED) {
1335 			uiop->uio_loffset += sizeof (struct pcdir);
1336 			offset += sizeof (struct pcdir);
1337 			ep++;
1338 			continue;
1339 		}
1340 		if (PCDL_IS_LFN(ep)) {
1341 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1342 			    0)
1343 				break;
1344 			continue;
1345 		}
1346 
1347 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1348 			break;
1349 	}
1350 	if (bp)
1351 		brelse(bp);
1352 	pc_unlockfs(fsp);
1353 	return (error);
1354 }
1355 
1356 
1357 /*
1358  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1359  * When we are called the pcfs is already locked.
1360  */
1361 /*ARGSUSED*/
1362 static int
1363 pcfs_getapage(
1364 	struct vnode *vp,
1365 	u_offset_t off,
1366 	size_t len,
1367 	uint_t *protp,
1368 	page_t *pl[],		/* NULL if async IO is requested */
1369 	size_t plsz,
1370 	struct seg *seg,
1371 	caddr_t addr,
1372 	enum seg_rw rw,
1373 	struct cred *cr)
1374 {
1375 	struct pcnode *pcp;
1376 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1377 	struct vnode *devvp;
1378 	page_t *pp;
1379 	page_t *pagefound;
1380 	int err;
1381 
1382 	/*
1383 	 * If the filesystem was umounted by force, return immediately.
1384 	 */
1385 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1386 		return (EIO);
1387 
1388 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1389 	    (void *)vp, off, len);
1390 
1391 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1392 		return (EIO);
1393 	devvp = fsp->pcfs_devvp;
1394 
1395 	/* pcfs doesn't do readaheads */
1396 	if (pl == NULL)
1397 		return (0);
1398 
1399 	pl[0] = NULL;
1400 	err = 0;
1401 	/*
1402 	 * If the accessed time on the pcnode has not already been
1403 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1404 	 * This gives us approximate modified times for mmap'ed files
1405 	 * which are accessed via loads in the user address space.
1406 	 */
1407 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1408 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1409 		pcp->pc_flags |= PC_ACC;
1410 		pc_mark_acc(pcp);
1411 	}
1412 reread:
1413 	if ((pagefound = page_exists(vp, off)) == NULL) {
1414 		/*
1415 		 * Need to really do disk IO to get the page(s).
1416 		 */
1417 		struct buf *bp;
1418 		daddr_t lbn, bn;
1419 		u_offset_t io_off;
1420 		size_t io_len;
1421 		u_offset_t lbnoff, xferoffset;
1422 		u_offset_t pgoff;
1423 		uint_t	xfersize;
1424 		int err1;
1425 
1426 		lbn = pc_lblkno(fsp, off);
1427 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1428 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1429 
1430 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1431 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1432 		if (pp == NULL)
1433 			/*
1434 			 * XXX - If pcfs is made MT-hot, this should go
1435 			 * back to reread.
1436 			 */
1437 			panic("pcfs_getapage pvn_read_kluster");
1438 
1439 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1440 		    pgoff += xfersize,
1441 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1442 		    lbnoff += xfersize, xferoffset += xfersize) {
1443 			/*
1444 			 * read as many contiguous blocks as possible to
1445 			 * fill this page
1446 			 */
1447 			xfersize = PAGESIZE - pgoff;
1448 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1449 			if (err1) {
1450 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1451 				err = err1;
1452 				goto out;
1453 			}
1454 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1455 			bp->b_edev = devvp->v_rdev;
1456 			bp->b_dev = cmpdev(devvp->v_rdev);
1457 			bp->b_blkno = bn +
1458 			    /* add a sector offset within the cluster */
1459 			    /* when the clustersize > PAGESIZE */
1460 			    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1461 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1462 			bp->b_file = vp;
1463 			bp->b_offset = (offset_t)(off + pgoff);
1464 
1465 			(void) bdev_strategy(bp);
1466 
1467 			lwp_stat_update(LWP_STAT_INBLK, 1);
1468 
1469 			if (err == 0)
1470 				err = biowait(bp);
1471 			else
1472 				(void) biowait(bp);
1473 			pageio_done(bp);
1474 			if (err)
1475 				goto out;
1476 		}
1477 		if (pgoff < PAGESIZE) {
1478 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1479 		}
1480 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1481 	}
1482 out:
1483 	if (err) {
1484 		if (pp != NULL)
1485 			pvn_read_done(pp, B_ERROR);
1486 		return (err);
1487 	}
1488 
1489 	if (pagefound) {
1490 		/*
1491 		 * Page exists in the cache, acquire the "shared"
1492 		 * lock.  If this fails, go back to reread.
1493 		 */
1494 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1495 			goto reread;
1496 		}
1497 		pl[0] = pp;
1498 		pl[1] = NULL;
1499 	}
1500 	return (err);
1501 }
1502 
1503 /*
1504  * Return all the pages from [off..off+len] in given file
1505  */
1506 static int
1507 pcfs_getpage(
1508 	struct vnode *vp,
1509 	offset_t off,
1510 	size_t len,
1511 	uint_t *protp,
1512 	page_t *pl[],
1513 	size_t plsz,
1514 	struct seg *seg,
1515 	caddr_t addr,
1516 	enum seg_rw rw,
1517 	struct cred *cr)
1518 {
1519 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1520 	int err;
1521 
1522 	PC_DPRINTF0(6, "pcfs_getpage\n");
1523 	if (err = pc_verify(fsp))
1524 		return (err);
1525 	if (vp->v_flag & VNOMAP)
1526 		return (ENOSYS);
1527 	ASSERT(off <= UINT32_MAX);
1528 	err = pc_lockfs(fsp, 0, 0);
1529 	if (err)
1530 		return (err);
1531 	if (protp != NULL)
1532 		*protp = PROT_ALL;
1533 
1534 	ASSERT((off & PAGEOFFSET) == 0);
1535 	if (len <= PAGESIZE) {
1536 		err = pcfs_getapage(vp, off, len, protp, pl,
1537 		    plsz, seg, addr, rw, cr);
1538 	} else {
1539 		err = pvn_getpages(pcfs_getapage, vp, off,
1540 		    len, protp, pl, plsz, seg, addr, rw, cr);
1541 	}
1542 	pc_unlockfs(fsp);
1543 	return (err);
1544 }
1545 
1546 
1547 /*
1548  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1549  * If len == 0, do from off to EOF.
1550  *
1551  * The normal cases should be len == 0 & off == 0 (entire vp list),
1552  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1553  * (from pageout).
1554  *
1555  */
1556 /*ARGSUSED*/
1557 static int
1558 pcfs_putpage(
1559 	struct vnode *vp,
1560 	offset_t off,
1561 	size_t len,
1562 	int flags,
1563 	struct cred *cr)
1564 {
1565 	struct pcnode *pcp;
1566 	page_t *pp;
1567 	struct pcfs *fsp;
1568 	u_offset_t io_off;
1569 	size_t io_len;
1570 	offset_t eoff;
1571 	int err;
1572 
1573 	/*
1574 	 * If the filesystem was umounted by force, return immediately.
1575 	 */
1576 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1577 		return (EIO);
1578 
1579 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1580 	if (vp->v_flag & VNOMAP)
1581 		return (ENOSYS);
1582 
1583 	fsp = VFSTOPCFS(vp->v_vfsp);
1584 
1585 	if (err = pc_verify(fsp))
1586 		return (err);
1587 	if ((pcp = VTOPC(vp)) == NULL) {
1588 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1589 		return (EIO);
1590 	}
1591 	if (pcp->pc_flags & PC_INVAL)
1592 		return (EIO);
1593 
1594 	if (curproc == proc_pageout) {
1595 		/*
1596 		 * XXX - This is a quick hack to avoid blocking
1597 		 * pageout. Also to avoid pcfs_getapage deadlocking
1598 		 * with putpage when memory is running out,
1599 		 * since we only have one global lock and we don't
1600 		 * support async putpage.
1601 		 * It should be fixed someday.
1602 		 *
1603 		 * Interestingly, this used to be a test of NOMEMWAIT().
1604 		 * We only ever got here once pcfs started supporting
1605 		 * NFS sharing, and then only because the NFS server
1606 		 * threads seem to do writes in sched's process context.
1607 		 * Since everyone else seems to just care about pageout,
1608 		 * the test was changed to look for pageout directly.
1609 		 */
1610 		return (ENOMEM);
1611 	}
1612 
1613 	ASSERT(off <= UINT32_MAX);
1614 
1615 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1616 
1617 	err = pc_lockfs(fsp, 0, 0);
1618 	if (err)
1619 		return (err);
1620 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1621 		pc_unlockfs(fsp);
1622 		return (0);
1623 	}
1624 
1625 	if (len == 0) {
1626 		/*
1627 		 * Search the entire vp list for pages >= off
1628 		 */
1629 		err = pvn_vplist_dirty(vp, off,
1630 		    pcfs_putapage, flags, cr);
1631 	} else {
1632 		eoff = off + len;
1633 
1634 		for (io_off = off; io_off < eoff &&
1635 		    io_off < pcp->pc_size; io_off += io_len) {
1636 			/*
1637 			 * If we are not invalidating, synchronously
1638 			 * freeing or writing pages use the routine
1639 			 * page_lookup_nowait() to prevent reclaiming
1640 			 * them from the free list.
1641 			 */
1642 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1643 				pp = page_lookup(vp, io_off,
1644 					(flags & (B_INVAL | B_FREE)) ?
1645 					    SE_EXCL : SE_SHARED);
1646 			} else {
1647 				pp = page_lookup_nowait(vp, io_off,
1648 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
1649 			}
1650 
1651 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1652 				io_len = PAGESIZE;
1653 			else {
1654 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1655 					flags, cr);
1656 				if (err != 0)
1657 					break;
1658 				/*
1659 				 * "io_off" and "io_len" are returned as
1660 				 * the range of pages we actually wrote.
1661 				 * This allows us to skip ahead more quickly
1662 				 * since several pages may've been dealt
1663 				 * with by this iteration of the loop.
1664 				 */
1665 			}
1666 		}
1667 	}
1668 	if (err == 0 && (flags & B_INVAL) &&
1669 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1670 		/*
1671 		 * If doing "invalidation", make sure that
1672 		 * all pages on the vnode list are actually
1673 		 * gone.
1674 		 */
1675 		cmn_err(CE_PANIC,
1676 			"pcfs_putpage: B_INVAL, pages not gone");
1677 	} else if (err) {
1678 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1679 	}
1680 	pc_unlockfs(fsp);
1681 	return (err);
1682 }
1683 
1684 /*
1685  * Write out a single page, possibly klustering adjacent dirty pages.
1686  */
1687 /*ARGSUSED*/
1688 int
1689 pcfs_putapage(
1690 	struct vnode *vp,
1691 	page_t *pp,
1692 	u_offset_t *offp,
1693 	size_t *lenp,
1694 	int flags,
1695 	struct cred *cr)
1696 {
1697 	struct pcnode *pcp;
1698 	struct pcfs *fsp;
1699 	struct vnode *devvp;
1700 	size_t io_len;
1701 	daddr_t bn;
1702 	u_offset_t lbn, lbnoff, xferoffset;
1703 	uint_t pgoff, xfersize;
1704 	int err = 0;
1705 	u_offset_t io_off;
1706 
1707 	pcp = VTOPC(vp);
1708 	fsp = VFSTOPCFS(vp->v_vfsp);
1709 	devvp = fsp->pcfs_devvp;
1710 
1711 	/*
1712 	 * If the modified time on the inode has not already been
1713 	 * set elsewhere (e.g. for write/setattr) and this is not
1714 	 * a call from msync (B_FORCE) we set the time now.
1715 	 * This gives us approximate modified times for mmap'ed files
1716 	 * which are modified via stores in the user address space.
1717 	 */
1718 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1719 		pcp->pc_flags |= PC_MOD;
1720 		pc_mark_mod(pcp);
1721 	}
1722 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1723 	    PAGESIZE, flags);
1724 
1725 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1726 		goto out;
1727 	}
1728 
1729 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1730 
1731 	lbn = pc_lblkno(fsp, io_off);
1732 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1733 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1734 
1735 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1736 	    pgoff += xfersize,
1737 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1738 	    lbnoff += xfersize, xferoffset += xfersize) {
1739 
1740 		struct buf *bp;
1741 		int err1;
1742 
1743 		/*
1744 		 * write as many contiguous blocks as possible from this page
1745 		 */
1746 		xfersize = io_len - pgoff;
1747 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1748 		if (err1) {
1749 			err = err1;
1750 			goto out;
1751 		}
1752 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1753 		bp->b_edev = devvp->v_rdev;
1754 		bp->b_dev = cmpdev(devvp->v_rdev);
1755 		bp->b_blkno = bn +
1756 		    /* add a sector offset within the cluster */
1757 		    /* when the clustersize > PAGESIZE */
1758 		    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1759 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1760 		bp->b_file = vp;
1761 		bp->b_offset = (offset_t)(io_off + pgoff);
1762 
1763 		(void) bdev_strategy(bp);
1764 
1765 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1766 
1767 		if (err == 0)
1768 			err = biowait(bp);
1769 		else
1770 			(void) biowait(bp);
1771 		pageio_done(bp);
1772 	}
1773 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1774 	pp = NULL;
1775 
1776 out:
1777 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1778 		pvn_write_done(pp, B_WRITE | flags);
1779 	} else if (err != 0 && pp != NULL) {
1780 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1781 	}
1782 
1783 	if (offp)
1784 		*offp = io_off;
1785 	if (lenp)
1786 		*lenp = io_len;
1787 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1788 		    (void *)vp, (void *)pp, io_off, io_len);
1789 	if (err) {
1790 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1791 	}
1792 	return (err);
1793 }
1794 
1795 /*ARGSUSED*/
1796 static int
1797 pcfs_map(
1798 	struct vnode *vp,
1799 	offset_t off,
1800 	struct as *as,
1801 	caddr_t *addrp,
1802 	size_t len,
1803 	uchar_t prot,
1804 	uchar_t maxprot,
1805 	uint_t flags,
1806 	struct cred *cr)
1807 {
1808 	struct segvn_crargs vn_a;
1809 	int error;
1810 
1811 	PC_DPRINTF0(6, "pcfs_map\n");
1812 	if (vp->v_flag & VNOMAP)
1813 		return (ENOSYS);
1814 
1815 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1816 		return (ENXIO);
1817 
1818 	as_rangelock(as);
1819 	if ((flags & MAP_FIXED) == 0) {
1820 		map_addr(addrp, len, off, 1, flags);
1821 		if (*addrp == NULL) {
1822 			as_rangeunlock(as);
1823 			return (ENOMEM);
1824 		}
1825 	} else {
1826 		/*
1827 		 * User specified address - blow away any previous mappings
1828 		 */
1829 		(void) as_unmap(as, *addrp, len);
1830 	}
1831 
1832 	vn_a.vp = vp;
1833 	vn_a.offset = off;
1834 	vn_a.type = flags & MAP_TYPE;
1835 	vn_a.prot = prot;
1836 	vn_a.maxprot = maxprot;
1837 	vn_a.flags = flags & ~MAP_TYPE;
1838 	vn_a.cred = cr;
1839 	vn_a.amp = NULL;
1840 	vn_a.szc = 0;
1841 	vn_a.lgrp_mem_policy_flags = 0;
1842 
1843 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1844 	as_rangeunlock(as);
1845 	return (error);
1846 }
1847 
1848 /* ARGSUSED */
1849 static int
1850 pcfs_seek(
1851 	struct vnode *vp,
1852 	offset_t ooff,
1853 	offset_t *noffp)
1854 {
1855 	if (*noffp < 0)
1856 		return (EINVAL);
1857 	else if (*noffp > MAXOFFSET_T)
1858 		return (EINVAL);
1859 	else
1860 		return (0);
1861 }
1862 
1863 /* ARGSUSED */
1864 static int
1865 pcfs_addmap(
1866 	struct vnode *vp,
1867 	offset_t off,
1868 	struct as *as,
1869 	caddr_t addr,
1870 	size_t len,
1871 	uchar_t prot,
1872 	uchar_t maxprot,
1873 	uint_t flags,
1874 	struct cred *cr)
1875 {
1876 	if (vp->v_flag & VNOMAP)
1877 		return (ENOSYS);
1878 	return (0);
1879 }
1880 
1881 /*ARGSUSED*/
1882 static int
1883 pcfs_delmap(
1884 	struct vnode *vp,
1885 	offset_t off,
1886 	struct as *as,
1887 	caddr_t addr,
1888 	size_t len,
1889 	uint_t prot,
1890 	uint_t maxprot,
1891 	uint_t flags,
1892 	struct cred *cr)
1893 {
1894 	if (vp->v_flag & VNOMAP)
1895 		return (ENOSYS);
1896 	return (0);
1897 }
1898 
1899 /*
1900  * POSIX pathconf() support.
1901  */
1902 /* ARGSUSED */
1903 static int
1904 pcfs_pathconf(
1905 	struct vnode *vp,
1906 	int cmd,
1907 	ulong_t *valp,
1908 	struct cred *cr)
1909 {
1910 	ulong_t val;
1911 	int error = 0;
1912 	struct statvfs64 vfsbuf;
1913 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1914 
1915 	switch (cmd) {
1916 
1917 	case _PC_LINK_MAX:
1918 		val = 1;
1919 		break;
1920 
1921 	case _PC_MAX_CANON:
1922 		val = MAX_CANON;
1923 		break;
1924 
1925 	case _PC_MAX_INPUT:
1926 		val = MAX_INPUT;
1927 		break;
1928 
1929 	case _PC_NAME_MAX:
1930 		bzero(&vfsbuf, sizeof (vfsbuf));
1931 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1932 			break;
1933 		val = vfsbuf.f_namemax;
1934 		break;
1935 
1936 	case _PC_PATH_MAX:
1937 	case _PC_SYMLINK_MAX:
1938 		val = PCMAXPATHLEN;
1939 		break;
1940 
1941 	case _PC_PIPE_BUF:
1942 		val = PIPE_BUF;
1943 		break;
1944 
1945 	case _PC_NO_TRUNC:
1946 		val = (ulong_t)-1; 	/* Will truncate long file name */
1947 		break;
1948 
1949 	case _PC_VDISABLE:
1950 		val = _POSIX_VDISABLE;
1951 		break;
1952 
1953 	case _PC_CHOWN_RESTRICTED:
1954 		if (rstchown)
1955 			val = rstchown;		/* chown restricted enabled */
1956 		else
1957 			val = (ulong_t)-1;
1958 		break;
1959 
1960 	case _PC_ACL_ENABLED:
1961 		val = 0;
1962 		break;
1963 
1964 	case _PC_FILESIZEBITS:
1965 		/*
1966 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1967 		 * FAT12 can only go up to the maximum filesystem capacity
1968 		 * which is ~509MB.
1969 		 */
1970 		val = IS_FAT12(fsp) ? 30 : 33;
1971 		break;
1972 	default:
1973 		error = EINVAL;
1974 		break;
1975 	}
1976 
1977 	if (error == 0)
1978 		*valp = val;
1979 	return (error);
1980 }
1981 
1982 /* ARGSUSED */
1983 static int
1984 pcfs_space(
1985 	struct vnode *vp,
1986 	int cmd,
1987 	struct flock64 *bfp,
1988 	int flag,
1989 	offset_t offset,
1990 	cred_t *cr,
1991 	caller_context_t *ct)
1992 {
1993 	struct vattr vattr;
1994 	int error;
1995 
1996 	if (cmd != F_FREESP)
1997 		return (EINVAL);
1998 
1999 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2000 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2001 			return (EFBIG);
2002 		/*
2003 		 * we only support the special case of l_len == 0,
2004 		 * meaning free to end of file at this moment.
2005 		 */
2006 		if (bfp->l_len != 0)
2007 			return (EINVAL);
2008 		vattr.va_mask = AT_SIZE;
2009 		vattr.va_size = bfp->l_start;
2010 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
2011 	}
2012 	return (error);
2013 }
2014 
2015 /*
2016  * Break up 'len' chars from 'buf' into a long file name chunk.
2017  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2018  */
2019 void
2020 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2021 {
2022 	char 	*tmp = buf;
2023 	int	i;
2024 
2025 
2026 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2027 		if (len > 0) {
2028 			ep->pcdl_firstfilename[i] = *tmp;
2029 			ep->pcdl_firstfilename[i+1] = 0;
2030 			len--;
2031 			tmp++;
2032 		} else {
2033 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2034 			ep->pcdl_firstfilename[i+1] = (uchar_t)0xff;
2035 		}
2036 	}
2037 
2038 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2039 		if (len > 0) {
2040 			ep->pcdl_secondfilename[i] = *tmp;
2041 			ep->pcdl_secondfilename[i+1] = 0;
2042 			len--;
2043 			tmp++;
2044 		} else {
2045 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2046 			ep->pcdl_secondfilename[i+1] = (uchar_t)0xff;
2047 		}
2048 	}
2049 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2050 		if (len > 0) {
2051 			ep->pcdl_thirdfilename[i] = *tmp;
2052 			ep->pcdl_thirdfilename[i+1] = 0;
2053 			len--;
2054 			tmp++;
2055 		} else {
2056 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2057 			ep->pcdl_thirdfilename[i+1] = (uchar_t)0xff;
2058 		}
2059 	}
2060 }
2061 
2062 /*
2063  * Extract the characters from the long filename chunk into 'buf'.
2064  * Return the number of characters extracted.
2065  */
2066 static int
2067 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2068 {
2069 	char 	*tmp = buf;
2070 	int	i;
2071 
2072 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp++) {
2073 		if (ep->pcdl_firstfilename[i+1] != '\0')
2074 			return (-1);
2075 		if (foldcase)
2076 			*tmp = tolower(ep->pcdl_firstfilename[i]);
2077 		else
2078 			*tmp = ep->pcdl_firstfilename[i];
2079 		if (*tmp == '\0')
2080 			return (tmp - buf);
2081 	}
2082 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp++) {
2083 		if (ep->pcdl_secondfilename[i+1] != '\0')
2084 			return (-1);
2085 		if (foldcase)
2086 			*tmp = tolower(ep->pcdl_secondfilename[i]);
2087 		else
2088 			*tmp = ep->pcdl_secondfilename[i];
2089 		if (*tmp == '\0')
2090 			return (tmp - buf);
2091 	}
2092 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp++) {
2093 		if (ep->pcdl_thirdfilename[i+1] != '\0')
2094 			return (-1);
2095 		if (foldcase)
2096 			*tmp = tolower(ep->pcdl_thirdfilename[i]);
2097 		else
2098 			*tmp = ep->pcdl_thirdfilename[i];
2099 		if (*tmp == '\0')
2100 			return (tmp - buf);
2101 	}
2102 	*tmp = '\0';
2103 	return (tmp - buf);
2104 }
2105 
2106 
2107 /*
2108  * Checksum the passed in short filename.
2109  * This is used to validate each component of the long name to make
2110  * sure the long name is valid (it hasn't been "detached" from the
2111  * short filename). This algorithm was found in FreeBSD.
2112  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2113  */
2114 
2115 uchar_t
2116 pc_checksum_long_fn(char *name, char *ext)
2117 {
2118 	uchar_t c;
2119 	char	b[11];
2120 
2121 	bcopy(name, b, 8);
2122 	bcopy(ext, b+8, 3);
2123 
2124 	c = b[0];
2125 	c = ((c << 7) | (c >> 1)) + b[1];
2126 	c = ((c << 7) | (c >> 1)) + b[2];
2127 	c = ((c << 7) | (c >> 1)) + b[3];
2128 	c = ((c << 7) | (c >> 1)) + b[4];
2129 	c = ((c << 7) | (c >> 1)) + b[5];
2130 	c = ((c << 7) | (c >> 1)) + b[6];
2131 	c = ((c << 7) | (c >> 1)) + b[7];
2132 	c = ((c << 7) | (c >> 1)) + b[8];
2133 	c = ((c << 7) | (c >> 1)) + b[9];
2134 	c = ((c << 7) | (c >> 1)) + b[10];
2135 
2136 	return (c);
2137 }
2138 
2139 /*
2140  * Read a chunk of long filename entries into 'namep'.
2141  * Return with offset pointing to short entry (on success), or next
2142  * entry to read (if this wasn't a valid lfn really).
2143  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2144  * a long filename.
2145  *
2146  * Can also be called with a NULL namep, in which case it just returns
2147  * whether this was really a valid long filename and consumes it
2148  * (used by pc_dirempty()).
2149  */
2150 int
2151 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2152     struct pcdir **epp, offset_t *offset, struct buf **bp)
2153 {
2154 	struct pcdir *ep = *epp;
2155 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2156 	struct vnode *dvp = PCTOV(pcp);
2157 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2158 	char	*lfn;
2159 	char	*lfn_base;
2160 	int	boff;
2161 	int	i, cs;
2162 	char	buf[20];
2163 	uchar_t	cksum;
2164 	int	detached = 0;
2165 	int	error = 0;
2166 	int	foldcase;
2167 
2168 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2169 	/* use callers buffer unless we didn't get one */
2170 	if (namep)
2171 		lfn_base = namep;
2172 	else
2173 		lfn_base = kmem_alloc(PCMAXNAMLEN+1, KM_SLEEP);
2174 	lfn = lfn_base + PCMAXNAMLEN - 1;
2175 	*lfn = '\0';
2176 	cksum = lep->pcdl_checksum;
2177 
2178 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2179 		/* read next block if necessary */
2180 		boff = pc_blkoff(fsp, *offset);
2181 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2182 			if (*bp != NULL) {
2183 				brelse(*bp);
2184 				*bp = NULL;
2185 			}
2186 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2187 			if (error) {
2188 				if (namep == NULL)
2189 					kmem_free(lfn_base, PCMAXNAMLEN+1);
2190 				return (error);
2191 			}
2192 			lep = (struct pcdir_lfn *)ep;
2193 		}
2194 		/* can this happen? Bad fs? */
2195 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2196 			detached = 1;
2197 			break;
2198 		}
2199 		if (cksum != lep->pcdl_checksum)
2200 			detached = 1;
2201 		/* process current entry */
2202 		cs = get_long_fn_chunk(lep, buf, foldcase);
2203 		if (cs == -1) {
2204 			detached = 1;
2205 		} else {
2206 			for (; cs > 0; cs--) {
2207 				/* see if we underflow */
2208 				if (lfn >= lfn_base)
2209 					*--lfn = buf[cs - 1];
2210 				else
2211 					detached = 1;
2212 			}
2213 		}
2214 		lep++;
2215 		*offset += sizeof (struct pcdir);
2216 	}
2217 	/* read next block if necessary */
2218 	boff = pc_blkoff(fsp, *offset);
2219 	ep = (struct pcdir *)lep;
2220 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2221 		if (*bp != NULL) {
2222 			brelse(*bp);
2223 			*bp = NULL;
2224 		}
2225 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2226 		if (error) {
2227 			if (namep == NULL)
2228 				kmem_free(lfn_base, PCMAXNAMLEN+1);
2229 			return (error);
2230 		}
2231 	}
2232 	/* should be on the short one */
2233 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2234 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2235 		detached = 1;
2236 	}
2237 	if (detached ||
2238 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2239 	    !pc_valid_long_fn(lfn)) {
2240 		/*
2241 		 * process current entry again. This may end up another lfn
2242 		 * or a short name.
2243 		 */
2244 		*epp = ep;
2245 		if (namep == NULL)
2246 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2247 		return (EINVAL);
2248 	}
2249 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2250 		/*
2251 		 * Don't display label because it may contain
2252 		 * funny characters.
2253 		 */
2254 		*offset += sizeof (struct pcdir);
2255 		ep++;
2256 		*epp = ep;
2257 		if (namep == NULL)
2258 			kmem_free(lfn_base, PCMAXNAMLEN+1);
2259 		return (EINVAL);
2260 	}
2261 	if (namep) {
2262 		/* lfn is part of namep, but shifted. shift it back */
2263 		cs = strlen(lfn);
2264 		for (i = 0; i < cs; i++)
2265 			namep[i] = lfn[i];
2266 		namep[i] = '\0';
2267 	} else {
2268 		kmem_free(lfn_base, PCMAXNAMLEN+1);
2269 	}
2270 	*epp = ep;
2271 	return (0);
2272 }
2273 /*
2274  * Read a long filename into the pc_dirent structure and copy it out.
2275  */
2276 int
2277 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2278     struct pcdir **epp, offset_t *offset, struct buf **bp)
2279 {
2280 	struct pcdir *ep;
2281 	struct pcnode *pcp = VTOPC(dvp);
2282 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2283 	offset_t uiooffset = uiop->uio_loffset;
2284 	int	error = 0;
2285 	offset_t oldoffset;
2286 
2287 	oldoffset = *offset;
2288 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2289 	if (error) {
2290 		if (error == EINVAL) {
2291 			uiop->uio_loffset += *offset - oldoffset;
2292 			return (0);
2293 		} else
2294 			return (error);
2295 	}
2296 
2297 	ep = *epp;
2298 	uiop->uio_loffset += *offset - oldoffset;
2299 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2300 	if (ld->d_reclen > uiop->uio_resid) {
2301 		uiop->uio_loffset = uiooffset;
2302 		return (ENOSPC);
2303 	}
2304 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2305 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2306 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2307 	    pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2308 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2309 	uiop->uio_loffset = ld->d_off;
2310 	*offset += sizeof (struct pcdir);
2311 	ep++;
2312 	*epp = ep;
2313 	return (0);
2314 }
2315 
2316 /*
2317  * Read a short filename into the pc_dirent structure and copy it out.
2318  */
2319 int
2320 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2321     struct pcdir **epp, offset_t *offset, struct buf **bp)
2322 {
2323 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2324 	int	boff = pc_blkoff(fsp, *offset);
2325 	struct pcdir *ep = *epp;
2326 	offset_t	oldoffset = uiop->uio_loffset;
2327 	int	error;
2328 	int	foldcase;
2329 
2330 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2331 		uiop->uio_loffset += sizeof (struct pcdir);
2332 		*offset += sizeof (struct pcdir);
2333 		ep++;
2334 		*epp = ep;
2335 		return (0);
2336 	}
2337 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2338 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2339 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2340 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2341 	    &ep->pcd_ext[0], foldcase);
2342 	if (error == 0) {
2343 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2344 		if (ld->d_reclen > uiop->uio_resid) {
2345 			uiop->uio_loffset = oldoffset;
2346 			return (ENOSPC);
2347 		}
2348 		ld->d_off = (off64_t)(uiop->uio_loffset +
2349 		    sizeof (struct pcdir));
2350 		(void) uiomove((caddr_t)ld,
2351 		    ld->d_reclen, UIO_READ, uiop);
2352 		uiop->uio_loffset = ld->d_off;
2353 	} else {
2354 		uiop->uio_loffset += sizeof (struct pcdir);
2355 	}
2356 	*offset += sizeof (struct pcdir);
2357 	ep++;
2358 	*epp = ep;
2359 	return (0);
2360 }
2361 
2362 static int
2363 pcfs_fid(struct vnode *vp, struct fid *fidp)
2364 {
2365 	struct pc_fid *pcfid;
2366 	struct pcnode *pcp;
2367 	struct pcfs	*fsp;
2368 	int	error;
2369 
2370 	fsp = VFSTOPCFS(vp->v_vfsp);
2371 	if (fsp == NULL)
2372 		return (EIO);
2373 	error = pc_lockfs(fsp, 0, 0);
2374 	if (error)
2375 		return (error);
2376 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2377 		pc_unlockfs(fsp);
2378 		return (EIO);
2379 	}
2380 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2381 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2382 		pc_unlockfs(fsp);
2383 		return (ENOSPC);
2384 	}
2385 
2386 	pcfid = (struct pc_fid *)fidp;
2387 	bzero(pcfid, sizeof (struct pc_fid));
2388 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2389 	if (vp->v_flag & VROOT) {
2390 		pcfid->pcfid_block = 0;
2391 		pcfid->pcfid_offset = 0;
2392 		pcfid->pcfid_ctime = 0;
2393 	} else {
2394 		pcfid->pcfid_block = pcp->pc_eblkno;
2395 		pcfid->pcfid_offset = pcp->pc_eoffset;
2396 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2397 	}
2398 	pc_unlockfs(fsp);
2399 	return (0);
2400 }
2401