xref: /titanic_51/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 81f63062a60a29358c252e0d10807f8a8547fbb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/dirent.h>
38 #include <sys/vnode.h>
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/uio.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/vmsystm.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/statvfs.h>
53 #include <sys/unistd.h>
54 #include <sys/kmem.h>
55 #include <sys/conf.h>
56 #include <sys/flock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59 #include <sys/sunddi.h>
60 
61 #include <vm/seg.h>
62 #include <vm/page.h>
63 #include <vm/pvn.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg_kmem.h>
69 
70 #include <fs/fs_subr.h>
71 
72 static int pcfs_open(struct vnode **, int, struct cred *);
73 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
74 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
75 	struct caller_context *);
76 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
77 	struct caller_context *);
78 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
79 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
80 	caller_context_t *);
81 static int pcfs_access(struct vnode *, int, int, struct cred *);
82 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
83 	struct pathname *, int, struct vnode *, struct cred *);
84 static int pcfs_create(struct vnode *, char *, struct vattr *,
85 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
86 static int pcfs_remove(struct vnode *, char *, struct cred *);
87 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
88 	struct cred *);
89 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
90 	struct cred *);
91 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
92 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
93 static int pcfs_fsync(struct vnode *, int, struct cred *);
94 static void pcfs_inactive(struct vnode *, struct cred *);
95 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
96 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
97 	offset_t, cred_t *, caller_context_t *);
98 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
99 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
100 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
101 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
102 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
103 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
104 	uchar_t, uchar_t, uint_t, struct cred *);
105 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
106 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
107 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
108 	size_t, uint_t, uint_t, uint_t, struct cred *);
109 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
110 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
111 
112 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
113 	struct cred *);
114 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
115 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
116 
117 extern krwlock_t pcnodes_lock;
118 
119 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
120 
121 /*
122  * vnode op vectors for files and directories.
123  */
124 struct vnodeops *pcfs_fvnodeops;
125 struct vnodeops *pcfs_dvnodeops;
126 
127 const fs_operation_def_t pcfs_fvnodeops_template[] = {
128 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
129 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
130 	VOPNAME_READ,		{ .vop_read = pcfs_read },
131 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
132 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
133 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
134 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
135 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
136 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
137 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
138 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
139 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
140 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
141 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
142 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
143 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
144 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
145 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
146 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
147 	NULL,			NULL
148 };
149 
150 const fs_operation_def_t pcfs_dvnodeops_template[] = {
151 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
152 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
153 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
154 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
155 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
156 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
157 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
158 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
159 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
160 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
161 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
162 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
163 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
164 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
165 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
166 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
167 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
168 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
169 	NULL,			NULL
170 };
171 
172 
173 /*ARGSUSED*/
174 static int
175 pcfs_open(
176 	struct vnode **vpp,
177 	int flag,
178 	struct cred *cr)
179 {
180 	return (0);
181 }
182 
183 /*
184  * files are sync'ed on close to keep floppy up to date
185  */
186 
187 /*ARGSUSED*/
188 static int
189 pcfs_close(
190 	struct vnode *vp,
191 	int flag,
192 	int count,
193 	offset_t offset,
194 	struct cred *cr)
195 {
196 	return (0);
197 }
198 
199 /*ARGSUSED*/
200 static int
201 pcfs_read(
202 	struct vnode *vp,
203 	struct uio *uiop,
204 	int ioflag,
205 	struct cred *cr,
206 	struct caller_context *ct)
207 {
208 	struct pcfs *fsp;
209 	struct pcnode *pcp;
210 	int error;
211 
212 	fsp = VFSTOPCFS(vp->v_vfsp);
213 	if (error = pc_verify(fsp))
214 		return (error);
215 	error = pc_lockfs(fsp, 0, 0);
216 	if (error)
217 		return (error);
218 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
219 		pc_unlockfs(fsp);
220 		return (EIO);
221 	}
222 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
223 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
224 		pc_mark_acc(fsp, pcp);
225 	}
226 	pc_unlockfs(fsp);
227 	if (error) {
228 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
229 	}
230 	return (error);
231 }
232 
233 /*ARGSUSED*/
234 static int
235 pcfs_write(
236 	struct vnode *vp,
237 	struct uio *uiop,
238 	int ioflag,
239 	struct cred *cr,
240 	struct caller_context *ct)
241 {
242 	struct pcfs *fsp;
243 	struct pcnode *pcp;
244 	int error;
245 
246 	fsp = VFSTOPCFS(vp->v_vfsp);
247 	if (error = pc_verify(fsp))
248 		return (error);
249 	error = pc_lockfs(fsp, 0, 0);
250 	if (error)
251 		return (error);
252 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
253 		pc_unlockfs(fsp);
254 		return (EIO);
255 	}
256 	if (ioflag & FAPPEND) {
257 		/*
258 		 * in append mode start at end of file.
259 		 */
260 		uiop->uio_loffset = pcp->pc_size;
261 	}
262 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
263 	pcp->pc_flags |= PC_MOD;
264 	pc_mark_mod(fsp, pcp);
265 	if (ioflag & (FSYNC|FDSYNC))
266 		(void) pc_nodeupdate(pcp);
267 
268 	pc_unlockfs(fsp);
269 	if (error) {
270 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
271 	}
272 	return (error);
273 }
274 
275 /*
276  * read or write a vnode
277  */
278 static int
279 rwpcp(
280 	struct pcnode *pcp,
281 	struct uio *uio,
282 	enum uio_rw rw,
283 	int ioflag)
284 {
285 	struct vnode *vp = PCTOV(pcp);
286 	struct pcfs *fsp;
287 	daddr_t bn;			/* phys block number */
288 	int n;
289 	offset_t off;
290 	caddr_t base;
291 	int mapon, pagecreate;
292 	int newpage;
293 	int error = 0;
294 	rlim64_t limit = uio->uio_llimit;
295 	int oresid = uio->uio_resid;
296 
297 	/*
298 	 * If the filesystem was umounted by force, return immediately.
299 	 */
300 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
301 		return (EIO);
302 
303 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
304 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
305 
306 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
307 	ASSERT(vp->v_type == VREG);
308 
309 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
310 		return (0);
311 	}
312 
313 	if (uio->uio_loffset < 0)
314 		return (EINVAL);
315 
316 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
317 		limit = MAXOFFSET_T;
318 
319 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
320 		proc_t *p = ttoproc(curthread);
321 
322 		mutex_enter(&p->p_lock);
323 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
324 		    p, RCA_UNSAFE_SIGINFO);
325 		mutex_exit(&p->p_lock);
326 		return (EFBIG);
327 	}
328 
329 	/* the following condition will occur only for write */
330 
331 	if (uio->uio_loffset >= UINT32_MAX)
332 		return (EFBIG);
333 
334 	if (uio->uio_resid == 0)
335 		return (0);
336 
337 	if (limit > UINT32_MAX)
338 		limit = UINT32_MAX;
339 
340 	fsp = VFSTOPCFS(vp->v_vfsp);
341 	if (fsp->pcfs_flags & PCFS_IRRECOV)
342 		return (EIO);
343 
344 	do {
345 		/*
346 		 * Assignments to "n" in this block may appear
347 		 * to overflow in some cases.  However, after careful
348 		 * analysis it was determined that all assignments to
349 		 * "n" serve only to make "n" smaller.  Since "n"
350 		 * starts out as no larger than MAXBSIZE, "int" is
351 		 * safe.
352 		 */
353 		off = uio->uio_loffset & MAXBMASK;
354 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
355 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
356 		if (rw == UIO_READ) {
357 			offset_t diff;
358 
359 			diff = pcp->pc_size - uio->uio_loffset;
360 			if (diff <= 0)
361 				return (0);
362 			if (diff < n)
363 				n = (int)diff;
364 		}
365 		/*
366 		 * Compare limit with the actual offset + n, not the
367 		 * rounded down offset "off" or we will overflow
368 		 * the maximum file size after all.
369 		 */
370 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
371 			if (uio->uio_loffset >= limit) {
372 				error = EFBIG;
373 				break;
374 			}
375 			n = (int)(limit - uio->uio_loffset);
376 		}
377 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
378 		pagecreate = 0;
379 		newpage = 0;
380 		if (rw == UIO_WRITE) {
381 			/*
382 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
383 			 * with one page at a time, instead of one MAXBSIZE
384 			 * at a time, so we can fully explore pagecreate
385 			 * optimization??
386 			 */
387 			if (uio->uio_loffset + n > pcp->pc_size) {
388 				uint_t ncl, lcn;
389 
390 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
391 				    fsp->pcfs_clsize);
392 				if (uio->uio_loffset > pcp->pc_size &&
393 				    ncl < (uint_t)howmany(uio->uio_loffset,
394 				    fsp->pcfs_clsize)) {
395 					/*
396 					 * Allocate and zerofill skipped
397 					 * clusters. This may not be worth the
398 					 * effort since a small lseek beyond
399 					 * eof but still within the cluster
400 					 * will not be zeroed out.
401 					 */
402 					lcn = pc_lblkno(fsp, uio->uio_loffset);
403 					error = pc_balloc(pcp, (daddr_t)lcn,
404 					    1, &bn);
405 					ncl = lcn + 1;
406 				}
407 				if (!error &&
408 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
409 				    fsp->pcfs_clsize))
410 					/*
411 					 * allocate clusters w/o zerofill
412 					 */
413 					error = pc_balloc(pcp,
414 					    (daddr_t)pc_lblkno(fsp,
415 					    uio->uio_loffset + n - 1),
416 					    0, &bn);
417 
418 				pcp->pc_flags |= PC_CHG;
419 
420 				if (error) {
421 					pc_cluster32_t ncl;
422 					int nerror;
423 
424 					/*
425 					 * figure out new file size from
426 					 * cluster chain length. If this
427 					 * is detected to loop, the chain
428 					 * is corrupted and we'd better
429 					 * keep our fingers off that file.
430 					 */
431 					nerror = pc_fileclsize(fsp,
432 					    pcp->pc_scluster, &ncl);
433 					if (nerror) {
434 						PC_DPRINTF1(2,
435 						    "cluster chain "
436 						    "corruption, "
437 						    "scluster=%d\n",
438 						    pcp->pc_scluster);
439 						pcp->pc_size = 0;
440 						pcp->pc_flags |= PC_INVAL;
441 						error = nerror;
442 						(void) segmap_release(segkmap,
443 						    base, 0);
444 						break;
445 					}
446 					pcp->pc_size = fsp->pcfs_clsize * ncl;
447 
448 					if (error == ENOSPC &&
449 					    (pcp->pc_size - uio->uio_loffset)
450 					    > 0) {
451 						PC_DPRINTF3(2, "rwpcp ENOSPC "
452 						    "off=%lld n=%d size=%d\n",
453 						    uio->uio_loffset,
454 						    n, pcp->pc_size);
455 						n = (int)(pcp->pc_size -
456 						    uio->uio_loffset);
457 					} else {
458 						PC_DPRINTF1(1,
459 						    "rwpcp error1=%d\n", error);
460 						(void) segmap_release(segkmap,
461 						    base, 0);
462 						break;
463 					}
464 				} else {
465 					pcp->pc_size =
466 					    (uint_t)(uio->uio_loffset + n);
467 				}
468 				if (mapon == 0) {
469 					newpage = segmap_pagecreate(segkmap,
470 					    base, (size_t)n, 0);
471 					pagecreate = 1;
472 				}
473 			} else if (n == MAXBSIZE) {
474 				newpage = segmap_pagecreate(segkmap, base,
475 				    (size_t)n, 0);
476 				pagecreate = 1;
477 			}
478 		}
479 		error = uiomove(base + mapon, (size_t)n, rw, uio);
480 
481 		if (pagecreate && uio->uio_loffset <
482 		    roundup(off + mapon + n, PAGESIZE)) {
483 			offset_t nzero, nmoved;
484 
485 			nmoved = uio->uio_loffset - (off + mapon);
486 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
487 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
488 		}
489 
490 		/*
491 		 * Unlock the pages which have been allocated by
492 		 * page_create_va() in segmap_pagecreate().
493 		 */
494 		if (newpage) {
495 			segmap_pageunlock(segkmap, base, (size_t)n,
496 			    rw == UIO_WRITE ? S_WRITE : S_READ);
497 		}
498 
499 		if (error) {
500 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
501 			/*
502 			 * If we failed on a write, we may have already
503 			 * allocated file blocks as well as pages.  It's hard
504 			 * to undo the block allocation, but we must be sure
505 			 * to invalidate any pages that may have been
506 			 * allocated.
507 			 */
508 			if (rw == UIO_WRITE)
509 				(void) segmap_release(segkmap, base, SM_INVAL);
510 			else
511 				(void) segmap_release(segkmap, base, 0);
512 		} else {
513 			uint_t flags = 0;
514 
515 			if (rw == UIO_READ) {
516 				if (n + mapon == MAXBSIZE ||
517 				    uio->uio_loffset == pcp->pc_size)
518 					flags = SM_DONTNEED;
519 			} else if (ioflag & (FSYNC|FDSYNC)) {
520 				flags = SM_WRITE;
521 			} else if (n + mapon == MAXBSIZE) {
522 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
523 			}
524 			error = segmap_release(segkmap, base, flags);
525 		}
526 
527 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
528 
529 	if (oresid != uio->uio_resid)
530 		error = 0;
531 	return (error);
532 }
533 
534 /*ARGSUSED*/
535 static int
536 pcfs_getattr(
537 	struct vnode *vp,
538 	struct vattr *vap,
539 	int flags,
540 	struct cred *cr)
541 {
542 	struct pcnode *pcp;
543 	struct pcfs *fsp;
544 	int error;
545 	char attr;
546 	struct pctime atime;
547 	int64_t unixtime;
548 
549 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
550 
551 	fsp = VFSTOPCFS(vp->v_vfsp);
552 	error = pc_lockfs(fsp, 0, 0);
553 	if (error)
554 		return (error);
555 
556 	/*
557 	 * Note that we don't check for "invalid node" (PC_INVAL) here
558 	 * only in order to make stat() succeed. We allow no I/O on such
559 	 * a node, but do allow to check for its existance.
560 	 */
561 	if ((pcp = VTOPC(vp)) == NULL) {
562 		pc_unlockfs(fsp);
563 		return (EIO);
564 	}
565 	/*
566 	 * Copy from pcnode.
567 	 */
568 	vap->va_type = vp->v_type;
569 	attr = pcp->pc_entry.pcd_attr;
570 	if (PCA_IS_HIDDEN(fsp, attr))
571 		vap->va_mode = 0;
572 	else if (attr & PCA_LABEL)
573 		vap->va_mode = 0444;
574 	else if (attr & PCA_RDONLY)
575 		vap->va_mode = 0555;
576 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
577 		vap->va_mode = 0755;
578 	} else {
579 		vap->va_mode = 0777;
580 	}
581 
582 	if (attr & PCA_DIR)
583 		vap->va_mode |= S_IFDIR;
584 	else
585 		vap->va_mode |= S_IFREG;
586 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
587 		vap->va_uid = 0;
588 		vap->va_gid = 0;
589 	} else {
590 		vap->va_uid = crgetuid(cr);
591 		vap->va_gid = crgetgid(cr);
592 	}
593 	vap->va_fsid = vp->v_vfsp->vfs_dev;
594 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
595 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
596 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
597 	vap->va_nlink = 1;
598 	vap->va_size = (u_offset_t)pcp->pc_size;
599 	vap->va_rdev = 0;
600 	vap->va_nblocks =
601 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
602 	vap->va_blksize = fsp->pcfs_clsize;
603 
604 	/*
605 	 * FAT root directories have no timestamps. In order not to return
606 	 * "time zero" (1/1/1970), we record the time of the mount and give
607 	 * that. This breaks less expectations.
608 	 */
609 	if (vp->v_flag & VROOT) {
610 		vap->va_mtime = fsp->pcfs_mounttime;
611 		vap->va_atime = fsp->pcfs_mounttime;
612 		vap->va_ctime = fsp->pcfs_mounttime;
613 		pc_unlockfs(fsp);
614 		return (0);
615 	}
616 
617 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
618 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
619 		if (unixtime > INT32_MAX)
620 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
621 		unixtime = MIN(unixtime, INT32_MAX);
622 	} else if (unixtime > INT32_MAX &&
623 	    get_udatamodel() == DATAMODEL_ILP32) {
624 		pc_unlockfs(fsp);
625 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
626 		return (EOVERFLOW);
627 	}
628 
629 	vap->va_mtime.tv_sec = (time_t)unixtime;
630 	vap->va_mtime.tv_nsec = 0;
631 
632 	/*
633 	 * FAT doesn't know about POSIX ctime.
634 	 * Best approximation is to always set it to mtime.
635 	 */
636 	vap->va_ctime = vap->va_mtime;
637 
638 	/*
639 	 * FAT only stores "last access date". If that's the
640 	 * same as the date of last modification then the time
641 	 * of last access is known. Otherwise, use midnight.
642 	 */
643 	atime.pct_date = pcp->pc_entry.pcd_ladate;
644 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
645 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
646 	else
647 		atime.pct_time = 0;
648 	pc_pcttotv(&atime, &unixtime);
649 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
650 		if (unixtime > INT32_MAX)
651 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
652 		unixtime = MIN(unixtime, INT32_MAX);
653 	} else if (unixtime > INT32_MAX &&
654 	    get_udatamodel() == DATAMODEL_ILP32) {
655 		pc_unlockfs(fsp);
656 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
657 		return (EOVERFLOW);
658 	}
659 
660 	vap->va_atime.tv_sec = (time_t)unixtime;
661 	vap->va_atime.tv_nsec = 0;
662 
663 	pc_unlockfs(fsp);
664 	return (0);
665 }
666 
667 
668 /*ARGSUSED*/
669 static int
670 pcfs_setattr(
671 	struct vnode *vp,
672 	struct vattr *vap,
673 	int flags,
674 	struct cred *cr,
675 	caller_context_t *ct)
676 {
677 	struct pcnode *pcp;
678 	mode_t mask = vap->va_mask;
679 	int error;
680 	struct pcfs *fsp;
681 	timestruc_t now, *timep;
682 
683 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
684 	/*
685 	 * cannot set these attributes
686 	 */
687 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
688 		return (EINVAL);
689 	}
690 	/*
691 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
692 	 * from 'tar' when it tries to set times on a directory, and console
693 	 * printf's on the NFS server when it gets EINVAL back on such a
694 	 * request. One possible problem with that since a directory entry
695 	 * identifies a file, '.' and all the '..' entries in subdirectories
696 	 * may get out of sync when the directory is updated since they're
697 	 * treated like separate files. We could fix that by looking for
698 	 * '.' and giving it the same attributes, and then looking for
699 	 * all the subdirectories and updating '..', but that's pretty
700 	 * expensive for something that doesn't seem likely to matter.
701 	 */
702 	/* can't do some ops on directories anyway */
703 	if ((vp->v_type == VDIR) &&
704 	    (mask & AT_SIZE)) {
705 		return (EINVAL);
706 	}
707 
708 	fsp = VFSTOPCFS(vp->v_vfsp);
709 	error = pc_lockfs(fsp, 0, 0);
710 	if (error)
711 		return (error);
712 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
713 		pc_unlockfs(fsp);
714 		return (EIO);
715 	}
716 
717 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
718 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
719 			pc_unlockfs(fsp);
720 			return (EACCES);
721 		}
722 	}
723 
724 	/*
725 	 * Change file access modes.
726 	 * If nobody has write permission, file is marked readonly.
727 	 * Otherwise file is writable by anyone.
728 	 */
729 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
730 		if ((vap->va_mode & 0222) == 0)
731 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
732 		else
733 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
734 		pcp->pc_flags |= PC_CHG;
735 	}
736 	/*
737 	 * Truncate file. Must have write permission.
738 	 */
739 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
740 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
741 			error = EACCES;
742 			goto out;
743 		}
744 		if (vap->va_size > UINT32_MAX) {
745 			error = EFBIG;
746 			goto out;
747 		}
748 		error = pc_truncate(pcp, (uint_t)vap->va_size);
749 		if (error)
750 			goto out;
751 	}
752 	/*
753 	 * Change file modified times.
754 	 */
755 	if (mask & (AT_MTIME | AT_CTIME)) {
756 		/*
757 		 * If SysV-compatible option to set access and
758 		 * modified times if privileged, owner, or write access,
759 		 * use current time rather than va_mtime.
760 		 *
761 		 * XXX - va_mtime.tv_sec == -1 flags this.
762 		 */
763 		timep = &vap->va_mtime;
764 		if (vap->va_mtime.tv_sec == -1) {
765 			gethrestime(&now);
766 			timep = &now;
767 		}
768 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
769 		    timep->tv_sec > INT32_MAX) {
770 			error = EOVERFLOW;
771 			goto out;
772 		}
773 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
774 		if (error)
775 			goto out;
776 		pcp->pc_flags |= PC_CHG;
777 	}
778 	/*
779 	 * Change file access times.
780 	 */
781 	if (mask & AT_ATIME) {
782 		/*
783 		 * If SysV-compatible option to set access and
784 		 * modified times if privileged, owner, or write access,
785 		 * use current time rather than va_mtime.
786 		 *
787 		 * XXX - va_atime.tv_sec == -1 flags this.
788 		 */
789 		struct pctime	atime;
790 
791 		timep = &vap->va_atime;
792 		if (vap->va_atime.tv_sec == -1) {
793 			gethrestime(&now);
794 			timep = &now;
795 		}
796 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
797 		    timep->tv_sec > INT32_MAX) {
798 			error = EOVERFLOW;
799 			goto out;
800 		}
801 		error = pc_tvtopct(timep, &atime);
802 		if (error)
803 			goto out;
804 		pcp->pc_entry.pcd_ladate = atime.pct_date;
805 		pcp->pc_flags |= PC_CHG;
806 	}
807 out:
808 	pc_unlockfs(fsp);
809 	return (error);
810 }
811 
812 
813 /*ARGSUSED*/
814 static int
815 pcfs_access(
816 	struct vnode *vp,
817 	int mode,
818 	int flags,
819 	struct cred *cr)
820 {
821 	struct pcnode *pcp;
822 	struct pcfs *fsp;
823 
824 
825 	fsp = VFSTOPCFS(vp->v_vfsp);
826 
827 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
828 		return (EIO);
829 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
830 		return (EACCES);
831 
832 	/*
833 	 * If this is a boot partition, privileged users have full access while
834 	 * others have read-only access.
835 	 */
836 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
837 		if ((mode & VWRITE) &&
838 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
839 			return (EACCES);
840 	}
841 	return (0);
842 }
843 
844 
845 /*ARGSUSED*/
846 static int
847 pcfs_fsync(
848 	struct vnode *vp,
849 	int syncflag,
850 	struct cred *cr)
851 {
852 	struct pcfs *fsp;
853 	struct pcnode *pcp;
854 	int error;
855 
856 	fsp = VFSTOPCFS(vp->v_vfsp);
857 	if (error = pc_verify(fsp))
858 		return (error);
859 	error = pc_lockfs(fsp, 0, 0);
860 	if (error)
861 		return (error);
862 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
863 		pc_unlockfs(fsp);
864 		return (EIO);
865 	}
866 	rw_enter(&pcnodes_lock, RW_WRITER);
867 	error = pc_nodesync(pcp);
868 	rw_exit(&pcnodes_lock);
869 	pc_unlockfs(fsp);
870 	return (error);
871 }
872 
873 
874 /*ARGSUSED*/
875 static void
876 pcfs_inactive(
877 	struct vnode *vp,
878 	struct cred *cr)
879 {
880 	struct pcnode *pcp;
881 	struct pcfs *fsp;
882 	int error;
883 
884 	fsp = VFSTOPCFS(vp->v_vfsp);
885 	error = pc_lockfs(fsp, 0, 1);
886 
887 	/*
888 	 * If the filesystem was umounted by force, all dirty
889 	 * pages associated with this vnode are invalidated
890 	 * and then the vnode will be freed.
891 	 */
892 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
893 		pcp = VTOPC(vp);
894 		if (vn_has_cached_data(vp)) {
895 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
896 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
897 		}
898 		remque(pcp);
899 		if (error == 0)
900 			pc_unlockfs(fsp);
901 		vn_free(vp);
902 		kmem_free(pcp, sizeof (struct pcnode));
903 		VFS_RELE(PCFSTOVFS(fsp));
904 		return;
905 	}
906 
907 	mutex_enter(&vp->v_lock);
908 	ASSERT(vp->v_count >= 1);
909 	if (vp->v_count > 1) {
910 		vp->v_count--;  /* release our hold from vn_rele */
911 		mutex_exit(&vp->v_lock);
912 		pc_unlockfs(fsp);
913 		return;
914 	}
915 	mutex_exit(&vp->v_lock);
916 
917 	/*
918 	 * Check again to confirm that no intervening I/O error
919 	 * with a subsequent pc_diskchanged() call has released
920 	 * the pcnode. If it has then release the vnode as above.
921 	 */
922 	pcp = VTOPC(vp);
923 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
924 		if (vn_has_cached_data(vp))
925 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
926 			    pcfs_putapage, B_INVAL | B_TRUNC,
927 			    (struct cred *)NULL);
928 	}
929 
930 	if (pcp == NULL) {
931 		vn_free(vp);
932 	} else {
933 		pc_rele(pcp);
934 	}
935 
936 	if (!error)
937 		pc_unlockfs(fsp);
938 }
939 
940 /*ARGSUSED*/
941 static int
942 pcfs_lookup(
943 	struct vnode *dvp,
944 	char *nm,
945 	struct vnode **vpp,
946 	struct pathname *pnp,
947 	int flags,
948 	struct vnode *rdir,
949 	struct cred *cr)
950 {
951 	struct pcfs *fsp;
952 	struct pcnode *pcp;
953 	int error;
954 
955 	/*
956 	 * If the filesystem was umounted by force, return immediately.
957 	 */
958 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
959 		return (EIO);
960 
961 	/*
962 	 * verify that the dvp is still valid on the disk
963 	 */
964 	fsp = VFSTOPCFS(dvp->v_vfsp);
965 	if (error = pc_verify(fsp))
966 		return (error);
967 	error = pc_lockfs(fsp, 0, 0);
968 	if (error)
969 		return (error);
970 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
971 		pc_unlockfs(fsp);
972 		return (EIO);
973 	}
974 	/*
975 	 * Null component name is a synonym for directory being searched.
976 	 */
977 	if (*nm == '\0') {
978 		VN_HOLD(dvp);
979 		*vpp = dvp;
980 		pc_unlockfs(fsp);
981 		return (0);
982 	}
983 
984 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
985 	if (!error) {
986 		*vpp = PCTOV(pcp);
987 		pcp->pc_flags |= PC_EXTERNAL;
988 	}
989 	pc_unlockfs(fsp);
990 	return (error);
991 }
992 
993 
994 /*ARGSUSED*/
995 static int
996 pcfs_create(
997 	struct vnode *dvp,
998 	char *nm,
999 	struct vattr *vap,
1000 	enum vcexcl exclusive,
1001 	int mode,
1002 	struct vnode **vpp,
1003 	struct cred *cr,
1004 	int flag)
1005 {
1006 	int error;
1007 	struct pcnode *pcp;
1008 	struct vnode *vp;
1009 	struct pcfs *fsp;
1010 
1011 	/*
1012 	 * can't create directories. use pcfs_mkdir.
1013 	 * can't create anything other than files.
1014 	 */
1015 	if (vap->va_type == VDIR)
1016 		return (EISDIR);
1017 	else if (vap->va_type != VREG)
1018 		return (EINVAL);
1019 
1020 	pcp = NULL;
1021 	fsp = VFSTOPCFS(dvp->v_vfsp);
1022 	error = pc_lockfs(fsp, 0, 0);
1023 	if (error)
1024 		return (error);
1025 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1026 		pc_unlockfs(fsp);
1027 		return (EIO);
1028 	}
1029 
1030 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1031 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1032 			pc_unlockfs(fsp);
1033 			return (EACCES);
1034 		}
1035 	}
1036 
1037 	if (*nm == '\0') {
1038 		/*
1039 		 * Null component name refers to the directory itself.
1040 		 */
1041 		VN_HOLD(dvp);
1042 		pcp = VTOPC(dvp);
1043 		error = EEXIST;
1044 	} else {
1045 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1046 	}
1047 	/*
1048 	 * if file exists and this is a nonexclusive create,
1049 	 * check for access permissions
1050 	 */
1051 	if (error == EEXIST) {
1052 		vp = PCTOV(pcp);
1053 		if (exclusive == NONEXCL) {
1054 			if (vp->v_type == VDIR) {
1055 				error = EISDIR;
1056 			} else if (mode) {
1057 				error = pcfs_access(PCTOV(pcp), mode, 0,
1058 				    cr);
1059 			} else {
1060 				error = 0;
1061 			}
1062 		}
1063 		if (error) {
1064 			VN_RELE(PCTOV(pcp));
1065 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1066 		    (vap->va_size == 0)) {
1067 			error = pc_truncate(pcp, 0L);
1068 			if (error) {
1069 				VN_RELE(PCTOV(pcp));
1070 			} else {
1071 				vnevent_create(PCTOV(pcp));
1072 			}
1073 		}
1074 	}
1075 	if (error) {
1076 		pc_unlockfs(fsp);
1077 		return (error);
1078 	}
1079 	*vpp = PCTOV(pcp);
1080 	pcp->pc_flags |= PC_EXTERNAL;
1081 	pc_unlockfs(fsp);
1082 	return (error);
1083 }
1084 
1085 /*ARGSUSED*/
1086 static int
1087 pcfs_remove(
1088 	struct vnode *vp,
1089 	char *nm,
1090 	struct cred *cr)
1091 {
1092 	struct pcfs *fsp;
1093 	struct pcnode *pcp;
1094 	int error;
1095 
1096 	fsp = VFSTOPCFS(vp->v_vfsp);
1097 	if (error = pc_verify(fsp))
1098 		return (error);
1099 	error = pc_lockfs(fsp, 0, 0);
1100 	if (error)
1101 		return (error);
1102 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1103 		pc_unlockfs(fsp);
1104 		return (EIO);
1105 	}
1106 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1107 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1108 			pc_unlockfs(fsp);
1109 			return (EACCES);
1110 		}
1111 	}
1112 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1113 	pc_unlockfs(fsp);
1114 	return (error);
1115 }
1116 
1117 /*
1118  * Rename a file or directory
1119  * This rename is restricted to only rename files within a directory.
1120  * XX should make rename more general
1121  */
1122 /*ARGSUSED*/
1123 static int
1124 pcfs_rename(
1125 	struct vnode *sdvp,		/* old (source) parent vnode */
1126 	char *snm,			/* old (source) entry name */
1127 	struct vnode *tdvp,		/* new (target) parent vnode */
1128 	char *tnm,			/* new (target) entry name */
1129 	struct cred *cr)
1130 {
1131 	struct pcfs *fsp;
1132 	struct pcnode *dp;	/* parent pcnode */
1133 	struct pcnode *tdp;
1134 	int error;
1135 
1136 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1137 	if (error = pc_verify(fsp))
1138 		return (error);
1139 
1140 	/*
1141 	 * make sure we can muck with this directory.
1142 	 */
1143 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1144 	if (error) {
1145 		return (error);
1146 	}
1147 	error = pc_lockfs(fsp, 0, 0);
1148 	if (error)
1149 		return (error);
1150 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1151 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1152 		pc_unlockfs(fsp);
1153 		return (EIO);
1154 	}
1155 	error = pc_rename(dp, tdp, snm, tnm);
1156 	pc_unlockfs(fsp);
1157 	return (error);
1158 }
1159 
1160 /*ARGSUSED*/
1161 static int
1162 pcfs_mkdir(
1163 	struct vnode *dvp,
1164 	char *nm,
1165 	struct vattr *vap,
1166 	struct vnode **vpp,
1167 	struct cred *cr)
1168 {
1169 	struct pcfs *fsp;
1170 	struct pcnode *pcp;
1171 	int error;
1172 
1173 	fsp = VFSTOPCFS(dvp->v_vfsp);
1174 	if (error = pc_verify(fsp))
1175 		return (error);
1176 	error = pc_lockfs(fsp, 0, 0);
1177 	if (error)
1178 		return (error);
1179 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1180 		pc_unlockfs(fsp);
1181 		return (EIO);
1182 	}
1183 
1184 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1185 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1186 			pc_unlockfs(fsp);
1187 			return (EACCES);
1188 		}
1189 	}
1190 
1191 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1192 
1193 	if (!error) {
1194 		pcp -> pc_flags |= PC_EXTERNAL;
1195 		*vpp = PCTOV(pcp);
1196 	} else if (error == EEXIST) {
1197 		VN_RELE(PCTOV(pcp));
1198 	}
1199 	pc_unlockfs(fsp);
1200 	return (error);
1201 }
1202 
1203 /*ARGSUSED*/
1204 static int
1205 pcfs_rmdir(
1206 	struct vnode *dvp,
1207 	char *nm,
1208 	struct vnode *cdir,
1209 	struct cred *cr)
1210 {
1211 	struct pcfs *fsp;
1212 	struct pcnode *pcp;
1213 	int error;
1214 
1215 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1216 	if (error = pc_verify(fsp))
1217 		return (error);
1218 	if (error = pc_lockfs(fsp, 0, 0))
1219 		return (error);
1220 
1221 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1222 		pc_unlockfs(fsp);
1223 		return (EIO);
1224 	}
1225 
1226 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1227 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1228 			pc_unlockfs(fsp);
1229 			return (EACCES);
1230 		}
1231 	}
1232 
1233 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1234 	pc_unlockfs(fsp);
1235 	return (error);
1236 }
1237 
1238 /*
1239  * read entries in a directory.
1240  * we must convert pc format to unix format
1241  */
1242 
1243 /*ARGSUSED*/
1244 static int
1245 pcfs_readdir(
1246 	struct vnode *dvp,
1247 	struct uio *uiop,
1248 	struct cred *cr,
1249 	int *eofp)
1250 {
1251 	struct pcnode *pcp;
1252 	struct pcfs *fsp;
1253 	struct pcdir *ep;
1254 	struct buf *bp = NULL;
1255 	offset_t offset;
1256 	int boff;
1257 	struct pc_dirent lbp;
1258 	struct pc_dirent *ld = &lbp;
1259 	int error;
1260 
1261 	/*
1262 	 * If the filesystem was umounted by force, return immediately.
1263 	 */
1264 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1265 		return (EIO);
1266 
1267 	if ((uiop->uio_iovcnt != 1) ||
1268 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1269 		return (EINVAL);
1270 	}
1271 	fsp = VFSTOPCFS(dvp->v_vfsp);
1272 	/*
1273 	 * verify that the dp is still valid on the disk
1274 	 */
1275 	if (error = pc_verify(fsp)) {
1276 		return (error);
1277 	}
1278 	error = pc_lockfs(fsp, 0, 0);
1279 	if (error)
1280 		return (error);
1281 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1282 		pc_unlockfs(fsp);
1283 		return (EIO);
1284 	}
1285 
1286 	bzero(ld, sizeof (*ld));
1287 
1288 	if (eofp != NULL)
1289 		*eofp = 0;
1290 	offset = uiop->uio_loffset;
1291 
1292 	if (dvp->v_flag & VROOT) {
1293 		/*
1294 		 * kludge up entries for "." and ".." in the root.
1295 		 */
1296 		if (offset == 0) {
1297 			(void) strcpy(ld->d_name, ".");
1298 			ld->d_reclen = DIRENT64_RECLEN(1);
1299 			ld->d_off = (off64_t)sizeof (struct pcdir);
1300 			ld->d_ino = (ino64_t)UINT_MAX;
1301 			if (ld->d_reclen > uiop->uio_resid) {
1302 				pc_unlockfs(fsp);
1303 				return (ENOSPC);
1304 			}
1305 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1306 			uiop->uio_loffset = ld->d_off;
1307 			offset = uiop->uio_loffset;
1308 		}
1309 		if (offset == sizeof (struct pcdir)) {
1310 			(void) strcpy(ld->d_name, "..");
1311 			ld->d_reclen = DIRENT64_RECLEN(2);
1312 			if (ld->d_reclen > uiop->uio_resid) {
1313 				pc_unlockfs(fsp);
1314 				return (ENOSPC);
1315 			}
1316 			ld->d_off = (off64_t)(uiop->uio_loffset +
1317 			    sizeof (struct pcdir));
1318 			ld->d_ino = (ino64_t)UINT_MAX;
1319 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1320 			uiop->uio_loffset = ld->d_off;
1321 			offset = uiop->uio_loffset;
1322 		}
1323 		offset -= 2 * sizeof (struct pcdir);
1324 		/* offset now has the real offset value into directory file */
1325 	}
1326 
1327 	for (;;) {
1328 		boff = pc_blkoff(fsp, offset);
1329 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1330 			if (bp != NULL) {
1331 				brelse(bp);
1332 				bp = NULL;
1333 			}
1334 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1335 			if (error) {
1336 				if (error == ENOENT) {
1337 					error = 0;
1338 					if (eofp)
1339 						*eofp = 1;
1340 				}
1341 				break;
1342 			}
1343 		}
1344 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1345 			if (eofp)
1346 				*eofp = 1;
1347 			break;
1348 		}
1349 		/*
1350 		 * Don't display label because it may contain funny characters.
1351 		 */
1352 		if (ep->pcd_filename[0] == PCD_ERASED) {
1353 			uiop->uio_loffset += sizeof (struct pcdir);
1354 			offset += sizeof (struct pcdir);
1355 			ep++;
1356 			continue;
1357 		}
1358 		if (PCDL_IS_LFN(ep)) {
1359 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1360 			    0)
1361 				break;
1362 			continue;
1363 		}
1364 
1365 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1366 			break;
1367 	}
1368 	if (bp)
1369 		brelse(bp);
1370 	pc_unlockfs(fsp);
1371 	return (error);
1372 }
1373 
1374 
1375 /*
1376  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1377  * When we are called the pcfs is already locked.
1378  */
1379 /*ARGSUSED*/
1380 static int
1381 pcfs_getapage(
1382 	struct vnode *vp,
1383 	u_offset_t off,
1384 	size_t len,
1385 	uint_t *protp,
1386 	page_t *pl[],		/* NULL if async IO is requested */
1387 	size_t plsz,
1388 	struct seg *seg,
1389 	caddr_t addr,
1390 	enum seg_rw rw,
1391 	struct cred *cr)
1392 {
1393 	struct pcnode *pcp;
1394 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1395 	struct vnode *devvp;
1396 	page_t *pp;
1397 	page_t *pagefound;
1398 	int err;
1399 
1400 	/*
1401 	 * If the filesystem was umounted by force, return immediately.
1402 	 */
1403 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1404 		return (EIO);
1405 
1406 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1407 	    (void *)vp, off, len);
1408 
1409 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1410 		return (EIO);
1411 	devvp = fsp->pcfs_devvp;
1412 
1413 	/* pcfs doesn't do readaheads */
1414 	if (pl == NULL)
1415 		return (0);
1416 
1417 	pl[0] = NULL;
1418 	err = 0;
1419 	/*
1420 	 * If the accessed time on the pcnode has not already been
1421 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1422 	 * This gives us approximate modified times for mmap'ed files
1423 	 * which are accessed via loads in the user address space.
1424 	 */
1425 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1426 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1427 		pc_mark_acc(fsp, pcp);
1428 	}
1429 reread:
1430 	if ((pagefound = page_exists(vp, off)) == NULL) {
1431 		/*
1432 		 * Need to really do disk IO to get the page(s).
1433 		 */
1434 		struct buf *bp;
1435 		daddr_t lbn, bn;
1436 		u_offset_t io_off;
1437 		size_t io_len;
1438 		u_offset_t lbnoff, xferoffset;
1439 		u_offset_t pgoff;
1440 		uint_t	xfersize;
1441 		int err1;
1442 
1443 		lbn = pc_lblkno(fsp, off);
1444 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1445 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1446 
1447 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1448 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1449 		if (pp == NULL)
1450 			/*
1451 			 * XXX - If pcfs is made MT-hot, this should go
1452 			 * back to reread.
1453 			 */
1454 			panic("pcfs_getapage pvn_read_kluster");
1455 
1456 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1457 		    pgoff += xfersize,
1458 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1459 		    lbnoff += xfersize, xferoffset += xfersize) {
1460 			/*
1461 			 * read as many contiguous blocks as possible to
1462 			 * fill this page
1463 			 */
1464 			xfersize = PAGESIZE - pgoff;
1465 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1466 			if (err1) {
1467 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1468 				err = err1;
1469 				goto out;
1470 			}
1471 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1472 			bp->b_edev = devvp->v_rdev;
1473 			bp->b_dev = cmpdev(devvp->v_rdev);
1474 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1475 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1476 			bp->b_file = vp;
1477 			bp->b_offset = (offset_t)(off + pgoff);
1478 
1479 			(void) bdev_strategy(bp);
1480 
1481 			lwp_stat_update(LWP_STAT_INBLK, 1);
1482 
1483 			if (err == 0)
1484 				err = biowait(bp);
1485 			else
1486 				(void) biowait(bp);
1487 			pageio_done(bp);
1488 			if (err)
1489 				goto out;
1490 		}
1491 		if (pgoff < PAGESIZE) {
1492 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1493 		}
1494 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1495 	}
1496 out:
1497 	if (err) {
1498 		if (pp != NULL)
1499 			pvn_read_done(pp, B_ERROR);
1500 		return (err);
1501 	}
1502 
1503 	if (pagefound) {
1504 		/*
1505 		 * Page exists in the cache, acquire the "shared"
1506 		 * lock.  If this fails, go back to reread.
1507 		 */
1508 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1509 			goto reread;
1510 		}
1511 		pl[0] = pp;
1512 		pl[1] = NULL;
1513 	}
1514 	return (err);
1515 }
1516 
1517 /*
1518  * Return all the pages from [off..off+len] in given file
1519  */
1520 static int
1521 pcfs_getpage(
1522 	struct vnode *vp,
1523 	offset_t off,
1524 	size_t len,
1525 	uint_t *protp,
1526 	page_t *pl[],
1527 	size_t plsz,
1528 	struct seg *seg,
1529 	caddr_t addr,
1530 	enum seg_rw rw,
1531 	struct cred *cr)
1532 {
1533 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1534 	int err;
1535 
1536 	PC_DPRINTF0(6, "pcfs_getpage\n");
1537 	if (err = pc_verify(fsp))
1538 		return (err);
1539 	if (vp->v_flag & VNOMAP)
1540 		return (ENOSYS);
1541 	ASSERT(off <= UINT32_MAX);
1542 	err = pc_lockfs(fsp, 0, 0);
1543 	if (err)
1544 		return (err);
1545 	if (protp != NULL)
1546 		*protp = PROT_ALL;
1547 
1548 	ASSERT((off & PAGEOFFSET) == 0);
1549 	if (len <= PAGESIZE) {
1550 		err = pcfs_getapage(vp, off, len, protp, pl,
1551 		    plsz, seg, addr, rw, cr);
1552 	} else {
1553 		err = pvn_getpages(pcfs_getapage, vp, off,
1554 		    len, protp, pl, plsz, seg, addr, rw, cr);
1555 	}
1556 	pc_unlockfs(fsp);
1557 	return (err);
1558 }
1559 
1560 
1561 /*
1562  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1563  * If len == 0, do from off to EOF.
1564  *
1565  * The normal cases should be len == 0 & off == 0 (entire vp list),
1566  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1567  * (from pageout).
1568  *
1569  */
1570 /*ARGSUSED*/
1571 static int
1572 pcfs_putpage(
1573 	struct vnode *vp,
1574 	offset_t off,
1575 	size_t len,
1576 	int flags,
1577 	struct cred *cr)
1578 {
1579 	struct pcnode *pcp;
1580 	page_t *pp;
1581 	struct pcfs *fsp;
1582 	u_offset_t io_off;
1583 	size_t io_len;
1584 	offset_t eoff;
1585 	int err;
1586 
1587 	/*
1588 	 * If the filesystem was umounted by force, return immediately.
1589 	 */
1590 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1591 		return (EIO);
1592 
1593 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1594 	if (vp->v_flag & VNOMAP)
1595 		return (ENOSYS);
1596 
1597 	fsp = VFSTOPCFS(vp->v_vfsp);
1598 
1599 	if (err = pc_verify(fsp))
1600 		return (err);
1601 	if ((pcp = VTOPC(vp)) == NULL) {
1602 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1603 		return (EIO);
1604 	}
1605 	if (pcp->pc_flags & PC_INVAL)
1606 		return (EIO);
1607 
1608 	if (curproc == proc_pageout) {
1609 		/*
1610 		 * XXX - This is a quick hack to avoid blocking
1611 		 * pageout. Also to avoid pcfs_getapage deadlocking
1612 		 * with putpage when memory is running out,
1613 		 * since we only have one global lock and we don't
1614 		 * support async putpage.
1615 		 * It should be fixed someday.
1616 		 *
1617 		 * Interestingly, this used to be a test of NOMEMWAIT().
1618 		 * We only ever got here once pcfs started supporting
1619 		 * NFS sharing, and then only because the NFS server
1620 		 * threads seem to do writes in sched's process context.
1621 		 * Since everyone else seems to just care about pageout,
1622 		 * the test was changed to look for pageout directly.
1623 		 */
1624 		return (ENOMEM);
1625 	}
1626 
1627 	ASSERT(off <= UINT32_MAX);
1628 
1629 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1630 
1631 	err = pc_lockfs(fsp, 0, 0);
1632 	if (err)
1633 		return (err);
1634 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1635 		pc_unlockfs(fsp);
1636 		return (0);
1637 	}
1638 
1639 	if (len == 0) {
1640 		/*
1641 		 * Search the entire vp list for pages >= off
1642 		 */
1643 		err = pvn_vplist_dirty(vp, off,
1644 		    pcfs_putapage, flags, cr);
1645 	} else {
1646 		eoff = off + len;
1647 
1648 		for (io_off = off; io_off < eoff &&
1649 		    io_off < pcp->pc_size; io_off += io_len) {
1650 			/*
1651 			 * If we are not invalidating, synchronously
1652 			 * freeing or writing pages use the routine
1653 			 * page_lookup_nowait() to prevent reclaiming
1654 			 * them from the free list.
1655 			 */
1656 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1657 				pp = page_lookup(vp, io_off,
1658 				    (flags & (B_INVAL | B_FREE)) ?
1659 				    SE_EXCL : SE_SHARED);
1660 			} else {
1661 				pp = page_lookup_nowait(vp, io_off,
1662 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1663 			}
1664 
1665 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1666 				io_len = PAGESIZE;
1667 			else {
1668 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1669 				    flags, cr);
1670 				if (err != 0)
1671 					break;
1672 				/*
1673 				 * "io_off" and "io_len" are returned as
1674 				 * the range of pages we actually wrote.
1675 				 * This allows us to skip ahead more quickly
1676 				 * since several pages may've been dealt
1677 				 * with by this iteration of the loop.
1678 				 */
1679 			}
1680 		}
1681 	}
1682 	if (err == 0 && (flags & B_INVAL) &&
1683 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1684 		/*
1685 		 * If doing "invalidation", make sure that
1686 		 * all pages on the vnode list are actually
1687 		 * gone.
1688 		 */
1689 		cmn_err(CE_PANIC,
1690 		    "pcfs_putpage: B_INVAL, pages not gone");
1691 	} else if (err) {
1692 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1693 	}
1694 	pc_unlockfs(fsp);
1695 	return (err);
1696 }
1697 
1698 /*
1699  * Write out a single page, possibly klustering adjacent dirty pages.
1700  */
1701 /*ARGSUSED*/
1702 int
1703 pcfs_putapage(
1704 	struct vnode *vp,
1705 	page_t *pp,
1706 	u_offset_t *offp,
1707 	size_t *lenp,
1708 	int flags,
1709 	struct cred *cr)
1710 {
1711 	struct pcnode *pcp;
1712 	struct pcfs *fsp;
1713 	struct vnode *devvp;
1714 	size_t io_len;
1715 	daddr_t bn;
1716 	u_offset_t lbn, lbnoff, xferoffset;
1717 	uint_t pgoff, xfersize;
1718 	int err = 0;
1719 	u_offset_t io_off;
1720 
1721 	pcp = VTOPC(vp);
1722 	fsp = VFSTOPCFS(vp->v_vfsp);
1723 	devvp = fsp->pcfs_devvp;
1724 
1725 	/*
1726 	 * If the modified time on the inode has not already been
1727 	 * set elsewhere (e.g. for write/setattr) and this is not
1728 	 * a call from msync (B_FORCE) we set the time now.
1729 	 * This gives us approximate modified times for mmap'ed files
1730 	 * which are modified via stores in the user address space.
1731 	 */
1732 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1733 		pcp->pc_flags |= PC_MOD;
1734 		pc_mark_mod(fsp, pcp);
1735 	}
1736 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1737 	    PAGESIZE, flags);
1738 
1739 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1740 		goto out;
1741 	}
1742 
1743 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1744 
1745 	lbn = pc_lblkno(fsp, io_off);
1746 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1747 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1748 
1749 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1750 	    pgoff += xfersize,
1751 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1752 	    lbnoff += xfersize, xferoffset += xfersize) {
1753 
1754 		struct buf *bp;
1755 		int err1;
1756 
1757 		/*
1758 		 * write as many contiguous blocks as possible from this page
1759 		 */
1760 		xfersize = io_len - pgoff;
1761 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1762 		if (err1) {
1763 			err = err1;
1764 			goto out;
1765 		}
1766 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1767 		bp->b_edev = devvp->v_rdev;
1768 		bp->b_dev = cmpdev(devvp->v_rdev);
1769 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1770 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1771 		bp->b_file = vp;
1772 		bp->b_offset = (offset_t)(io_off + pgoff);
1773 
1774 		(void) bdev_strategy(bp);
1775 
1776 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1777 
1778 		if (err == 0)
1779 			err = biowait(bp);
1780 		else
1781 			(void) biowait(bp);
1782 		pageio_done(bp);
1783 	}
1784 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1785 	pp = NULL;
1786 
1787 out:
1788 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1789 		pvn_write_done(pp, B_WRITE | flags);
1790 	} else if (err != 0 && pp != NULL) {
1791 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1792 	}
1793 
1794 	if (offp)
1795 		*offp = io_off;
1796 	if (lenp)
1797 		*lenp = io_len;
1798 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1799 		    (void *)vp, (void *)pp, io_off, io_len);
1800 	if (err) {
1801 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1802 	}
1803 	return (err);
1804 }
1805 
1806 /*ARGSUSED*/
1807 static int
1808 pcfs_map(
1809 	struct vnode *vp,
1810 	offset_t off,
1811 	struct as *as,
1812 	caddr_t *addrp,
1813 	size_t len,
1814 	uchar_t prot,
1815 	uchar_t maxprot,
1816 	uint_t flags,
1817 	struct cred *cr)
1818 {
1819 	struct segvn_crargs vn_a;
1820 	int error;
1821 
1822 	PC_DPRINTF0(6, "pcfs_map\n");
1823 	if (vp->v_flag & VNOMAP)
1824 		return (ENOSYS);
1825 
1826 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1827 		return (ENXIO);
1828 
1829 	as_rangelock(as);
1830 	if ((flags & MAP_FIXED) == 0) {
1831 		map_addr(addrp, len, off, 1, flags);
1832 		if (*addrp == NULL) {
1833 			as_rangeunlock(as);
1834 			return (ENOMEM);
1835 		}
1836 	} else {
1837 		/*
1838 		 * User specified address - blow away any previous mappings
1839 		 */
1840 		(void) as_unmap(as, *addrp, len);
1841 	}
1842 
1843 	vn_a.vp = vp;
1844 	vn_a.offset = off;
1845 	vn_a.type = flags & MAP_TYPE;
1846 	vn_a.prot = prot;
1847 	vn_a.maxprot = maxprot;
1848 	vn_a.flags = flags & ~MAP_TYPE;
1849 	vn_a.cred = cr;
1850 	vn_a.amp = NULL;
1851 	vn_a.szc = 0;
1852 	vn_a.lgrp_mem_policy_flags = 0;
1853 
1854 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1855 	as_rangeunlock(as);
1856 	return (error);
1857 }
1858 
1859 /* ARGSUSED */
1860 static int
1861 pcfs_seek(
1862 	struct vnode *vp,
1863 	offset_t ooff,
1864 	offset_t *noffp)
1865 {
1866 	if (*noffp < 0)
1867 		return (EINVAL);
1868 	else if (*noffp > MAXOFFSET_T)
1869 		return (EINVAL);
1870 	else
1871 		return (0);
1872 }
1873 
1874 /* ARGSUSED */
1875 static int
1876 pcfs_addmap(
1877 	struct vnode *vp,
1878 	offset_t off,
1879 	struct as *as,
1880 	caddr_t addr,
1881 	size_t len,
1882 	uchar_t prot,
1883 	uchar_t maxprot,
1884 	uint_t flags,
1885 	struct cred *cr)
1886 {
1887 	if (vp->v_flag & VNOMAP)
1888 		return (ENOSYS);
1889 	return (0);
1890 }
1891 
1892 /*ARGSUSED*/
1893 static int
1894 pcfs_delmap(
1895 	struct vnode *vp,
1896 	offset_t off,
1897 	struct as *as,
1898 	caddr_t addr,
1899 	size_t len,
1900 	uint_t prot,
1901 	uint_t maxprot,
1902 	uint_t flags,
1903 	struct cred *cr)
1904 {
1905 	if (vp->v_flag & VNOMAP)
1906 		return (ENOSYS);
1907 	return (0);
1908 }
1909 
1910 /*
1911  * POSIX pathconf() support.
1912  */
1913 /* ARGSUSED */
1914 static int
1915 pcfs_pathconf(
1916 	struct vnode *vp,
1917 	int cmd,
1918 	ulong_t *valp,
1919 	struct cred *cr)
1920 {
1921 	ulong_t val;
1922 	int error = 0;
1923 	struct statvfs64 vfsbuf;
1924 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1925 
1926 	switch (cmd) {
1927 
1928 	case _PC_LINK_MAX:
1929 		val = 1;
1930 		break;
1931 
1932 	case _PC_MAX_CANON:
1933 		val = MAX_CANON;
1934 		break;
1935 
1936 	case _PC_MAX_INPUT:
1937 		val = MAX_INPUT;
1938 		break;
1939 
1940 	case _PC_NAME_MAX:
1941 		bzero(&vfsbuf, sizeof (vfsbuf));
1942 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1943 			break;
1944 		val = vfsbuf.f_namemax;
1945 		break;
1946 
1947 	case _PC_PATH_MAX:
1948 	case _PC_SYMLINK_MAX:
1949 		val = PCMAXPATHLEN;
1950 		break;
1951 
1952 	case _PC_PIPE_BUF:
1953 		val = PIPE_BUF;
1954 		break;
1955 
1956 	case _PC_NO_TRUNC:
1957 		val = (ulong_t)-1; 	/* Will truncate long file name */
1958 		break;
1959 
1960 	case _PC_VDISABLE:
1961 		val = _POSIX_VDISABLE;
1962 		break;
1963 
1964 	case _PC_CHOWN_RESTRICTED:
1965 		if (rstchown)
1966 			val = rstchown;		/* chown restricted enabled */
1967 		else
1968 			val = (ulong_t)-1;
1969 		break;
1970 
1971 	case _PC_ACL_ENABLED:
1972 		val = 0;
1973 		break;
1974 
1975 	case _PC_FILESIZEBITS:
1976 		/*
1977 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1978 		 * FAT12 can only go up to the maximum filesystem capacity
1979 		 * which is ~509MB.
1980 		 */
1981 		val = IS_FAT12(fsp) ? 30 : 33;
1982 		break;
1983 	default:
1984 		error = EINVAL;
1985 		break;
1986 	}
1987 
1988 	if (error == 0)
1989 		*valp = val;
1990 	return (error);
1991 }
1992 
1993 /* ARGSUSED */
1994 static int
1995 pcfs_space(
1996 	struct vnode *vp,
1997 	int cmd,
1998 	struct flock64 *bfp,
1999 	int flag,
2000 	offset_t offset,
2001 	cred_t *cr,
2002 	caller_context_t *ct)
2003 {
2004 	struct vattr vattr;
2005 	int error;
2006 
2007 	if (cmd != F_FREESP)
2008 		return (EINVAL);
2009 
2010 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2011 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2012 			return (EFBIG);
2013 		/*
2014 		 * we only support the special case of l_len == 0,
2015 		 * meaning free to end of file at this moment.
2016 		 */
2017 		if (bfp->l_len != 0)
2018 			return (EINVAL);
2019 		vattr.va_mask = AT_SIZE;
2020 		vattr.va_size = bfp->l_start;
2021 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
2022 	}
2023 	return (error);
2024 }
2025 
2026 /*
2027  * Break up 'len' chars from 'buf' into a long file name chunk.
2028  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2029  */
2030 void
2031 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2032 {
2033 	int	i;
2034 
2035 	ASSERT(buf != NULL);
2036 
2037 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2038 		if (len > 0) {
2039 			ep->pcdl_firstfilename[i] = *buf++;
2040 			ep->pcdl_firstfilename[i + 1] = *buf++;
2041 			len -= 2;
2042 		} else {
2043 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2044 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2045 		}
2046 	}
2047 
2048 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2049 		if (len > 0) {
2050 			ep->pcdl_secondfilename[i] = *buf++;
2051 			ep->pcdl_secondfilename[i + 1] = *buf++;
2052 			len -= 2;
2053 		} else {
2054 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2055 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2056 		}
2057 	}
2058 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2059 		if (len > 0) {
2060 			ep->pcdl_thirdfilename[i] = *buf++;
2061 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2062 			len -= 2;
2063 		} else {
2064 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2065 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2066 		}
2067 	}
2068 }
2069 
2070 /*
2071  * Extract the characters from the long filename chunk into 'buf'.
2072  * Return the number of characters extracted.
2073  */
2074 static int
2075 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2076 {
2077 	char 	*tmp = buf;
2078 	int	i;
2079 
2080 	/* Copy all the names, no filtering now */
2081 
2082 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2083 		*tmp = ep->pcdl_firstfilename[i];
2084 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2085 
2086 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2087 			return (tmp - buf);
2088 		if (*(tmp + 1) == '\0' && foldcase) {
2089 			*tmp = toupper(*tmp);
2090 		}
2091 	}
2092 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2093 		*tmp = ep->pcdl_secondfilename[i];
2094 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2095 
2096 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2097 			return (tmp - buf);
2098 		if (*(tmp + 1) == '\0' && foldcase) {
2099 			*tmp = toupper(*tmp);
2100 		}
2101 	}
2102 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2103 		*tmp = ep->pcdl_thirdfilename[i];
2104 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2105 
2106 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2107 			return (tmp - buf);
2108 		if (*(tmp + 1) == '\0' && foldcase) {
2109 			*tmp = toupper(*tmp);
2110 		}
2111 	}
2112 	return (tmp - buf);
2113 }
2114 
2115 
2116 /*
2117  * Checksum the passed in short filename.
2118  * This is used to validate each component of the long name to make
2119  * sure the long name is valid (it hasn't been "detached" from the
2120  * short filename). This algorithm was found in FreeBSD.
2121  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2122  */
2123 
2124 uchar_t
2125 pc_checksum_long_fn(char *name, char *ext)
2126 {
2127 	uchar_t c;
2128 	char	b[11];
2129 
2130 	bcopy(name, b, 8);
2131 	bcopy(ext, b+8, 3);
2132 
2133 	c = b[0];
2134 	c = ((c << 7) | (c >> 1)) + b[1];
2135 	c = ((c << 7) | (c >> 1)) + b[2];
2136 	c = ((c << 7) | (c >> 1)) + b[3];
2137 	c = ((c << 7) | (c >> 1)) + b[4];
2138 	c = ((c << 7) | (c >> 1)) + b[5];
2139 	c = ((c << 7) | (c >> 1)) + b[6];
2140 	c = ((c << 7) | (c >> 1)) + b[7];
2141 	c = ((c << 7) | (c >> 1)) + b[8];
2142 	c = ((c << 7) | (c >> 1)) + b[9];
2143 	c = ((c << 7) | (c >> 1)) + b[10];
2144 
2145 	return (c);
2146 }
2147 
2148 /*
2149  * Read a chunk of long filename entries into 'namep'.
2150  * Return with offset pointing to short entry (on success), or next
2151  * entry to read (if this wasn't a valid lfn really).
2152  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2153  * a long filename.
2154  *
2155  * Can also be called with a NULL namep, in which case it just returns
2156  * whether this was really a valid long filename and consumes it
2157  * (used by pc_dirempty()).
2158  */
2159 int
2160 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2161     struct pcdir **epp, offset_t *offset, struct buf **bp)
2162 {
2163 	struct pcdir *ep = *epp;
2164 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2165 	struct vnode *dvp = PCTOV(pcp);
2166 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2167 	char	*lfn;
2168 	char	*lfn_base;
2169 	int	boff;
2170 	int	i, cs;
2171 	char	*buf;
2172 	uchar_t	cksum;
2173 	int	detached = 0;
2174 	int	error = 0;
2175 	int	foldcase;
2176 	int	count = 0;
2177 	size_t	u16l = 0, u8l = 0;
2178 
2179 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2180 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2181 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2182 	*lfn = '\0';
2183 	*(lfn + 1) = '\0';
2184 	cksum = lep->pcdl_checksum;
2185 
2186 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2187 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2188 		/* read next block if necessary */
2189 		boff = pc_blkoff(fsp, *offset);
2190 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2191 			if (*bp != NULL) {
2192 				brelse(*bp);
2193 				*bp = NULL;
2194 			}
2195 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2196 			if (error) {
2197 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2198 				kmem_free(buf, PCMAXNAM_UTF16);
2199 				return (error);
2200 			}
2201 			lep = (struct pcdir_lfn *)ep;
2202 		}
2203 		/* can this happen? Bad fs? */
2204 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2205 			detached = 1;
2206 			break;
2207 		}
2208 		if (cksum != lep->pcdl_checksum)
2209 			detached = 1;
2210 		/* process current entry */
2211 		cs = get_long_fn_chunk(lep, buf, foldcase);
2212 		count += cs;
2213 		for (; cs > 0; cs--) {
2214 			/* see if we underflow */
2215 			if (lfn >= lfn_base)
2216 				*--lfn = buf[cs - 1];
2217 			else
2218 				detached = 1;
2219 		}
2220 		lep++;
2221 		*offset += sizeof (struct pcdir);
2222 	}
2223 	kmem_free(buf, PCMAXNAM_UTF16);
2224 	/* read next block if necessary */
2225 	boff = pc_blkoff(fsp, *offset);
2226 	ep = (struct pcdir *)lep;
2227 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2228 		if (*bp != NULL) {
2229 			brelse(*bp);
2230 			*bp = NULL;
2231 		}
2232 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2233 		if (error) {
2234 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2235 			return (error);
2236 		}
2237 	}
2238 	/* should be on the short one */
2239 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2240 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2241 		detached = 1;
2242 	}
2243 	if (detached ||
2244 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2245 	    !pc_valid_long_fn(lfn, 0)) {
2246 		/*
2247 		 * process current entry again. This may end up another lfn
2248 		 * or a short name.
2249 		 */
2250 		*epp = ep;
2251 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2252 		return (EINVAL);
2253 	}
2254 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2255 		/*
2256 		 * Don't display label because it may contain
2257 		 * funny characters.
2258 		 */
2259 		*offset += sizeof (struct pcdir);
2260 		ep++;
2261 		*epp = ep;
2262 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2263 		return (EINVAL);
2264 	}
2265 	if (namep) {
2266 		u16l = count / 2;
2267 		u8l = PCMAXNAMLEN;
2268 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2269 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2270 		/*
2271 		 * uconv_u16tou8() will catch conversion errors including
2272 		 * the case where there is not enough room to write the
2273 		 * converted result and the u8l will never go over the given
2274 		 * PCMAXNAMLEN.
2275 		 */
2276 		if (error != 0) {
2277 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2278 			return (EINVAL);
2279 		}
2280 		namep[u8l] = '\0';
2281 	}
2282 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2283 	*epp = ep;
2284 	return (0);
2285 }
2286 /*
2287  * Read a long filename into the pc_dirent structure and copy it out.
2288  */
2289 int
2290 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2291     struct pcdir **epp, offset_t *offset, struct buf **bp)
2292 {
2293 	struct pcdir *ep;
2294 	struct pcnode *pcp = VTOPC(dvp);
2295 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2296 	offset_t uiooffset = uiop->uio_loffset;
2297 	int	error = 0;
2298 	offset_t oldoffset;
2299 
2300 	oldoffset = *offset;
2301 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2302 	if (error) {
2303 		if (error == EINVAL) {
2304 			uiop->uio_loffset += *offset - oldoffset;
2305 			return (0);
2306 		} else
2307 			return (error);
2308 	}
2309 
2310 	ep = *epp;
2311 	uiop->uio_loffset += *offset - oldoffset;
2312 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2313 	if (ld->d_reclen > uiop->uio_resid) {
2314 		uiop->uio_loffset = uiooffset;
2315 		return (ENOSPC);
2316 	}
2317 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2318 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2319 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2320 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2321 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2322 	uiop->uio_loffset = ld->d_off;
2323 	*offset += sizeof (struct pcdir);
2324 	ep++;
2325 	*epp = ep;
2326 	return (0);
2327 }
2328 
2329 /*
2330  * Read a short filename into the pc_dirent structure and copy it out.
2331  */
2332 int
2333 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2334     struct pcdir **epp, offset_t *offset, struct buf **bp)
2335 {
2336 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2337 	int	boff = pc_blkoff(fsp, *offset);
2338 	struct pcdir *ep = *epp;
2339 	offset_t	oldoffset = uiop->uio_loffset;
2340 	int	error;
2341 	int	foldcase;
2342 
2343 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2344 		uiop->uio_loffset += sizeof (struct pcdir);
2345 		*offset += sizeof (struct pcdir);
2346 		ep++;
2347 		*epp = ep;
2348 		return (0);
2349 	}
2350 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2351 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2352 	    pc_direntpersec(fsp));
2353 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2354 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2355 	    &ep->pcd_ext[0], foldcase);
2356 	if (error == 0) {
2357 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2358 		if (ld->d_reclen > uiop->uio_resid) {
2359 			uiop->uio_loffset = oldoffset;
2360 			return (ENOSPC);
2361 		}
2362 		ld->d_off = (off64_t)(uiop->uio_loffset +
2363 		    sizeof (struct pcdir));
2364 		(void) uiomove((caddr_t)ld,
2365 		    ld->d_reclen, UIO_READ, uiop);
2366 		uiop->uio_loffset = ld->d_off;
2367 	} else {
2368 		uiop->uio_loffset += sizeof (struct pcdir);
2369 	}
2370 	*offset += sizeof (struct pcdir);
2371 	ep++;
2372 	*epp = ep;
2373 	return (0);
2374 }
2375 
2376 static int
2377 pcfs_fid(struct vnode *vp, struct fid *fidp)
2378 {
2379 	struct pc_fid *pcfid;
2380 	struct pcnode *pcp;
2381 	struct pcfs	*fsp;
2382 	int	error;
2383 
2384 	fsp = VFSTOPCFS(vp->v_vfsp);
2385 	if (fsp == NULL)
2386 		return (EIO);
2387 	error = pc_lockfs(fsp, 0, 0);
2388 	if (error)
2389 		return (error);
2390 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2391 		pc_unlockfs(fsp);
2392 		return (EIO);
2393 	}
2394 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2395 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2396 		pc_unlockfs(fsp);
2397 		return (ENOSPC);
2398 	}
2399 
2400 	pcfid = (struct pc_fid *)fidp;
2401 	bzero(pcfid, sizeof (struct pc_fid));
2402 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2403 	if (vp->v_flag & VROOT) {
2404 		pcfid->pcfid_block = 0;
2405 		pcfid->pcfid_offset = 0;
2406 		pcfid->pcfid_ctime = 0;
2407 	} else {
2408 		pcfid->pcfid_block = pcp->pc_eblkno;
2409 		pcfid->pcfid_offset = pcp->pc_eoffset;
2410 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2411 	}
2412 	pc_unlockfs(fsp);
2413 	return (0);
2414 }
2415