xref: /titanic_41/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 31ceb98b622e1a310256f4c4a1472beb92046db3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/dirent.h>
38 #include <sys/vnode.h>
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/uio.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/vmsystm.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/statvfs.h>
53 #include <sys/unistd.h>
54 #include <sys/kmem.h>
55 #include <sys/conf.h>
56 #include <sys/flock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59 #include <sys/sunddi.h>
60 
61 #include <vm/seg.h>
62 #include <vm/page.h>
63 #include <vm/pvn.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg_kmem.h>
69 
70 #include <fs/fs_subr.h>
71 
72 static int pcfs_open(struct vnode **, int, struct cred *);
73 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
74 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
75 	struct caller_context *);
76 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
77 	struct caller_context *);
78 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
79 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
80 	caller_context_t *);
81 static int pcfs_access(struct vnode *, int, int, struct cred *);
82 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
83 	struct pathname *, int, struct vnode *, struct cred *);
84 static int pcfs_create(struct vnode *, char *, struct vattr *,
85 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
86 static int pcfs_remove(struct vnode *, char *, struct cred *);
87 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
88 	struct cred *);
89 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
90 	struct cred *);
91 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
92 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
93 static int pcfs_fsync(struct vnode *, int, struct cred *);
94 static void pcfs_inactive(struct vnode *, struct cred *);
95 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
96 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
97 	offset_t, cred_t *, caller_context_t *);
98 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
99 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
100 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
101 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
102 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
103 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
104 	uchar_t, uchar_t, uint_t, struct cred *);
105 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
106 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
107 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
108 	size_t, uint_t, uint_t, uint_t, struct cred *);
109 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
110 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
111 
112 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
113 	struct cred *);
114 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
115 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
116 
117 extern krwlock_t pcnodes_lock;
118 
119 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
120 
121 /*
122  * vnode op vectors for files and directories.
123  */
124 struct vnodeops *pcfs_fvnodeops;
125 struct vnodeops *pcfs_dvnodeops;
126 
127 const fs_operation_def_t pcfs_fvnodeops_template[] = {
128 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
129 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
130 	VOPNAME_READ,		{ .vop_read = pcfs_read },
131 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
132 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
133 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
134 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
135 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
136 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
137 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
138 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
139 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
140 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
141 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
142 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
143 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
144 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
145 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
146 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
147 	NULL,			NULL
148 };
149 
150 const fs_operation_def_t pcfs_dvnodeops_template[] = {
151 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
152 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
153 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
154 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
155 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
156 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
157 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
158 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
159 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
160 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
161 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
162 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
163 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
164 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
165 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
166 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
167 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
168 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
169 	NULL,			NULL
170 };
171 
172 
173 /*ARGSUSED*/
174 static int
175 pcfs_open(
176 	struct vnode **vpp,
177 	int flag,
178 	struct cred *cr)
179 {
180 	return (0);
181 }
182 
183 /*
184  * files are sync'ed on close to keep floppy up to date
185  */
186 
187 /*ARGSUSED*/
188 static int
189 pcfs_close(
190 	struct vnode *vp,
191 	int flag,
192 	int count,
193 	offset_t offset,
194 	struct cred *cr)
195 {
196 	return (0);
197 }
198 
199 /*ARGSUSED*/
200 static int
201 pcfs_read(
202 	struct vnode *vp,
203 	struct uio *uiop,
204 	int ioflag,
205 	struct cred *cr,
206 	struct caller_context *ct)
207 {
208 	struct pcfs *fsp;
209 	struct pcnode *pcp;
210 	int error;
211 
212 	fsp = VFSTOPCFS(vp->v_vfsp);
213 	if (error = pc_verify(fsp))
214 		return (error);
215 	error = pc_lockfs(fsp, 0, 0);
216 	if (error)
217 		return (error);
218 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
219 		pc_unlockfs(fsp);
220 		return (EIO);
221 	}
222 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
223 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
224 		pcp->pc_flags |= PC_ACC;
225 		pc_mark_acc(pcp);
226 	}
227 	pc_unlockfs(fsp);
228 	if (error) {
229 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
230 	}
231 	return (error);
232 }
233 
234 /*ARGSUSED*/
235 static int
236 pcfs_write(
237 	struct vnode *vp,
238 	struct uio *uiop,
239 	int ioflag,
240 	struct cred *cr,
241 	struct caller_context *ct)
242 {
243 	struct pcfs *fsp;
244 	struct pcnode *pcp;
245 	int error;
246 
247 	fsp = VFSTOPCFS(vp->v_vfsp);
248 	if (error = pc_verify(fsp))
249 		return (error);
250 	error = pc_lockfs(fsp, 0, 0);
251 	if (error)
252 		return (error);
253 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
254 		pc_unlockfs(fsp);
255 		return (EIO);
256 	}
257 	if (ioflag & FAPPEND) {
258 		/*
259 		 * in append mode start at end of file.
260 		 */
261 		uiop->uio_loffset = pcp->pc_size;
262 	}
263 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
264 	pcp->pc_flags |= PC_MOD;
265 	pc_mark_mod(pcp);
266 	if (ioflag & (FSYNC|FDSYNC))
267 		(void) pc_nodeupdate(pcp);
268 
269 	pc_unlockfs(fsp);
270 	if (error) {
271 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
272 	}
273 	return (error);
274 }
275 
276 /*
277  * read or write a vnode
278  */
279 static int
280 rwpcp(
281 	struct pcnode *pcp,
282 	struct uio *uio,
283 	enum uio_rw rw,
284 	int ioflag)
285 {
286 	struct vnode *vp = PCTOV(pcp);
287 	struct pcfs *fsp;
288 	daddr_t bn;			/* phys block number */
289 	int n;
290 	offset_t off;
291 	caddr_t base;
292 	int mapon, pagecreate;
293 	int newpage;
294 	int error = 0;
295 	rlim64_t limit = uio->uio_llimit;
296 	int oresid = uio->uio_resid;
297 
298 	/*
299 	 * If the filesystem was umounted by force, return immediately.
300 	 */
301 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
302 		return (EIO);
303 
304 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
305 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
306 
307 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
308 	ASSERT(vp->v_type == VREG);
309 
310 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
311 		return (0);
312 	}
313 
314 	if (uio->uio_loffset < 0)
315 		return (EINVAL);
316 
317 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
318 		limit = MAXOFFSET_T;
319 
320 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
321 		proc_t *p = ttoproc(curthread);
322 
323 		mutex_enter(&p->p_lock);
324 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
325 		    p, RCA_UNSAFE_SIGINFO);
326 		mutex_exit(&p->p_lock);
327 		return (EFBIG);
328 	}
329 
330 	/* the following condition will occur only for write */
331 
332 	if (uio->uio_loffset >= UINT32_MAX)
333 		return (EFBIG);
334 
335 	if (uio->uio_resid == 0)
336 		return (0);
337 
338 	if (limit > UINT32_MAX)
339 		limit = UINT32_MAX;
340 
341 	fsp = VFSTOPCFS(vp->v_vfsp);
342 	if (fsp->pcfs_flags & PCFS_IRRECOV)
343 		return (EIO);
344 
345 	do {
346 		/*
347 		 * Assignments to "n" in this block may appear
348 		 * to overflow in some cases.  However, after careful
349 		 * analysis it was determined that all assignments to
350 		 * "n" serve only to make "n" smaller.  Since "n"
351 		 * starts out as no larger than MAXBSIZE, "int" is
352 		 * safe.
353 		 */
354 		off = uio->uio_loffset & MAXBMASK;
355 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
356 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
357 		if (rw == UIO_READ) {
358 			offset_t diff;
359 
360 			diff = pcp->pc_size - uio->uio_loffset;
361 			if (diff <= 0)
362 				return (0);
363 			if (diff < n)
364 				n = (int)diff;
365 		}
366 		/*
367 		 * Compare limit with the actual offset + n, not the
368 		 * rounded down offset "off" or we will overflow
369 		 * the maximum file size after all.
370 		 */
371 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
372 			if (uio->uio_loffset >= limit) {
373 				error = EFBIG;
374 				break;
375 			}
376 			n = (int)(limit - uio->uio_loffset);
377 		}
378 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
379 		pagecreate = 0;
380 		newpage = 0;
381 		if (rw == UIO_WRITE) {
382 			/*
383 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
384 			 * with one page at a time, instead of one MAXBSIZE
385 			 * at a time, so we can fully explore pagecreate
386 			 * optimization??
387 			 */
388 			if (uio->uio_loffset + n > pcp->pc_size) {
389 				uint_t ncl, lcn;
390 
391 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
392 				    fsp->pcfs_clsize);
393 				if (uio->uio_loffset > pcp->pc_size &&
394 				    ncl < (uint_t)howmany(uio->uio_loffset,
395 				    fsp->pcfs_clsize)) {
396 					/*
397 					 * Allocate and zerofill skipped
398 					 * clusters. This may not be worth the
399 					 * effort since a small lseek beyond
400 					 * eof but still within the cluster
401 					 * will not be zeroed out.
402 					 */
403 					lcn = pc_lblkno(fsp, uio->uio_loffset);
404 					error = pc_balloc(pcp, (daddr_t)lcn,
405 					    1, &bn);
406 					ncl = lcn + 1;
407 				}
408 				if (!error &&
409 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
410 				    fsp->pcfs_clsize))
411 					/*
412 					 * allocate clusters w/o zerofill
413 					 */
414 					error = pc_balloc(pcp,
415 					    (daddr_t)pc_lblkno(fsp,
416 					    uio->uio_loffset + n - 1),
417 					    0, &bn);
418 
419 				pcp->pc_flags |= PC_CHG;
420 
421 				if (error) {
422 					pc_cluster32_t ncl;
423 					int nerror;
424 
425 					/*
426 					 * figure out new file size from
427 					 * cluster chain length. If this
428 					 * is detected to loop, the chain
429 					 * is corrupted and we'd better
430 					 * keep our fingers off that file.
431 					 */
432 					nerror = pc_fileclsize(fsp,
433 					    pcp->pc_scluster, &ncl);
434 					if (nerror) {
435 						PC_DPRINTF1(2,
436 						    "cluster chain "
437 						    "corruption, "
438 						    "scluster=%d\n",
439 						    pcp->pc_scluster);
440 						pcp->pc_size = 0;
441 						pcp->pc_flags |= PC_INVAL;
442 						error = nerror;
443 						(void) segmap_release(segkmap,
444 						    base, 0);
445 						break;
446 					}
447 					pcp->pc_size = fsp->pcfs_clsize * ncl;
448 
449 					if (error == ENOSPC &&
450 					    (pcp->pc_size - uio->uio_loffset)
451 					    > 0) {
452 						PC_DPRINTF3(2, "rwpcp ENOSPC "
453 						    "off=%lld n=%d size=%d\n",
454 						    uio->uio_loffset,
455 						    n, pcp->pc_size);
456 						n = (int)(pcp->pc_size -
457 						    uio->uio_loffset);
458 					} else {
459 						PC_DPRINTF1(1,
460 						    "rwpcp error1=%d\n", error);
461 						(void) segmap_release(segkmap,
462 						    base, 0);
463 						break;
464 					}
465 				} else {
466 					pcp->pc_size =
467 					    (uint_t)(uio->uio_loffset + n);
468 				}
469 				if (mapon == 0) {
470 					newpage = segmap_pagecreate(segkmap,
471 					    base, (size_t)n, 0);
472 					pagecreate = 1;
473 				}
474 			} else if (n == MAXBSIZE) {
475 				newpage = segmap_pagecreate(segkmap, base,
476 				    (size_t)n, 0);
477 				pagecreate = 1;
478 			}
479 		}
480 		error = uiomove(base + mapon, (size_t)n, rw, uio);
481 
482 		if (pagecreate && uio->uio_loffset <
483 		    roundup(off + mapon + n, PAGESIZE)) {
484 			offset_t nzero, nmoved;
485 
486 			nmoved = uio->uio_loffset - (off + mapon);
487 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
488 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
489 		}
490 
491 		/*
492 		 * Unlock the pages which have been allocated by
493 		 * page_create_va() in segmap_pagecreate().
494 		 */
495 		if (newpage)
496 			segmap_pageunlock(segkmap, base, (size_t)n,
497 			    rw == UIO_WRITE ? S_WRITE : S_READ);
498 
499 		if (error) {
500 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
501 			/*
502 			 * If we failed on a write, we may have already
503 			 * allocated file blocks as well as pages.  It's hard
504 			 * to undo the block allocation, but we must be sure
505 			 * to invalidate any pages that may have been
506 			 * allocated.
507 			 */
508 			if (rw == UIO_WRITE)
509 				(void) segmap_release(segkmap, base, SM_INVAL);
510 			else
511 				(void) segmap_release(segkmap, base, 0);
512 		} else {
513 			uint_t flags = 0;
514 
515 			if (rw == UIO_READ) {
516 				if (n + mapon == MAXBSIZE ||
517 				    uio->uio_loffset == pcp->pc_size)
518 					flags = SM_DONTNEED;
519 			} else if (ioflag & (FSYNC|FDSYNC)) {
520 				flags = SM_WRITE;
521 			} else if (n + mapon == MAXBSIZE) {
522 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
523 			}
524 			error = segmap_release(segkmap, base, flags);
525 		}
526 
527 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
528 
529 	if (oresid != uio->uio_resid)
530 		error = 0;
531 	return (error);
532 }
533 
534 /*ARGSUSED*/
535 static int
536 pcfs_getattr(
537 	struct vnode *vp,
538 	struct vattr *vap,
539 	int flags,
540 	struct cred *cr)
541 {
542 	struct pcnode *pcp;
543 	struct pcfs *fsp;
544 	int error;
545 	char attr;
546 	struct pctime atime;
547 	int64_t unixtime;
548 
549 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
550 
551 	fsp = VFSTOPCFS(vp->v_vfsp);
552 	error = pc_lockfs(fsp, 0, 0);
553 	if (error)
554 		return (error);
555 
556 	/*
557 	 * Note that we don't check for "invalid node" (PC_INVAL) here
558 	 * only in order to make stat() succeed. We allow no I/O on such
559 	 * a node, but do allow to check for its existance.
560 	 */
561 	if ((pcp = VTOPC(vp)) == NULL) {
562 		pc_unlockfs(fsp);
563 		return (EIO);
564 	}
565 	/*
566 	 * Copy from pcnode.
567 	 */
568 	vap->va_type = vp->v_type;
569 	attr = pcp->pc_entry.pcd_attr;
570 	if (PCA_IS_HIDDEN(fsp, attr))
571 		vap->va_mode = 0;
572 	else if (attr & PCA_LABEL)
573 		vap->va_mode = 0444;
574 	else if (attr & PCA_RDONLY)
575 		vap->va_mode = 0555;
576 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
577 		vap->va_mode = 0755;
578 	} else {
579 		vap->va_mode = 0777;
580 	}
581 
582 	if (attr & PCA_DIR)
583 		vap->va_mode |= S_IFDIR;
584 	else
585 		vap->va_mode |= S_IFREG;
586 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
587 		vap->va_uid = 0;
588 		vap->va_gid = 0;
589 	} else {
590 		vap->va_uid = crgetuid(cr);
591 		vap->va_gid = crgetgid(cr);
592 	}
593 	vap->va_fsid = vp->v_vfsp->vfs_dev;
594 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
595 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
596 	    pc_getstartcluster(fsp, &pcp->pc_entry), fsp->pcfs_entps);
597 	vap->va_nlink = 1;
598 	vap->va_size = (u_offset_t)pcp->pc_size;
599 
600 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
601 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
602 		if (unixtime > INT32_MAX)
603 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
604 		unixtime = MIN(unixtime, INT32_MAX);
605 	} else if (unixtime > INT32_MAX &&
606 	    get_udatamodel() == DATAMODEL_ILP32) {
607 		pc_unlockfs(fsp);
608 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
609 		return (EOVERFLOW);
610 	}
611 
612 	vap->va_mtime.tv_sec = (time_t)unixtime;
613 	vap->va_mtime.tv_nsec = 0;
614 
615 	/*
616 	 * FAT doesn't know about POSIX ctime.
617 	 * Best approximation is to always set it to mtime.
618 	 */
619 	vap->va_ctime = vap->va_mtime;
620 
621 	/*
622 	 * FAT only stores "last access date". If that's the
623 	 * same as the date of last modification then the time
624 	 * of last access is known. Otherwise, use midnight.
625 	 */
626 	atime.pct_date = pcp->pc_entry.pcd_ladate;
627 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
628 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
629 	else
630 		atime.pct_time = 0;
631 	pc_pcttotv(&atime, &unixtime);
632 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
633 		if (unixtime > INT32_MAX)
634 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
635 		unixtime = MIN(unixtime, INT32_MAX);
636 	} else if (unixtime > INT32_MAX &&
637 	    get_udatamodel() == DATAMODEL_ILP32) {
638 		pc_unlockfs(fsp);
639 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
640 		return (EOVERFLOW);
641 	}
642 
643 	vap->va_atime.tv_sec = (time_t)unixtime;
644 	vap->va_atime.tv_nsec = 0;
645 
646 	vap->va_rdev = 0;
647 	vap->va_nblocks = (fsblkcnt64_t)howmany((offset_t)pcp->pc_size,
648 	    DEV_BSIZE);
649 	vap->va_blksize = fsp->pcfs_clsize;
650 	pc_unlockfs(fsp);
651 	return (0);
652 }
653 
654 
655 /*ARGSUSED*/
656 static int
657 pcfs_setattr(
658 	struct vnode *vp,
659 	struct vattr *vap,
660 	int flags,
661 	struct cred *cr,
662 	caller_context_t *ct)
663 {
664 	struct pcnode *pcp;
665 	mode_t mask = vap->va_mask;
666 	int error;
667 	struct pcfs *fsp;
668 	timestruc_t now, *timep;
669 
670 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
671 	/*
672 	 * cannot set these attributes
673 	 */
674 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
675 		return (EINVAL);
676 	}
677 	/*
678 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
679 	 * from 'tar' when it tries to set times on a directory, and console
680 	 * printf's on the NFS server when it gets EINVAL back on such a
681 	 * request. One possible problem with that since a directory entry
682 	 * identifies a file, '.' and all the '..' entries in subdirectories
683 	 * may get out of sync when the directory is updated since they're
684 	 * treated like separate files. We could fix that by looking for
685 	 * '.' and giving it the same attributes, and then looking for
686 	 * all the subdirectories and updating '..', but that's pretty
687 	 * expensive for something that doesn't seem likely to matter.
688 	 */
689 	/* can't do some ops on directories anyway */
690 	if ((vp->v_type == VDIR) &&
691 	    (mask & AT_SIZE)) {
692 		return (EINVAL);
693 	}
694 
695 	fsp = VFSTOPCFS(vp->v_vfsp);
696 	error = pc_lockfs(fsp, 0, 0);
697 	if (error)
698 		return (error);
699 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
700 		pc_unlockfs(fsp);
701 		return (EIO);
702 	}
703 
704 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
705 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
706 			pc_unlockfs(fsp);
707 			return (EACCES);
708 		}
709 	}
710 
711 	/*
712 	 * Change file access modes.
713 	 * If nobody has write permission, file is marked readonly.
714 	 * Otherwise file is writable by anyone.
715 	 */
716 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
717 		if ((vap->va_mode & 0222) == 0)
718 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
719 		else
720 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
721 		pcp->pc_flags |= PC_CHG;
722 	}
723 	/*
724 	 * Truncate file. Must have write permission.
725 	 */
726 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
727 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
728 			error = EACCES;
729 			goto out;
730 		}
731 		if (vap->va_size > UINT32_MAX) {
732 			error = EFBIG;
733 			goto out;
734 		}
735 		error = pc_truncate(pcp, (uint_t)vap->va_size);
736 		if (error)
737 			goto out;
738 	}
739 	/*
740 	 * Change file modified times.
741 	 */
742 	if (mask & (AT_MTIME | AT_CTIME)) {
743 		/*
744 		 * If SysV-compatible option to set access and
745 		 * modified times if privileged, owner, or write access,
746 		 * use current time rather than va_mtime.
747 		 *
748 		 * XXX - va_mtime.tv_sec == -1 flags this.
749 		 */
750 		timep = &vap->va_mtime;
751 		if (vap->va_mtime.tv_sec == -1) {
752 			gethrestime(&now);
753 			timep = &now;
754 		}
755 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
756 		    timep->tv_sec > INT32_MAX) {
757 			error = EOVERFLOW;
758 			goto out;
759 		}
760 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
761 		if (error)
762 			goto out;
763 		pcp->pc_flags |= PC_CHG;
764 	}
765 	/*
766 	 * Change file access times.
767 	 */
768 	if (mask & AT_ATIME) {
769 		/*
770 		 * If SysV-compatible option to set access and
771 		 * modified times if privileged, owner, or write access,
772 		 * use current time rather than va_mtime.
773 		 *
774 		 * XXX - va_atime.tv_sec == -1 flags this.
775 		 */
776 		struct pctime	atime;
777 
778 		timep = &vap->va_atime;
779 		if (vap->va_atime.tv_sec == -1) {
780 			gethrestime(&now);
781 			timep = &now;
782 		}
783 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
784 		    timep->tv_sec > INT32_MAX) {
785 			error = EOVERFLOW;
786 			goto out;
787 		}
788 		error = pc_tvtopct(timep, &atime);
789 		if (error)
790 			goto out;
791 		pcp->pc_entry.pcd_ladate = atime.pct_date;
792 		pcp->pc_flags |= PC_CHG;
793 	}
794 out:
795 	pc_unlockfs(fsp);
796 	return (error);
797 }
798 
799 
800 /*ARGSUSED*/
801 static int
802 pcfs_access(
803 	struct vnode *vp,
804 	int mode,
805 	int flags,
806 	struct cred *cr)
807 {
808 	struct pcnode *pcp;
809 	struct pcfs *fsp;
810 
811 
812 	fsp = VFSTOPCFS(vp->v_vfsp);
813 
814 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
815 		return (EIO);
816 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
817 		return (EACCES);
818 
819 	/*
820 	 * If this is a boot partition, privileged users have full access while
821 	 * others have read-only access.
822 	 */
823 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
824 		if ((mode & VWRITE) &&
825 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
826 			return (EACCES);
827 	}
828 	return (0);
829 }
830 
831 
832 /*ARGSUSED*/
833 static int
834 pcfs_fsync(
835 	struct vnode *vp,
836 	int syncflag,
837 	struct cred *cr)
838 {
839 	struct pcfs *fsp;
840 	struct pcnode *pcp;
841 	int error;
842 
843 	fsp = VFSTOPCFS(vp->v_vfsp);
844 	if (error = pc_verify(fsp))
845 		return (error);
846 	error = pc_lockfs(fsp, 0, 0);
847 	if (error)
848 		return (error);
849 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
850 		pc_unlockfs(fsp);
851 		return (EIO);
852 	}
853 	rw_enter(&pcnodes_lock, RW_WRITER);
854 	error = pc_nodesync(pcp);
855 	rw_exit(&pcnodes_lock);
856 	pc_unlockfs(fsp);
857 	return (error);
858 }
859 
860 
861 /*ARGSUSED*/
862 static void
863 pcfs_inactive(
864 	struct vnode *vp,
865 	struct cred *cr)
866 {
867 	struct pcnode *pcp;
868 	struct pcfs *fsp;
869 	int error;
870 
871 	fsp = VFSTOPCFS(vp->v_vfsp);
872 	error = pc_lockfs(fsp, 0, 1);
873 
874 	/*
875 	 * If the filesystem was umounted by force, all dirty
876 	 * pages associated with this vnode are invalidated
877 	 * and then the vnode will be freed.
878 	 */
879 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
880 		pcp = VTOPC(vp);
881 		if (vn_has_cached_data(vp)) {
882 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
883 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
884 		}
885 		remque(pcp);
886 		if (error == 0)
887 			pc_unlockfs(fsp);
888 		vn_free(vp);
889 		kmem_free(pcp, sizeof (struct pcnode));
890 		VFS_RELE(PCFSTOVFS(fsp));
891 		return;
892 	}
893 
894 	mutex_enter(&vp->v_lock);
895 	ASSERT(vp->v_count >= 1);
896 	if (vp->v_count > 1) {
897 		vp->v_count--;  /* release our hold from vn_rele */
898 		mutex_exit(&vp->v_lock);
899 		pc_unlockfs(fsp);
900 		return;
901 	}
902 	mutex_exit(&vp->v_lock);
903 
904 	/*
905 	 * Check again to confirm that no intervening I/O error
906 	 * with a subsequent pc_diskchanged() call has released
907 	 * the pcnode. If it has then release the vnode as above.
908 	 */
909 	pcp = VTOPC(vp);
910 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
911 		if (vn_has_cached_data(vp))
912 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
913 			    pcfs_putapage, B_INVAL | B_TRUNC,
914 			    (struct cred *)NULL);
915 	}
916 
917 	if (pcp == NULL) {
918 		vn_free(vp);
919 	} else {
920 		pc_rele(pcp);
921 	}
922 
923 	if (!error)
924 		pc_unlockfs(fsp);
925 }
926 
927 /*ARGSUSED*/
928 static int
929 pcfs_lookup(
930 	struct vnode *dvp,
931 	char *nm,
932 	struct vnode **vpp,
933 	struct pathname *pnp,
934 	int flags,
935 	struct vnode *rdir,
936 	struct cred *cr)
937 {
938 	struct pcfs *fsp;
939 	struct pcnode *pcp;
940 	int error;
941 
942 	/*
943 	 * If the filesystem was umounted by force, return immediately.
944 	 */
945 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
946 		return (EIO);
947 
948 	/*
949 	 * verify that the dvp is still valid on the disk
950 	 */
951 	fsp = VFSTOPCFS(dvp->v_vfsp);
952 	if (error = pc_verify(fsp))
953 		return (error);
954 	error = pc_lockfs(fsp, 0, 0);
955 	if (error)
956 		return (error);
957 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
958 		pc_unlockfs(fsp);
959 		return (EIO);
960 	}
961 	/*
962 	 * Null component name is a synonym for directory being searched.
963 	 */
964 	if (*nm == '\0') {
965 		VN_HOLD(dvp);
966 		*vpp = dvp;
967 		pc_unlockfs(fsp);
968 		return (0);
969 	}
970 
971 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
972 	if (!error) {
973 		*vpp = PCTOV(pcp);
974 		pcp->pc_flags |= PC_EXTERNAL;
975 	}
976 	pc_unlockfs(fsp);
977 	return (error);
978 }
979 
980 
981 /*ARGSUSED*/
982 static int
983 pcfs_create(
984 	struct vnode *dvp,
985 	char *nm,
986 	struct vattr *vap,
987 	enum vcexcl exclusive,
988 	int mode,
989 	struct vnode **vpp,
990 	struct cred *cr,
991 	int flag)
992 {
993 	int error;
994 	struct pcnode *pcp;
995 	struct vnode *vp;
996 	struct pcfs *fsp;
997 
998 	/*
999 	 * can't create directories. use pcfs_mkdir.
1000 	 * can't create anything other than files.
1001 	 */
1002 	if (vap->va_type == VDIR)
1003 		return (EISDIR);
1004 	else if (vap->va_type != VREG)
1005 		return (EINVAL);
1006 
1007 	pcp = NULL;
1008 	fsp = VFSTOPCFS(dvp->v_vfsp);
1009 	error = pc_lockfs(fsp, 0, 0);
1010 	if (error)
1011 		return (error);
1012 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1013 		pc_unlockfs(fsp);
1014 		return (EIO);
1015 	}
1016 
1017 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1018 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1019 			pc_unlockfs(fsp);
1020 			return (EACCES);
1021 		}
1022 	}
1023 
1024 	if (*nm == '\0') {
1025 		/*
1026 		 * Null component name refers to the directory itself.
1027 		 */
1028 		VN_HOLD(dvp);
1029 		pcp = VTOPC(dvp);
1030 		error = EEXIST;
1031 	} else {
1032 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1033 	}
1034 	/*
1035 	 * if file exists and this is a nonexclusive create,
1036 	 * check for access permissions
1037 	 */
1038 	if (error == EEXIST) {
1039 		vp = PCTOV(pcp);
1040 		if (exclusive == NONEXCL) {
1041 			if (vp->v_type == VDIR) {
1042 				error = EISDIR;
1043 			} else if (mode) {
1044 				error = pcfs_access(PCTOV(pcp), mode, 0,
1045 				    cr);
1046 			} else {
1047 				error = 0;
1048 			}
1049 		}
1050 		if (error) {
1051 			VN_RELE(PCTOV(pcp));
1052 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1053 		    (vap->va_size == 0)) {
1054 			error = pc_truncate(pcp, 0L);
1055 			if (error) {
1056 				VN_RELE(PCTOV(pcp));
1057 			} else {
1058 				vnevent_create(PCTOV(pcp));
1059 			}
1060 		}
1061 	}
1062 	if (error) {
1063 		pc_unlockfs(fsp);
1064 		return (error);
1065 	}
1066 	*vpp = PCTOV(pcp);
1067 	pcp->pc_flags |= PC_EXTERNAL;
1068 	pc_unlockfs(fsp);
1069 	return (error);
1070 }
1071 
1072 /*ARGSUSED*/
1073 static int
1074 pcfs_remove(
1075 	struct vnode *vp,
1076 	char *nm,
1077 	struct cred *cr)
1078 {
1079 	struct pcfs *fsp;
1080 	struct pcnode *pcp;
1081 	int error;
1082 
1083 	fsp = VFSTOPCFS(vp->v_vfsp);
1084 	if (error = pc_verify(fsp))
1085 		return (error);
1086 	error = pc_lockfs(fsp, 0, 0);
1087 	if (error)
1088 		return (error);
1089 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1090 		pc_unlockfs(fsp);
1091 		return (EIO);
1092 	}
1093 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1094 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1095 			pc_unlockfs(fsp);
1096 			return (EACCES);
1097 		}
1098 	}
1099 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1100 	pc_unlockfs(fsp);
1101 	return (error);
1102 }
1103 
1104 /*
1105  * Rename a file or directory
1106  * This rename is restricted to only rename files within a directory.
1107  * XX should make rename more general
1108  */
1109 /*ARGSUSED*/
1110 static int
1111 pcfs_rename(
1112 	struct vnode *sdvp,		/* old (source) parent vnode */
1113 	char *snm,			/* old (source) entry name */
1114 	struct vnode *tdvp,		/* new (target) parent vnode */
1115 	char *tnm,			/* new (target) entry name */
1116 	struct cred *cr)
1117 {
1118 	struct pcfs *fsp;
1119 	struct pcnode *dp;	/* parent pcnode */
1120 	struct pcnode *tdp;
1121 	int error;
1122 
1123 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1124 	if (error = pc_verify(fsp))
1125 		return (error);
1126 
1127 	/*
1128 	 * make sure we can muck with this directory.
1129 	 */
1130 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1131 	if (error) {
1132 		return (error);
1133 	}
1134 	error = pc_lockfs(fsp, 0, 0);
1135 	if (error)
1136 		return (error);
1137 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1138 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1139 		pc_unlockfs(fsp);
1140 		return (EIO);
1141 	}
1142 	error = pc_rename(dp, tdp, snm, tnm);
1143 	pc_unlockfs(fsp);
1144 	return (error);
1145 }
1146 
1147 /*ARGSUSED*/
1148 static int
1149 pcfs_mkdir(
1150 	struct vnode *dvp,
1151 	char *nm,
1152 	struct vattr *vap,
1153 	struct vnode **vpp,
1154 	struct cred *cr)
1155 {
1156 	struct pcfs *fsp;
1157 	struct pcnode *pcp;
1158 	int error;
1159 
1160 	fsp = VFSTOPCFS(dvp->v_vfsp);
1161 	if (error = pc_verify(fsp))
1162 		return (error);
1163 	error = pc_lockfs(fsp, 0, 0);
1164 	if (error)
1165 		return (error);
1166 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1167 		pc_unlockfs(fsp);
1168 		return (EIO);
1169 	}
1170 
1171 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1172 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1173 			pc_unlockfs(fsp);
1174 			return (EACCES);
1175 		}
1176 	}
1177 
1178 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1179 
1180 	if (!error) {
1181 		pcp -> pc_flags |= PC_EXTERNAL;
1182 		*vpp = PCTOV(pcp);
1183 	} else if (error == EEXIST) {
1184 		VN_RELE(PCTOV(pcp));
1185 	}
1186 	pc_unlockfs(fsp);
1187 	return (error);
1188 }
1189 
1190 /*ARGSUSED*/
1191 static int
1192 pcfs_rmdir(
1193 	struct vnode *dvp,
1194 	char *nm,
1195 	struct vnode *cdir,
1196 	struct cred *cr)
1197 {
1198 	struct pcfs *fsp;
1199 	struct pcnode *pcp;
1200 	int error;
1201 
1202 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1203 	if (error = pc_verify(fsp))
1204 		return (error);
1205 	if (error = pc_lockfs(fsp, 0, 0))
1206 		return (error);
1207 
1208 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1209 		pc_unlockfs(fsp);
1210 		return (EIO);
1211 	}
1212 
1213 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1214 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1215 			pc_unlockfs(fsp);
1216 			return (EACCES);
1217 		}
1218 	}
1219 
1220 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1221 	pc_unlockfs(fsp);
1222 	return (error);
1223 }
1224 
1225 /*
1226  * read entries in a directory.
1227  * we must convert pc format to unix format
1228  */
1229 
1230 /*ARGSUSED*/
1231 static int
1232 pcfs_readdir(
1233 	struct vnode *dvp,
1234 	struct uio *uiop,
1235 	struct cred *cr,
1236 	int *eofp)
1237 {
1238 	struct pcnode *pcp;
1239 	struct pcfs *fsp;
1240 	struct pcdir *ep;
1241 	struct buf *bp = NULL;
1242 	offset_t offset;
1243 	int boff;
1244 	struct pc_dirent lbp;
1245 	struct pc_dirent *ld = &lbp;
1246 	int error;
1247 
1248 	/*
1249 	 * If the filesystem was umounted by force, return immediately.
1250 	 */
1251 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1252 		return (EIO);
1253 
1254 	if ((uiop->uio_iovcnt != 1) ||
1255 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1256 		return (EINVAL);
1257 	}
1258 	fsp = VFSTOPCFS(dvp->v_vfsp);
1259 	/*
1260 	 * verify that the dp is still valid on the disk
1261 	 */
1262 	if (error = pc_verify(fsp)) {
1263 		return (error);
1264 	}
1265 	error = pc_lockfs(fsp, 0, 0);
1266 	if (error)
1267 		return (error);
1268 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1269 		pc_unlockfs(fsp);
1270 		return (EIO);
1271 	}
1272 
1273 	bzero(ld, sizeof (*ld));
1274 
1275 	if (eofp != NULL)
1276 		*eofp = 0;
1277 	offset = uiop->uio_loffset;
1278 
1279 	if (dvp->v_flag & VROOT) {
1280 		/*
1281 		 * kludge up entries for "." and ".." in the root.
1282 		 */
1283 		if (offset == 0) {
1284 			(void) strcpy(ld->d_name, ".");
1285 			ld->d_reclen = DIRENT64_RECLEN(1);
1286 			ld->d_off = (off64_t)sizeof (struct pcdir);
1287 			ld->d_ino = (ino64_t)UINT_MAX;
1288 			if (ld->d_reclen > uiop->uio_resid) {
1289 				pc_unlockfs(fsp);
1290 				return (ENOSPC);
1291 			}
1292 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1293 			uiop->uio_loffset = ld->d_off;
1294 			offset = uiop->uio_loffset;
1295 		}
1296 		if (offset == sizeof (struct pcdir)) {
1297 			(void) strcpy(ld->d_name, "..");
1298 			ld->d_reclen = DIRENT64_RECLEN(2);
1299 			if (ld->d_reclen > uiop->uio_resid) {
1300 				pc_unlockfs(fsp);
1301 				return (ENOSPC);
1302 			}
1303 			ld->d_off = (off64_t)(uiop->uio_loffset +
1304 			    sizeof (struct pcdir));
1305 			ld->d_ino = (ino64_t)UINT_MAX;
1306 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1307 			uiop->uio_loffset = ld->d_off;
1308 			offset = uiop->uio_loffset;
1309 		}
1310 		offset -= 2 * sizeof (struct pcdir);
1311 		/* offset now has the real offset value into directory file */
1312 	}
1313 
1314 	for (;;) {
1315 		boff = pc_blkoff(fsp, offset);
1316 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1317 			if (bp != NULL) {
1318 				brelse(bp);
1319 				bp = NULL;
1320 			}
1321 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1322 			if (error) {
1323 				if (error == ENOENT) {
1324 					error = 0;
1325 					if (eofp)
1326 						*eofp = 1;
1327 				}
1328 				break;
1329 			}
1330 		}
1331 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1332 			if (eofp)
1333 				*eofp = 1;
1334 			break;
1335 		}
1336 		/*
1337 		 * Don't display label because it may contain funny characters.
1338 		 */
1339 		if (ep->pcd_filename[0] == PCD_ERASED) {
1340 			uiop->uio_loffset += sizeof (struct pcdir);
1341 			offset += sizeof (struct pcdir);
1342 			ep++;
1343 			continue;
1344 		}
1345 		if (PCDL_IS_LFN(ep)) {
1346 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1347 			    0)
1348 				break;
1349 			continue;
1350 		}
1351 
1352 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1353 			break;
1354 	}
1355 	if (bp)
1356 		brelse(bp);
1357 	pc_unlockfs(fsp);
1358 	return (error);
1359 }
1360 
1361 
1362 /*
1363  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1364  * When we are called the pcfs is already locked.
1365  */
1366 /*ARGSUSED*/
1367 static int
1368 pcfs_getapage(
1369 	struct vnode *vp,
1370 	u_offset_t off,
1371 	size_t len,
1372 	uint_t *protp,
1373 	page_t *pl[],		/* NULL if async IO is requested */
1374 	size_t plsz,
1375 	struct seg *seg,
1376 	caddr_t addr,
1377 	enum seg_rw rw,
1378 	struct cred *cr)
1379 {
1380 	struct pcnode *pcp;
1381 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1382 	struct vnode *devvp;
1383 	page_t *pp;
1384 	page_t *pagefound;
1385 	int err;
1386 
1387 	/*
1388 	 * If the filesystem was umounted by force, return immediately.
1389 	 */
1390 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1391 		return (EIO);
1392 
1393 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1394 	    (void *)vp, off, len);
1395 
1396 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1397 		return (EIO);
1398 	devvp = fsp->pcfs_devvp;
1399 
1400 	/* pcfs doesn't do readaheads */
1401 	if (pl == NULL)
1402 		return (0);
1403 
1404 	pl[0] = NULL;
1405 	err = 0;
1406 	/*
1407 	 * If the accessed time on the pcnode has not already been
1408 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1409 	 * This gives us approximate modified times for mmap'ed files
1410 	 * which are accessed via loads in the user address space.
1411 	 */
1412 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1413 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1414 		pcp->pc_flags |= PC_ACC;
1415 		pc_mark_acc(pcp);
1416 	}
1417 reread:
1418 	if ((pagefound = page_exists(vp, off)) == NULL) {
1419 		/*
1420 		 * Need to really do disk IO to get the page(s).
1421 		 */
1422 		struct buf *bp;
1423 		daddr_t lbn, bn;
1424 		u_offset_t io_off;
1425 		size_t io_len;
1426 		u_offset_t lbnoff, xferoffset;
1427 		u_offset_t pgoff;
1428 		uint_t	xfersize;
1429 		int err1;
1430 
1431 		lbn = pc_lblkno(fsp, off);
1432 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1433 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1434 
1435 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1436 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1437 		if (pp == NULL)
1438 			/*
1439 			 * XXX - If pcfs is made MT-hot, this should go
1440 			 * back to reread.
1441 			 */
1442 			panic("pcfs_getapage pvn_read_kluster");
1443 
1444 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1445 		    pgoff += xfersize,
1446 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1447 		    lbnoff += xfersize, xferoffset += xfersize) {
1448 			/*
1449 			 * read as many contiguous blocks as possible to
1450 			 * fill this page
1451 			 */
1452 			xfersize = PAGESIZE - pgoff;
1453 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1454 			if (err1) {
1455 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1456 				err = err1;
1457 				goto out;
1458 			}
1459 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1460 			bp->b_edev = devvp->v_rdev;
1461 			bp->b_dev = cmpdev(devvp->v_rdev);
1462 			bp->b_blkno = bn +
1463 			    /* add a sector offset within the cluster */
1464 			    /* when the clustersize > PAGESIZE */
1465 			    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1466 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1467 			bp->b_file = vp;
1468 			bp->b_offset = (offset_t)(off + pgoff);
1469 
1470 			(void) bdev_strategy(bp);
1471 
1472 			lwp_stat_update(LWP_STAT_INBLK, 1);
1473 
1474 			if (err == 0)
1475 				err = biowait(bp);
1476 			else
1477 				(void) biowait(bp);
1478 			pageio_done(bp);
1479 			if (err)
1480 				goto out;
1481 		}
1482 		if (pgoff < PAGESIZE) {
1483 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1484 		}
1485 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1486 	}
1487 out:
1488 	if (err) {
1489 		if (pp != NULL)
1490 			pvn_read_done(pp, B_ERROR);
1491 		return (err);
1492 	}
1493 
1494 	if (pagefound) {
1495 		/*
1496 		 * Page exists in the cache, acquire the "shared"
1497 		 * lock.  If this fails, go back to reread.
1498 		 */
1499 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1500 			goto reread;
1501 		}
1502 		pl[0] = pp;
1503 		pl[1] = NULL;
1504 	}
1505 	return (err);
1506 }
1507 
1508 /*
1509  * Return all the pages from [off..off+len] in given file
1510  */
1511 static int
1512 pcfs_getpage(
1513 	struct vnode *vp,
1514 	offset_t off,
1515 	size_t len,
1516 	uint_t *protp,
1517 	page_t *pl[],
1518 	size_t plsz,
1519 	struct seg *seg,
1520 	caddr_t addr,
1521 	enum seg_rw rw,
1522 	struct cred *cr)
1523 {
1524 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1525 	int err;
1526 
1527 	PC_DPRINTF0(6, "pcfs_getpage\n");
1528 	if (err = pc_verify(fsp))
1529 		return (err);
1530 	if (vp->v_flag & VNOMAP)
1531 		return (ENOSYS);
1532 	ASSERT(off <= UINT32_MAX);
1533 	err = pc_lockfs(fsp, 0, 0);
1534 	if (err)
1535 		return (err);
1536 	if (protp != NULL)
1537 		*protp = PROT_ALL;
1538 
1539 	ASSERT((off & PAGEOFFSET) == 0);
1540 	if (len <= PAGESIZE) {
1541 		err = pcfs_getapage(vp, off, len, protp, pl,
1542 		    plsz, seg, addr, rw, cr);
1543 	} else {
1544 		err = pvn_getpages(pcfs_getapage, vp, off,
1545 		    len, protp, pl, plsz, seg, addr, rw, cr);
1546 	}
1547 	pc_unlockfs(fsp);
1548 	return (err);
1549 }
1550 
1551 
1552 /*
1553  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1554  * If len == 0, do from off to EOF.
1555  *
1556  * The normal cases should be len == 0 & off == 0 (entire vp list),
1557  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1558  * (from pageout).
1559  *
1560  */
1561 /*ARGSUSED*/
1562 static int
1563 pcfs_putpage(
1564 	struct vnode *vp,
1565 	offset_t off,
1566 	size_t len,
1567 	int flags,
1568 	struct cred *cr)
1569 {
1570 	struct pcnode *pcp;
1571 	page_t *pp;
1572 	struct pcfs *fsp;
1573 	u_offset_t io_off;
1574 	size_t io_len;
1575 	offset_t eoff;
1576 	int err;
1577 
1578 	/*
1579 	 * If the filesystem was umounted by force, return immediately.
1580 	 */
1581 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1582 		return (EIO);
1583 
1584 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1585 	if (vp->v_flag & VNOMAP)
1586 		return (ENOSYS);
1587 
1588 	fsp = VFSTOPCFS(vp->v_vfsp);
1589 
1590 	if (err = pc_verify(fsp))
1591 		return (err);
1592 	if ((pcp = VTOPC(vp)) == NULL) {
1593 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1594 		return (EIO);
1595 	}
1596 	if (pcp->pc_flags & PC_INVAL)
1597 		return (EIO);
1598 
1599 	if (curproc == proc_pageout) {
1600 		/*
1601 		 * XXX - This is a quick hack to avoid blocking
1602 		 * pageout. Also to avoid pcfs_getapage deadlocking
1603 		 * with putpage when memory is running out,
1604 		 * since we only have one global lock and we don't
1605 		 * support async putpage.
1606 		 * It should be fixed someday.
1607 		 *
1608 		 * Interestingly, this used to be a test of NOMEMWAIT().
1609 		 * We only ever got here once pcfs started supporting
1610 		 * NFS sharing, and then only because the NFS server
1611 		 * threads seem to do writes in sched's process context.
1612 		 * Since everyone else seems to just care about pageout,
1613 		 * the test was changed to look for pageout directly.
1614 		 */
1615 		return (ENOMEM);
1616 	}
1617 
1618 	ASSERT(off <= UINT32_MAX);
1619 
1620 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1621 
1622 	err = pc_lockfs(fsp, 0, 0);
1623 	if (err)
1624 		return (err);
1625 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1626 		pc_unlockfs(fsp);
1627 		return (0);
1628 	}
1629 
1630 	if (len == 0) {
1631 		/*
1632 		 * Search the entire vp list for pages >= off
1633 		 */
1634 		err = pvn_vplist_dirty(vp, off,
1635 		    pcfs_putapage, flags, cr);
1636 	} else {
1637 		eoff = off + len;
1638 
1639 		for (io_off = off; io_off < eoff &&
1640 		    io_off < pcp->pc_size; io_off += io_len) {
1641 			/*
1642 			 * If we are not invalidating, synchronously
1643 			 * freeing or writing pages use the routine
1644 			 * page_lookup_nowait() to prevent reclaiming
1645 			 * them from the free list.
1646 			 */
1647 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1648 				pp = page_lookup(vp, io_off,
1649 				    (flags & (B_INVAL | B_FREE)) ?
1650 				    SE_EXCL : SE_SHARED);
1651 			} else {
1652 				pp = page_lookup_nowait(vp, io_off,
1653 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1654 			}
1655 
1656 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1657 				io_len = PAGESIZE;
1658 			else {
1659 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1660 				    flags, cr);
1661 				if (err != 0)
1662 					break;
1663 				/*
1664 				 * "io_off" and "io_len" are returned as
1665 				 * the range of pages we actually wrote.
1666 				 * This allows us to skip ahead more quickly
1667 				 * since several pages may've been dealt
1668 				 * with by this iteration of the loop.
1669 				 */
1670 			}
1671 		}
1672 	}
1673 	if (err == 0 && (flags & B_INVAL) &&
1674 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1675 		/*
1676 		 * If doing "invalidation", make sure that
1677 		 * all pages on the vnode list are actually
1678 		 * gone.
1679 		 */
1680 		cmn_err(CE_PANIC,
1681 		    "pcfs_putpage: B_INVAL, pages not gone");
1682 	} else if (err) {
1683 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1684 	}
1685 	pc_unlockfs(fsp);
1686 	return (err);
1687 }
1688 
1689 /*
1690  * Write out a single page, possibly klustering adjacent dirty pages.
1691  */
1692 /*ARGSUSED*/
1693 int
1694 pcfs_putapage(
1695 	struct vnode *vp,
1696 	page_t *pp,
1697 	u_offset_t *offp,
1698 	size_t *lenp,
1699 	int flags,
1700 	struct cred *cr)
1701 {
1702 	struct pcnode *pcp;
1703 	struct pcfs *fsp;
1704 	struct vnode *devvp;
1705 	size_t io_len;
1706 	daddr_t bn;
1707 	u_offset_t lbn, lbnoff, xferoffset;
1708 	uint_t pgoff, xfersize;
1709 	int err = 0;
1710 	u_offset_t io_off;
1711 
1712 	pcp = VTOPC(vp);
1713 	fsp = VFSTOPCFS(vp->v_vfsp);
1714 	devvp = fsp->pcfs_devvp;
1715 
1716 	/*
1717 	 * If the modified time on the inode has not already been
1718 	 * set elsewhere (e.g. for write/setattr) and this is not
1719 	 * a call from msync (B_FORCE) we set the time now.
1720 	 * This gives us approximate modified times for mmap'ed files
1721 	 * which are modified via stores in the user address space.
1722 	 */
1723 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1724 		pcp->pc_flags |= PC_MOD;
1725 		pc_mark_mod(pcp);
1726 	}
1727 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1728 	    PAGESIZE, flags);
1729 
1730 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1731 		goto out;
1732 	}
1733 
1734 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1735 
1736 	lbn = pc_lblkno(fsp, io_off);
1737 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1738 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1739 
1740 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1741 	    pgoff += xfersize,
1742 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1743 	    lbnoff += xfersize, xferoffset += xfersize) {
1744 
1745 		struct buf *bp;
1746 		int err1;
1747 
1748 		/*
1749 		 * write as many contiguous blocks as possible from this page
1750 		 */
1751 		xfersize = io_len - pgoff;
1752 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1753 		if (err1) {
1754 			err = err1;
1755 			goto out;
1756 		}
1757 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1758 		bp->b_edev = devvp->v_rdev;
1759 		bp->b_dev = cmpdev(devvp->v_rdev);
1760 		bp->b_blkno = bn +
1761 		    /* add a sector offset within the cluster */
1762 		    /* when the clustersize > PAGESIZE */
1763 		    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1764 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1765 		bp->b_file = vp;
1766 		bp->b_offset = (offset_t)(io_off + pgoff);
1767 
1768 		(void) bdev_strategy(bp);
1769 
1770 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1771 
1772 		if (err == 0)
1773 			err = biowait(bp);
1774 		else
1775 			(void) biowait(bp);
1776 		pageio_done(bp);
1777 	}
1778 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1779 	pp = NULL;
1780 
1781 out:
1782 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1783 		pvn_write_done(pp, B_WRITE | flags);
1784 	} else if (err != 0 && pp != NULL) {
1785 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1786 	}
1787 
1788 	if (offp)
1789 		*offp = io_off;
1790 	if (lenp)
1791 		*lenp = io_len;
1792 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1793 		    (void *)vp, (void *)pp, io_off, io_len);
1794 	if (err) {
1795 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1796 	}
1797 	return (err);
1798 }
1799 
1800 /*ARGSUSED*/
1801 static int
1802 pcfs_map(
1803 	struct vnode *vp,
1804 	offset_t off,
1805 	struct as *as,
1806 	caddr_t *addrp,
1807 	size_t len,
1808 	uchar_t prot,
1809 	uchar_t maxprot,
1810 	uint_t flags,
1811 	struct cred *cr)
1812 {
1813 	struct segvn_crargs vn_a;
1814 	int error;
1815 
1816 	PC_DPRINTF0(6, "pcfs_map\n");
1817 	if (vp->v_flag & VNOMAP)
1818 		return (ENOSYS);
1819 
1820 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1821 		return (ENXIO);
1822 
1823 	as_rangelock(as);
1824 	if ((flags & MAP_FIXED) == 0) {
1825 		map_addr(addrp, len, off, 1, flags);
1826 		if (*addrp == NULL) {
1827 			as_rangeunlock(as);
1828 			return (ENOMEM);
1829 		}
1830 	} else {
1831 		/*
1832 		 * User specified address - blow away any previous mappings
1833 		 */
1834 		(void) as_unmap(as, *addrp, len);
1835 	}
1836 
1837 	vn_a.vp = vp;
1838 	vn_a.offset = off;
1839 	vn_a.type = flags & MAP_TYPE;
1840 	vn_a.prot = prot;
1841 	vn_a.maxprot = maxprot;
1842 	vn_a.flags = flags & ~MAP_TYPE;
1843 	vn_a.cred = cr;
1844 	vn_a.amp = NULL;
1845 	vn_a.szc = 0;
1846 	vn_a.lgrp_mem_policy_flags = 0;
1847 
1848 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1849 	as_rangeunlock(as);
1850 	return (error);
1851 }
1852 
1853 /* ARGSUSED */
1854 static int
1855 pcfs_seek(
1856 	struct vnode *vp,
1857 	offset_t ooff,
1858 	offset_t *noffp)
1859 {
1860 	if (*noffp < 0)
1861 		return (EINVAL);
1862 	else if (*noffp > MAXOFFSET_T)
1863 		return (EINVAL);
1864 	else
1865 		return (0);
1866 }
1867 
1868 /* ARGSUSED */
1869 static int
1870 pcfs_addmap(
1871 	struct vnode *vp,
1872 	offset_t off,
1873 	struct as *as,
1874 	caddr_t addr,
1875 	size_t len,
1876 	uchar_t prot,
1877 	uchar_t maxprot,
1878 	uint_t flags,
1879 	struct cred *cr)
1880 {
1881 	if (vp->v_flag & VNOMAP)
1882 		return (ENOSYS);
1883 	return (0);
1884 }
1885 
1886 /*ARGSUSED*/
1887 static int
1888 pcfs_delmap(
1889 	struct vnode *vp,
1890 	offset_t off,
1891 	struct as *as,
1892 	caddr_t addr,
1893 	size_t len,
1894 	uint_t prot,
1895 	uint_t maxprot,
1896 	uint_t flags,
1897 	struct cred *cr)
1898 {
1899 	if (vp->v_flag & VNOMAP)
1900 		return (ENOSYS);
1901 	return (0);
1902 }
1903 
1904 /*
1905  * POSIX pathconf() support.
1906  */
1907 /* ARGSUSED */
1908 static int
1909 pcfs_pathconf(
1910 	struct vnode *vp,
1911 	int cmd,
1912 	ulong_t *valp,
1913 	struct cred *cr)
1914 {
1915 	ulong_t val;
1916 	int error = 0;
1917 	struct statvfs64 vfsbuf;
1918 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1919 
1920 	switch (cmd) {
1921 
1922 	case _PC_LINK_MAX:
1923 		val = 1;
1924 		break;
1925 
1926 	case _PC_MAX_CANON:
1927 		val = MAX_CANON;
1928 		break;
1929 
1930 	case _PC_MAX_INPUT:
1931 		val = MAX_INPUT;
1932 		break;
1933 
1934 	case _PC_NAME_MAX:
1935 		bzero(&vfsbuf, sizeof (vfsbuf));
1936 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1937 			break;
1938 		val = vfsbuf.f_namemax;
1939 		break;
1940 
1941 	case _PC_PATH_MAX:
1942 	case _PC_SYMLINK_MAX:
1943 		val = PCMAXPATHLEN;
1944 		break;
1945 
1946 	case _PC_PIPE_BUF:
1947 		val = PIPE_BUF;
1948 		break;
1949 
1950 	case _PC_NO_TRUNC:
1951 		val = (ulong_t)-1; 	/* Will truncate long file name */
1952 		break;
1953 
1954 	case _PC_VDISABLE:
1955 		val = _POSIX_VDISABLE;
1956 		break;
1957 
1958 	case _PC_CHOWN_RESTRICTED:
1959 		if (rstchown)
1960 			val = rstchown;		/* chown restricted enabled */
1961 		else
1962 			val = (ulong_t)-1;
1963 		break;
1964 
1965 	case _PC_ACL_ENABLED:
1966 		val = 0;
1967 		break;
1968 
1969 	case _PC_FILESIZEBITS:
1970 		/*
1971 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1972 		 * FAT12 can only go up to the maximum filesystem capacity
1973 		 * which is ~509MB.
1974 		 */
1975 		val = IS_FAT12(fsp) ? 30 : 33;
1976 		break;
1977 	default:
1978 		error = EINVAL;
1979 		break;
1980 	}
1981 
1982 	if (error == 0)
1983 		*valp = val;
1984 	return (error);
1985 }
1986 
1987 /* ARGSUSED */
1988 static int
1989 pcfs_space(
1990 	struct vnode *vp,
1991 	int cmd,
1992 	struct flock64 *bfp,
1993 	int flag,
1994 	offset_t offset,
1995 	cred_t *cr,
1996 	caller_context_t *ct)
1997 {
1998 	struct vattr vattr;
1999 	int error;
2000 
2001 	if (cmd != F_FREESP)
2002 		return (EINVAL);
2003 
2004 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2005 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2006 			return (EFBIG);
2007 		/*
2008 		 * we only support the special case of l_len == 0,
2009 		 * meaning free to end of file at this moment.
2010 		 */
2011 		if (bfp->l_len != 0)
2012 			return (EINVAL);
2013 		vattr.va_mask = AT_SIZE;
2014 		vattr.va_size = bfp->l_start;
2015 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
2016 	}
2017 	return (error);
2018 }
2019 
2020 /*
2021  * Break up 'len' chars from 'buf' into a long file name chunk.
2022  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2023  */
2024 void
2025 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2026 {
2027 	int	i;
2028 
2029 	ASSERT(buf != NULL);
2030 
2031 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2032 		if (len > 0) {
2033 			ep->pcdl_firstfilename[i] = *buf++;
2034 			ep->pcdl_firstfilename[i + 1] = *buf++;
2035 			len -= 2;
2036 		} else {
2037 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2038 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2039 		}
2040 	}
2041 
2042 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2043 		if (len > 0) {
2044 			ep->pcdl_secondfilename[i] = *buf++;
2045 			ep->pcdl_secondfilename[i + 1] = *buf++;
2046 			len -= 2;
2047 		} else {
2048 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2049 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2050 		}
2051 	}
2052 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2053 		if (len > 0) {
2054 			ep->pcdl_thirdfilename[i] = *buf++;
2055 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2056 			len -= 2;
2057 		} else {
2058 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2059 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2060 		}
2061 	}
2062 }
2063 
2064 /*
2065  * Extract the characters from the long filename chunk into 'buf'.
2066  * Return the number of characters extracted.
2067  */
2068 static int
2069 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2070 {
2071 	char 	*tmp = buf;
2072 	int	i;
2073 
2074 	/* Copy all the names, no filtering now */
2075 
2076 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2077 		*tmp = ep->pcdl_firstfilename[i];
2078 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2079 
2080 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2081 			return (tmp - buf);
2082 		if (*(tmp + 1) == '\0' && foldcase) {
2083 			*tmp = toupper(*tmp);
2084 		}
2085 	}
2086 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2087 		*tmp = ep->pcdl_secondfilename[i];
2088 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2089 
2090 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2091 			return (tmp - buf);
2092 		if (*(tmp + 1) == '\0' && foldcase) {
2093 			*tmp = toupper(*tmp);
2094 		}
2095 	}
2096 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2097 		*tmp = ep->pcdl_thirdfilename[i];
2098 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2099 
2100 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2101 			return (tmp - buf);
2102 		if (*(tmp + 1) == '\0' && foldcase) {
2103 			*tmp = toupper(*tmp);
2104 		}
2105 	}
2106 	return (tmp - buf);
2107 }
2108 
2109 
2110 /*
2111  * Checksum the passed in short filename.
2112  * This is used to validate each component of the long name to make
2113  * sure the long name is valid (it hasn't been "detached" from the
2114  * short filename). This algorithm was found in FreeBSD.
2115  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2116  */
2117 
2118 uchar_t
2119 pc_checksum_long_fn(char *name, char *ext)
2120 {
2121 	uchar_t c;
2122 	char	b[11];
2123 
2124 	bcopy(name, b, 8);
2125 	bcopy(ext, b+8, 3);
2126 
2127 	c = b[0];
2128 	c = ((c << 7) | (c >> 1)) + b[1];
2129 	c = ((c << 7) | (c >> 1)) + b[2];
2130 	c = ((c << 7) | (c >> 1)) + b[3];
2131 	c = ((c << 7) | (c >> 1)) + b[4];
2132 	c = ((c << 7) | (c >> 1)) + b[5];
2133 	c = ((c << 7) | (c >> 1)) + b[6];
2134 	c = ((c << 7) | (c >> 1)) + b[7];
2135 	c = ((c << 7) | (c >> 1)) + b[8];
2136 	c = ((c << 7) | (c >> 1)) + b[9];
2137 	c = ((c << 7) | (c >> 1)) + b[10];
2138 
2139 	return (c);
2140 }
2141 
2142 /*
2143  * Read a chunk of long filename entries into 'namep'.
2144  * Return with offset pointing to short entry (on success), or next
2145  * entry to read (if this wasn't a valid lfn really).
2146  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2147  * a long filename.
2148  *
2149  * Can also be called with a NULL namep, in which case it just returns
2150  * whether this was really a valid long filename and consumes it
2151  * (used by pc_dirempty()).
2152  */
2153 int
2154 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2155     struct pcdir **epp, offset_t *offset, struct buf **bp)
2156 {
2157 	struct pcdir *ep = *epp;
2158 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2159 	struct vnode *dvp = PCTOV(pcp);
2160 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2161 	char	*lfn;
2162 	char	*lfn_base;
2163 	int	boff;
2164 	int	i, cs;
2165 	char 	*buf;
2166 	uchar_t	cksum;
2167 	int 	detached = 0;
2168 	int	error = 0;
2169 	int	foldcase;
2170 	int	count = 0;
2171 	size_t u16l = 0, u8l = 0;
2172 
2173 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2174 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2175 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2176 	*lfn = '\0';
2177 	*(lfn + 1) = '\0';
2178 	cksum = lep->pcdl_checksum;
2179 
2180 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2181 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2182 		/* read next block if necessary */
2183 		boff = pc_blkoff(fsp, *offset);
2184 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2185 			if (*bp != NULL) {
2186 				brelse(*bp);
2187 				*bp = NULL;
2188 			}
2189 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2190 			if (error) {
2191 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2192 				kmem_free(buf, PCMAXNAM_UTF16);
2193 				return (error);
2194 			}
2195 			lep = (struct pcdir_lfn *)ep;
2196 		}
2197 		/* can this happen? Bad fs? */
2198 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2199 			detached = 1;
2200 			break;
2201 		}
2202 		if (cksum != lep->pcdl_checksum)
2203 			detached = 1;
2204 		/* process current entry */
2205 		cs = get_long_fn_chunk(lep, buf, foldcase);
2206 		count += cs;
2207 		for (; cs > 0; cs--) {
2208 			/* see if we underflow */
2209 			if (lfn >= lfn_base)
2210 				*--lfn = buf[cs - 1];
2211 			else
2212 				detached = 1;
2213 		}
2214 		lep++;
2215 		*offset += sizeof (struct pcdir);
2216 	}
2217 	kmem_free(buf, PCMAXNAM_UTF16);
2218 	/* read next block if necessary */
2219 	boff = pc_blkoff(fsp, *offset);
2220 	ep = (struct pcdir *)lep;
2221 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2222 		if (*bp != NULL) {
2223 			brelse(*bp);
2224 			*bp = NULL;
2225 		}
2226 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2227 		if (error) {
2228 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2229 			return (error);
2230 		}
2231 	}
2232 	/* should be on the short one */
2233 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2234 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2235 		detached = 1;
2236 	}
2237 	if (detached ||
2238 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2239 	    !pc_valid_long_fn(lfn, 0)) {
2240 		/*
2241 		 * process current entry again. This may end up another lfn
2242 		 * or a short name.
2243 		 */
2244 		*epp = ep;
2245 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2246 		return (EINVAL);
2247 	}
2248 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2249 		/*
2250 		 * Don't display label because it may contain
2251 		 * funny characters.
2252 		 */
2253 		*offset += sizeof (struct pcdir);
2254 		ep++;
2255 		*epp = ep;
2256 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2257 		return (EINVAL);
2258 	}
2259 	if (namep) {
2260 		u16l = count / 2;
2261 		u8l = PCMAXNAMLEN;
2262 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2263 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2264 		/*
2265 		 * uconv_u16tou8() will catch conversion errors including
2266 		 * the case where there is not enough room to write the
2267 		 * converted result and the u8l will never go over the given
2268 		 * PCMAXNAMLEN.
2269 		 */
2270 		if (error != 0) {
2271 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2272 			return (EINVAL);
2273 		}
2274 		namep[u8l] = '\0';
2275 	}
2276 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2277 	*epp = ep;
2278 	return (0);
2279 }
2280 /*
2281  * Read a long filename into the pc_dirent structure and copy it out.
2282  */
2283 int
2284 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2285     struct pcdir **epp, offset_t *offset, struct buf **bp)
2286 {
2287 	struct pcdir *ep;
2288 	struct pcnode *pcp = VTOPC(dvp);
2289 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2290 	offset_t uiooffset = uiop->uio_loffset;
2291 	int	error = 0;
2292 	offset_t oldoffset;
2293 
2294 	oldoffset = *offset;
2295 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2296 	if (error) {
2297 		if (error == EINVAL) {
2298 			uiop->uio_loffset += *offset - oldoffset;
2299 			return (0);
2300 		} else
2301 			return (error);
2302 	}
2303 
2304 	ep = *epp;
2305 	uiop->uio_loffset += *offset - oldoffset;
2306 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2307 	if (ld->d_reclen > uiop->uio_resid) {
2308 		uiop->uio_loffset = uiooffset;
2309 		return (ENOSPC);
2310 	}
2311 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2312 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2313 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2314 	    pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2315 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2316 	uiop->uio_loffset = ld->d_off;
2317 	*offset += sizeof (struct pcdir);
2318 	ep++;
2319 	*epp = ep;
2320 	return (0);
2321 }
2322 
2323 /*
2324  * Read a short filename into the pc_dirent structure and copy it out.
2325  */
2326 int
2327 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2328     struct pcdir **epp, offset_t *offset, struct buf **bp)
2329 {
2330 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2331 	int	boff = pc_blkoff(fsp, *offset);
2332 	struct pcdir *ep = *epp;
2333 	offset_t	oldoffset = uiop->uio_loffset;
2334 	int	error;
2335 	int	foldcase;
2336 
2337 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2338 		uiop->uio_loffset += sizeof (struct pcdir);
2339 		*offset += sizeof (struct pcdir);
2340 		ep++;
2341 		*epp = ep;
2342 		return (0);
2343 	}
2344 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2345 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2346 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2347 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2348 	    &ep->pcd_ext[0], foldcase);
2349 	if (error == 0) {
2350 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2351 		if (ld->d_reclen > uiop->uio_resid) {
2352 			uiop->uio_loffset = oldoffset;
2353 			return (ENOSPC);
2354 		}
2355 		ld->d_off = (off64_t)(uiop->uio_loffset +
2356 		    sizeof (struct pcdir));
2357 		(void) uiomove((caddr_t)ld,
2358 		    ld->d_reclen, UIO_READ, uiop);
2359 		uiop->uio_loffset = ld->d_off;
2360 	} else {
2361 		uiop->uio_loffset += sizeof (struct pcdir);
2362 	}
2363 	*offset += sizeof (struct pcdir);
2364 	ep++;
2365 	*epp = ep;
2366 	return (0);
2367 }
2368 
2369 static int
2370 pcfs_fid(struct vnode *vp, struct fid *fidp)
2371 {
2372 	struct pc_fid *pcfid;
2373 	struct pcnode *pcp;
2374 	struct pcfs	*fsp;
2375 	int	error;
2376 
2377 	fsp = VFSTOPCFS(vp->v_vfsp);
2378 	if (fsp == NULL)
2379 		return (EIO);
2380 	error = pc_lockfs(fsp, 0, 0);
2381 	if (error)
2382 		return (error);
2383 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2384 		pc_unlockfs(fsp);
2385 		return (EIO);
2386 	}
2387 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2388 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2389 		pc_unlockfs(fsp);
2390 		return (ENOSPC);
2391 	}
2392 
2393 	pcfid = (struct pc_fid *)fidp;
2394 	bzero(pcfid, sizeof (struct pc_fid));
2395 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2396 	if (vp->v_flag & VROOT) {
2397 		pcfid->pcfid_block = 0;
2398 		pcfid->pcfid_offset = 0;
2399 		pcfid->pcfid_ctime = 0;
2400 	} else {
2401 		pcfid->pcfid_block = pcp->pc_eblkno;
2402 		pcfid->pcfid_offset = pcp->pc_eoffset;
2403 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2404 	}
2405 	pc_unlockfs(fsp);
2406 	return (0);
2407 }
2408