xref: /illumos-gate/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision bb57d1f5164aca913cbd286ae1b61c896167cfa7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/dirent.h>
38 #include <sys/vnode.h>
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/uio.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/vmsystm.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/statvfs.h>
53 #include <sys/unistd.h>
54 #include <sys/kmem.h>
55 #include <sys/conf.h>
56 #include <sys/flock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59 #include <sys/sunddi.h>
60 
61 #include <vm/seg.h>
62 #include <vm/page.h>
63 #include <vm/pvn.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg_kmem.h>
69 
70 #include <fs/fs_subr.h>
71 
72 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
73 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
74 	caller_context_t *ct);
75 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
76 	caller_context_t *);
77 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
78 	caller_context_t *);
79 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
80 	caller_context_t *ct);
81 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
82 	caller_context_t *);
83 static int pcfs_access(struct vnode *, int, int, struct cred *,
84 	caller_context_t *ct);
85 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
86 	struct pathname *, int, struct vnode *, struct cred *,
87 	caller_context_t *, int *, pathname_t *);
88 static int pcfs_create(struct vnode *, char *, struct vattr *,
89 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
90 	caller_context_t *, vsecattr_t *);
91 static int pcfs_remove(struct vnode *, char *, struct cred *,
92 	caller_context_t *, int);
93 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
94 	struct cred *, caller_context_t *, int);
95 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
96 	struct cred *, caller_context_t *, int, vsecattr_t *);
97 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
98 	caller_context_t *, int);
99 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
100 	caller_context_t *, int);
101 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
102 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
103 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
104 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
105 	offset_t, cred_t *, caller_context_t *);
106 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
107 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
108 	caller_context_t *);
109 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
110 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
111 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
112 	caller_context_t *);
113 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
114 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
115 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
116 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
117 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
118 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
119 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
120 	caller_context_t *);
121 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
122 	caller_context_t *);
123 
124 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
125 	struct cred *);
126 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
127 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
128 
129 extern krwlock_t pcnodes_lock;
130 
131 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
132 
133 /*
134  * vnode op vectors for files and directories.
135  */
136 struct vnodeops *pcfs_fvnodeops;
137 struct vnodeops *pcfs_dvnodeops;
138 
139 const fs_operation_def_t pcfs_fvnodeops_template[] = {
140 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
141 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
142 	VOPNAME_READ,		{ .vop_read = pcfs_read },
143 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
144 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
145 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
146 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
147 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
148 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
149 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
150 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
151 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
152 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
153 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
154 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
155 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
156 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
157 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
158 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
159 	NULL,			NULL
160 };
161 
162 const fs_operation_def_t pcfs_dvnodeops_template[] = {
163 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
164 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
165 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
166 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
167 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
168 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
169 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
170 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
171 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
172 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
173 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
174 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
175 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
176 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
177 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
178 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
179 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
180 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
181 	NULL,			NULL
182 };
183 
184 
185 /*ARGSUSED*/
186 static int
187 pcfs_open(
188 	struct vnode **vpp,
189 	int flag,
190 	struct cred *cr,
191 	caller_context_t *ct)
192 {
193 	return (0);
194 }
195 
196 /*
197  * files are sync'ed on close to keep floppy up to date
198  */
199 
200 /*ARGSUSED*/
201 static int
202 pcfs_close(
203 	struct vnode *vp,
204 	int flag,
205 	int count,
206 	offset_t offset,
207 	struct cred *cr,
208 	caller_context_t *ct)
209 {
210 	return (0);
211 }
212 
213 /*ARGSUSED*/
214 static int
215 pcfs_read(
216 	struct vnode *vp,
217 	struct uio *uiop,
218 	int ioflag,
219 	struct cred *cr,
220 	struct caller_context *ct)
221 {
222 	struct pcfs *fsp;
223 	struct pcnode *pcp;
224 	int error;
225 
226 	fsp = VFSTOPCFS(vp->v_vfsp);
227 	if (error = pc_verify(fsp))
228 		return (error);
229 	error = pc_lockfs(fsp, 0, 0);
230 	if (error)
231 		return (error);
232 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
233 		pc_unlockfs(fsp);
234 		return (EIO);
235 	}
236 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
237 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
238 		pc_mark_acc(fsp, pcp);
239 	}
240 	pc_unlockfs(fsp);
241 	if (error) {
242 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
243 	}
244 	return (error);
245 }
246 
247 /*ARGSUSED*/
248 static int
249 pcfs_write(
250 	struct vnode *vp,
251 	struct uio *uiop,
252 	int ioflag,
253 	struct cred *cr,
254 	struct caller_context *ct)
255 {
256 	struct pcfs *fsp;
257 	struct pcnode *pcp;
258 	int error;
259 
260 	fsp = VFSTOPCFS(vp->v_vfsp);
261 	if (error = pc_verify(fsp))
262 		return (error);
263 	error = pc_lockfs(fsp, 0, 0);
264 	if (error)
265 		return (error);
266 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
267 		pc_unlockfs(fsp);
268 		return (EIO);
269 	}
270 	if (ioflag & FAPPEND) {
271 		/*
272 		 * in append mode start at end of file.
273 		 */
274 		uiop->uio_loffset = pcp->pc_size;
275 	}
276 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
277 	pcp->pc_flags |= PC_MOD;
278 	pc_mark_mod(fsp, pcp);
279 	if (ioflag & (FSYNC|FDSYNC))
280 		(void) pc_nodeupdate(pcp);
281 
282 	pc_unlockfs(fsp);
283 	if (error) {
284 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
285 	}
286 	return (error);
287 }
288 
289 /*
290  * read or write a vnode
291  */
292 static int
293 rwpcp(
294 	struct pcnode *pcp,
295 	struct uio *uio,
296 	enum uio_rw rw,
297 	int ioflag)
298 {
299 	struct vnode *vp = PCTOV(pcp);
300 	struct pcfs *fsp;
301 	daddr_t bn;			/* phys block number */
302 	int n;
303 	offset_t off;
304 	caddr_t base;
305 	int mapon, pagecreate;
306 	int newpage;
307 	int error = 0;
308 	rlim64_t limit = uio->uio_llimit;
309 	int oresid = uio->uio_resid;
310 
311 	/*
312 	 * If the filesystem was umounted by force, return immediately.
313 	 */
314 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
315 		return (EIO);
316 
317 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
318 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
319 
320 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
321 	ASSERT(vp->v_type == VREG);
322 
323 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
324 		return (0);
325 	}
326 
327 	if (uio->uio_loffset < 0)
328 		return (EINVAL);
329 
330 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
331 		limit = MAXOFFSET_T;
332 
333 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
334 		proc_t *p = ttoproc(curthread);
335 
336 		mutex_enter(&p->p_lock);
337 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
338 		    p, RCA_UNSAFE_SIGINFO);
339 		mutex_exit(&p->p_lock);
340 		return (EFBIG);
341 	}
342 
343 	/* the following condition will occur only for write */
344 
345 	if (uio->uio_loffset >= UINT32_MAX)
346 		return (EFBIG);
347 
348 	if (uio->uio_resid == 0)
349 		return (0);
350 
351 	if (limit > UINT32_MAX)
352 		limit = UINT32_MAX;
353 
354 	fsp = VFSTOPCFS(vp->v_vfsp);
355 	if (fsp->pcfs_flags & PCFS_IRRECOV)
356 		return (EIO);
357 
358 	do {
359 		/*
360 		 * Assignments to "n" in this block may appear
361 		 * to overflow in some cases.  However, after careful
362 		 * analysis it was determined that all assignments to
363 		 * "n" serve only to make "n" smaller.  Since "n"
364 		 * starts out as no larger than MAXBSIZE, "int" is
365 		 * safe.
366 		 */
367 		off = uio->uio_loffset & MAXBMASK;
368 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
369 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
370 		if (rw == UIO_READ) {
371 			offset_t diff;
372 
373 			diff = pcp->pc_size - uio->uio_loffset;
374 			if (diff <= 0)
375 				return (0);
376 			if (diff < n)
377 				n = (int)diff;
378 		}
379 		/*
380 		 * Compare limit with the actual offset + n, not the
381 		 * rounded down offset "off" or we will overflow
382 		 * the maximum file size after all.
383 		 */
384 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
385 			if (uio->uio_loffset >= limit) {
386 				error = EFBIG;
387 				break;
388 			}
389 			n = (int)(limit - uio->uio_loffset);
390 		}
391 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
392 		pagecreate = 0;
393 		newpage = 0;
394 		if (rw == UIO_WRITE) {
395 			/*
396 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
397 			 * with one page at a time, instead of one MAXBSIZE
398 			 * at a time, so we can fully explore pagecreate
399 			 * optimization??
400 			 */
401 			if (uio->uio_loffset + n > pcp->pc_size) {
402 				uint_t ncl, lcn;
403 
404 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
405 				    fsp->pcfs_clsize);
406 				if (uio->uio_loffset > pcp->pc_size &&
407 				    ncl < (uint_t)howmany(uio->uio_loffset,
408 				    fsp->pcfs_clsize)) {
409 					/*
410 					 * Allocate and zerofill skipped
411 					 * clusters. This may not be worth the
412 					 * effort since a small lseek beyond
413 					 * eof but still within the cluster
414 					 * will not be zeroed out.
415 					 */
416 					lcn = pc_lblkno(fsp, uio->uio_loffset);
417 					error = pc_balloc(pcp, (daddr_t)lcn,
418 					    1, &bn);
419 					ncl = lcn + 1;
420 				}
421 				if (!error &&
422 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
423 				    fsp->pcfs_clsize))
424 					/*
425 					 * allocate clusters w/o zerofill
426 					 */
427 					error = pc_balloc(pcp,
428 					    (daddr_t)pc_lblkno(fsp,
429 					    uio->uio_loffset + n - 1),
430 					    0, &bn);
431 
432 				pcp->pc_flags |= PC_CHG;
433 
434 				if (error) {
435 					pc_cluster32_t ncl;
436 					int nerror;
437 
438 					/*
439 					 * figure out new file size from
440 					 * cluster chain length. If this
441 					 * is detected to loop, the chain
442 					 * is corrupted and we'd better
443 					 * keep our fingers off that file.
444 					 */
445 					nerror = pc_fileclsize(fsp,
446 					    pcp->pc_scluster, &ncl);
447 					if (nerror) {
448 						PC_DPRINTF1(2,
449 						    "cluster chain "
450 						    "corruption, "
451 						    "scluster=%d\n",
452 						    pcp->pc_scluster);
453 						pcp->pc_size = 0;
454 						pcp->pc_flags |= PC_INVAL;
455 						error = nerror;
456 						(void) segmap_release(segkmap,
457 						    base, 0);
458 						break;
459 					}
460 					pcp->pc_size = fsp->pcfs_clsize * ncl;
461 
462 					if (error == ENOSPC &&
463 					    (pcp->pc_size - uio->uio_loffset)
464 					    > 0) {
465 						PC_DPRINTF3(2, "rwpcp ENOSPC "
466 						    "off=%lld n=%d size=%d\n",
467 						    uio->uio_loffset,
468 						    n, pcp->pc_size);
469 						n = (int)(pcp->pc_size -
470 						    uio->uio_loffset);
471 					} else {
472 						PC_DPRINTF1(1,
473 						    "rwpcp error1=%d\n", error);
474 						(void) segmap_release(segkmap,
475 						    base, 0);
476 						break;
477 					}
478 				} else {
479 					pcp->pc_size =
480 					    (uint_t)(uio->uio_loffset + n);
481 				}
482 				if (mapon == 0) {
483 					newpage = segmap_pagecreate(segkmap,
484 					    base, (size_t)n, 0);
485 					pagecreate = 1;
486 				}
487 			} else if (n == MAXBSIZE) {
488 				newpage = segmap_pagecreate(segkmap, base,
489 				    (size_t)n, 0);
490 				pagecreate = 1;
491 			}
492 		}
493 		error = uiomove(base + mapon, (size_t)n, rw, uio);
494 
495 		if (pagecreate && uio->uio_loffset <
496 		    roundup(off + mapon + n, PAGESIZE)) {
497 			offset_t nzero, nmoved;
498 
499 			nmoved = uio->uio_loffset - (off + mapon);
500 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
501 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
502 		}
503 
504 		/*
505 		 * Unlock the pages which have been allocated by
506 		 * page_create_va() in segmap_pagecreate().
507 		 */
508 		if (newpage) {
509 			segmap_pageunlock(segkmap, base, (size_t)n,
510 			    rw == UIO_WRITE ? S_WRITE : S_READ);
511 		}
512 
513 		if (error) {
514 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
515 			/*
516 			 * If we failed on a write, we may have already
517 			 * allocated file blocks as well as pages.  It's hard
518 			 * to undo the block allocation, but we must be sure
519 			 * to invalidate any pages that may have been
520 			 * allocated.
521 			 */
522 			if (rw == UIO_WRITE)
523 				(void) segmap_release(segkmap, base, SM_INVAL);
524 			else
525 				(void) segmap_release(segkmap, base, 0);
526 		} else {
527 			uint_t flags = 0;
528 
529 			if (rw == UIO_READ) {
530 				if (n + mapon == MAXBSIZE ||
531 				    uio->uio_loffset == pcp->pc_size)
532 					flags = SM_DONTNEED;
533 			} else if (ioflag & (FSYNC|FDSYNC)) {
534 				flags = SM_WRITE;
535 			} else if (n + mapon == MAXBSIZE) {
536 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
537 			}
538 			error = segmap_release(segkmap, base, flags);
539 		}
540 
541 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
542 
543 	if (oresid != uio->uio_resid)
544 		error = 0;
545 	return (error);
546 }
547 
548 /*ARGSUSED*/
549 static int
550 pcfs_getattr(
551 	struct vnode *vp,
552 	struct vattr *vap,
553 	int flags,
554 	struct cred *cr,
555 	caller_context_t *ct)
556 {
557 	struct pcnode *pcp;
558 	struct pcfs *fsp;
559 	int error;
560 	char attr;
561 	struct pctime atime;
562 	int64_t unixtime;
563 
564 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
565 
566 	fsp = VFSTOPCFS(vp->v_vfsp);
567 	error = pc_lockfs(fsp, 0, 0);
568 	if (error)
569 		return (error);
570 
571 	/*
572 	 * Note that we don't check for "invalid node" (PC_INVAL) here
573 	 * only in order to make stat() succeed. We allow no I/O on such
574 	 * a node, but do allow to check for its existence.
575 	 */
576 	if ((pcp = VTOPC(vp)) == NULL) {
577 		pc_unlockfs(fsp);
578 		return (EIO);
579 	}
580 	/*
581 	 * Copy from pcnode.
582 	 */
583 	vap->va_type = vp->v_type;
584 	attr = pcp->pc_entry.pcd_attr;
585 	if (PCA_IS_HIDDEN(fsp, attr))
586 		vap->va_mode = 0;
587 	else if (attr & PCA_LABEL)
588 		vap->va_mode = 0444;
589 	else if (attr & PCA_RDONLY)
590 		vap->va_mode = 0555;
591 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
592 		vap->va_mode = 0755;
593 	} else {
594 		vap->va_mode = 0777;
595 	}
596 
597 	if (attr & PCA_DIR)
598 		vap->va_mode |= S_IFDIR;
599 	else
600 		vap->va_mode |= S_IFREG;
601 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
602 		vap->va_uid = 0;
603 		vap->va_gid = 0;
604 	} else {
605 		vap->va_uid = crgetuid(cr);
606 		vap->va_gid = crgetgid(cr);
607 	}
608 	vap->va_fsid = vp->v_vfsp->vfs_dev;
609 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
610 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
611 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
612 	vap->va_nlink = 1;
613 	vap->va_size = (u_offset_t)pcp->pc_size;
614 	vap->va_rdev = 0;
615 	vap->va_nblocks =
616 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
617 	vap->va_blksize = fsp->pcfs_clsize;
618 
619 	/*
620 	 * FAT root directories have no timestamps. In order not to return
621 	 * "time zero" (1/1/1970), we record the time of the mount and give
622 	 * that. This breaks less expectations.
623 	 */
624 	if (vp->v_flag & VROOT) {
625 		vap->va_mtime = fsp->pcfs_mounttime;
626 		vap->va_atime = fsp->pcfs_mounttime;
627 		vap->va_ctime = fsp->pcfs_mounttime;
628 		pc_unlockfs(fsp);
629 		return (0);
630 	}
631 
632 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
633 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
634 		if (unixtime > INT32_MAX)
635 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
636 		unixtime = MIN(unixtime, INT32_MAX);
637 	} else if (unixtime > INT32_MAX &&
638 	    get_udatamodel() == DATAMODEL_ILP32) {
639 		pc_unlockfs(fsp);
640 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
641 		return (EOVERFLOW);
642 	}
643 
644 	vap->va_mtime.tv_sec = (time_t)unixtime;
645 	vap->va_mtime.tv_nsec = 0;
646 
647 	/*
648 	 * FAT doesn't know about POSIX ctime.
649 	 * Best approximation is to always set it to mtime.
650 	 */
651 	vap->va_ctime = vap->va_mtime;
652 
653 	/*
654 	 * FAT only stores "last access date". If that's the
655 	 * same as the date of last modification then the time
656 	 * of last access is known. Otherwise, use midnight.
657 	 */
658 	atime.pct_date = pcp->pc_entry.pcd_ladate;
659 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
660 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
661 	else
662 		atime.pct_time = 0;
663 	pc_pcttotv(&atime, &unixtime);
664 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
665 		if (unixtime > INT32_MAX)
666 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
667 		unixtime = MIN(unixtime, INT32_MAX);
668 	} else if (unixtime > INT32_MAX &&
669 	    get_udatamodel() == DATAMODEL_ILP32) {
670 		pc_unlockfs(fsp);
671 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
672 		return (EOVERFLOW);
673 	}
674 
675 	vap->va_atime.tv_sec = (time_t)unixtime;
676 	vap->va_atime.tv_nsec = 0;
677 
678 	pc_unlockfs(fsp);
679 	return (0);
680 }
681 
682 
683 /*ARGSUSED*/
684 static int
685 pcfs_setattr(
686 	struct vnode *vp,
687 	struct vattr *vap,
688 	int flags,
689 	struct cred *cr,
690 	caller_context_t *ct)
691 {
692 	struct pcnode *pcp;
693 	mode_t mask = vap->va_mask;
694 	int error;
695 	struct pcfs *fsp;
696 	timestruc_t now, *timep;
697 
698 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
699 	/*
700 	 * cannot set these attributes
701 	 */
702 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
703 		return (EINVAL);
704 	}
705 	/*
706 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
707 	 * from 'tar' when it tries to set times on a directory, and console
708 	 * printf's on the NFS server when it gets EINVAL back on such a
709 	 * request. One possible problem with that since a directory entry
710 	 * identifies a file, '.' and all the '..' entries in subdirectories
711 	 * may get out of sync when the directory is updated since they're
712 	 * treated like separate files. We could fix that by looking for
713 	 * '.' and giving it the same attributes, and then looking for
714 	 * all the subdirectories and updating '..', but that's pretty
715 	 * expensive for something that doesn't seem likely to matter.
716 	 */
717 	/* can't do some ops on directories anyway */
718 	if ((vp->v_type == VDIR) &&
719 	    (mask & AT_SIZE)) {
720 		return (EINVAL);
721 	}
722 
723 	fsp = VFSTOPCFS(vp->v_vfsp);
724 	error = pc_lockfs(fsp, 0, 0);
725 	if (error)
726 		return (error);
727 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
728 		pc_unlockfs(fsp);
729 		return (EIO);
730 	}
731 
732 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
733 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
734 			pc_unlockfs(fsp);
735 			return (EACCES);
736 		}
737 	}
738 
739 	/*
740 	 * Change file access modes.
741 	 * If nobody has write permission, file is marked readonly.
742 	 * Otherwise file is writable by anyone.
743 	 */
744 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
745 		if ((vap->va_mode & 0222) == 0)
746 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
747 		else
748 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
749 		pcp->pc_flags |= PC_CHG;
750 	}
751 	/*
752 	 * Truncate file. Must have write permission.
753 	 */
754 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
755 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
756 			error = EACCES;
757 			goto out;
758 		}
759 		if (vap->va_size > UINT32_MAX) {
760 			error = EFBIG;
761 			goto out;
762 		}
763 		error = pc_truncate(pcp, (uint_t)vap->va_size);
764 		if (error)
765 			goto out;
766 	}
767 	/*
768 	 * Change file modified times.
769 	 */
770 	if (mask & (AT_MTIME | AT_CTIME)) {
771 		/*
772 		 * If SysV-compatible option to set access and
773 		 * modified times if privileged, owner, or write access,
774 		 * use current time rather than va_mtime.
775 		 *
776 		 * XXX - va_mtime.tv_sec == -1 flags this.
777 		 */
778 		timep = &vap->va_mtime;
779 		if (vap->va_mtime.tv_sec == -1) {
780 			gethrestime(&now);
781 			timep = &now;
782 		}
783 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
784 		    timep->tv_sec > INT32_MAX) {
785 			error = EOVERFLOW;
786 			goto out;
787 		}
788 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
789 		if (error)
790 			goto out;
791 		pcp->pc_flags |= PC_CHG;
792 	}
793 	/*
794 	 * Change file access times.
795 	 */
796 	if (mask & AT_ATIME) {
797 		/*
798 		 * If SysV-compatible option to set access and
799 		 * modified times if privileged, owner, or write access,
800 		 * use current time rather than va_mtime.
801 		 *
802 		 * XXX - va_atime.tv_sec == -1 flags this.
803 		 */
804 		struct pctime	atime;
805 
806 		timep = &vap->va_atime;
807 		if (vap->va_atime.tv_sec == -1) {
808 			gethrestime(&now);
809 			timep = &now;
810 		}
811 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
812 		    timep->tv_sec > INT32_MAX) {
813 			error = EOVERFLOW;
814 			goto out;
815 		}
816 		error = pc_tvtopct(timep, &atime);
817 		if (error)
818 			goto out;
819 		pcp->pc_entry.pcd_ladate = atime.pct_date;
820 		pcp->pc_flags |= PC_CHG;
821 	}
822 out:
823 	pc_unlockfs(fsp);
824 	return (error);
825 }
826 
827 
828 /*ARGSUSED*/
829 static int
830 pcfs_access(
831 	struct vnode *vp,
832 	int mode,
833 	int flags,
834 	struct cred *cr,
835 	caller_context_t *ct)
836 {
837 	struct pcnode *pcp;
838 	struct pcfs *fsp;
839 
840 
841 	fsp = VFSTOPCFS(vp->v_vfsp);
842 
843 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
844 		return (EIO);
845 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
846 		return (EACCES);
847 
848 	/*
849 	 * If this is a boot partition, privileged users have full access while
850 	 * others have read-only access.
851 	 */
852 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
853 		if ((mode & VWRITE) &&
854 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
855 			return (EACCES);
856 	}
857 	return (0);
858 }
859 
860 
861 /*ARGSUSED*/
862 static int
863 pcfs_fsync(
864 	struct vnode *vp,
865 	int syncflag,
866 	struct cred *cr,
867 	caller_context_t *ct)
868 {
869 	struct pcfs *fsp;
870 	struct pcnode *pcp;
871 	int error;
872 
873 	fsp = VFSTOPCFS(vp->v_vfsp);
874 	if (error = pc_verify(fsp))
875 		return (error);
876 	error = pc_lockfs(fsp, 0, 0);
877 	if (error)
878 		return (error);
879 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
880 		pc_unlockfs(fsp);
881 		return (EIO);
882 	}
883 	rw_enter(&pcnodes_lock, RW_WRITER);
884 	error = pc_nodesync(pcp);
885 	rw_exit(&pcnodes_lock);
886 	pc_unlockfs(fsp);
887 	return (error);
888 }
889 
890 
891 /*ARGSUSED*/
892 static void
893 pcfs_inactive(
894 	struct vnode *vp,
895 	struct cred *cr,
896 	caller_context_t *ct)
897 {
898 	struct pcnode *pcp;
899 	struct pcfs *fsp;
900 	int error;
901 
902 	fsp = VFSTOPCFS(vp->v_vfsp);
903 	error = pc_lockfs(fsp, 0, 1);
904 
905 	/*
906 	 * If the filesystem was umounted by force, all dirty
907 	 * pages associated with this vnode are invalidated
908 	 * and then the vnode will be freed.
909 	 */
910 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
911 		pcp = VTOPC(vp);
912 		if (vn_has_cached_data(vp)) {
913 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
914 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
915 		}
916 		remque(pcp);
917 		if (error == 0)
918 			pc_unlockfs(fsp);
919 		vn_free(vp);
920 		kmem_free(pcp, sizeof (struct pcnode));
921 		VFS_RELE(PCFSTOVFS(fsp));
922 		return;
923 	}
924 
925 	mutex_enter(&vp->v_lock);
926 	ASSERT(vp->v_count >= 1);
927 	if (vp->v_count > 1) {
928 		vp->v_count--;  /* release our hold from vn_rele */
929 		mutex_exit(&vp->v_lock);
930 		pc_unlockfs(fsp);
931 		return;
932 	}
933 	mutex_exit(&vp->v_lock);
934 
935 	/*
936 	 * Check again to confirm that no intervening I/O error
937 	 * with a subsequent pc_diskchanged() call has released
938 	 * the pcnode. If it has then release the vnode as above.
939 	 */
940 	pcp = VTOPC(vp);
941 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
942 		if (vn_has_cached_data(vp))
943 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
944 			    pcfs_putapage, B_INVAL | B_TRUNC,
945 			    (struct cred *)NULL);
946 	}
947 
948 	if (pcp == NULL) {
949 		vn_free(vp);
950 	} else {
951 		pc_rele(pcp);
952 	}
953 
954 	if (!error)
955 		pc_unlockfs(fsp);
956 }
957 
958 /*ARGSUSED*/
959 static int
960 pcfs_lookup(
961 	struct vnode *dvp,
962 	char *nm,
963 	struct vnode **vpp,
964 	struct pathname *pnp,
965 	int flags,
966 	struct vnode *rdir,
967 	struct cred *cr,
968 	caller_context_t *ct,
969 	int *direntflags,
970 	pathname_t *realpnp)
971 {
972 	struct pcfs *fsp;
973 	struct pcnode *pcp;
974 	int error;
975 
976 	/*
977 	 * If the filesystem was umounted by force, return immediately.
978 	 */
979 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
980 		return (EIO);
981 
982 	/*
983 	 * verify that the dvp is still valid on the disk
984 	 */
985 	fsp = VFSTOPCFS(dvp->v_vfsp);
986 	if (error = pc_verify(fsp))
987 		return (error);
988 	error = pc_lockfs(fsp, 0, 0);
989 	if (error)
990 		return (error);
991 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
992 		pc_unlockfs(fsp);
993 		return (EIO);
994 	}
995 	/*
996 	 * Null component name is a synonym for directory being searched.
997 	 */
998 	if (*nm == '\0') {
999 		VN_HOLD(dvp);
1000 		*vpp = dvp;
1001 		pc_unlockfs(fsp);
1002 		return (0);
1003 	}
1004 
1005 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1006 	if (!error) {
1007 		*vpp = PCTOV(pcp);
1008 		pcp->pc_flags |= PC_EXTERNAL;
1009 	}
1010 	pc_unlockfs(fsp);
1011 	return (error);
1012 }
1013 
1014 
1015 /*ARGSUSED*/
1016 static int
1017 pcfs_create(
1018 	struct vnode *dvp,
1019 	char *nm,
1020 	struct vattr *vap,
1021 	enum vcexcl exclusive,
1022 	int mode,
1023 	struct vnode **vpp,
1024 	struct cred *cr,
1025 	int flag,
1026 	caller_context_t *ct,
1027 	vsecattr_t *vsecp)
1028 {
1029 	int error;
1030 	struct pcnode *pcp;
1031 	struct vnode *vp;
1032 	struct pcfs *fsp;
1033 
1034 	/*
1035 	 * can't create directories. use pcfs_mkdir.
1036 	 * can't create anything other than files.
1037 	 */
1038 	if (vap->va_type == VDIR)
1039 		return (EISDIR);
1040 	else if (vap->va_type != VREG)
1041 		return (EINVAL);
1042 
1043 	pcp = NULL;
1044 	fsp = VFSTOPCFS(dvp->v_vfsp);
1045 	error = pc_lockfs(fsp, 0, 0);
1046 	if (error)
1047 		return (error);
1048 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1049 		pc_unlockfs(fsp);
1050 		return (EIO);
1051 	}
1052 
1053 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1054 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1055 			pc_unlockfs(fsp);
1056 			return (EACCES);
1057 		}
1058 	}
1059 
1060 	if (*nm == '\0') {
1061 		/*
1062 		 * Null component name refers to the directory itself.
1063 		 */
1064 		VN_HOLD(dvp);
1065 		pcp = VTOPC(dvp);
1066 		error = EEXIST;
1067 	} else {
1068 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1069 	}
1070 	/*
1071 	 * if file exists and this is a nonexclusive create,
1072 	 * check for access permissions
1073 	 */
1074 	if (error == EEXIST) {
1075 		vp = PCTOV(pcp);
1076 		if (exclusive == NONEXCL) {
1077 			if (vp->v_type == VDIR) {
1078 				error = EISDIR;
1079 			} else if (mode) {
1080 				error = pcfs_access(PCTOV(pcp), mode, 0,
1081 				    cr, ct);
1082 			} else {
1083 				error = 0;
1084 			}
1085 		}
1086 		if (error) {
1087 			VN_RELE(PCTOV(pcp));
1088 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1089 		    (vap->va_size == 0)) {
1090 			error = pc_truncate(pcp, 0L);
1091 			if (error) {
1092 				VN_RELE(PCTOV(pcp));
1093 			} else {
1094 				vnevent_create(PCTOV(pcp), ct);
1095 			}
1096 		}
1097 	}
1098 	if (error) {
1099 		pc_unlockfs(fsp);
1100 		return (error);
1101 	}
1102 	*vpp = PCTOV(pcp);
1103 	pcp->pc_flags |= PC_EXTERNAL;
1104 	pc_unlockfs(fsp);
1105 	return (error);
1106 }
1107 
1108 /*ARGSUSED*/
1109 static int
1110 pcfs_remove(
1111 	struct vnode *vp,
1112 	char *nm,
1113 	struct cred *cr,
1114 	caller_context_t *ct,
1115 	int flags)
1116 {
1117 	struct pcfs *fsp;
1118 	struct pcnode *pcp;
1119 	int error;
1120 
1121 	fsp = VFSTOPCFS(vp->v_vfsp);
1122 	if (error = pc_verify(fsp))
1123 		return (error);
1124 	error = pc_lockfs(fsp, 0, 0);
1125 	if (error)
1126 		return (error);
1127 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1128 		pc_unlockfs(fsp);
1129 		return (EIO);
1130 	}
1131 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1132 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1133 			pc_unlockfs(fsp);
1134 			return (EACCES);
1135 		}
1136 	}
1137 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1138 	pc_unlockfs(fsp);
1139 	return (error);
1140 }
1141 
1142 /*
1143  * Rename a file or directory
1144  * This rename is restricted to only rename files within a directory.
1145  * XX should make rename more general
1146  */
1147 /*ARGSUSED*/
1148 static int
1149 pcfs_rename(
1150 	struct vnode *sdvp,		/* old (source) parent vnode */
1151 	char *snm,			/* old (source) entry name */
1152 	struct vnode *tdvp,		/* new (target) parent vnode */
1153 	char *tnm,			/* new (target) entry name */
1154 	struct cred *cr,
1155 	caller_context_t *ct,
1156 	int flags)
1157 {
1158 	struct pcfs *fsp;
1159 	struct pcnode *dp;	/* parent pcnode */
1160 	struct pcnode *tdp;
1161 	int error;
1162 
1163 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1164 	if (error = pc_verify(fsp))
1165 		return (error);
1166 
1167 	/*
1168 	 * make sure we can muck with this directory.
1169 	 */
1170 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1171 	if (error) {
1172 		return (error);
1173 	}
1174 	error = pc_lockfs(fsp, 0, 0);
1175 	if (error)
1176 		return (error);
1177 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1178 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1179 		pc_unlockfs(fsp);
1180 		return (EIO);
1181 	}
1182 	error = pc_rename(dp, tdp, snm, tnm, ct);
1183 	pc_unlockfs(fsp);
1184 	return (error);
1185 }
1186 
1187 /*ARGSUSED*/
1188 static int
1189 pcfs_mkdir(
1190 	struct vnode *dvp,
1191 	char *nm,
1192 	struct vattr *vap,
1193 	struct vnode **vpp,
1194 	struct cred *cr,
1195 	caller_context_t *ct,
1196 	int flags,
1197 	vsecattr_t *vsecp)
1198 {
1199 	struct pcfs *fsp;
1200 	struct pcnode *pcp;
1201 	int error;
1202 
1203 	fsp = VFSTOPCFS(dvp->v_vfsp);
1204 	if (error = pc_verify(fsp))
1205 		return (error);
1206 	error = pc_lockfs(fsp, 0, 0);
1207 	if (error)
1208 		return (error);
1209 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1210 		pc_unlockfs(fsp);
1211 		return (EIO);
1212 	}
1213 
1214 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1215 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1216 			pc_unlockfs(fsp);
1217 			return (EACCES);
1218 		}
1219 	}
1220 
1221 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1222 
1223 	if (!error) {
1224 		pcp -> pc_flags |= PC_EXTERNAL;
1225 		*vpp = PCTOV(pcp);
1226 	} else if (error == EEXIST) {
1227 		VN_RELE(PCTOV(pcp));
1228 	}
1229 	pc_unlockfs(fsp);
1230 	return (error);
1231 }
1232 
1233 /*ARGSUSED*/
1234 static int
1235 pcfs_rmdir(
1236 	struct vnode *dvp,
1237 	char *nm,
1238 	struct vnode *cdir,
1239 	struct cred *cr,
1240 	caller_context_t *ct,
1241 	int flags)
1242 {
1243 	struct pcfs *fsp;
1244 	struct pcnode *pcp;
1245 	int error;
1246 
1247 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1248 	if (error = pc_verify(fsp))
1249 		return (error);
1250 	if (error = pc_lockfs(fsp, 0, 0))
1251 		return (error);
1252 
1253 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1254 		pc_unlockfs(fsp);
1255 		return (EIO);
1256 	}
1257 
1258 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1259 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1260 			pc_unlockfs(fsp);
1261 			return (EACCES);
1262 		}
1263 	}
1264 
1265 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1266 	pc_unlockfs(fsp);
1267 	return (error);
1268 }
1269 
1270 /*
1271  * read entries in a directory.
1272  * we must convert pc format to unix format
1273  */
1274 
1275 /*ARGSUSED*/
1276 static int
1277 pcfs_readdir(
1278 	struct vnode *dvp,
1279 	struct uio *uiop,
1280 	struct cred *cr,
1281 	int *eofp,
1282 	caller_context_t *ct,
1283 	int flags)
1284 {
1285 	struct pcnode *pcp;
1286 	struct pcfs *fsp;
1287 	struct pcdir *ep;
1288 	struct buf *bp = NULL;
1289 	offset_t offset;
1290 	int boff;
1291 	struct pc_dirent lbp;
1292 	struct pc_dirent *ld = &lbp;
1293 	int error;
1294 
1295 	/*
1296 	 * If the filesystem was umounted by force, return immediately.
1297 	 */
1298 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1299 		return (EIO);
1300 
1301 	if ((uiop->uio_iovcnt != 1) ||
1302 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1303 		return (EINVAL);
1304 	}
1305 	fsp = VFSTOPCFS(dvp->v_vfsp);
1306 	/*
1307 	 * verify that the dp is still valid on the disk
1308 	 */
1309 	if (error = pc_verify(fsp)) {
1310 		return (error);
1311 	}
1312 	error = pc_lockfs(fsp, 0, 0);
1313 	if (error)
1314 		return (error);
1315 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1316 		pc_unlockfs(fsp);
1317 		return (EIO);
1318 	}
1319 
1320 	bzero(ld, sizeof (*ld));
1321 
1322 	if (eofp != NULL)
1323 		*eofp = 0;
1324 	offset = uiop->uio_loffset;
1325 
1326 	if (dvp->v_flag & VROOT) {
1327 		/*
1328 		 * kludge up entries for "." and ".." in the root.
1329 		 */
1330 		if (offset == 0) {
1331 			(void) strcpy(ld->d_name, ".");
1332 			ld->d_reclen = DIRENT64_RECLEN(1);
1333 			ld->d_off = (off64_t)sizeof (struct pcdir);
1334 			ld->d_ino = (ino64_t)UINT_MAX;
1335 			if (ld->d_reclen > uiop->uio_resid) {
1336 				pc_unlockfs(fsp);
1337 				return (ENOSPC);
1338 			}
1339 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1340 			uiop->uio_loffset = ld->d_off;
1341 			offset = uiop->uio_loffset;
1342 		}
1343 		if (offset == sizeof (struct pcdir)) {
1344 			(void) strcpy(ld->d_name, "..");
1345 			ld->d_reclen = DIRENT64_RECLEN(2);
1346 			if (ld->d_reclen > uiop->uio_resid) {
1347 				pc_unlockfs(fsp);
1348 				return (ENOSPC);
1349 			}
1350 			ld->d_off = (off64_t)(uiop->uio_loffset +
1351 			    sizeof (struct pcdir));
1352 			ld->d_ino = (ino64_t)UINT_MAX;
1353 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1354 			uiop->uio_loffset = ld->d_off;
1355 			offset = uiop->uio_loffset;
1356 		}
1357 		offset -= 2 * sizeof (struct pcdir);
1358 		/* offset now has the real offset value into directory file */
1359 	}
1360 
1361 	for (;;) {
1362 		boff = pc_blkoff(fsp, offset);
1363 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1364 			if (bp != NULL) {
1365 				brelse(bp);
1366 				bp = NULL;
1367 			}
1368 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1369 			if (error) {
1370 				if (error == ENOENT) {
1371 					error = 0;
1372 					if (eofp)
1373 						*eofp = 1;
1374 				}
1375 				break;
1376 			}
1377 		}
1378 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1379 			if (eofp)
1380 				*eofp = 1;
1381 			break;
1382 		}
1383 		/*
1384 		 * Don't display label because it may contain funny characters.
1385 		 */
1386 		if (ep->pcd_filename[0] == PCD_ERASED) {
1387 			uiop->uio_loffset += sizeof (struct pcdir);
1388 			offset += sizeof (struct pcdir);
1389 			ep++;
1390 			continue;
1391 		}
1392 		if (PCDL_IS_LFN(ep)) {
1393 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1394 			    0)
1395 				break;
1396 			continue;
1397 		}
1398 
1399 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1400 			break;
1401 	}
1402 	if (bp)
1403 		brelse(bp);
1404 	pc_unlockfs(fsp);
1405 	return (error);
1406 }
1407 
1408 
1409 /*
1410  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1411  * When we are called the pcfs is already locked.
1412  */
1413 /*ARGSUSED*/
1414 static int
1415 pcfs_getapage(
1416 	struct vnode *vp,
1417 	u_offset_t off,
1418 	size_t len,
1419 	uint_t *protp,
1420 	page_t *pl[],		/* NULL if async IO is requested */
1421 	size_t plsz,
1422 	struct seg *seg,
1423 	caddr_t addr,
1424 	enum seg_rw rw,
1425 	struct cred *cr)
1426 {
1427 	struct pcnode *pcp;
1428 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1429 	struct vnode *devvp;
1430 	page_t *pp;
1431 	page_t *pagefound;
1432 	int err;
1433 
1434 	/*
1435 	 * If the filesystem was umounted by force, return immediately.
1436 	 */
1437 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1438 		return (EIO);
1439 
1440 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1441 	    (void *)vp, off, len);
1442 
1443 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1444 		return (EIO);
1445 	devvp = fsp->pcfs_devvp;
1446 
1447 	/* pcfs doesn't do readaheads */
1448 	if (pl == NULL)
1449 		return (0);
1450 
1451 	pl[0] = NULL;
1452 	err = 0;
1453 	/*
1454 	 * If the accessed time on the pcnode has not already been
1455 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1456 	 * This gives us approximate modified times for mmap'ed files
1457 	 * which are accessed via loads in the user address space.
1458 	 */
1459 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1460 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1461 		pc_mark_acc(fsp, pcp);
1462 	}
1463 reread:
1464 	if ((pagefound = page_exists(vp, off)) == NULL) {
1465 		/*
1466 		 * Need to really do disk IO to get the page(s).
1467 		 */
1468 		struct buf *bp;
1469 		daddr_t lbn, bn;
1470 		u_offset_t io_off;
1471 		size_t io_len;
1472 		u_offset_t lbnoff, xferoffset;
1473 		u_offset_t pgoff;
1474 		uint_t	xfersize;
1475 		int err1;
1476 
1477 		lbn = pc_lblkno(fsp, off);
1478 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1479 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1480 
1481 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1482 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1483 		if (pp == NULL)
1484 			/*
1485 			 * XXX - If pcfs is made MT-hot, this should go
1486 			 * back to reread.
1487 			 */
1488 			panic("pcfs_getapage pvn_read_kluster");
1489 
1490 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1491 		    pgoff += xfersize,
1492 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1493 		    lbnoff += xfersize, xferoffset += xfersize) {
1494 			/*
1495 			 * read as many contiguous blocks as possible to
1496 			 * fill this page
1497 			 */
1498 			xfersize = PAGESIZE - pgoff;
1499 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1500 			if (err1) {
1501 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1502 				err = err1;
1503 				goto out;
1504 			}
1505 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1506 			bp->b_edev = devvp->v_rdev;
1507 			bp->b_dev = cmpdev(devvp->v_rdev);
1508 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1509 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1510 			bp->b_file = vp;
1511 			bp->b_offset = (offset_t)(off + pgoff);
1512 
1513 			(void) bdev_strategy(bp);
1514 
1515 			lwp_stat_update(LWP_STAT_INBLK, 1);
1516 
1517 			if (err == 0)
1518 				err = biowait(bp);
1519 			else
1520 				(void) biowait(bp);
1521 			pageio_done(bp);
1522 			if (err)
1523 				goto out;
1524 		}
1525 		if (pgoff < PAGESIZE) {
1526 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1527 		}
1528 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1529 	}
1530 out:
1531 	if (err) {
1532 		if (pp != NULL)
1533 			pvn_read_done(pp, B_ERROR);
1534 		return (err);
1535 	}
1536 
1537 	if (pagefound) {
1538 		/*
1539 		 * Page exists in the cache, acquire the "shared"
1540 		 * lock.  If this fails, go back to reread.
1541 		 */
1542 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1543 			goto reread;
1544 		}
1545 		pl[0] = pp;
1546 		pl[1] = NULL;
1547 	}
1548 	return (err);
1549 }
1550 
1551 /*
1552  * Return all the pages from [off..off+len] in given file
1553  */
1554 /* ARGSUSED */
1555 static int
1556 pcfs_getpage(
1557 	struct vnode *vp,
1558 	offset_t off,
1559 	size_t len,
1560 	uint_t *protp,
1561 	page_t *pl[],
1562 	size_t plsz,
1563 	struct seg *seg,
1564 	caddr_t addr,
1565 	enum seg_rw rw,
1566 	struct cred *cr,
1567 	caller_context_t *ct)
1568 {
1569 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1570 	int err;
1571 
1572 	PC_DPRINTF0(6, "pcfs_getpage\n");
1573 	if (err = pc_verify(fsp))
1574 		return (err);
1575 	if (vp->v_flag & VNOMAP)
1576 		return (ENOSYS);
1577 	ASSERT(off <= UINT32_MAX);
1578 	err = pc_lockfs(fsp, 0, 0);
1579 	if (err)
1580 		return (err);
1581 	if (protp != NULL)
1582 		*protp = PROT_ALL;
1583 
1584 	ASSERT((off & PAGEOFFSET) == 0);
1585 	if (len <= PAGESIZE) {
1586 		err = pcfs_getapage(vp, off, len, protp, pl,
1587 		    plsz, seg, addr, rw, cr);
1588 	} else {
1589 		err = pvn_getpages(pcfs_getapage, vp, off,
1590 		    len, protp, pl, plsz, seg, addr, rw, cr);
1591 	}
1592 	pc_unlockfs(fsp);
1593 	return (err);
1594 }
1595 
1596 
1597 /*
1598  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1599  * If len == 0, do from off to EOF.
1600  *
1601  * The normal cases should be len == 0 & off == 0 (entire vp list),
1602  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1603  * (from pageout).
1604  *
1605  */
1606 /*ARGSUSED*/
1607 static int
1608 pcfs_putpage(
1609 	struct vnode *vp,
1610 	offset_t off,
1611 	size_t len,
1612 	int flags,
1613 	struct cred *cr,
1614 	caller_context_t *ct)
1615 {
1616 	struct pcnode *pcp;
1617 	page_t *pp;
1618 	struct pcfs *fsp;
1619 	u_offset_t io_off;
1620 	size_t io_len;
1621 	offset_t eoff;
1622 	int err;
1623 
1624 	/*
1625 	 * If the filesystem was umounted by force, return immediately.
1626 	 */
1627 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1628 		return (EIO);
1629 
1630 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1631 	if (vp->v_flag & VNOMAP)
1632 		return (ENOSYS);
1633 
1634 	fsp = VFSTOPCFS(vp->v_vfsp);
1635 
1636 	if (err = pc_verify(fsp))
1637 		return (err);
1638 	if ((pcp = VTOPC(vp)) == NULL) {
1639 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1640 		return (EIO);
1641 	}
1642 	if (pcp->pc_flags & PC_INVAL)
1643 		return (EIO);
1644 
1645 	if (curproc == proc_pageout) {
1646 		/*
1647 		 * XXX - This is a quick hack to avoid blocking
1648 		 * pageout. Also to avoid pcfs_getapage deadlocking
1649 		 * with putpage when memory is running out,
1650 		 * since we only have one global lock and we don't
1651 		 * support async putpage.
1652 		 * It should be fixed someday.
1653 		 *
1654 		 * Interestingly, this used to be a test of NOMEMWAIT().
1655 		 * We only ever got here once pcfs started supporting
1656 		 * NFS sharing, and then only because the NFS server
1657 		 * threads seem to do writes in sched's process context.
1658 		 * Since everyone else seems to just care about pageout,
1659 		 * the test was changed to look for pageout directly.
1660 		 */
1661 		return (ENOMEM);
1662 	}
1663 
1664 	ASSERT(off <= UINT32_MAX);
1665 
1666 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1667 
1668 	err = pc_lockfs(fsp, 0, 0);
1669 	if (err)
1670 		return (err);
1671 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1672 		pc_unlockfs(fsp);
1673 		return (0);
1674 	}
1675 
1676 	if (len == 0) {
1677 		/*
1678 		 * Search the entire vp list for pages >= off
1679 		 */
1680 		err = pvn_vplist_dirty(vp, off,
1681 		    pcfs_putapage, flags, cr);
1682 	} else {
1683 		eoff = off + len;
1684 
1685 		for (io_off = off; io_off < eoff &&
1686 		    io_off < pcp->pc_size; io_off += io_len) {
1687 			/*
1688 			 * If we are not invalidating, synchronously
1689 			 * freeing or writing pages use the routine
1690 			 * page_lookup_nowait() to prevent reclaiming
1691 			 * them from the free list.
1692 			 */
1693 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1694 				pp = page_lookup(vp, io_off,
1695 				    (flags & (B_INVAL | B_FREE)) ?
1696 				    SE_EXCL : SE_SHARED);
1697 			} else {
1698 				pp = page_lookup_nowait(vp, io_off,
1699 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1700 			}
1701 
1702 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1703 				io_len = PAGESIZE;
1704 			else {
1705 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1706 				    flags, cr);
1707 				if (err != 0)
1708 					break;
1709 				/*
1710 				 * "io_off" and "io_len" are returned as
1711 				 * the range of pages we actually wrote.
1712 				 * This allows us to skip ahead more quickly
1713 				 * since several pages may've been dealt
1714 				 * with by this iteration of the loop.
1715 				 */
1716 			}
1717 		}
1718 	}
1719 	if (err == 0 && (flags & B_INVAL) &&
1720 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1721 		/*
1722 		 * If doing "invalidation", make sure that
1723 		 * all pages on the vnode list are actually
1724 		 * gone.
1725 		 */
1726 		cmn_err(CE_PANIC,
1727 		    "pcfs_putpage: B_INVAL, pages not gone");
1728 	} else if (err) {
1729 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1730 	}
1731 	pc_unlockfs(fsp);
1732 	return (err);
1733 }
1734 
1735 /*
1736  * Write out a single page, possibly klustering adjacent dirty pages.
1737  */
1738 /*ARGSUSED*/
1739 int
1740 pcfs_putapage(
1741 	struct vnode *vp,
1742 	page_t *pp,
1743 	u_offset_t *offp,
1744 	size_t *lenp,
1745 	int flags,
1746 	struct cred *cr)
1747 {
1748 	struct pcnode *pcp;
1749 	struct pcfs *fsp;
1750 	struct vnode *devvp;
1751 	size_t io_len;
1752 	daddr_t bn;
1753 	u_offset_t lbn, lbnoff, xferoffset;
1754 	uint_t pgoff, xfersize;
1755 	int err = 0;
1756 	u_offset_t io_off;
1757 
1758 	pcp = VTOPC(vp);
1759 	fsp = VFSTOPCFS(vp->v_vfsp);
1760 	devvp = fsp->pcfs_devvp;
1761 
1762 	/*
1763 	 * If the modified time on the inode has not already been
1764 	 * set elsewhere (e.g. for write/setattr) and this is not
1765 	 * a call from msync (B_FORCE) we set the time now.
1766 	 * This gives us approximate modified times for mmap'ed files
1767 	 * which are modified via stores in the user address space.
1768 	 */
1769 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1770 		pcp->pc_flags |= PC_MOD;
1771 		pc_mark_mod(fsp, pcp);
1772 	}
1773 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1774 	    PAGESIZE, flags);
1775 
1776 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1777 		goto out;
1778 	}
1779 
1780 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1781 
1782 	lbn = pc_lblkno(fsp, io_off);
1783 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1784 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1785 
1786 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1787 	    pgoff += xfersize,
1788 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1789 	    lbnoff += xfersize, xferoffset += xfersize) {
1790 
1791 		struct buf *bp;
1792 		int err1;
1793 
1794 		/*
1795 		 * write as many contiguous blocks as possible from this page
1796 		 */
1797 		xfersize = io_len - pgoff;
1798 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1799 		if (err1) {
1800 			err = err1;
1801 			goto out;
1802 		}
1803 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1804 		bp->b_edev = devvp->v_rdev;
1805 		bp->b_dev = cmpdev(devvp->v_rdev);
1806 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1807 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1808 		bp->b_file = vp;
1809 		bp->b_offset = (offset_t)(io_off + pgoff);
1810 
1811 		(void) bdev_strategy(bp);
1812 
1813 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1814 
1815 		if (err == 0)
1816 			err = biowait(bp);
1817 		else
1818 			(void) biowait(bp);
1819 		pageio_done(bp);
1820 	}
1821 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1822 	pp = NULL;
1823 
1824 out:
1825 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1826 		pvn_write_done(pp, B_WRITE | flags);
1827 	} else if (err != 0 && pp != NULL) {
1828 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1829 	}
1830 
1831 	if (offp)
1832 		*offp = io_off;
1833 	if (lenp)
1834 		*lenp = io_len;
1835 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1836 		    (void *)vp, (void *)pp, io_off, io_len);
1837 	if (err) {
1838 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1839 	}
1840 	return (err);
1841 }
1842 
1843 /*ARGSUSED*/
1844 static int
1845 pcfs_map(
1846 	struct vnode *vp,
1847 	offset_t off,
1848 	struct as *as,
1849 	caddr_t *addrp,
1850 	size_t len,
1851 	uchar_t prot,
1852 	uchar_t maxprot,
1853 	uint_t flags,
1854 	struct cred *cr,
1855 	caller_context_t *ct)
1856 {
1857 	struct segvn_crargs vn_a;
1858 	int error;
1859 
1860 	PC_DPRINTF0(6, "pcfs_map\n");
1861 	if (vp->v_flag & VNOMAP)
1862 		return (ENOSYS);
1863 
1864 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1865 		return (ENXIO);
1866 
1867 	as_rangelock(as);
1868 	if ((flags & MAP_FIXED) == 0) {
1869 		map_addr(addrp, len, off, 1, flags);
1870 		if (*addrp == NULL) {
1871 			as_rangeunlock(as);
1872 			return (ENOMEM);
1873 		}
1874 	} else {
1875 		/*
1876 		 * User specified address - blow away any previous mappings
1877 		 */
1878 		(void) as_unmap(as, *addrp, len);
1879 	}
1880 
1881 	vn_a.vp = vp;
1882 	vn_a.offset = off;
1883 	vn_a.type = flags & MAP_TYPE;
1884 	vn_a.prot = prot;
1885 	vn_a.maxprot = maxprot;
1886 	vn_a.flags = flags & ~MAP_TYPE;
1887 	vn_a.cred = cr;
1888 	vn_a.amp = NULL;
1889 	vn_a.szc = 0;
1890 	vn_a.lgrp_mem_policy_flags = 0;
1891 
1892 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1893 	as_rangeunlock(as);
1894 	return (error);
1895 }
1896 
1897 /* ARGSUSED */
1898 static int
1899 pcfs_seek(
1900 	struct vnode *vp,
1901 	offset_t ooff,
1902 	offset_t *noffp,
1903 	caller_context_t *ct)
1904 {
1905 	if (*noffp < 0)
1906 		return (EINVAL);
1907 	else if (*noffp > MAXOFFSET_T)
1908 		return (EINVAL);
1909 	else
1910 		return (0);
1911 }
1912 
1913 /* ARGSUSED */
1914 static int
1915 pcfs_addmap(
1916 	struct vnode *vp,
1917 	offset_t off,
1918 	struct as *as,
1919 	caddr_t addr,
1920 	size_t len,
1921 	uchar_t prot,
1922 	uchar_t maxprot,
1923 	uint_t flags,
1924 	struct cred *cr,
1925 	caller_context_t *ct)
1926 {
1927 	if (vp->v_flag & VNOMAP)
1928 		return (ENOSYS);
1929 	return (0);
1930 }
1931 
1932 /*ARGSUSED*/
1933 static int
1934 pcfs_delmap(
1935 	struct vnode *vp,
1936 	offset_t off,
1937 	struct as *as,
1938 	caddr_t addr,
1939 	size_t len,
1940 	uint_t prot,
1941 	uint_t maxprot,
1942 	uint_t flags,
1943 	struct cred *cr,
1944 	caller_context_t *ct)
1945 {
1946 	if (vp->v_flag & VNOMAP)
1947 		return (ENOSYS);
1948 	return (0);
1949 }
1950 
1951 /*
1952  * POSIX pathconf() support.
1953  */
1954 /* ARGSUSED */
1955 static int
1956 pcfs_pathconf(
1957 	struct vnode *vp,
1958 	int cmd,
1959 	ulong_t *valp,
1960 	struct cred *cr,
1961 	caller_context_t *ct)
1962 {
1963 	ulong_t val;
1964 	int error = 0;
1965 	struct statvfs64 vfsbuf;
1966 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1967 
1968 	switch (cmd) {
1969 
1970 	case _PC_LINK_MAX:
1971 		val = 1;
1972 		break;
1973 
1974 	case _PC_MAX_CANON:
1975 		val = MAX_CANON;
1976 		break;
1977 
1978 	case _PC_MAX_INPUT:
1979 		val = MAX_INPUT;
1980 		break;
1981 
1982 	case _PC_NAME_MAX:
1983 		bzero(&vfsbuf, sizeof (vfsbuf));
1984 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1985 			break;
1986 		val = vfsbuf.f_namemax;
1987 		break;
1988 
1989 	case _PC_PATH_MAX:
1990 	case _PC_SYMLINK_MAX:
1991 		val = PCMAXPATHLEN;
1992 		break;
1993 
1994 	case _PC_PIPE_BUF:
1995 		val = PIPE_BUF;
1996 		break;
1997 
1998 	case _PC_NO_TRUNC:
1999 		val = (ulong_t)-1; 	/* Will truncate long file name */
2000 		break;
2001 
2002 	case _PC_VDISABLE:
2003 		val = _POSIX_VDISABLE;
2004 		break;
2005 
2006 	case _PC_CHOWN_RESTRICTED:
2007 		if (rstchown)
2008 			val = rstchown;		/* chown restricted enabled */
2009 		else
2010 			val = (ulong_t)-1;
2011 		break;
2012 
2013 	case _PC_ACL_ENABLED:
2014 		val = 0;
2015 		break;
2016 
2017 	case _PC_FILESIZEBITS:
2018 		/*
2019 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
2020 		 * FAT12 can only go up to the maximum filesystem capacity
2021 		 * which is ~509MB.
2022 		 */
2023 		val = IS_FAT12(fsp) ? 30 : 33;
2024 		break;
2025 	default:
2026 		error = EINVAL;
2027 		break;
2028 	}
2029 
2030 	if (error == 0)
2031 		*valp = val;
2032 	return (error);
2033 }
2034 
2035 /* ARGSUSED */
2036 static int
2037 pcfs_space(
2038 	struct vnode *vp,
2039 	int cmd,
2040 	struct flock64 *bfp,
2041 	int flag,
2042 	offset_t offset,
2043 	cred_t *cr,
2044 	caller_context_t *ct)
2045 {
2046 	struct vattr vattr;
2047 	int error;
2048 
2049 	if (cmd != F_FREESP)
2050 		return (EINVAL);
2051 
2052 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2053 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2054 			return (EFBIG);
2055 		/*
2056 		 * we only support the special case of l_len == 0,
2057 		 * meaning free to end of file at this moment.
2058 		 */
2059 		if (bfp->l_len != 0)
2060 			return (EINVAL);
2061 		vattr.va_mask = AT_SIZE;
2062 		vattr.va_size = bfp->l_start;
2063 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2064 	}
2065 	return (error);
2066 }
2067 
2068 /*
2069  * Break up 'len' chars from 'buf' into a long file name chunk.
2070  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2071  */
2072 void
2073 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2074 {
2075 	int	i;
2076 
2077 	ASSERT(buf != NULL);
2078 
2079 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2080 		if (len > 0) {
2081 			ep->pcdl_firstfilename[i] = *buf++;
2082 			ep->pcdl_firstfilename[i + 1] = *buf++;
2083 			len -= 2;
2084 		} else {
2085 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2086 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2087 		}
2088 	}
2089 
2090 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2091 		if (len > 0) {
2092 			ep->pcdl_secondfilename[i] = *buf++;
2093 			ep->pcdl_secondfilename[i + 1] = *buf++;
2094 			len -= 2;
2095 		} else {
2096 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2097 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2098 		}
2099 	}
2100 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2101 		if (len > 0) {
2102 			ep->pcdl_thirdfilename[i] = *buf++;
2103 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2104 			len -= 2;
2105 		} else {
2106 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2107 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2108 		}
2109 	}
2110 }
2111 
2112 /*
2113  * Extract the characters from the long filename chunk into 'buf'.
2114  * Return the number of characters extracted.
2115  */
2116 static int
2117 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2118 {
2119 	char 	*tmp = buf;
2120 	int	i;
2121 
2122 	/* Copy all the names, no filtering now */
2123 
2124 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2125 		*tmp = ep->pcdl_firstfilename[i];
2126 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2127 
2128 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2129 			return (tmp - buf);
2130 		if (*(tmp + 1) == '\0' && foldcase) {
2131 			*tmp = toupper(*tmp);
2132 		}
2133 	}
2134 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2135 		*tmp = ep->pcdl_secondfilename[i];
2136 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2137 
2138 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2139 			return (tmp - buf);
2140 		if (*(tmp + 1) == '\0' && foldcase) {
2141 			*tmp = toupper(*tmp);
2142 		}
2143 	}
2144 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2145 		*tmp = ep->pcdl_thirdfilename[i];
2146 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2147 
2148 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2149 			return (tmp - buf);
2150 		if (*(tmp + 1) == '\0' && foldcase) {
2151 			*tmp = toupper(*tmp);
2152 		}
2153 	}
2154 	return (tmp - buf);
2155 }
2156 
2157 
2158 /*
2159  * Checksum the passed in short filename.
2160  * This is used to validate each component of the long name to make
2161  * sure the long name is valid (it hasn't been "detached" from the
2162  * short filename). This algorithm was found in FreeBSD.
2163  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2164  */
2165 
2166 uchar_t
2167 pc_checksum_long_fn(char *name, char *ext)
2168 {
2169 	uchar_t c;
2170 	char	b[11];
2171 
2172 	bcopy(name, b, 8);
2173 	bcopy(ext, b+8, 3);
2174 
2175 	c = b[0];
2176 	c = ((c << 7) | (c >> 1)) + b[1];
2177 	c = ((c << 7) | (c >> 1)) + b[2];
2178 	c = ((c << 7) | (c >> 1)) + b[3];
2179 	c = ((c << 7) | (c >> 1)) + b[4];
2180 	c = ((c << 7) | (c >> 1)) + b[5];
2181 	c = ((c << 7) | (c >> 1)) + b[6];
2182 	c = ((c << 7) | (c >> 1)) + b[7];
2183 	c = ((c << 7) | (c >> 1)) + b[8];
2184 	c = ((c << 7) | (c >> 1)) + b[9];
2185 	c = ((c << 7) | (c >> 1)) + b[10];
2186 
2187 	return (c);
2188 }
2189 
2190 /*
2191  * Read a chunk of long filename entries into 'namep'.
2192  * Return with offset pointing to short entry (on success), or next
2193  * entry to read (if this wasn't a valid lfn really).
2194  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2195  * a long filename.
2196  *
2197  * Can also be called with a NULL namep, in which case it just returns
2198  * whether this was really a valid long filename and consumes it
2199  * (used by pc_dirempty()).
2200  */
2201 int
2202 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2203     struct pcdir **epp, offset_t *offset, struct buf **bp)
2204 {
2205 	struct pcdir *ep = *epp;
2206 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2207 	struct vnode *dvp = PCTOV(pcp);
2208 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2209 	char	*lfn;
2210 	char	*lfn_base;
2211 	int	boff;
2212 	int	i, cs;
2213 	char	*buf;
2214 	uchar_t	cksum;
2215 	int	detached = 0;
2216 	int	error = 0;
2217 	int	foldcase;
2218 	int	count = 0;
2219 	size_t	u16l = 0, u8l = 0;
2220 
2221 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2222 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2223 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2224 	*lfn = '\0';
2225 	*(lfn + 1) = '\0';
2226 	cksum = lep->pcdl_checksum;
2227 
2228 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2229 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2230 		/* read next block if necessary */
2231 		boff = pc_blkoff(fsp, *offset);
2232 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2233 			if (*bp != NULL) {
2234 				brelse(*bp);
2235 				*bp = NULL;
2236 			}
2237 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2238 			if (error) {
2239 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2240 				kmem_free(buf, PCMAXNAM_UTF16);
2241 				return (error);
2242 			}
2243 			lep = (struct pcdir_lfn *)ep;
2244 		}
2245 		/* can this happen? Bad fs? */
2246 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2247 			detached = 1;
2248 			break;
2249 		}
2250 		if (cksum != lep->pcdl_checksum)
2251 			detached = 1;
2252 		/* process current entry */
2253 		cs = get_long_fn_chunk(lep, buf, foldcase);
2254 		count += cs;
2255 		for (; cs > 0; cs--) {
2256 			/* see if we underflow */
2257 			if (lfn >= lfn_base)
2258 				*--lfn = buf[cs - 1];
2259 			else
2260 				detached = 1;
2261 		}
2262 		lep++;
2263 		*offset += sizeof (struct pcdir);
2264 	}
2265 	kmem_free(buf, PCMAXNAM_UTF16);
2266 	/* read next block if necessary */
2267 	boff = pc_blkoff(fsp, *offset);
2268 	ep = (struct pcdir *)lep;
2269 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2270 		if (*bp != NULL) {
2271 			brelse(*bp);
2272 			*bp = NULL;
2273 		}
2274 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2275 		if (error) {
2276 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2277 			return (error);
2278 		}
2279 	}
2280 	/* should be on the short one */
2281 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2282 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2283 		detached = 1;
2284 	}
2285 	if (detached ||
2286 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2287 	    !pc_valid_long_fn(lfn, 0)) {
2288 		/*
2289 		 * process current entry again. This may end up another lfn
2290 		 * or a short name.
2291 		 */
2292 		*epp = ep;
2293 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2294 		return (EINVAL);
2295 	}
2296 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2297 		/*
2298 		 * Don't display label because it may contain
2299 		 * funny characters.
2300 		 */
2301 		*offset += sizeof (struct pcdir);
2302 		ep++;
2303 		*epp = ep;
2304 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2305 		return (EINVAL);
2306 	}
2307 	if (namep) {
2308 		u16l = count / 2;
2309 		u8l = PCMAXNAMLEN;
2310 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2311 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2312 		/*
2313 		 * uconv_u16tou8() will catch conversion errors including
2314 		 * the case where there is not enough room to write the
2315 		 * converted result and the u8l will never go over the given
2316 		 * PCMAXNAMLEN.
2317 		 */
2318 		if (error != 0) {
2319 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2320 			return (EINVAL);
2321 		}
2322 		namep[u8l] = '\0';
2323 	}
2324 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2325 	*epp = ep;
2326 	return (0);
2327 }
2328 /*
2329  * Read a long filename into the pc_dirent structure and copy it out.
2330  */
2331 int
2332 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2333     struct pcdir **epp, offset_t *offset, struct buf **bp)
2334 {
2335 	struct pcdir *ep;
2336 	struct pcnode *pcp = VTOPC(dvp);
2337 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2338 	offset_t uiooffset = uiop->uio_loffset;
2339 	int	error = 0;
2340 	offset_t oldoffset;
2341 
2342 	oldoffset = *offset;
2343 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2344 	if (error) {
2345 		if (error == EINVAL) {
2346 			uiop->uio_loffset += *offset - oldoffset;
2347 			return (0);
2348 		} else
2349 			return (error);
2350 	}
2351 
2352 	ep = *epp;
2353 	uiop->uio_loffset += *offset - oldoffset;
2354 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2355 	if (ld->d_reclen > uiop->uio_resid) {
2356 		uiop->uio_loffset = uiooffset;
2357 		return (ENOSPC);
2358 	}
2359 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2360 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2361 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2362 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2363 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2364 	uiop->uio_loffset = ld->d_off;
2365 	*offset += sizeof (struct pcdir);
2366 	ep++;
2367 	*epp = ep;
2368 	return (0);
2369 }
2370 
2371 /*
2372  * Read a short filename into the pc_dirent structure and copy it out.
2373  */
2374 int
2375 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2376     struct pcdir **epp, offset_t *offset, struct buf **bp)
2377 {
2378 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2379 	int	boff = pc_blkoff(fsp, *offset);
2380 	struct pcdir *ep = *epp;
2381 	offset_t	oldoffset = uiop->uio_loffset;
2382 	int	error;
2383 	int	foldcase;
2384 
2385 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2386 		uiop->uio_loffset += sizeof (struct pcdir);
2387 		*offset += sizeof (struct pcdir);
2388 		ep++;
2389 		*epp = ep;
2390 		return (0);
2391 	}
2392 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2393 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2394 	    pc_direntpersec(fsp));
2395 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2396 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2397 	    &ep->pcd_ext[0], foldcase);
2398 	if (error == 0) {
2399 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2400 		if (ld->d_reclen > uiop->uio_resid) {
2401 			uiop->uio_loffset = oldoffset;
2402 			return (ENOSPC);
2403 		}
2404 		ld->d_off = (off64_t)(uiop->uio_loffset +
2405 		    sizeof (struct pcdir));
2406 		(void) uiomove((caddr_t)ld,
2407 		    ld->d_reclen, UIO_READ, uiop);
2408 		uiop->uio_loffset = ld->d_off;
2409 	} else {
2410 		uiop->uio_loffset += sizeof (struct pcdir);
2411 	}
2412 	*offset += sizeof (struct pcdir);
2413 	ep++;
2414 	*epp = ep;
2415 	return (0);
2416 }
2417 
2418 /* ARGSUSED */
2419 static int
2420 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2421 {
2422 	struct pc_fid *pcfid;
2423 	struct pcnode *pcp;
2424 	struct pcfs	*fsp;
2425 	int	error;
2426 
2427 	fsp = VFSTOPCFS(vp->v_vfsp);
2428 	if (fsp == NULL)
2429 		return (EIO);
2430 	error = pc_lockfs(fsp, 0, 0);
2431 	if (error)
2432 		return (error);
2433 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2434 		pc_unlockfs(fsp);
2435 		return (EIO);
2436 	}
2437 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2438 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2439 		pc_unlockfs(fsp);
2440 		return (ENOSPC);
2441 	}
2442 
2443 	pcfid = (struct pc_fid *)fidp;
2444 	bzero(pcfid, sizeof (struct pc_fid));
2445 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2446 	if (vp->v_flag & VROOT) {
2447 		pcfid->pcfid_block = 0;
2448 		pcfid->pcfid_offset = 0;
2449 		pcfid->pcfid_ctime = 0;
2450 	} else {
2451 		pcfid->pcfid_block = pcp->pc_eblkno;
2452 		pcfid->pcfid_offset = pcp->pc_eoffset;
2453 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2454 	}
2455 	pc_unlockfs(fsp);
2456 	return (0);
2457 }
2458