xref: /titanic_52/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 9525b14bcdeb5b5f6f95ab27c2f48f18bd2ec829)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/t_lock.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/user.h>
32 #include <sys/buf.h>
33 #include <sys/stat.h>
34 #include <sys/vfs.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/dirent.h>
37 #include <sys/vnode.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/uio.h>
42 #include <sys/fs/pc_label.h>
43 #include <sys/fs/pc_fs.h>
44 #include <sys/fs/pc_dir.h>
45 #include <sys/fs/pc_node.h>
46 #include <sys/mman.h>
47 #include <sys/pathname.h>
48 #include <sys/vmsystm.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/statvfs.h>
52 #include <sys/unistd.h>
53 #include <sys/kmem.h>
54 #include <sys/conf.h>
55 #include <sys/flock.h>
56 #include <sys/policy.h>
57 #include <sys/sdt.h>
58 #include <sys/sunddi.h>
59 #include <sys/types.h>
60 #include <sys/errno.h>
61 
62 #include <vm/seg.h>
63 #include <vm/page.h>
64 #include <vm/pvn.h>
65 #include <vm/seg_map.h>
66 #include <vm/seg_vn.h>
67 #include <vm/hat.h>
68 #include <vm/as.h>
69 #include <vm/seg_kmem.h>
70 
71 #include <fs/fs_subr.h>
72 
73 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
74 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
75 	caller_context_t *ct);
76 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
77 	caller_context_t *);
78 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
79 	caller_context_t *);
80 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
81 	caller_context_t *ct);
82 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
83 	caller_context_t *);
84 static int pcfs_access(struct vnode *, int, int, struct cred *,
85 	caller_context_t *ct);
86 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
87 	struct pathname *, int, struct vnode *, struct cred *,
88 	caller_context_t *, int *, pathname_t *);
89 static int pcfs_create(struct vnode *, char *, struct vattr *,
90 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
91 	caller_context_t *, vsecattr_t *);
92 static int pcfs_remove(struct vnode *, char *, struct cred *,
93 	caller_context_t *, int);
94 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
95 	struct cred *, caller_context_t *, int);
96 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
97 	struct cred *, caller_context_t *, int, vsecattr_t *);
98 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
99 	caller_context_t *, int);
100 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
101 	caller_context_t *, int);
102 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
103 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
104 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
105 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
106 	offset_t, cred_t *, caller_context_t *);
107 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
108 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
109 	caller_context_t *);
110 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
111 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
112 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
113 	caller_context_t *);
114 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
115 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
116 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
117 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
118 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
119 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
120 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
121 	caller_context_t *);
122 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
123 	caller_context_t *);
124 
125 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
126 	struct cred *);
127 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
128 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
129 
130 extern krwlock_t pcnodes_lock;
131 
132 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
133 
134 /*
135  * vnode op vectors for files and directories.
136  */
137 struct vnodeops *pcfs_fvnodeops;
138 struct vnodeops *pcfs_dvnodeops;
139 
140 const fs_operation_def_t pcfs_fvnodeops_template[] = {
141 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
142 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
143 	VOPNAME_READ,		{ .vop_read = pcfs_read },
144 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
145 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
146 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
147 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
148 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
149 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
150 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
151 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
152 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
153 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
154 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
155 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
156 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
157 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
158 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
159 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
160 	NULL,			NULL
161 };
162 
163 const fs_operation_def_t pcfs_dvnodeops_template[] = {
164 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
165 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
166 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
167 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
168 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
169 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
170 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
171 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
172 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
173 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
174 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
175 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
176 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
177 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
178 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
179 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
180 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
181 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
182 	NULL,			NULL
183 };
184 
185 
186 /*ARGSUSED*/
187 static int
188 pcfs_open(
189 	struct vnode **vpp,
190 	int flag,
191 	struct cred *cr,
192 	caller_context_t *ct)
193 {
194 	return (0);
195 }
196 
197 /*
198  * files are sync'ed on close to keep floppy up to date
199  */
200 
201 /*ARGSUSED*/
202 static int
203 pcfs_close(
204 	struct vnode *vp,
205 	int flag,
206 	int count,
207 	offset_t offset,
208 	struct cred *cr,
209 	caller_context_t *ct)
210 {
211 	return (0);
212 }
213 
214 /*ARGSUSED*/
215 static int
216 pcfs_read(
217 	struct vnode *vp,
218 	struct uio *uiop,
219 	int ioflag,
220 	struct cred *cr,
221 	struct caller_context *ct)
222 {
223 	struct pcfs *fsp;
224 	struct pcnode *pcp;
225 	int error;
226 
227 	fsp = VFSTOPCFS(vp->v_vfsp);
228 	if (error = pc_verify(fsp))
229 		return (error);
230 	error = pc_lockfs(fsp, 0, 0);
231 	if (error)
232 		return (error);
233 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
234 		pc_unlockfs(fsp);
235 		return (EIO);
236 	}
237 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
238 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
239 		pc_mark_acc(fsp, pcp);
240 	}
241 	pc_unlockfs(fsp);
242 	if (error) {
243 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
244 	}
245 	return (error);
246 }
247 
248 /*ARGSUSED*/
249 static int
250 pcfs_write(
251 	struct vnode *vp,
252 	struct uio *uiop,
253 	int ioflag,
254 	struct cred *cr,
255 	struct caller_context *ct)
256 {
257 	struct pcfs *fsp;
258 	struct pcnode *pcp;
259 	int error;
260 
261 	fsp = VFSTOPCFS(vp->v_vfsp);
262 	if (error = pc_verify(fsp))
263 		return (error);
264 	error = pc_lockfs(fsp, 0, 0);
265 	if (error)
266 		return (error);
267 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
268 		pc_unlockfs(fsp);
269 		return (EIO);
270 	}
271 	if (ioflag & FAPPEND) {
272 		/*
273 		 * in append mode start at end of file.
274 		 */
275 		uiop->uio_loffset = pcp->pc_size;
276 	}
277 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
278 	pcp->pc_flags |= PC_MOD;
279 	pc_mark_mod(fsp, pcp);
280 	if (ioflag & (FSYNC|FDSYNC))
281 		(void) pc_nodeupdate(pcp);
282 
283 	pc_unlockfs(fsp);
284 	if (error) {
285 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
286 	}
287 	return (error);
288 }
289 
290 /*
291  * read or write a vnode
292  */
293 static int
294 rwpcp(
295 	struct pcnode *pcp,
296 	struct uio *uio,
297 	enum uio_rw rw,
298 	int ioflag)
299 {
300 	struct vnode *vp = PCTOV(pcp);
301 	struct pcfs *fsp;
302 	daddr_t bn;			/* phys block number */
303 	int n;
304 	offset_t off;
305 	caddr_t base;
306 	int mapon, pagecreate;
307 	int newpage;
308 	int error = 0;
309 	rlim64_t limit = uio->uio_llimit;
310 	int oresid = uio->uio_resid;
311 
312 	/*
313 	 * If the filesystem was umounted by force, return immediately.
314 	 */
315 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
316 		return (EIO);
317 
318 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
319 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
320 
321 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
322 	ASSERT(vp->v_type == VREG);
323 
324 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
325 		return (0);
326 	}
327 
328 	if (uio->uio_loffset < 0)
329 		return (EINVAL);
330 
331 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
332 		limit = MAXOFFSET_T;
333 
334 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
335 		proc_t *p = ttoproc(curthread);
336 
337 		mutex_enter(&p->p_lock);
338 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
339 		    p, RCA_UNSAFE_SIGINFO);
340 		mutex_exit(&p->p_lock);
341 		return (EFBIG);
342 	}
343 
344 	/* the following condition will occur only for write */
345 
346 	if (uio->uio_loffset >= UINT32_MAX)
347 		return (EFBIG);
348 
349 	if (uio->uio_resid == 0)
350 		return (0);
351 
352 	if (limit > UINT32_MAX)
353 		limit = UINT32_MAX;
354 
355 	fsp = VFSTOPCFS(vp->v_vfsp);
356 	if (fsp->pcfs_flags & PCFS_IRRECOV)
357 		return (EIO);
358 
359 	do {
360 		/*
361 		 * Assignments to "n" in this block may appear
362 		 * to overflow in some cases.  However, after careful
363 		 * analysis it was determined that all assignments to
364 		 * "n" serve only to make "n" smaller.  Since "n"
365 		 * starts out as no larger than MAXBSIZE, "int" is
366 		 * safe.
367 		 */
368 		off = uio->uio_loffset & MAXBMASK;
369 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
370 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
371 		if (rw == UIO_READ) {
372 			offset_t diff;
373 
374 			diff = pcp->pc_size - uio->uio_loffset;
375 			if (diff <= 0)
376 				return (0);
377 			if (diff < n)
378 				n = (int)diff;
379 		}
380 		/*
381 		 * Compare limit with the actual offset + n, not the
382 		 * rounded down offset "off" or we will overflow
383 		 * the maximum file size after all.
384 		 */
385 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
386 			if (uio->uio_loffset >= limit) {
387 				error = EFBIG;
388 				break;
389 			}
390 			n = (int)(limit - uio->uio_loffset);
391 		}
392 
393 		/*
394 		 * Touch the page and fault it in if it is not in
395 		 * core before segmap_getmapflt can lock it. This
396 		 * is to avoid the deadlock if the buffer is mapped
397 		 * to the same file through mmap which we want to
398 		 * write to.
399 		 */
400 		uio_prefaultpages((long)n, uio);
401 
402 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
403 		pagecreate = 0;
404 		newpage = 0;
405 		if (rw == UIO_WRITE) {
406 			/*
407 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
408 			 * with one page at a time, instead of one MAXBSIZE
409 			 * at a time, so we can fully explore pagecreate
410 			 * optimization??
411 			 */
412 			if (uio->uio_loffset + n > pcp->pc_size) {
413 				uint_t ncl, lcn;
414 
415 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
416 				    fsp->pcfs_clsize);
417 				if (uio->uio_loffset > pcp->pc_size &&
418 				    ncl < (uint_t)howmany(uio->uio_loffset,
419 				    fsp->pcfs_clsize)) {
420 					/*
421 					 * Allocate and zerofill skipped
422 					 * clusters. This may not be worth the
423 					 * effort since a small lseek beyond
424 					 * eof but still within the cluster
425 					 * will not be zeroed out.
426 					 */
427 					lcn = pc_lblkno(fsp, uio->uio_loffset);
428 					error = pc_balloc(pcp, (daddr_t)lcn,
429 					    1, &bn);
430 					ncl = lcn + 1;
431 				}
432 				if (!error &&
433 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
434 				    fsp->pcfs_clsize))
435 					/*
436 					 * allocate clusters w/o zerofill
437 					 */
438 					error = pc_balloc(pcp,
439 					    (daddr_t)pc_lblkno(fsp,
440 					    uio->uio_loffset + n - 1),
441 					    0, &bn);
442 
443 				pcp->pc_flags |= PC_CHG;
444 
445 				if (error) {
446 					pc_cluster32_t ncl;
447 					int nerror;
448 
449 					/*
450 					 * figure out new file size from
451 					 * cluster chain length. If this
452 					 * is detected to loop, the chain
453 					 * is corrupted and we'd better
454 					 * keep our fingers off that file.
455 					 */
456 					nerror = pc_fileclsize(fsp,
457 					    pcp->pc_scluster, &ncl);
458 					if (nerror) {
459 						PC_DPRINTF1(2,
460 						    "cluster chain "
461 						    "corruption, "
462 						    "scluster=%d\n",
463 						    pcp->pc_scluster);
464 						pcp->pc_size = 0;
465 						pcp->pc_flags |= PC_INVAL;
466 						error = nerror;
467 						(void) segmap_release(segkmap,
468 						    base, 0);
469 						break;
470 					}
471 					pcp->pc_size = fsp->pcfs_clsize * ncl;
472 
473 					if (error == ENOSPC &&
474 					    (pcp->pc_size - uio->uio_loffset)
475 					    > 0) {
476 						PC_DPRINTF3(2, "rwpcp ENOSPC "
477 						    "off=%lld n=%d size=%d\n",
478 						    uio->uio_loffset,
479 						    n, pcp->pc_size);
480 						n = (int)(pcp->pc_size -
481 						    uio->uio_loffset);
482 					} else {
483 						PC_DPRINTF1(1,
484 						    "rwpcp error1=%d\n", error);
485 						(void) segmap_release(segkmap,
486 						    base, 0);
487 						break;
488 					}
489 				} else {
490 					pcp->pc_size =
491 					    (uint_t)(uio->uio_loffset + n);
492 				}
493 				if (mapon == 0) {
494 					newpage = segmap_pagecreate(segkmap,
495 					    base, (size_t)n, 0);
496 					pagecreate = 1;
497 				}
498 			} else if (n == MAXBSIZE) {
499 				newpage = segmap_pagecreate(segkmap, base,
500 				    (size_t)n, 0);
501 				pagecreate = 1;
502 			}
503 		}
504 		error = uiomove(base + mapon, (size_t)n, rw, uio);
505 
506 		if (pagecreate && uio->uio_loffset <
507 		    roundup(off + mapon + n, PAGESIZE)) {
508 			offset_t nzero, nmoved;
509 
510 			nmoved = uio->uio_loffset - (off + mapon);
511 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
512 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
513 		}
514 
515 		/*
516 		 * Unlock the pages which have been allocated by
517 		 * page_create_va() in segmap_pagecreate().
518 		 */
519 		if (newpage) {
520 			segmap_pageunlock(segkmap, base, (size_t)n,
521 			    rw == UIO_WRITE ? S_WRITE : S_READ);
522 		}
523 
524 		if (error) {
525 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
526 			/*
527 			 * If we failed on a write, we may have already
528 			 * allocated file blocks as well as pages.  It's hard
529 			 * to undo the block allocation, but we must be sure
530 			 * to invalidate any pages that may have been
531 			 * allocated.
532 			 */
533 			if (rw == UIO_WRITE)
534 				(void) segmap_release(segkmap, base, SM_INVAL);
535 			else
536 				(void) segmap_release(segkmap, base, 0);
537 		} else {
538 			uint_t flags = 0;
539 
540 			if (rw == UIO_READ) {
541 				if (n + mapon == MAXBSIZE ||
542 				    uio->uio_loffset == pcp->pc_size)
543 					flags = SM_DONTNEED;
544 			} else if (ioflag & (FSYNC|FDSYNC)) {
545 				flags = SM_WRITE;
546 			} else if (n + mapon == MAXBSIZE) {
547 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
548 			}
549 			error = segmap_release(segkmap, base, flags);
550 		}
551 
552 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
553 
554 	if (oresid != uio->uio_resid)
555 		error = 0;
556 	return (error);
557 }
558 
559 /*ARGSUSED*/
560 static int
561 pcfs_getattr(
562 	struct vnode *vp,
563 	struct vattr *vap,
564 	int flags,
565 	struct cred *cr,
566 	caller_context_t *ct)
567 {
568 	struct pcnode *pcp;
569 	struct pcfs *fsp;
570 	int error;
571 	char attr;
572 	struct pctime atime;
573 	int64_t unixtime;
574 
575 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
576 
577 	fsp = VFSTOPCFS(vp->v_vfsp);
578 	error = pc_lockfs(fsp, 0, 0);
579 	if (error)
580 		return (error);
581 
582 	/*
583 	 * Note that we don't check for "invalid node" (PC_INVAL) here
584 	 * only in order to make stat() succeed. We allow no I/O on such
585 	 * a node, but do allow to check for its existence.
586 	 */
587 	if ((pcp = VTOPC(vp)) == NULL) {
588 		pc_unlockfs(fsp);
589 		return (EIO);
590 	}
591 	/*
592 	 * Copy from pcnode.
593 	 */
594 	vap->va_type = vp->v_type;
595 	attr = pcp->pc_entry.pcd_attr;
596 	if (PCA_IS_HIDDEN(fsp, attr))
597 		vap->va_mode = 0;
598 	else if (attr & PCA_LABEL)
599 		vap->va_mode = 0444;
600 	else if (attr & PCA_RDONLY)
601 		vap->va_mode = 0555;
602 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
603 		vap->va_mode = 0755;
604 	} else {
605 		vap->va_mode = 0777;
606 	}
607 
608 	if (attr & PCA_DIR)
609 		vap->va_mode |= S_IFDIR;
610 	else
611 		vap->va_mode |= S_IFREG;
612 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
613 		vap->va_uid = 0;
614 		vap->va_gid = 0;
615 	} else {
616 		vap->va_uid = crgetuid(cr);
617 		vap->va_gid = crgetgid(cr);
618 	}
619 	vap->va_fsid = vp->v_vfsp->vfs_dev;
620 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
621 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
622 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
623 	vap->va_nlink = 1;
624 	vap->va_size = (u_offset_t)pcp->pc_size;
625 	vap->va_rdev = 0;
626 	vap->va_nblocks =
627 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
628 	vap->va_blksize = fsp->pcfs_clsize;
629 
630 	/*
631 	 * FAT root directories have no timestamps. In order not to return
632 	 * "time zero" (1/1/1970), we record the time of the mount and give
633 	 * that. This breaks less expectations.
634 	 */
635 	if (vp->v_flag & VROOT) {
636 		vap->va_mtime = fsp->pcfs_mounttime;
637 		vap->va_atime = fsp->pcfs_mounttime;
638 		vap->va_ctime = fsp->pcfs_mounttime;
639 		pc_unlockfs(fsp);
640 		return (0);
641 	}
642 
643 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
644 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
645 		if (unixtime > INT32_MAX)
646 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
647 		unixtime = MIN(unixtime, INT32_MAX);
648 	} else if (unixtime > INT32_MAX &&
649 	    get_udatamodel() == DATAMODEL_ILP32) {
650 		pc_unlockfs(fsp);
651 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
652 		return (EOVERFLOW);
653 	}
654 
655 	vap->va_mtime.tv_sec = (time_t)unixtime;
656 	vap->va_mtime.tv_nsec = 0;
657 
658 	/*
659 	 * FAT doesn't know about POSIX ctime.
660 	 * Best approximation is to always set it to mtime.
661 	 */
662 	vap->va_ctime = vap->va_mtime;
663 
664 	/*
665 	 * FAT only stores "last access date". If that's the
666 	 * same as the date of last modification then the time
667 	 * of last access is known. Otherwise, use midnight.
668 	 */
669 	atime.pct_date = pcp->pc_entry.pcd_ladate;
670 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
671 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
672 	else
673 		atime.pct_time = 0;
674 	pc_pcttotv(&atime, &unixtime);
675 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
676 		if (unixtime > INT32_MAX)
677 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
678 		unixtime = MIN(unixtime, INT32_MAX);
679 	} else if (unixtime > INT32_MAX &&
680 	    get_udatamodel() == DATAMODEL_ILP32) {
681 		pc_unlockfs(fsp);
682 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
683 		return (EOVERFLOW);
684 	}
685 
686 	vap->va_atime.tv_sec = (time_t)unixtime;
687 	vap->va_atime.tv_nsec = 0;
688 
689 	pc_unlockfs(fsp);
690 	return (0);
691 }
692 
693 
694 /*ARGSUSED*/
695 static int
696 pcfs_setattr(
697 	struct vnode *vp,
698 	struct vattr *vap,
699 	int flags,
700 	struct cred *cr,
701 	caller_context_t *ct)
702 {
703 	struct pcnode *pcp;
704 	mode_t mask = vap->va_mask;
705 	int error;
706 	struct pcfs *fsp;
707 	timestruc_t now, *timep;
708 
709 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
710 	/*
711 	 * cannot set these attributes
712 	 */
713 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
714 		return (EINVAL);
715 	}
716 	/*
717 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
718 	 * from 'tar' when it tries to set times on a directory, and console
719 	 * printf's on the NFS server when it gets EINVAL back on such a
720 	 * request. One possible problem with that since a directory entry
721 	 * identifies a file, '.' and all the '..' entries in subdirectories
722 	 * may get out of sync when the directory is updated since they're
723 	 * treated like separate files. We could fix that by looking for
724 	 * '.' and giving it the same attributes, and then looking for
725 	 * all the subdirectories and updating '..', but that's pretty
726 	 * expensive for something that doesn't seem likely to matter.
727 	 */
728 	/* can't do some ops on directories anyway */
729 	if ((vp->v_type == VDIR) &&
730 	    (mask & AT_SIZE)) {
731 		return (EINVAL);
732 	}
733 
734 	fsp = VFSTOPCFS(vp->v_vfsp);
735 	error = pc_lockfs(fsp, 0, 0);
736 	if (error)
737 		return (error);
738 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
739 		pc_unlockfs(fsp);
740 		return (EIO);
741 	}
742 
743 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
744 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
745 			pc_unlockfs(fsp);
746 			return (EACCES);
747 		}
748 	}
749 
750 	/*
751 	 * Change file access modes.
752 	 * If nobody has write permission, file is marked readonly.
753 	 * Otherwise file is writable by anyone.
754 	 */
755 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
756 		if ((vap->va_mode & 0222) == 0)
757 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
758 		else
759 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
760 		pcp->pc_flags |= PC_CHG;
761 	}
762 	/*
763 	 * Truncate file. Must have write permission.
764 	 */
765 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
766 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
767 			error = EACCES;
768 			goto out;
769 		}
770 		if (vap->va_size > UINT32_MAX) {
771 			error = EFBIG;
772 			goto out;
773 		}
774 		error = pc_truncate(pcp, (uint_t)vap->va_size);
775 		if (error)
776 			goto out;
777 	}
778 	/*
779 	 * Change file modified times.
780 	 */
781 	if (mask & (AT_MTIME | AT_CTIME)) {
782 		/*
783 		 * If SysV-compatible option to set access and
784 		 * modified times if privileged, owner, or write access,
785 		 * use current time rather than va_mtime.
786 		 *
787 		 * XXX - va_mtime.tv_sec == -1 flags this.
788 		 */
789 		timep = &vap->va_mtime;
790 		if (vap->va_mtime.tv_sec == -1) {
791 			gethrestime(&now);
792 			timep = &now;
793 		}
794 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
795 		    timep->tv_sec > INT32_MAX) {
796 			error = EOVERFLOW;
797 			goto out;
798 		}
799 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
800 		if (error)
801 			goto out;
802 		pcp->pc_flags |= PC_CHG;
803 	}
804 	/*
805 	 * Change file access times.
806 	 */
807 	if (mask & AT_ATIME) {
808 		/*
809 		 * If SysV-compatible option to set access and
810 		 * modified times if privileged, owner, or write access,
811 		 * use current time rather than va_mtime.
812 		 *
813 		 * XXX - va_atime.tv_sec == -1 flags this.
814 		 */
815 		struct pctime	atime;
816 
817 		timep = &vap->va_atime;
818 		if (vap->va_atime.tv_sec == -1) {
819 			gethrestime(&now);
820 			timep = &now;
821 		}
822 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
823 		    timep->tv_sec > INT32_MAX) {
824 			error = EOVERFLOW;
825 			goto out;
826 		}
827 		error = pc_tvtopct(timep, &atime);
828 		if (error)
829 			goto out;
830 		pcp->pc_entry.pcd_ladate = atime.pct_date;
831 		pcp->pc_flags |= PC_CHG;
832 	}
833 out:
834 	pc_unlockfs(fsp);
835 	return (error);
836 }
837 
838 
839 /*ARGSUSED*/
840 static int
841 pcfs_access(
842 	struct vnode *vp,
843 	int mode,
844 	int flags,
845 	struct cred *cr,
846 	caller_context_t *ct)
847 {
848 	struct pcnode *pcp;
849 	struct pcfs *fsp;
850 
851 
852 	fsp = VFSTOPCFS(vp->v_vfsp);
853 
854 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
855 		return (EIO);
856 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
857 		return (EACCES);
858 
859 	/*
860 	 * If this is a boot partition, privileged users have full access while
861 	 * others have read-only access.
862 	 */
863 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
864 		if ((mode & VWRITE) &&
865 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
866 			return (EACCES);
867 	}
868 	return (0);
869 }
870 
871 
872 /*ARGSUSED*/
873 static int
874 pcfs_fsync(
875 	struct vnode *vp,
876 	int syncflag,
877 	struct cred *cr,
878 	caller_context_t *ct)
879 {
880 	struct pcfs *fsp;
881 	struct pcnode *pcp;
882 	int error;
883 
884 	fsp = VFSTOPCFS(vp->v_vfsp);
885 	if (error = pc_verify(fsp))
886 		return (error);
887 	error = pc_lockfs(fsp, 0, 0);
888 	if (error)
889 		return (error);
890 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
891 		pc_unlockfs(fsp);
892 		return (EIO);
893 	}
894 	rw_enter(&pcnodes_lock, RW_WRITER);
895 	error = pc_nodesync(pcp);
896 	rw_exit(&pcnodes_lock);
897 	pc_unlockfs(fsp);
898 	return (error);
899 }
900 
901 
902 /*ARGSUSED*/
903 static void
904 pcfs_inactive(
905 	struct vnode *vp,
906 	struct cred *cr,
907 	caller_context_t *ct)
908 {
909 	struct pcnode *pcp;
910 	struct pcfs *fsp;
911 	int error;
912 
913 	fsp = VFSTOPCFS(vp->v_vfsp);
914 	error = pc_lockfs(fsp, 0, 1);
915 
916 	/*
917 	 * If the filesystem was umounted by force, all dirty
918 	 * pages associated with this vnode are invalidated
919 	 * and then the vnode will be freed.
920 	 */
921 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
922 		pcp = VTOPC(vp);
923 		if (vn_has_cached_data(vp)) {
924 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
925 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
926 		}
927 		remque(pcp);
928 		if (error == 0)
929 			pc_unlockfs(fsp);
930 		vn_free(vp);
931 		kmem_free(pcp, sizeof (struct pcnode));
932 		VFS_RELE(PCFSTOVFS(fsp));
933 		return;
934 	}
935 
936 	mutex_enter(&vp->v_lock);
937 	ASSERT(vp->v_count >= 1);
938 	if (vp->v_count > 1) {
939 		vp->v_count--;  /* release our hold from vn_rele */
940 		mutex_exit(&vp->v_lock);
941 		pc_unlockfs(fsp);
942 		return;
943 	}
944 	mutex_exit(&vp->v_lock);
945 
946 	/*
947 	 * Check again to confirm that no intervening I/O error
948 	 * with a subsequent pc_diskchanged() call has released
949 	 * the pcnode. If it has then release the vnode as above.
950 	 */
951 	pcp = VTOPC(vp);
952 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
953 		if (vn_has_cached_data(vp))
954 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
955 			    pcfs_putapage, B_INVAL | B_TRUNC,
956 			    (struct cred *)NULL);
957 	}
958 
959 	if (pcp == NULL) {
960 		vn_free(vp);
961 	} else {
962 		pc_rele(pcp);
963 	}
964 
965 	if (!error)
966 		pc_unlockfs(fsp);
967 }
968 
969 /*ARGSUSED*/
970 static int
971 pcfs_lookup(
972 	struct vnode *dvp,
973 	char *nm,
974 	struct vnode **vpp,
975 	struct pathname *pnp,
976 	int flags,
977 	struct vnode *rdir,
978 	struct cred *cr,
979 	caller_context_t *ct,
980 	int *direntflags,
981 	pathname_t *realpnp)
982 {
983 	struct pcfs *fsp;
984 	struct pcnode *pcp;
985 	int error;
986 
987 	/*
988 	 * If the filesystem was umounted by force, return immediately.
989 	 */
990 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
991 		return (EIO);
992 
993 	/*
994 	 * verify that the dvp is still valid on the disk
995 	 */
996 	fsp = VFSTOPCFS(dvp->v_vfsp);
997 	if (error = pc_verify(fsp))
998 		return (error);
999 	error = pc_lockfs(fsp, 0, 0);
1000 	if (error)
1001 		return (error);
1002 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1003 		pc_unlockfs(fsp);
1004 		return (EIO);
1005 	}
1006 	/*
1007 	 * Null component name is a synonym for directory being searched.
1008 	 */
1009 	if (*nm == '\0') {
1010 		VN_HOLD(dvp);
1011 		*vpp = dvp;
1012 		pc_unlockfs(fsp);
1013 		return (0);
1014 	}
1015 
1016 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1017 	if (!error) {
1018 		*vpp = PCTOV(pcp);
1019 		pcp->pc_flags |= PC_EXTERNAL;
1020 	}
1021 	pc_unlockfs(fsp);
1022 	return (error);
1023 }
1024 
1025 
1026 /*ARGSUSED*/
1027 static int
1028 pcfs_create(
1029 	struct vnode *dvp,
1030 	char *nm,
1031 	struct vattr *vap,
1032 	enum vcexcl exclusive,
1033 	int mode,
1034 	struct vnode **vpp,
1035 	struct cred *cr,
1036 	int flag,
1037 	caller_context_t *ct,
1038 	vsecattr_t *vsecp)
1039 {
1040 	int error;
1041 	struct pcnode *pcp;
1042 	struct vnode *vp;
1043 	struct pcfs *fsp;
1044 
1045 	/*
1046 	 * can't create directories. use pcfs_mkdir.
1047 	 * can't create anything other than files.
1048 	 */
1049 	if (vap->va_type == VDIR)
1050 		return (EISDIR);
1051 	else if (vap->va_type != VREG)
1052 		return (EINVAL);
1053 
1054 	pcp = NULL;
1055 	fsp = VFSTOPCFS(dvp->v_vfsp);
1056 	error = pc_lockfs(fsp, 0, 0);
1057 	if (error)
1058 		return (error);
1059 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1060 		pc_unlockfs(fsp);
1061 		return (EIO);
1062 	}
1063 
1064 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1065 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1066 			pc_unlockfs(fsp);
1067 			return (EACCES);
1068 		}
1069 	}
1070 
1071 	if (*nm == '\0') {
1072 		/*
1073 		 * Null component name refers to the directory itself.
1074 		 */
1075 		VN_HOLD(dvp);
1076 		pcp = VTOPC(dvp);
1077 		error = EEXIST;
1078 	} else {
1079 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1080 	}
1081 	/*
1082 	 * if file exists and this is a nonexclusive create,
1083 	 * check for access permissions
1084 	 */
1085 	if (error == EEXIST) {
1086 		vp = PCTOV(pcp);
1087 		if (exclusive == NONEXCL) {
1088 			if (vp->v_type == VDIR) {
1089 				error = EISDIR;
1090 			} else if (mode) {
1091 				error = pcfs_access(PCTOV(pcp), mode, 0,
1092 				    cr, ct);
1093 			} else {
1094 				error = 0;
1095 			}
1096 		}
1097 		if (error) {
1098 			VN_RELE(PCTOV(pcp));
1099 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1100 		    (vap->va_size == 0)) {
1101 			error = pc_truncate(pcp, 0L);
1102 			if (error) {
1103 				VN_RELE(PCTOV(pcp));
1104 			} else {
1105 				vnevent_create(PCTOV(pcp), ct);
1106 			}
1107 		}
1108 	}
1109 	if (error) {
1110 		pc_unlockfs(fsp);
1111 		return (error);
1112 	}
1113 	*vpp = PCTOV(pcp);
1114 	pcp->pc_flags |= PC_EXTERNAL;
1115 	pc_unlockfs(fsp);
1116 	return (error);
1117 }
1118 
1119 /*ARGSUSED*/
1120 static int
1121 pcfs_remove(
1122 	struct vnode *vp,
1123 	char *nm,
1124 	struct cred *cr,
1125 	caller_context_t *ct,
1126 	int flags)
1127 {
1128 	struct pcfs *fsp;
1129 	struct pcnode *pcp;
1130 	int error;
1131 
1132 	fsp = VFSTOPCFS(vp->v_vfsp);
1133 	if (error = pc_verify(fsp))
1134 		return (error);
1135 	error = pc_lockfs(fsp, 0, 0);
1136 	if (error)
1137 		return (error);
1138 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1139 		pc_unlockfs(fsp);
1140 		return (EIO);
1141 	}
1142 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1143 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1144 			pc_unlockfs(fsp);
1145 			return (EACCES);
1146 		}
1147 	}
1148 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1149 	pc_unlockfs(fsp);
1150 	return (error);
1151 }
1152 
1153 /*
1154  * Rename a file or directory
1155  * This rename is restricted to only rename files within a directory.
1156  * XX should make rename more general
1157  */
1158 /*ARGSUSED*/
1159 static int
1160 pcfs_rename(
1161 	struct vnode *sdvp,		/* old (source) parent vnode */
1162 	char *snm,			/* old (source) entry name */
1163 	struct vnode *tdvp,		/* new (target) parent vnode */
1164 	char *tnm,			/* new (target) entry name */
1165 	struct cred *cr,
1166 	caller_context_t *ct,
1167 	int flags)
1168 {
1169 	struct pcfs *fsp;
1170 	struct pcnode *dp;	/* parent pcnode */
1171 	struct pcnode *tdp;
1172 	int error;
1173 
1174 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1175 	if (error = pc_verify(fsp))
1176 		return (error);
1177 
1178 	/*
1179 	 * make sure we can muck with this directory.
1180 	 */
1181 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1182 	if (error) {
1183 		return (error);
1184 	}
1185 	error = pc_lockfs(fsp, 0, 0);
1186 	if (error)
1187 		return (error);
1188 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1189 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1190 		pc_unlockfs(fsp);
1191 		return (EIO);
1192 	}
1193 	error = pc_rename(dp, tdp, snm, tnm, ct);
1194 	pc_unlockfs(fsp);
1195 	return (error);
1196 }
1197 
1198 /*ARGSUSED*/
1199 static int
1200 pcfs_mkdir(
1201 	struct vnode *dvp,
1202 	char *nm,
1203 	struct vattr *vap,
1204 	struct vnode **vpp,
1205 	struct cred *cr,
1206 	caller_context_t *ct,
1207 	int flags,
1208 	vsecattr_t *vsecp)
1209 {
1210 	struct pcfs *fsp;
1211 	struct pcnode *pcp;
1212 	int error;
1213 
1214 	fsp = VFSTOPCFS(dvp->v_vfsp);
1215 	if (error = pc_verify(fsp))
1216 		return (error);
1217 	error = pc_lockfs(fsp, 0, 0);
1218 	if (error)
1219 		return (error);
1220 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1221 		pc_unlockfs(fsp);
1222 		return (EIO);
1223 	}
1224 
1225 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1226 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1227 			pc_unlockfs(fsp);
1228 			return (EACCES);
1229 		}
1230 	}
1231 
1232 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1233 
1234 	if (!error) {
1235 		pcp -> pc_flags |= PC_EXTERNAL;
1236 		*vpp = PCTOV(pcp);
1237 	} else if (error == EEXIST) {
1238 		VN_RELE(PCTOV(pcp));
1239 	}
1240 	pc_unlockfs(fsp);
1241 	return (error);
1242 }
1243 
1244 /*ARGSUSED*/
1245 static int
1246 pcfs_rmdir(
1247 	struct vnode *dvp,
1248 	char *nm,
1249 	struct vnode *cdir,
1250 	struct cred *cr,
1251 	caller_context_t *ct,
1252 	int flags)
1253 {
1254 	struct pcfs *fsp;
1255 	struct pcnode *pcp;
1256 	int error;
1257 
1258 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1259 	if (error = pc_verify(fsp))
1260 		return (error);
1261 	if (error = pc_lockfs(fsp, 0, 0))
1262 		return (error);
1263 
1264 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1265 		pc_unlockfs(fsp);
1266 		return (EIO);
1267 	}
1268 
1269 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1270 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1271 			pc_unlockfs(fsp);
1272 			return (EACCES);
1273 		}
1274 	}
1275 
1276 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1277 	pc_unlockfs(fsp);
1278 	return (error);
1279 }
1280 
1281 /*
1282  * read entries in a directory.
1283  * we must convert pc format to unix format
1284  */
1285 
1286 /*ARGSUSED*/
1287 static int
1288 pcfs_readdir(
1289 	struct vnode *dvp,
1290 	struct uio *uiop,
1291 	struct cred *cr,
1292 	int *eofp,
1293 	caller_context_t *ct,
1294 	int flags)
1295 {
1296 	struct pcnode *pcp;
1297 	struct pcfs *fsp;
1298 	struct pcdir *ep;
1299 	struct buf *bp = NULL;
1300 	offset_t offset;
1301 	int boff;
1302 	struct pc_dirent lbp;
1303 	struct pc_dirent *ld = &lbp;
1304 	int error;
1305 
1306 	/*
1307 	 * If the filesystem was umounted by force, return immediately.
1308 	 */
1309 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1310 		return (EIO);
1311 
1312 	if ((uiop->uio_iovcnt != 1) ||
1313 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1314 		return (EINVAL);
1315 	}
1316 	fsp = VFSTOPCFS(dvp->v_vfsp);
1317 	/*
1318 	 * verify that the dp is still valid on the disk
1319 	 */
1320 	if (error = pc_verify(fsp)) {
1321 		return (error);
1322 	}
1323 	error = pc_lockfs(fsp, 0, 0);
1324 	if (error)
1325 		return (error);
1326 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1327 		pc_unlockfs(fsp);
1328 		return (EIO);
1329 	}
1330 
1331 	bzero(ld, sizeof (*ld));
1332 
1333 	if (eofp != NULL)
1334 		*eofp = 0;
1335 	offset = uiop->uio_loffset;
1336 
1337 	if (dvp->v_flag & VROOT) {
1338 		/*
1339 		 * kludge up entries for "." and ".." in the root.
1340 		 */
1341 		if (offset == 0) {
1342 			(void) strcpy(ld->d_name, ".");
1343 			ld->d_reclen = DIRENT64_RECLEN(1);
1344 			ld->d_off = (off64_t)sizeof (struct pcdir);
1345 			ld->d_ino = (ino64_t)UINT_MAX;
1346 			if (ld->d_reclen > uiop->uio_resid) {
1347 				pc_unlockfs(fsp);
1348 				return (ENOSPC);
1349 			}
1350 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1351 			uiop->uio_loffset = ld->d_off;
1352 			offset = uiop->uio_loffset;
1353 		}
1354 		if (offset == sizeof (struct pcdir)) {
1355 			(void) strcpy(ld->d_name, "..");
1356 			ld->d_reclen = DIRENT64_RECLEN(2);
1357 			if (ld->d_reclen > uiop->uio_resid) {
1358 				pc_unlockfs(fsp);
1359 				return (ENOSPC);
1360 			}
1361 			ld->d_off = (off64_t)(uiop->uio_loffset +
1362 			    sizeof (struct pcdir));
1363 			ld->d_ino = (ino64_t)UINT_MAX;
1364 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1365 			uiop->uio_loffset = ld->d_off;
1366 			offset = uiop->uio_loffset;
1367 		}
1368 		offset -= 2 * sizeof (struct pcdir);
1369 		/* offset now has the real offset value into directory file */
1370 	}
1371 
1372 	for (;;) {
1373 		boff = pc_blkoff(fsp, offset);
1374 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1375 			if (bp != NULL) {
1376 				brelse(bp);
1377 				bp = NULL;
1378 			}
1379 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1380 			if (error) {
1381 				if (error == ENOENT) {
1382 					error = 0;
1383 					if (eofp)
1384 						*eofp = 1;
1385 				}
1386 				break;
1387 			}
1388 		}
1389 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1390 			if (eofp)
1391 				*eofp = 1;
1392 			break;
1393 		}
1394 		/*
1395 		 * Don't display label because it may contain funny characters.
1396 		 */
1397 		if (ep->pcd_filename[0] == PCD_ERASED) {
1398 			uiop->uio_loffset += sizeof (struct pcdir);
1399 			offset += sizeof (struct pcdir);
1400 			ep++;
1401 			continue;
1402 		}
1403 		if (PCDL_IS_LFN(ep)) {
1404 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1405 			    0)
1406 				break;
1407 			continue;
1408 		}
1409 
1410 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1411 			break;
1412 	}
1413 	if (bp)
1414 		brelse(bp);
1415 	pc_unlockfs(fsp);
1416 	return (error);
1417 }
1418 
1419 
1420 /*
1421  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1422  * When we are called the pcfs is already locked.
1423  */
1424 /*ARGSUSED*/
1425 static int
1426 pcfs_getapage(
1427 	struct vnode *vp,
1428 	u_offset_t off,
1429 	size_t len,
1430 	uint_t *protp,
1431 	page_t *pl[],		/* NULL if async IO is requested */
1432 	size_t plsz,
1433 	struct seg *seg,
1434 	caddr_t addr,
1435 	enum seg_rw rw,
1436 	struct cred *cr)
1437 {
1438 	struct pcnode *pcp;
1439 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1440 	struct vnode *devvp;
1441 	page_t *pp;
1442 	page_t *pagefound;
1443 	int err;
1444 
1445 	/*
1446 	 * If the filesystem was umounted by force, return immediately.
1447 	 */
1448 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1449 		return (EIO);
1450 
1451 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1452 	    (void *)vp, off, len);
1453 
1454 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1455 		return (EIO);
1456 	devvp = fsp->pcfs_devvp;
1457 
1458 	/* pcfs doesn't do readaheads */
1459 	if (pl == NULL)
1460 		return (0);
1461 
1462 	pl[0] = NULL;
1463 	err = 0;
1464 	/*
1465 	 * If the accessed time on the pcnode has not already been
1466 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1467 	 * This gives us approximate modified times for mmap'ed files
1468 	 * which are accessed via loads in the user address space.
1469 	 */
1470 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1471 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1472 		pc_mark_acc(fsp, pcp);
1473 	}
1474 reread:
1475 	if ((pagefound = page_exists(vp, off)) == NULL) {
1476 		/*
1477 		 * Need to really do disk IO to get the page(s).
1478 		 */
1479 		struct buf *bp;
1480 		daddr_t lbn, bn;
1481 		u_offset_t io_off;
1482 		size_t io_len;
1483 		u_offset_t lbnoff, xferoffset;
1484 		u_offset_t pgoff;
1485 		uint_t	xfersize;
1486 		int err1;
1487 
1488 		lbn = pc_lblkno(fsp, off);
1489 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1490 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1491 
1492 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1493 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1494 		if (pp == NULL)
1495 			/*
1496 			 * XXX - If pcfs is made MT-hot, this should go
1497 			 * back to reread.
1498 			 */
1499 			panic("pcfs_getapage pvn_read_kluster");
1500 
1501 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1502 		    pgoff += xfersize,
1503 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1504 		    lbnoff += xfersize, xferoffset += xfersize) {
1505 			/*
1506 			 * read as many contiguous blocks as possible to
1507 			 * fill this page
1508 			 */
1509 			xfersize = PAGESIZE - pgoff;
1510 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1511 			if (err1) {
1512 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1513 				err = err1;
1514 				goto out;
1515 			}
1516 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1517 			bp->b_edev = devvp->v_rdev;
1518 			bp->b_dev = cmpdev(devvp->v_rdev);
1519 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1520 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1521 			bp->b_file = vp;
1522 			bp->b_offset = (offset_t)(off + pgoff);
1523 
1524 			(void) bdev_strategy(bp);
1525 
1526 			lwp_stat_update(LWP_STAT_INBLK, 1);
1527 
1528 			if (err == 0)
1529 				err = biowait(bp);
1530 			else
1531 				(void) biowait(bp);
1532 			pageio_done(bp);
1533 			if (err)
1534 				goto out;
1535 		}
1536 		if (pgoff < PAGESIZE) {
1537 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1538 		}
1539 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1540 	}
1541 out:
1542 	if (err) {
1543 		if (pp != NULL)
1544 			pvn_read_done(pp, B_ERROR);
1545 		return (err);
1546 	}
1547 
1548 	if (pagefound) {
1549 		/*
1550 		 * Page exists in the cache, acquire the "shared"
1551 		 * lock.  If this fails, go back to reread.
1552 		 */
1553 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1554 			goto reread;
1555 		}
1556 		pl[0] = pp;
1557 		pl[1] = NULL;
1558 	}
1559 	return (err);
1560 }
1561 
1562 /*
1563  * Return all the pages from [off..off+len] in given file
1564  */
1565 /* ARGSUSED */
1566 static int
1567 pcfs_getpage(
1568 	struct vnode *vp,
1569 	offset_t off,
1570 	size_t len,
1571 	uint_t *protp,
1572 	page_t *pl[],
1573 	size_t plsz,
1574 	struct seg *seg,
1575 	caddr_t addr,
1576 	enum seg_rw rw,
1577 	struct cred *cr,
1578 	caller_context_t *ct)
1579 {
1580 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1581 	int err;
1582 
1583 	PC_DPRINTF0(6, "pcfs_getpage\n");
1584 	if (err = pc_verify(fsp))
1585 		return (err);
1586 	if (vp->v_flag & VNOMAP)
1587 		return (ENOSYS);
1588 	ASSERT(off <= UINT32_MAX);
1589 	err = pc_lockfs(fsp, 0, 0);
1590 	if (err)
1591 		return (err);
1592 	if (protp != NULL)
1593 		*protp = PROT_ALL;
1594 
1595 	ASSERT((off & PAGEOFFSET) == 0);
1596 	if (len <= PAGESIZE) {
1597 		err = pcfs_getapage(vp, off, len, protp, pl,
1598 		    plsz, seg, addr, rw, cr);
1599 	} else {
1600 		err = pvn_getpages(pcfs_getapage, vp, off,
1601 		    len, protp, pl, plsz, seg, addr, rw, cr);
1602 	}
1603 	pc_unlockfs(fsp);
1604 	return (err);
1605 }
1606 
1607 
1608 /*
1609  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1610  * If len == 0, do from off to EOF.
1611  *
1612  * The normal cases should be len == 0 & off == 0 (entire vp list),
1613  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1614  * (from pageout).
1615  *
1616  */
1617 /*ARGSUSED*/
1618 static int
1619 pcfs_putpage(
1620 	struct vnode *vp,
1621 	offset_t off,
1622 	size_t len,
1623 	int flags,
1624 	struct cred *cr,
1625 	caller_context_t *ct)
1626 {
1627 	struct pcnode *pcp;
1628 	page_t *pp;
1629 	struct pcfs *fsp;
1630 	u_offset_t io_off;
1631 	size_t io_len;
1632 	offset_t eoff;
1633 	int err;
1634 
1635 	/*
1636 	 * If the filesystem was umounted by force, return immediately.
1637 	 */
1638 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1639 		return (EIO);
1640 
1641 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1642 	if (vp->v_flag & VNOMAP)
1643 		return (ENOSYS);
1644 
1645 	fsp = VFSTOPCFS(vp->v_vfsp);
1646 
1647 	if (err = pc_verify(fsp))
1648 		return (err);
1649 	if ((pcp = VTOPC(vp)) == NULL) {
1650 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1651 		return (EIO);
1652 	}
1653 	if (pcp->pc_flags & PC_INVAL)
1654 		return (EIO);
1655 
1656 	if (curproc == proc_pageout) {
1657 		/*
1658 		 * XXX - This is a quick hack to avoid blocking
1659 		 * pageout. Also to avoid pcfs_getapage deadlocking
1660 		 * with putpage when memory is running out,
1661 		 * since we only have one global lock and we don't
1662 		 * support async putpage.
1663 		 * It should be fixed someday.
1664 		 *
1665 		 * Interestingly, this used to be a test of NOMEMWAIT().
1666 		 * We only ever got here once pcfs started supporting
1667 		 * NFS sharing, and then only because the NFS server
1668 		 * threads seem to do writes in sched's process context.
1669 		 * Since everyone else seems to just care about pageout,
1670 		 * the test was changed to look for pageout directly.
1671 		 */
1672 		return (ENOMEM);
1673 	}
1674 
1675 	ASSERT(off <= UINT32_MAX);
1676 
1677 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1678 
1679 	err = pc_lockfs(fsp, 0, 0);
1680 	if (err)
1681 		return (err);
1682 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1683 		pc_unlockfs(fsp);
1684 		return (0);
1685 	}
1686 
1687 	if (len == 0) {
1688 		/*
1689 		 * Search the entire vp list for pages >= off
1690 		 */
1691 		err = pvn_vplist_dirty(vp, off,
1692 		    pcfs_putapage, flags, cr);
1693 	} else {
1694 		eoff = off + len;
1695 
1696 		for (io_off = off; io_off < eoff &&
1697 		    io_off < pcp->pc_size; io_off += io_len) {
1698 			/*
1699 			 * If we are not invalidating, synchronously
1700 			 * freeing or writing pages use the routine
1701 			 * page_lookup_nowait() to prevent reclaiming
1702 			 * them from the free list.
1703 			 */
1704 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1705 				pp = page_lookup(vp, io_off,
1706 				    (flags & (B_INVAL | B_FREE)) ?
1707 				    SE_EXCL : SE_SHARED);
1708 			} else {
1709 				pp = page_lookup_nowait(vp, io_off,
1710 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1711 			}
1712 
1713 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1714 				io_len = PAGESIZE;
1715 			else {
1716 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1717 				    flags, cr);
1718 				if (err != 0)
1719 					break;
1720 				/*
1721 				 * "io_off" and "io_len" are returned as
1722 				 * the range of pages we actually wrote.
1723 				 * This allows us to skip ahead more quickly
1724 				 * since several pages may've been dealt
1725 				 * with by this iteration of the loop.
1726 				 */
1727 			}
1728 		}
1729 	}
1730 	if (err == 0 && (flags & B_INVAL) &&
1731 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1732 		/*
1733 		 * If doing "invalidation", make sure that
1734 		 * all pages on the vnode list are actually
1735 		 * gone.
1736 		 */
1737 		cmn_err(CE_PANIC,
1738 		    "pcfs_putpage: B_INVAL, pages not gone");
1739 	} else if (err) {
1740 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1741 	}
1742 	pc_unlockfs(fsp);
1743 	return (err);
1744 }
1745 
1746 /*
1747  * Write out a single page, possibly klustering adjacent dirty pages.
1748  */
1749 /*ARGSUSED*/
1750 int
1751 pcfs_putapage(
1752 	struct vnode *vp,
1753 	page_t *pp,
1754 	u_offset_t *offp,
1755 	size_t *lenp,
1756 	int flags,
1757 	struct cred *cr)
1758 {
1759 	struct pcnode *pcp;
1760 	struct pcfs *fsp;
1761 	struct vnode *devvp;
1762 	size_t io_len;
1763 	daddr_t bn;
1764 	u_offset_t lbn, lbnoff, xferoffset;
1765 	uint_t pgoff, xfersize;
1766 	int err = 0;
1767 	u_offset_t io_off;
1768 
1769 	pcp = VTOPC(vp);
1770 	fsp = VFSTOPCFS(vp->v_vfsp);
1771 	devvp = fsp->pcfs_devvp;
1772 
1773 	/*
1774 	 * If the modified time on the inode has not already been
1775 	 * set elsewhere (e.g. for write/setattr) and this is not
1776 	 * a call from msync (B_FORCE) we set the time now.
1777 	 * This gives us approximate modified times for mmap'ed files
1778 	 * which are modified via stores in the user address space.
1779 	 */
1780 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1781 		pcp->pc_flags |= PC_MOD;
1782 		pc_mark_mod(fsp, pcp);
1783 	}
1784 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1785 	    PAGESIZE, flags);
1786 
1787 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1788 		goto out;
1789 	}
1790 
1791 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1792 
1793 	lbn = pc_lblkno(fsp, io_off);
1794 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1795 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1796 
1797 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1798 	    pgoff += xfersize,
1799 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1800 	    lbnoff += xfersize, xferoffset += xfersize) {
1801 
1802 		struct buf *bp;
1803 		int err1;
1804 
1805 		/*
1806 		 * write as many contiguous blocks as possible from this page
1807 		 */
1808 		xfersize = io_len - pgoff;
1809 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1810 		if (err1) {
1811 			err = err1;
1812 			goto out;
1813 		}
1814 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1815 		bp->b_edev = devvp->v_rdev;
1816 		bp->b_dev = cmpdev(devvp->v_rdev);
1817 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1818 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1819 		bp->b_file = vp;
1820 		bp->b_offset = (offset_t)(io_off + pgoff);
1821 
1822 		(void) bdev_strategy(bp);
1823 
1824 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1825 
1826 		if (err == 0)
1827 			err = biowait(bp);
1828 		else
1829 			(void) biowait(bp);
1830 		pageio_done(bp);
1831 	}
1832 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1833 	pp = NULL;
1834 
1835 out:
1836 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1837 		pvn_write_done(pp, B_WRITE | flags);
1838 	} else if (err != 0 && pp != NULL) {
1839 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1840 	}
1841 
1842 	if (offp)
1843 		*offp = io_off;
1844 	if (lenp)
1845 		*lenp = io_len;
1846 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1847 		    (void *)vp, (void *)pp, io_off, io_len);
1848 	if (err) {
1849 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1850 	}
1851 	return (err);
1852 }
1853 
1854 /*ARGSUSED*/
1855 static int
1856 pcfs_map(
1857 	struct vnode *vp,
1858 	offset_t off,
1859 	struct as *as,
1860 	caddr_t *addrp,
1861 	size_t len,
1862 	uchar_t prot,
1863 	uchar_t maxprot,
1864 	uint_t flags,
1865 	struct cred *cr,
1866 	caller_context_t *ct)
1867 {
1868 	struct segvn_crargs vn_a;
1869 	int error;
1870 
1871 	PC_DPRINTF0(6, "pcfs_map\n");
1872 	if (vp->v_flag & VNOMAP)
1873 		return (ENOSYS);
1874 
1875 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1876 		return (ENXIO);
1877 
1878 	as_rangelock(as);
1879 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1880 	if (error != 0) {
1881 		as_rangeunlock(as);
1882 		return (error);
1883 	}
1884 
1885 	vn_a.vp = vp;
1886 	vn_a.offset = off;
1887 	vn_a.type = flags & MAP_TYPE;
1888 	vn_a.prot = prot;
1889 	vn_a.maxprot = maxprot;
1890 	vn_a.flags = flags & ~MAP_TYPE;
1891 	vn_a.cred = cr;
1892 	vn_a.amp = NULL;
1893 	vn_a.szc = 0;
1894 	vn_a.lgrp_mem_policy_flags = 0;
1895 
1896 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1897 	as_rangeunlock(as);
1898 	return (error);
1899 }
1900 
1901 /* ARGSUSED */
1902 static int
1903 pcfs_seek(
1904 	struct vnode *vp,
1905 	offset_t ooff,
1906 	offset_t *noffp,
1907 	caller_context_t *ct)
1908 {
1909 	if (*noffp < 0)
1910 		return (EINVAL);
1911 	else if (*noffp > MAXOFFSET_T)
1912 		return (EINVAL);
1913 	else
1914 		return (0);
1915 }
1916 
1917 /* ARGSUSED */
1918 static int
1919 pcfs_addmap(
1920 	struct vnode *vp,
1921 	offset_t off,
1922 	struct as *as,
1923 	caddr_t addr,
1924 	size_t len,
1925 	uchar_t prot,
1926 	uchar_t maxprot,
1927 	uint_t flags,
1928 	struct cred *cr,
1929 	caller_context_t *ct)
1930 {
1931 	if (vp->v_flag & VNOMAP)
1932 		return (ENOSYS);
1933 	return (0);
1934 }
1935 
1936 /*ARGSUSED*/
1937 static int
1938 pcfs_delmap(
1939 	struct vnode *vp,
1940 	offset_t off,
1941 	struct as *as,
1942 	caddr_t addr,
1943 	size_t len,
1944 	uint_t prot,
1945 	uint_t maxprot,
1946 	uint_t flags,
1947 	struct cred *cr,
1948 	caller_context_t *ct)
1949 {
1950 	if (vp->v_flag & VNOMAP)
1951 		return (ENOSYS);
1952 	return (0);
1953 }
1954 
1955 /*
1956  * POSIX pathconf() support.
1957  */
1958 /* ARGSUSED */
1959 static int
1960 pcfs_pathconf(
1961 	struct vnode *vp,
1962 	int cmd,
1963 	ulong_t *valp,
1964 	struct cred *cr,
1965 	caller_context_t *ct)
1966 {
1967 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1968 
1969 	switch (cmd) {
1970 	case _PC_LINK_MAX:
1971 		*valp = 1;
1972 		return (0);
1973 
1974 	case _PC_CASE_BEHAVIOR:
1975 		return (EINVAL);
1976 
1977 	case _PC_FILESIZEBITS:
1978 		/*
1979 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1980 		 * FAT12 can only go up to the maximum filesystem capacity
1981 		 * which is ~509MB.
1982 		 */
1983 		*valp = IS_FAT12(fsp) ? 30 : 33;
1984 		return (0);
1985 
1986 	case _PC_TIMESTAMP_RESOLUTION:
1987 		/*
1988 		 * PCFS keeps track of modification times, it its own
1989 		 * internal format, to a resolution of 2 seconds.
1990 		 * Since 2000 million is representable in an int32_t
1991 		 * without overflow (or becoming negative), we allow
1992 		 * this value to be returned.
1993 		 */
1994 		*valp = 2000000000L;
1995 		return (0);
1996 
1997 	default:
1998 		return (fs_pathconf(vp, cmd, valp, cr, ct));
1999 	}
2000 
2001 }
2002 
2003 /* ARGSUSED */
2004 static int
2005 pcfs_space(
2006 	struct vnode *vp,
2007 	int cmd,
2008 	struct flock64 *bfp,
2009 	int flag,
2010 	offset_t offset,
2011 	cred_t *cr,
2012 	caller_context_t *ct)
2013 {
2014 	struct vattr vattr;
2015 	int error;
2016 
2017 	if (cmd != F_FREESP)
2018 		return (EINVAL);
2019 
2020 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2021 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2022 			return (EFBIG);
2023 		/*
2024 		 * we only support the special case of l_len == 0,
2025 		 * meaning free to end of file at this moment.
2026 		 */
2027 		if (bfp->l_len != 0)
2028 			return (EINVAL);
2029 		vattr.va_mask = AT_SIZE;
2030 		vattr.va_size = bfp->l_start;
2031 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2032 	}
2033 	return (error);
2034 }
2035 
2036 /*
2037  * Break up 'len' chars from 'buf' into a long file name chunk.
2038  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2039  */
2040 void
2041 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2042 {
2043 	int	i;
2044 
2045 	ASSERT(buf != NULL);
2046 
2047 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2048 		if (len > 0) {
2049 			ep->pcdl_firstfilename[i] = *buf++;
2050 			ep->pcdl_firstfilename[i + 1] = *buf++;
2051 			len -= 2;
2052 		} else {
2053 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2054 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2055 		}
2056 	}
2057 
2058 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2059 		if (len > 0) {
2060 			ep->pcdl_secondfilename[i] = *buf++;
2061 			ep->pcdl_secondfilename[i + 1] = *buf++;
2062 			len -= 2;
2063 		} else {
2064 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2065 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2066 		}
2067 	}
2068 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2069 		if (len > 0) {
2070 			ep->pcdl_thirdfilename[i] = *buf++;
2071 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2072 			len -= 2;
2073 		} else {
2074 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2075 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2076 		}
2077 	}
2078 }
2079 
2080 /*
2081  * Extract the characters from the long filename chunk into 'buf'.
2082  * Return the number of characters extracted.
2083  */
2084 static int
2085 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2086 {
2087 	char 	*tmp = buf;
2088 	int	i;
2089 
2090 	/* Copy all the names, no filtering now */
2091 
2092 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2093 		*tmp = ep->pcdl_firstfilename[i];
2094 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2095 
2096 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2097 			return (tmp - buf);
2098 	}
2099 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2100 		*tmp = ep->pcdl_secondfilename[i];
2101 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2102 
2103 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2104 			return (tmp - buf);
2105 	}
2106 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2107 		*tmp = ep->pcdl_thirdfilename[i];
2108 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2109 
2110 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2111 			return (tmp - buf);
2112 	}
2113 	return (tmp - buf);
2114 }
2115 
2116 
2117 /*
2118  * Checksum the passed in short filename.
2119  * This is used to validate each component of the long name to make
2120  * sure the long name is valid (it hasn't been "detached" from the
2121  * short filename). This algorithm was found in FreeBSD.
2122  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2123  */
2124 
2125 uchar_t
2126 pc_checksum_long_fn(char *name, char *ext)
2127 {
2128 	uchar_t c;
2129 	char	b[11];
2130 
2131 	bcopy(name, b, 8);
2132 	bcopy(ext, b+8, 3);
2133 
2134 	c = b[0];
2135 	c = ((c << 7) | (c >> 1)) + b[1];
2136 	c = ((c << 7) | (c >> 1)) + b[2];
2137 	c = ((c << 7) | (c >> 1)) + b[3];
2138 	c = ((c << 7) | (c >> 1)) + b[4];
2139 	c = ((c << 7) | (c >> 1)) + b[5];
2140 	c = ((c << 7) | (c >> 1)) + b[6];
2141 	c = ((c << 7) | (c >> 1)) + b[7];
2142 	c = ((c << 7) | (c >> 1)) + b[8];
2143 	c = ((c << 7) | (c >> 1)) + b[9];
2144 	c = ((c << 7) | (c >> 1)) + b[10];
2145 
2146 	return (c);
2147 }
2148 
2149 /*
2150  * Read a chunk of long filename entries into 'namep'.
2151  * Return with offset pointing to short entry (on success), or next
2152  * entry to read (if this wasn't a valid lfn really).
2153  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2154  * a long filename.
2155  *
2156  * Can also be called with a NULL namep, in which case it just returns
2157  * whether this was really a valid long filename and consumes it
2158  * (used by pc_dirempty()).
2159  */
2160 int
2161 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2162     struct pcdir **epp, offset_t *offset, struct buf **bp)
2163 {
2164 	struct pcdir *ep = *epp;
2165 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2166 	struct vnode *dvp = PCTOV(pcp);
2167 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2168 	char	*lfn;
2169 	char	*lfn_base;
2170 	int	boff;
2171 	int	i, cs;
2172 	char	*buf;
2173 	uchar_t	cksum;
2174 	int	detached = 0;
2175 	int	error = 0;
2176 	int	foldcase;
2177 	int	count = 0;
2178 	size_t	u16l = 0, u8l = 0;
2179 	char	*outbuf;
2180 	size_t	ret, inlen, outlen;
2181 
2182 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2183 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2184 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2185 	*lfn = '\0';
2186 	*(lfn + 1) = '\0';
2187 	cksum = lep->pcdl_checksum;
2188 
2189 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2190 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2191 		/* read next block if necessary */
2192 		boff = pc_blkoff(fsp, *offset);
2193 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2194 			if (*bp != NULL) {
2195 				brelse(*bp);
2196 				*bp = NULL;
2197 			}
2198 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2199 			if (error) {
2200 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2201 				kmem_free(buf, PCMAXNAM_UTF16);
2202 				return (error);
2203 			}
2204 			lep = (struct pcdir_lfn *)ep;
2205 		}
2206 		/* can this happen? Bad fs? */
2207 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2208 			detached = 1;
2209 			break;
2210 		}
2211 		if (cksum != lep->pcdl_checksum)
2212 			detached = 1;
2213 		/* process current entry */
2214 		cs = get_long_fn_chunk(lep, buf);
2215 		count += cs;
2216 		for (; cs > 0; cs--) {
2217 			/* see if we underflow */
2218 			if (lfn >= lfn_base)
2219 				*--lfn = buf[cs - 1];
2220 			else
2221 				detached = 1;
2222 		}
2223 		lep++;
2224 		*offset += sizeof (struct pcdir);
2225 	}
2226 	kmem_free(buf, PCMAXNAM_UTF16);
2227 	/* read next block if necessary */
2228 	boff = pc_blkoff(fsp, *offset);
2229 	ep = (struct pcdir *)lep;
2230 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2231 		if (*bp != NULL) {
2232 			brelse(*bp);
2233 			*bp = NULL;
2234 		}
2235 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2236 		if (error) {
2237 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2238 			return (error);
2239 		}
2240 	}
2241 	/* should be on the short one */
2242 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2243 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2244 		detached = 1;
2245 	}
2246 	if (detached ||
2247 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2248 	    !pc_valid_long_fn(lfn, 0)) {
2249 		/*
2250 		 * process current entry again. This may end up another lfn
2251 		 * or a short name.
2252 		 */
2253 		*epp = ep;
2254 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2255 		return (EINVAL);
2256 	}
2257 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2258 		/*
2259 		 * Don't display label because it may contain
2260 		 * funny characters.
2261 		 */
2262 		*offset += sizeof (struct pcdir);
2263 		ep++;
2264 		*epp = ep;
2265 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2266 		return (EINVAL);
2267 	}
2268 	if (namep) {
2269 		u16l = count / 2;
2270 		u8l = PCMAXNAMLEN;
2271 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2272 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2273 		/*
2274 		 * uconv_u16tou8() will catch conversion errors including
2275 		 * the case where there is not enough room to write the
2276 		 * converted result and the u8l will never go over the given
2277 		 * PCMAXNAMLEN.
2278 		 */
2279 		if (error != 0) {
2280 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2281 			return (EINVAL);
2282 		}
2283 		namep[u8l] = '\0';
2284 		if (foldcase) {
2285 			inlen = strlen(namep);
2286 			outlen = PCMAXNAMLEN;
2287 			outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2288 			ret = u8_textprep_str(namep, &inlen, outbuf,
2289 			    &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2290 			    &error);
2291 			if (ret == -1) {
2292 				kmem_free(outbuf, PCMAXNAMLEN + 1);
2293 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2294 				return (EINVAL);
2295 			}
2296 			outbuf[PCMAXNAMLEN - outlen] = '\0';
2297 			(void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2298 			kmem_free(outbuf, PCMAXNAMLEN + 1);
2299 		}
2300 	}
2301 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2302 	*epp = ep;
2303 	return (0);
2304 }
2305 /*
2306  * Read a long filename into the pc_dirent structure and copy it out.
2307  */
2308 int
2309 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2310     struct pcdir **epp, offset_t *offset, struct buf **bp)
2311 {
2312 	struct pcdir *ep;
2313 	struct pcnode *pcp = VTOPC(dvp);
2314 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2315 	offset_t uiooffset = uiop->uio_loffset;
2316 	int	error = 0;
2317 	offset_t oldoffset;
2318 
2319 	oldoffset = *offset;
2320 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2321 	if (error) {
2322 		if (error == EINVAL) {
2323 			uiop->uio_loffset += *offset - oldoffset;
2324 			return (0);
2325 		} else
2326 			return (error);
2327 	}
2328 
2329 	ep = *epp;
2330 	uiop->uio_loffset += *offset - oldoffset;
2331 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2332 	if (ld->d_reclen > uiop->uio_resid) {
2333 		uiop->uio_loffset = uiooffset;
2334 		return (ENOSPC);
2335 	}
2336 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2337 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2338 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2339 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2340 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2341 	uiop->uio_loffset = ld->d_off;
2342 	*offset += sizeof (struct pcdir);
2343 	ep++;
2344 	*epp = ep;
2345 	return (0);
2346 }
2347 
2348 /*
2349  * Read a short filename into the pc_dirent structure and copy it out.
2350  */
2351 int
2352 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2353     struct pcdir **epp, offset_t *offset, struct buf **bp)
2354 {
2355 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2356 	int	boff = pc_blkoff(fsp, *offset);
2357 	struct pcdir *ep = *epp;
2358 	offset_t	oldoffset = uiop->uio_loffset;
2359 	int	error;
2360 	int	foldcase;
2361 
2362 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2363 		uiop->uio_loffset += sizeof (struct pcdir);
2364 		*offset += sizeof (struct pcdir);
2365 		ep++;
2366 		*epp = ep;
2367 		return (0);
2368 	}
2369 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2370 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2371 	    pc_direntpersec(fsp));
2372 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2373 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2374 	    &ep->pcd_ext[0], foldcase);
2375 	if (error == 0) {
2376 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2377 		if (ld->d_reclen > uiop->uio_resid) {
2378 			uiop->uio_loffset = oldoffset;
2379 			return (ENOSPC);
2380 		}
2381 		ld->d_off = (off64_t)(uiop->uio_loffset +
2382 		    sizeof (struct pcdir));
2383 		(void) uiomove((caddr_t)ld,
2384 		    ld->d_reclen, UIO_READ, uiop);
2385 		uiop->uio_loffset = ld->d_off;
2386 	} else {
2387 		uiop->uio_loffset += sizeof (struct pcdir);
2388 	}
2389 	*offset += sizeof (struct pcdir);
2390 	ep++;
2391 	*epp = ep;
2392 	return (0);
2393 }
2394 
2395 /* ARGSUSED */
2396 static int
2397 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2398 {
2399 	struct pc_fid *pcfid;
2400 	struct pcnode *pcp;
2401 	struct pcfs	*fsp;
2402 	int	error;
2403 
2404 	fsp = VFSTOPCFS(vp->v_vfsp);
2405 	if (fsp == NULL)
2406 		return (EIO);
2407 	error = pc_lockfs(fsp, 0, 0);
2408 	if (error)
2409 		return (error);
2410 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2411 		pc_unlockfs(fsp);
2412 		return (EIO);
2413 	}
2414 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2415 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2416 		pc_unlockfs(fsp);
2417 		return (ENOSPC);
2418 	}
2419 
2420 	pcfid = (struct pc_fid *)fidp;
2421 	bzero(pcfid, sizeof (struct pc_fid));
2422 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2423 	if (vp->v_flag & VROOT) {
2424 		pcfid->pcfid_block = 0;
2425 		pcfid->pcfid_offset = 0;
2426 		pcfid->pcfid_ctime = 0;
2427 	} else {
2428 		pcfid->pcfid_block = pcp->pc_eblkno;
2429 		pcfid->pcfid_offset = pcp->pc_eoffset;
2430 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2431 	}
2432 	pc_unlockfs(fsp);
2433 	return (0);
2434 }
2435