xref: /illumos-gate/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 9c2acf00e275b6b2125a306f33cdddcc58393220)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
30  * Copyright (c) 2017 by Delphix. All rights reserved.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/t_lock.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/user.h>
38 #include <sys/buf.h>
39 #include <sys/stat.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/dirent.h>
43 #include <sys/vnode.h>
44 #include <sys/proc.h>
45 #include <sys/file.h>
46 #include <sys/fcntl.h>
47 #include <sys/uio.h>
48 #include <sys/fs/pc_label.h>
49 #include <sys/fs/pc_fs.h>
50 #include <sys/fs/pc_dir.h>
51 #include <sys/fs/pc_node.h>
52 #include <sys/mman.h>
53 #include <sys/pathname.h>
54 #include <sys/vmsystm.h>
55 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
57 #include <sys/statvfs.h>
58 #include <sys/unistd.h>
59 #include <sys/kmem.h>
60 #include <sys/conf.h>
61 #include <sys/flock.h>
62 #include <sys/policy.h>
63 #include <sys/sdt.h>
64 #include <sys/sunddi.h>
65 #include <sys/types.h>
66 #include <sys/errno.h>
67 
68 #include <vm/seg.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/hat.h>
74 #include <vm/as.h>
75 #include <vm/seg_kmem.h>
76 
77 #include <fs/fs_subr.h>
78 
79 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
80 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
81 	caller_context_t *ct);
82 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
83 	caller_context_t *);
84 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
85 	caller_context_t *);
86 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
87 	caller_context_t *ct);
88 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
89 	caller_context_t *);
90 static int pcfs_access(struct vnode *, int, int, struct cred *,
91 	caller_context_t *ct);
92 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
93 	struct pathname *, int, struct vnode *, struct cred *,
94 	caller_context_t *, int *, pathname_t *);
95 static int pcfs_create(struct vnode *, char *, struct vattr *,
96 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
97 	caller_context_t *, vsecattr_t *);
98 static int pcfs_remove(struct vnode *, char *, struct cred *,
99 	caller_context_t *, int);
100 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
101 	struct cred *, caller_context_t *, int);
102 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
103 	struct cred *, caller_context_t *, int, vsecattr_t *);
104 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
105 	caller_context_t *, int);
106 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
107 	caller_context_t *, int);
108 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
109 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
110 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
111 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
112 	offset_t, cred_t *, caller_context_t *);
113 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
114 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
115 	caller_context_t *);
116 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
117 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
118 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
119 	caller_context_t *);
120 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
121 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
122 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
123 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
124 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
125 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
126 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
127 	caller_context_t *);
128 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
129 	caller_context_t *);
130 
131 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
132 	struct cred *);
133 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
134 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
135 
136 extern krwlock_t pcnodes_lock;
137 
138 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
139 
140 /*
141  * vnode op vectors for files and directories.
142  */
143 struct vnodeops *pcfs_fvnodeops;
144 struct vnodeops *pcfs_dvnodeops;
145 
146 const fs_operation_def_t pcfs_fvnodeops_template[] = {
147 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
148 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
149 	VOPNAME_READ,		{ .vop_read = pcfs_read },
150 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
151 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
152 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
153 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
154 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
155 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
156 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
157 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
158 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
159 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
160 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
161 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
162 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
163 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
164 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
165 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
166 	NULL,			NULL
167 };
168 
169 const fs_operation_def_t pcfs_dvnodeops_template[] = {
170 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
171 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
172 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
173 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
174 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
175 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
176 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
177 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
178 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
179 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
180 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
181 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
182 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
183 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
184 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
185 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
186 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
187 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
188 	NULL,			NULL
189 };
190 
191 
192 /*ARGSUSED*/
193 static int
194 pcfs_open(
195 	struct vnode **vpp,
196 	int flag,
197 	struct cred *cr,
198 	caller_context_t *ct)
199 {
200 	return (0);
201 }
202 
203 /*
204  * files are sync'ed on close to keep floppy up to date
205  */
206 
207 /*ARGSUSED*/
208 static int
209 pcfs_close(
210 	struct vnode *vp,
211 	int flag,
212 	int count,
213 	offset_t offset,
214 	struct cred *cr,
215 	caller_context_t *ct)
216 {
217 	return (0);
218 }
219 
220 /*ARGSUSED*/
221 static int
222 pcfs_read(
223 	struct vnode *vp,
224 	struct uio *uiop,
225 	int ioflag,
226 	struct cred *cr,
227 	struct caller_context *ct)
228 {
229 	struct pcfs *fsp;
230 	struct pcnode *pcp;
231 	int error;
232 
233 	fsp = VFSTOPCFS(vp->v_vfsp);
234 	if (error = pc_verify(fsp))
235 		return (error);
236 	error = pc_lockfs(fsp, 0, 0);
237 	if (error)
238 		return (error);
239 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
240 		pc_unlockfs(fsp);
241 		return (EIO);
242 	}
243 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
244 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
245 		pc_mark_acc(fsp, pcp);
246 	}
247 	pc_unlockfs(fsp);
248 	if (error) {
249 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
250 	}
251 	return (error);
252 }
253 
254 /*ARGSUSED*/
255 static int
256 pcfs_write(
257 	struct vnode *vp,
258 	struct uio *uiop,
259 	int ioflag,
260 	struct cred *cr,
261 	struct caller_context *ct)
262 {
263 	struct pcfs *fsp;
264 	struct pcnode *pcp;
265 	int error;
266 
267 	fsp = VFSTOPCFS(vp->v_vfsp);
268 	if (error = pc_verify(fsp))
269 		return (error);
270 	error = pc_lockfs(fsp, 0, 0);
271 	if (error)
272 		return (error);
273 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
274 		pc_unlockfs(fsp);
275 		return (EIO);
276 	}
277 	if (ioflag & FAPPEND) {
278 		/*
279 		 * in append mode start at end of file.
280 		 */
281 		uiop->uio_loffset = pcp->pc_size;
282 	}
283 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
284 	pcp->pc_flags |= PC_MOD;
285 	pc_mark_mod(fsp, pcp);
286 	if (ioflag & (FSYNC|FDSYNC))
287 		(void) pc_nodeupdate(pcp);
288 
289 	pc_unlockfs(fsp);
290 	if (error) {
291 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
292 	}
293 	return (error);
294 }
295 
296 /*
297  * read or write a vnode
298  */
299 static int
300 rwpcp(
301 	struct pcnode *pcp,
302 	struct uio *uio,
303 	enum uio_rw rw,
304 	int ioflag)
305 {
306 	struct vnode *vp = PCTOV(pcp);
307 	struct pcfs *fsp;
308 	daddr_t bn;			/* phys block number */
309 	int n;
310 	offset_t off;
311 	caddr_t base;
312 	int mapon, pagecreate;
313 	int newpage;
314 	int error = 0;
315 	rlim64_t limit = uio->uio_llimit;
316 	int oresid = uio->uio_resid;
317 
318 	/*
319 	 * If the filesystem was umounted by force, return immediately.
320 	 */
321 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
322 		return (EIO);
323 
324 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
325 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
326 
327 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
328 	ASSERT(vp->v_type == VREG);
329 
330 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
331 		return (0);
332 	}
333 
334 	if (uio->uio_loffset < 0)
335 		return (EINVAL);
336 
337 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
338 		limit = MAXOFFSET_T;
339 
340 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
341 		proc_t *p = ttoproc(curthread);
342 
343 		mutex_enter(&p->p_lock);
344 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
345 		    p, RCA_UNSAFE_SIGINFO);
346 		mutex_exit(&p->p_lock);
347 		return (EFBIG);
348 	}
349 
350 	/* the following condition will occur only for write */
351 
352 	if (uio->uio_loffset >= UINT32_MAX)
353 		return (EFBIG);
354 
355 	if (uio->uio_resid == 0)
356 		return (0);
357 
358 	if (limit > UINT32_MAX)
359 		limit = UINT32_MAX;
360 
361 	fsp = VFSTOPCFS(vp->v_vfsp);
362 	if (fsp->pcfs_flags & PCFS_IRRECOV)
363 		return (EIO);
364 
365 	do {
366 		/*
367 		 * Assignments to "n" in this block may appear
368 		 * to overflow in some cases.  However, after careful
369 		 * analysis it was determined that all assignments to
370 		 * "n" serve only to make "n" smaller.  Since "n"
371 		 * starts out as no larger than MAXBSIZE, "int" is
372 		 * safe.
373 		 */
374 		off = uio->uio_loffset & MAXBMASK;
375 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
376 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
377 		if (rw == UIO_READ) {
378 			offset_t diff;
379 
380 			diff = pcp->pc_size - uio->uio_loffset;
381 			if (diff <= 0)
382 				return (0);
383 			if (diff < n)
384 				n = (int)diff;
385 		}
386 		/*
387 		 * Compare limit with the actual offset + n, not the
388 		 * rounded down offset "off" or we will overflow
389 		 * the maximum file size after all.
390 		 */
391 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
392 			if (uio->uio_loffset >= limit) {
393 				error = EFBIG;
394 				break;
395 			}
396 			n = (int)(limit - uio->uio_loffset);
397 		}
398 
399 		/*
400 		 * Touch the page and fault it in if it is not in
401 		 * core before segmap_getmapflt can lock it. This
402 		 * is to avoid the deadlock if the buffer is mapped
403 		 * to the same file through mmap which we want to
404 		 * write to.
405 		 */
406 		uio_prefaultpages((long)n, uio);
407 
408 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
409 		pagecreate = 0;
410 		newpage = 0;
411 		if (rw == UIO_WRITE) {
412 			/*
413 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
414 			 * with one page at a time, instead of one MAXBSIZE
415 			 * at a time, so we can fully explore pagecreate
416 			 * optimization??
417 			 */
418 			if (uio->uio_loffset + n > pcp->pc_size) {
419 				uint_t ncl, lcn;
420 
421 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
422 				    fsp->pcfs_clsize);
423 				if (uio->uio_loffset > pcp->pc_size &&
424 				    ncl < (uint_t)howmany(uio->uio_loffset,
425 				    fsp->pcfs_clsize)) {
426 					/*
427 					 * Allocate and zerofill skipped
428 					 * clusters. This may not be worth the
429 					 * effort since a small lseek beyond
430 					 * eof but still within the cluster
431 					 * will not be zeroed out.
432 					 */
433 					lcn = pc_lblkno(fsp, uio->uio_loffset);
434 					error = pc_balloc(pcp, (daddr_t)lcn,
435 					    1, &bn);
436 					ncl = lcn + 1;
437 				}
438 				if (!error &&
439 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
440 				    fsp->pcfs_clsize))
441 					/*
442 					 * allocate clusters w/o zerofill
443 					 */
444 					error = pc_balloc(pcp,
445 					    (daddr_t)pc_lblkno(fsp,
446 					    uio->uio_loffset + n - 1),
447 					    0, &bn);
448 
449 				pcp->pc_flags |= PC_CHG;
450 
451 				if (error) {
452 					pc_cluster32_t ncl;
453 					int nerror;
454 
455 					/*
456 					 * figure out new file size from
457 					 * cluster chain length. If this
458 					 * is detected to loop, the chain
459 					 * is corrupted and we'd better
460 					 * keep our fingers off that file.
461 					 */
462 					nerror = pc_fileclsize(fsp,
463 					    pcp->pc_scluster, &ncl);
464 					if (nerror) {
465 						PC_DPRINTF1(2,
466 						    "cluster chain "
467 						    "corruption, "
468 						    "scluster=%d\n",
469 						    pcp->pc_scluster);
470 						pcp->pc_size = 0;
471 						pcp->pc_flags |= PC_INVAL;
472 						error = nerror;
473 						(void) segmap_release(segkmap,
474 						    base, 0);
475 						break;
476 					}
477 					pcp->pc_size = fsp->pcfs_clsize * ncl;
478 
479 					if (error == ENOSPC &&
480 					    (pcp->pc_size - uio->uio_loffset)
481 					    > 0) {
482 						PC_DPRINTF3(2, "rwpcp ENOSPC "
483 						    "off=%lld n=%d size=%d\n",
484 						    uio->uio_loffset,
485 						    n, pcp->pc_size);
486 						n = (int)(pcp->pc_size -
487 						    uio->uio_loffset);
488 					} else {
489 						PC_DPRINTF1(1,
490 						    "rwpcp error1=%d\n", error);
491 						(void) segmap_release(segkmap,
492 						    base, 0);
493 						break;
494 					}
495 				} else {
496 					pcp->pc_size =
497 					    (uint_t)(uio->uio_loffset + n);
498 				}
499 				if (mapon == 0) {
500 					newpage = segmap_pagecreate(segkmap,
501 					    base, (size_t)n, 0);
502 					pagecreate = 1;
503 				}
504 			} else if (n == MAXBSIZE) {
505 				newpage = segmap_pagecreate(segkmap, base,
506 				    (size_t)n, 0);
507 				pagecreate = 1;
508 			}
509 		}
510 		error = uiomove(base + mapon, (size_t)n, rw, uio);
511 
512 		if (pagecreate && uio->uio_loffset <
513 		    roundup(off + mapon + n, PAGESIZE)) {
514 			offset_t nzero, nmoved;
515 
516 			nmoved = uio->uio_loffset - (off + mapon);
517 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
518 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
519 		}
520 
521 		/*
522 		 * Unlock the pages which have been allocated by
523 		 * page_create_va() in segmap_pagecreate().
524 		 */
525 		if (newpage) {
526 			segmap_pageunlock(segkmap, base, (size_t)n,
527 			    rw == UIO_WRITE ? S_WRITE : S_READ);
528 		}
529 
530 		if (error) {
531 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
532 			/*
533 			 * If we failed on a write, we may have already
534 			 * allocated file blocks as well as pages.  It's hard
535 			 * to undo the block allocation, but we must be sure
536 			 * to invalidate any pages that may have been
537 			 * allocated.
538 			 */
539 			if (rw == UIO_WRITE)
540 				(void) segmap_release(segkmap, base, SM_INVAL);
541 			else
542 				(void) segmap_release(segkmap, base, 0);
543 		} else {
544 			uint_t flags = 0;
545 
546 			if (rw == UIO_READ) {
547 				if (n + mapon == MAXBSIZE ||
548 				    uio->uio_loffset == pcp->pc_size)
549 					flags = SM_DONTNEED;
550 			} else if (ioflag & (FSYNC|FDSYNC)) {
551 				flags = SM_WRITE;
552 			} else if (n + mapon == MAXBSIZE) {
553 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
554 			}
555 			error = segmap_release(segkmap, base, flags);
556 		}
557 
558 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
559 
560 	if (oresid != uio->uio_resid)
561 		error = 0;
562 	return (error);
563 }
564 
565 /*ARGSUSED*/
566 static int
567 pcfs_getattr(
568 	struct vnode *vp,
569 	struct vattr *vap,
570 	int flags,
571 	struct cred *cr,
572 	caller_context_t *ct)
573 {
574 	struct pcnode *pcp;
575 	struct pcfs *fsp;
576 	int error;
577 	char attr;
578 	struct pctime atime;
579 	int64_t unixtime;
580 
581 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
582 
583 	fsp = VFSTOPCFS(vp->v_vfsp);
584 	error = pc_lockfs(fsp, 0, 0);
585 	if (error)
586 		return (error);
587 
588 	/*
589 	 * Note that we don't check for "invalid node" (PC_INVAL) here
590 	 * only in order to make stat() succeed. We allow no I/O on such
591 	 * a node, but do allow to check for its existence.
592 	 */
593 	if ((pcp = VTOPC(vp)) == NULL) {
594 		pc_unlockfs(fsp);
595 		return (EIO);
596 	}
597 	/*
598 	 * Copy from pcnode.
599 	 */
600 	vap->va_type = vp->v_type;
601 	attr = pcp->pc_entry.pcd_attr;
602 	if (PCA_IS_HIDDEN(fsp, attr))
603 		vap->va_mode = 0;
604 	else if (attr & PCA_LABEL)
605 		vap->va_mode = 0444;
606 	else if (attr & PCA_RDONLY)
607 		vap->va_mode = 0555;
608 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
609 		vap->va_mode = 0755;
610 	} else {
611 		vap->va_mode = 0777;
612 	}
613 
614 	if (attr & PCA_DIR)
615 		vap->va_mode |= S_IFDIR;
616 	else
617 		vap->va_mode |= S_IFREG;
618 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
619 		vap->va_uid = 0;
620 		vap->va_gid = 0;
621 	} else {
622 		vap->va_uid = crgetuid(cr);
623 		vap->va_gid = crgetgid(cr);
624 	}
625 	vap->va_fsid = vp->v_vfsp->vfs_dev;
626 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
627 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
628 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
629 	vap->va_nlink = 1;
630 	vap->va_size = (u_offset_t)pcp->pc_size;
631 	vap->va_rdev = 0;
632 	vap->va_nblocks =
633 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
634 	vap->va_blksize = fsp->pcfs_clsize;
635 
636 	/*
637 	 * FAT root directories have no timestamps. In order not to return
638 	 * "time zero" (1/1/1970), we record the time of the mount and give
639 	 * that. This breaks less expectations.
640 	 */
641 	if (vp->v_flag & VROOT) {
642 		vap->va_mtime = fsp->pcfs_mounttime;
643 		vap->va_atime = fsp->pcfs_mounttime;
644 		vap->va_ctime = fsp->pcfs_mounttime;
645 		pc_unlockfs(fsp);
646 		return (0);
647 	}
648 
649 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
650 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
651 		if (unixtime > INT32_MAX)
652 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
653 		unixtime = MIN(unixtime, INT32_MAX);
654 	} else if (unixtime > INT32_MAX &&
655 	    get_udatamodel() == DATAMODEL_ILP32) {
656 		pc_unlockfs(fsp);
657 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
658 		return (EOVERFLOW);
659 	}
660 
661 	vap->va_mtime.tv_sec = (time_t)unixtime;
662 	vap->va_mtime.tv_nsec = 0;
663 
664 	/*
665 	 * FAT doesn't know about POSIX ctime.
666 	 * Best approximation is to always set it to mtime.
667 	 */
668 	vap->va_ctime = vap->va_mtime;
669 
670 	/*
671 	 * FAT only stores "last access date". If that's the
672 	 * same as the date of last modification then the time
673 	 * of last access is known. Otherwise, use midnight.
674 	 */
675 	atime.pct_date = pcp->pc_entry.pcd_ladate;
676 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
677 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
678 	else
679 		atime.pct_time = 0;
680 	pc_pcttotv(&atime, &unixtime);
681 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
682 		if (unixtime > INT32_MAX)
683 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
684 		unixtime = MIN(unixtime, INT32_MAX);
685 	} else if (unixtime > INT32_MAX &&
686 	    get_udatamodel() == DATAMODEL_ILP32) {
687 		pc_unlockfs(fsp);
688 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
689 		return (EOVERFLOW);
690 	}
691 
692 	vap->va_atime.tv_sec = (time_t)unixtime;
693 	vap->va_atime.tv_nsec = 0;
694 
695 	pc_unlockfs(fsp);
696 	return (0);
697 }
698 
699 
700 /*ARGSUSED*/
701 static int
702 pcfs_setattr(
703 	struct vnode *vp,
704 	struct vattr *vap,
705 	int flags,
706 	struct cred *cr,
707 	caller_context_t *ct)
708 {
709 	struct pcnode *pcp;
710 	mode_t mask = vap->va_mask;
711 	int error;
712 	struct pcfs *fsp;
713 	timestruc_t now, *timep;
714 
715 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
716 	/*
717 	 * cannot set these attributes
718 	 */
719 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
720 		return (EINVAL);
721 	}
722 	/*
723 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
724 	 * from 'tar' when it tries to set times on a directory, and console
725 	 * printf's on the NFS server when it gets EINVAL back on such a
726 	 * request. One possible problem with that since a directory entry
727 	 * identifies a file, '.' and all the '..' entries in subdirectories
728 	 * may get out of sync when the directory is updated since they're
729 	 * treated like separate files. We could fix that by looking for
730 	 * '.' and giving it the same attributes, and then looking for
731 	 * all the subdirectories and updating '..', but that's pretty
732 	 * expensive for something that doesn't seem likely to matter.
733 	 */
734 	/* can't do some ops on directories anyway */
735 	if ((vp->v_type == VDIR) &&
736 	    (mask & AT_SIZE)) {
737 		return (EINVAL);
738 	}
739 
740 	fsp = VFSTOPCFS(vp->v_vfsp);
741 	error = pc_lockfs(fsp, 0, 0);
742 	if (error)
743 		return (error);
744 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
745 		pc_unlockfs(fsp);
746 		return (EIO);
747 	}
748 
749 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
750 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
751 			pc_unlockfs(fsp);
752 			return (EACCES);
753 		}
754 	}
755 
756 	/*
757 	 * Change file access modes.
758 	 * If nobody has write permission, file is marked readonly.
759 	 * Otherwise file is writable by anyone.
760 	 */
761 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
762 		if ((vap->va_mode & 0222) == 0)
763 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
764 		else
765 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
766 		pcp->pc_flags |= PC_CHG;
767 	}
768 	/*
769 	 * Truncate file. Must have write permission.
770 	 */
771 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
772 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
773 			error = EACCES;
774 			goto out;
775 		}
776 		if (vap->va_size > UINT32_MAX) {
777 			error = EFBIG;
778 			goto out;
779 		}
780 		error = pc_truncate(pcp, (uint_t)vap->va_size);
781 
782 		if (error)
783 			goto out;
784 
785 		if (vap->va_size == 0)
786 			vnevent_truncate(vp, ct);
787 	}
788 	/*
789 	 * Change file modified times.
790 	 */
791 	if (mask & (AT_MTIME | AT_CTIME)) {
792 		/*
793 		 * If SysV-compatible option to set access and
794 		 * modified times if privileged, owner, or write access,
795 		 * use current time rather than va_mtime.
796 		 *
797 		 * XXX - va_mtime.tv_sec == -1 flags this.
798 		 */
799 		timep = &vap->va_mtime;
800 		if (vap->va_mtime.tv_sec == -1) {
801 			gethrestime(&now);
802 			timep = &now;
803 		}
804 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
805 		    timep->tv_sec > INT32_MAX) {
806 			error = EOVERFLOW;
807 			goto out;
808 		}
809 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
810 		if (error)
811 			goto out;
812 		pcp->pc_flags |= PC_CHG;
813 	}
814 	/*
815 	 * Change file access times.
816 	 */
817 	if (mask & AT_ATIME) {
818 		/*
819 		 * If SysV-compatible option to set access and
820 		 * modified times if privileged, owner, or write access,
821 		 * use current time rather than va_mtime.
822 		 *
823 		 * XXX - va_atime.tv_sec == -1 flags this.
824 		 */
825 		struct pctime	atime;
826 
827 		timep = &vap->va_atime;
828 		if (vap->va_atime.tv_sec == -1) {
829 			gethrestime(&now);
830 			timep = &now;
831 		}
832 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
833 		    timep->tv_sec > INT32_MAX) {
834 			error = EOVERFLOW;
835 			goto out;
836 		}
837 		error = pc_tvtopct(timep, &atime);
838 		if (error)
839 			goto out;
840 		pcp->pc_entry.pcd_ladate = atime.pct_date;
841 		pcp->pc_flags |= PC_CHG;
842 	}
843 out:
844 	pc_unlockfs(fsp);
845 	return (error);
846 }
847 
848 
849 /*ARGSUSED*/
850 static int
851 pcfs_access(
852 	struct vnode *vp,
853 	int mode,
854 	int flags,
855 	struct cred *cr,
856 	caller_context_t *ct)
857 {
858 	struct pcnode *pcp;
859 	struct pcfs *fsp;
860 
861 
862 	fsp = VFSTOPCFS(vp->v_vfsp);
863 
864 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
865 		return (EIO);
866 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
867 		return (EACCES);
868 
869 	/*
870 	 * If this is a boot partition, privileged users have full access while
871 	 * others have read-only access.
872 	 */
873 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
874 		if ((mode & VWRITE) &&
875 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
876 			return (EACCES);
877 	}
878 	return (0);
879 }
880 
881 
882 /*ARGSUSED*/
883 static int
884 pcfs_fsync(
885 	struct vnode *vp,
886 	int syncflag,
887 	struct cred *cr,
888 	caller_context_t *ct)
889 {
890 	struct pcfs *fsp;
891 	struct pcnode *pcp;
892 	int error;
893 
894 	fsp = VFSTOPCFS(vp->v_vfsp);
895 	if (error = pc_verify(fsp))
896 		return (error);
897 	error = pc_lockfs(fsp, 0, 0);
898 	if (error)
899 		return (error);
900 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
901 		pc_unlockfs(fsp);
902 		return (EIO);
903 	}
904 	rw_enter(&pcnodes_lock, RW_WRITER);
905 	error = pc_nodesync(pcp);
906 	rw_exit(&pcnodes_lock);
907 	pc_unlockfs(fsp);
908 	return (error);
909 }
910 
911 
912 /*ARGSUSED*/
913 static void
914 pcfs_inactive(
915 	struct vnode *vp,
916 	struct cred *cr,
917 	caller_context_t *ct)
918 {
919 	struct pcnode *pcp;
920 	struct pcfs *fsp;
921 	int error;
922 
923 	fsp = VFSTOPCFS(vp->v_vfsp);
924 	error = pc_lockfs(fsp, 0, 1);
925 
926 	/*
927 	 * If the filesystem was umounted by force, all dirty
928 	 * pages associated with this vnode are invalidated
929 	 * and then the vnode will be freed.
930 	 */
931 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
932 		pcp = VTOPC(vp);
933 		if (vn_has_cached_data(vp)) {
934 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
935 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
936 		}
937 		remque(pcp);
938 		if (error == 0)
939 			pc_unlockfs(fsp);
940 		vn_free(vp);
941 		kmem_free(pcp, sizeof (struct pcnode));
942 		VFS_RELE(PCFSTOVFS(fsp));
943 		return;
944 	}
945 
946 	mutex_enter(&vp->v_lock);
947 	ASSERT(vp->v_count >= 1);
948 	if (vp->v_count > 1) {
949 		VN_RELE_LOCKED(vp);
950 		mutex_exit(&vp->v_lock);
951 		pc_unlockfs(fsp);
952 		return;
953 	}
954 	mutex_exit(&vp->v_lock);
955 
956 	/*
957 	 * Check again to confirm that no intervening I/O error
958 	 * with a subsequent pc_diskchanged() call has released
959 	 * the pcnode. If it has then release the vnode as above.
960 	 */
961 	pcp = VTOPC(vp);
962 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
963 		if (vn_has_cached_data(vp))
964 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
965 			    pcfs_putapage, B_INVAL | B_TRUNC,
966 			    (struct cred *)NULL);
967 	}
968 
969 	if (pcp == NULL) {
970 		vn_free(vp);
971 	} else {
972 		pc_rele(pcp);
973 	}
974 
975 	if (!error)
976 		pc_unlockfs(fsp);
977 }
978 
979 /*ARGSUSED*/
980 static int
981 pcfs_lookup(
982 	struct vnode *dvp,
983 	char *nm,
984 	struct vnode **vpp,
985 	struct pathname *pnp,
986 	int flags,
987 	struct vnode *rdir,
988 	struct cred *cr,
989 	caller_context_t *ct,
990 	int *direntflags,
991 	pathname_t *realpnp)
992 {
993 	struct pcfs *fsp;
994 	struct pcnode *pcp;
995 	int error;
996 
997 	/*
998 	 * If the filesystem was umounted by force, return immediately.
999 	 */
1000 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1001 		return (EIO);
1002 
1003 	/*
1004 	 * verify that the dvp is still valid on the disk
1005 	 */
1006 	fsp = VFSTOPCFS(dvp->v_vfsp);
1007 	if (error = pc_verify(fsp))
1008 		return (error);
1009 	error = pc_lockfs(fsp, 0, 0);
1010 	if (error)
1011 		return (error);
1012 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1013 		pc_unlockfs(fsp);
1014 		return (EIO);
1015 	}
1016 	/*
1017 	 * Null component name is a synonym for directory being searched.
1018 	 */
1019 	if (*nm == '\0') {
1020 		VN_HOLD(dvp);
1021 		*vpp = dvp;
1022 		pc_unlockfs(fsp);
1023 		return (0);
1024 	}
1025 
1026 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1027 	if (!error) {
1028 		*vpp = PCTOV(pcp);
1029 		pcp->pc_flags |= PC_EXTERNAL;
1030 	}
1031 	pc_unlockfs(fsp);
1032 	return (error);
1033 }
1034 
1035 
1036 /*ARGSUSED*/
1037 static int
1038 pcfs_create(
1039 	struct vnode *dvp,
1040 	char *nm,
1041 	struct vattr *vap,
1042 	enum vcexcl exclusive,
1043 	int mode,
1044 	struct vnode **vpp,
1045 	struct cred *cr,
1046 	int flag,
1047 	caller_context_t *ct,
1048 	vsecattr_t *vsecp)
1049 {
1050 	int error;
1051 	struct pcnode *pcp;
1052 	struct vnode *vp;
1053 	struct pcfs *fsp;
1054 
1055 	/*
1056 	 * can't create directories. use pcfs_mkdir.
1057 	 * can't create anything other than files.
1058 	 */
1059 	if (vap->va_type == VDIR)
1060 		return (EISDIR);
1061 	else if (vap->va_type != VREG)
1062 		return (EINVAL);
1063 
1064 	pcp = NULL;
1065 	fsp = VFSTOPCFS(dvp->v_vfsp);
1066 	error = pc_lockfs(fsp, 0, 0);
1067 	if (error)
1068 		return (error);
1069 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1070 		pc_unlockfs(fsp);
1071 		return (EIO);
1072 	}
1073 
1074 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1075 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1076 			pc_unlockfs(fsp);
1077 			return (EACCES);
1078 		}
1079 	}
1080 
1081 	if (*nm == '\0') {
1082 		/*
1083 		 * Null component name refers to the directory itself.
1084 		 */
1085 		VN_HOLD(dvp);
1086 		pcp = VTOPC(dvp);
1087 		error = EEXIST;
1088 	} else {
1089 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1090 	}
1091 	/*
1092 	 * if file exists and this is a nonexclusive create,
1093 	 * check for access permissions
1094 	 */
1095 	if (error == EEXIST) {
1096 		vp = PCTOV(pcp);
1097 		if (exclusive == NONEXCL) {
1098 			if (vp->v_type == VDIR) {
1099 				error = EISDIR;
1100 			} else if (mode) {
1101 				error = pcfs_access(PCTOV(pcp), mode, 0,
1102 				    cr, ct);
1103 			} else {
1104 				error = 0;
1105 			}
1106 		}
1107 		if (error) {
1108 			VN_RELE(PCTOV(pcp));
1109 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1110 		    (vap->va_size == 0)) {
1111 			error = pc_truncate(pcp, 0L);
1112 			if (error) {
1113 				VN_RELE(PCTOV(pcp));
1114 			} else {
1115 				vnevent_create(PCTOV(pcp), ct);
1116 			}
1117 		}
1118 	}
1119 	if (error) {
1120 		pc_unlockfs(fsp);
1121 		return (error);
1122 	}
1123 	*vpp = PCTOV(pcp);
1124 	pcp->pc_flags |= PC_EXTERNAL;
1125 	pc_unlockfs(fsp);
1126 	return (error);
1127 }
1128 
1129 /*ARGSUSED*/
1130 static int
1131 pcfs_remove(
1132 	struct vnode *vp,
1133 	char *nm,
1134 	struct cred *cr,
1135 	caller_context_t *ct,
1136 	int flags)
1137 {
1138 	struct pcfs *fsp;
1139 	struct pcnode *pcp;
1140 	int error;
1141 
1142 	fsp = VFSTOPCFS(vp->v_vfsp);
1143 	if (error = pc_verify(fsp))
1144 		return (error);
1145 	error = pc_lockfs(fsp, 0, 0);
1146 	if (error)
1147 		return (error);
1148 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1149 		pc_unlockfs(fsp);
1150 		return (EIO);
1151 	}
1152 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1153 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1154 			pc_unlockfs(fsp);
1155 			return (EACCES);
1156 		}
1157 	}
1158 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1159 	pc_unlockfs(fsp);
1160 	return (error);
1161 }
1162 
1163 /*
1164  * Rename a file or directory
1165  * This rename is restricted to only rename files within a directory.
1166  * XX should make rename more general
1167  */
1168 /*ARGSUSED*/
1169 static int
1170 pcfs_rename(
1171 	struct vnode *sdvp,		/* old (source) parent vnode */
1172 	char *snm,			/* old (source) entry name */
1173 	struct vnode *tdvp,		/* new (target) parent vnode */
1174 	char *tnm,			/* new (target) entry name */
1175 	struct cred *cr,
1176 	caller_context_t *ct,
1177 	int flags)
1178 {
1179 	struct pcfs *fsp;
1180 	struct pcnode *dp;	/* parent pcnode */
1181 	struct pcnode *tdp;
1182 	int error;
1183 
1184 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1185 	if (error = pc_verify(fsp))
1186 		return (error);
1187 
1188 	/*
1189 	 * make sure we can muck with this directory.
1190 	 */
1191 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1192 	if (error) {
1193 		return (error);
1194 	}
1195 	error = pc_lockfs(fsp, 0, 0);
1196 	if (error)
1197 		return (error);
1198 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1199 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1200 		pc_unlockfs(fsp);
1201 		return (EIO);
1202 	}
1203 	error = pc_rename(dp, tdp, snm, tnm, ct);
1204 	pc_unlockfs(fsp);
1205 	return (error);
1206 }
1207 
1208 /*ARGSUSED*/
1209 static int
1210 pcfs_mkdir(
1211 	struct vnode *dvp,
1212 	char *nm,
1213 	struct vattr *vap,
1214 	struct vnode **vpp,
1215 	struct cred *cr,
1216 	caller_context_t *ct,
1217 	int flags,
1218 	vsecattr_t *vsecp)
1219 {
1220 	struct pcfs *fsp;
1221 	struct pcnode *pcp;
1222 	int error;
1223 
1224 	fsp = VFSTOPCFS(dvp->v_vfsp);
1225 	if (error = pc_verify(fsp))
1226 		return (error);
1227 	error = pc_lockfs(fsp, 0, 0);
1228 	if (error)
1229 		return (error);
1230 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1231 		pc_unlockfs(fsp);
1232 		return (EIO);
1233 	}
1234 
1235 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1236 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1237 			pc_unlockfs(fsp);
1238 			return (EACCES);
1239 		}
1240 	}
1241 
1242 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1243 
1244 	if (!error) {
1245 		pcp -> pc_flags |= PC_EXTERNAL;
1246 		*vpp = PCTOV(pcp);
1247 	} else if (error == EEXIST) {
1248 		VN_RELE(PCTOV(pcp));
1249 	}
1250 	pc_unlockfs(fsp);
1251 	return (error);
1252 }
1253 
1254 /*ARGSUSED*/
1255 static int
1256 pcfs_rmdir(
1257 	struct vnode *dvp,
1258 	char *nm,
1259 	struct vnode *cdir,
1260 	struct cred *cr,
1261 	caller_context_t *ct,
1262 	int flags)
1263 {
1264 	struct pcfs *fsp;
1265 	struct pcnode *pcp;
1266 	int error;
1267 
1268 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1269 	if (error = pc_verify(fsp))
1270 		return (error);
1271 	if (error = pc_lockfs(fsp, 0, 0))
1272 		return (error);
1273 
1274 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1275 		pc_unlockfs(fsp);
1276 		return (EIO);
1277 	}
1278 
1279 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1280 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1281 			pc_unlockfs(fsp);
1282 			return (EACCES);
1283 		}
1284 	}
1285 
1286 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1287 	pc_unlockfs(fsp);
1288 	return (error);
1289 }
1290 
1291 /*
1292  * read entries in a directory.
1293  * we must convert pc format to unix format
1294  */
1295 
1296 /*ARGSUSED*/
1297 static int
1298 pcfs_readdir(
1299 	struct vnode *dvp,
1300 	struct uio *uiop,
1301 	struct cred *cr,
1302 	int *eofp,
1303 	caller_context_t *ct,
1304 	int flags)
1305 {
1306 	struct pcnode *pcp;
1307 	struct pcfs *fsp;
1308 	struct pcdir *ep;
1309 	struct buf *bp = NULL;
1310 	offset_t offset;
1311 	int boff;
1312 	struct pc_dirent lbp;
1313 	struct pc_dirent *ld = &lbp;
1314 	int error;
1315 
1316 	/*
1317 	 * If the filesystem was umounted by force, return immediately.
1318 	 */
1319 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1320 		return (EIO);
1321 
1322 	if ((uiop->uio_iovcnt != 1) ||
1323 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1324 		return (EINVAL);
1325 	}
1326 	fsp = VFSTOPCFS(dvp->v_vfsp);
1327 	/*
1328 	 * verify that the dp is still valid on the disk
1329 	 */
1330 	if (error = pc_verify(fsp)) {
1331 		return (error);
1332 	}
1333 	error = pc_lockfs(fsp, 0, 0);
1334 	if (error)
1335 		return (error);
1336 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1337 		pc_unlockfs(fsp);
1338 		return (EIO);
1339 	}
1340 
1341 	bzero(ld, sizeof (*ld));
1342 
1343 	if (eofp != NULL)
1344 		*eofp = 0;
1345 	offset = uiop->uio_loffset;
1346 
1347 	if (dvp->v_flag & VROOT) {
1348 		/*
1349 		 * kludge up entries for "." and ".." in the root.
1350 		 */
1351 		if (offset == 0) {
1352 			(void) strcpy(ld->d_name, ".");
1353 			ld->d_reclen = DIRENT64_RECLEN(1);
1354 			ld->d_off = (off64_t)sizeof (struct pcdir);
1355 			ld->d_ino = (ino64_t)UINT_MAX;
1356 			if (ld->d_reclen > uiop->uio_resid) {
1357 				pc_unlockfs(fsp);
1358 				return (ENOSPC);
1359 			}
1360 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1361 			uiop->uio_loffset = ld->d_off;
1362 			offset = uiop->uio_loffset;
1363 		}
1364 		if (offset == sizeof (struct pcdir)) {
1365 			(void) strcpy(ld->d_name, "..");
1366 			ld->d_reclen = DIRENT64_RECLEN(2);
1367 			if (ld->d_reclen > uiop->uio_resid) {
1368 				pc_unlockfs(fsp);
1369 				return (ENOSPC);
1370 			}
1371 			ld->d_off = (off64_t)(uiop->uio_loffset +
1372 			    sizeof (struct pcdir));
1373 			ld->d_ino = (ino64_t)UINT_MAX;
1374 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1375 			uiop->uio_loffset = ld->d_off;
1376 			offset = uiop->uio_loffset;
1377 		}
1378 		offset -= 2 * sizeof (struct pcdir);
1379 		/* offset now has the real offset value into directory file */
1380 	}
1381 
1382 	for (;;) {
1383 		boff = pc_blkoff(fsp, offset);
1384 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1385 			if (bp != NULL) {
1386 				brelse(bp);
1387 				bp = NULL;
1388 			}
1389 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1390 			if (error) {
1391 				if (error == ENOENT) {
1392 					error = 0;
1393 					if (eofp)
1394 						*eofp = 1;
1395 				}
1396 				break;
1397 			}
1398 		}
1399 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1400 			if (eofp)
1401 				*eofp = 1;
1402 			break;
1403 		}
1404 		/*
1405 		 * Don't display label because it may contain funny characters.
1406 		 */
1407 		if (ep->pcd_filename[0] == PCD_ERASED) {
1408 			uiop->uio_loffset += sizeof (struct pcdir);
1409 			offset += sizeof (struct pcdir);
1410 			ep++;
1411 			continue;
1412 		}
1413 		if (PCDL_IS_LFN(ep)) {
1414 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1415 			    0)
1416 				break;
1417 			continue;
1418 		}
1419 
1420 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1421 			break;
1422 	}
1423 	if (bp)
1424 		brelse(bp);
1425 	pc_unlockfs(fsp);
1426 	return (error);
1427 }
1428 
1429 
1430 /*
1431  * Called from pvn_getpages to get a particular page.  When we are called
1432  * the pcfs is already locked.
1433  */
1434 /*ARGSUSED*/
1435 static int
1436 pcfs_getapage(
1437 	struct vnode *vp,
1438 	u_offset_t off,
1439 	size_t len,
1440 	uint_t *protp,
1441 	page_t *pl[],		/* NULL if async IO is requested */
1442 	size_t plsz,
1443 	struct seg *seg,
1444 	caddr_t addr,
1445 	enum seg_rw rw,
1446 	struct cred *cr)
1447 {
1448 	struct pcnode *pcp;
1449 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1450 	struct vnode *devvp;
1451 	page_t *pp;
1452 	page_t *pagefound;
1453 	int err;
1454 
1455 	/*
1456 	 * If the filesystem was umounted by force, return immediately.
1457 	 */
1458 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1459 		return (EIO);
1460 
1461 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1462 	    (void *)vp, off, len);
1463 
1464 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1465 		return (EIO);
1466 	devvp = fsp->pcfs_devvp;
1467 
1468 	/* pcfs doesn't do readaheads */
1469 	if (pl == NULL)
1470 		return (0);
1471 
1472 	pl[0] = NULL;
1473 	err = 0;
1474 	/*
1475 	 * If the accessed time on the pcnode has not already been
1476 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1477 	 * This gives us approximate modified times for mmap'ed files
1478 	 * which are accessed via loads in the user address space.
1479 	 */
1480 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1481 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1482 		pc_mark_acc(fsp, pcp);
1483 	}
1484 reread:
1485 	if ((pagefound = page_exists(vp, off)) == NULL) {
1486 		/*
1487 		 * Need to really do disk IO to get the page(s).
1488 		 */
1489 		struct buf *bp;
1490 		daddr_t lbn, bn;
1491 		u_offset_t io_off;
1492 		size_t io_len;
1493 		u_offset_t lbnoff, xferoffset;
1494 		u_offset_t pgoff;
1495 		uint_t	xfersize;
1496 		int err1;
1497 
1498 		lbn = pc_lblkno(fsp, off);
1499 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1500 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1501 
1502 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1503 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1504 		if (pp == NULL)
1505 			/*
1506 			 * XXX - If pcfs is made MT-hot, this should go
1507 			 * back to reread.
1508 			 */
1509 			panic("pcfs_getapage pvn_read_kluster");
1510 
1511 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1512 		    pgoff += xfersize,
1513 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1514 		    lbnoff += xfersize, xferoffset += xfersize) {
1515 			/*
1516 			 * read as many contiguous blocks as possible to
1517 			 * fill this page
1518 			 */
1519 			xfersize = PAGESIZE - pgoff;
1520 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1521 			if (err1) {
1522 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1523 				err = err1;
1524 				goto out;
1525 			}
1526 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1527 			bp->b_edev = devvp->v_rdev;
1528 			bp->b_dev = cmpdev(devvp->v_rdev);
1529 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1530 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1531 			bp->b_file = vp;
1532 			bp->b_offset = (offset_t)(off + pgoff);
1533 
1534 			(void) bdev_strategy(bp);
1535 
1536 			lwp_stat_update(LWP_STAT_INBLK, 1);
1537 
1538 			if (err == 0)
1539 				err = biowait(bp);
1540 			else
1541 				(void) biowait(bp);
1542 			pageio_done(bp);
1543 			if (err)
1544 				goto out;
1545 		}
1546 		if (pgoff < PAGESIZE) {
1547 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1548 		}
1549 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1550 	}
1551 out:
1552 	if (err) {
1553 		if (pp != NULL)
1554 			pvn_read_done(pp, B_ERROR);
1555 		return (err);
1556 	}
1557 
1558 	if (pagefound) {
1559 		/*
1560 		 * Page exists in the cache, acquire the "shared"
1561 		 * lock.  If this fails, go back to reread.
1562 		 */
1563 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1564 			goto reread;
1565 		}
1566 		pl[0] = pp;
1567 		pl[1] = NULL;
1568 	}
1569 	return (err);
1570 }
1571 
1572 /*
1573  * Return all the pages from [off..off+len] in given file
1574  */
1575 /* ARGSUSED */
1576 static int
1577 pcfs_getpage(
1578 	struct vnode *vp,
1579 	offset_t off,
1580 	size_t len,
1581 	uint_t *protp,
1582 	page_t *pl[],
1583 	size_t plsz,
1584 	struct seg *seg,
1585 	caddr_t addr,
1586 	enum seg_rw rw,
1587 	struct cred *cr,
1588 	caller_context_t *ct)
1589 {
1590 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1591 	int err;
1592 
1593 	PC_DPRINTF0(6, "pcfs_getpage\n");
1594 	if (err = pc_verify(fsp))
1595 		return (err);
1596 	if (vp->v_flag & VNOMAP)
1597 		return (ENOSYS);
1598 	ASSERT(off <= UINT32_MAX);
1599 	err = pc_lockfs(fsp, 0, 0);
1600 	if (err)
1601 		return (err);
1602 	if (protp != NULL)
1603 		*protp = PROT_ALL;
1604 
1605 	ASSERT((off & PAGEOFFSET) == 0);
1606 	err = pvn_getpages(pcfs_getapage, vp, off, len, protp, pl, plsz,
1607 	    seg, addr, rw, cr);
1608 
1609 	pc_unlockfs(fsp);
1610 	return (err);
1611 }
1612 
1613 
1614 /*
1615  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1616  * If len == 0, do from off to EOF.
1617  *
1618  * The normal cases should be len == 0 & off == 0 (entire vp list),
1619  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1620  * (from pageout).
1621  *
1622  */
1623 /*ARGSUSED*/
1624 static int
1625 pcfs_putpage(
1626 	struct vnode *vp,
1627 	offset_t off,
1628 	size_t len,
1629 	int flags,
1630 	struct cred *cr,
1631 	caller_context_t *ct)
1632 {
1633 	struct pcnode *pcp;
1634 	page_t *pp;
1635 	struct pcfs *fsp;
1636 	u_offset_t io_off;
1637 	size_t io_len;
1638 	offset_t eoff;
1639 	int err;
1640 
1641 	/*
1642 	 * If the filesystem was umounted by force, return immediately.
1643 	 */
1644 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1645 		return (EIO);
1646 
1647 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1648 	if (vp->v_flag & VNOMAP)
1649 		return (ENOSYS);
1650 
1651 	fsp = VFSTOPCFS(vp->v_vfsp);
1652 
1653 	if (err = pc_verify(fsp))
1654 		return (err);
1655 	if ((pcp = VTOPC(vp)) == NULL) {
1656 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1657 		return (EIO);
1658 	}
1659 	if (pcp->pc_flags & PC_INVAL)
1660 		return (EIO);
1661 
1662 	if (curproc == proc_pageout) {
1663 		/*
1664 		 * XXX - This is a quick hack to avoid blocking
1665 		 * pageout. Also to avoid pcfs_getapage deadlocking
1666 		 * with putpage when memory is running out,
1667 		 * since we only have one global lock and we don't
1668 		 * support async putpage.
1669 		 * It should be fixed someday.
1670 		 *
1671 		 * Interestingly, this used to be a test of NOMEMWAIT().
1672 		 * We only ever got here once pcfs started supporting
1673 		 * NFS sharing, and then only because the NFS server
1674 		 * threads seem to do writes in sched's process context.
1675 		 * Since everyone else seems to just care about pageout,
1676 		 * the test was changed to look for pageout directly.
1677 		 */
1678 		return (ENOMEM);
1679 	}
1680 
1681 	ASSERT(off <= UINT32_MAX);
1682 
1683 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1684 
1685 	err = pc_lockfs(fsp, 0, 0);
1686 	if (err)
1687 		return (err);
1688 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1689 		pc_unlockfs(fsp);
1690 		return (0);
1691 	}
1692 
1693 	if (len == 0) {
1694 		/*
1695 		 * Search the entire vp list for pages >= off
1696 		 */
1697 		err = pvn_vplist_dirty(vp, off,
1698 		    pcfs_putapage, flags, cr);
1699 	} else {
1700 		eoff = off + len;
1701 
1702 		for (io_off = off; io_off < eoff &&
1703 		    io_off < pcp->pc_size; io_off += io_len) {
1704 			/*
1705 			 * If we are not invalidating, synchronously
1706 			 * freeing or writing pages use the routine
1707 			 * page_lookup_nowait() to prevent reclaiming
1708 			 * them from the free list.
1709 			 */
1710 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1711 				pp = page_lookup(vp, io_off,
1712 				    (flags & (B_INVAL | B_FREE)) ?
1713 				    SE_EXCL : SE_SHARED);
1714 			} else {
1715 				pp = page_lookup_nowait(vp, io_off,
1716 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1717 			}
1718 
1719 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1720 				io_len = PAGESIZE;
1721 			else {
1722 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1723 				    flags, cr);
1724 				if (err != 0)
1725 					break;
1726 				/*
1727 				 * "io_off" and "io_len" are returned as
1728 				 * the range of pages we actually wrote.
1729 				 * This allows us to skip ahead more quickly
1730 				 * since several pages may've been dealt
1731 				 * with by this iteration of the loop.
1732 				 */
1733 			}
1734 		}
1735 	}
1736 	if (err == 0 && (flags & B_INVAL) &&
1737 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1738 		/*
1739 		 * If doing "invalidation", make sure that
1740 		 * all pages on the vnode list are actually
1741 		 * gone.
1742 		 */
1743 		cmn_err(CE_PANIC,
1744 		    "pcfs_putpage: B_INVAL, pages not gone");
1745 	} else if (err) {
1746 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1747 	}
1748 	pc_unlockfs(fsp);
1749 	return (err);
1750 }
1751 
1752 /*
1753  * Write out a single page, possibly klustering adjacent dirty pages.
1754  */
1755 /*ARGSUSED*/
1756 int
1757 pcfs_putapage(
1758 	struct vnode *vp,
1759 	page_t *pp,
1760 	u_offset_t *offp,
1761 	size_t *lenp,
1762 	int flags,
1763 	struct cred *cr)
1764 {
1765 	struct pcnode *pcp;
1766 	struct pcfs *fsp;
1767 	struct vnode *devvp;
1768 	size_t io_len;
1769 	daddr_t bn;
1770 	u_offset_t lbn, lbnoff, xferoffset;
1771 	uint_t pgoff, xfersize;
1772 	int err = 0;
1773 	u_offset_t io_off;
1774 
1775 	pcp = VTOPC(vp);
1776 	fsp = VFSTOPCFS(vp->v_vfsp);
1777 	devvp = fsp->pcfs_devvp;
1778 
1779 	/*
1780 	 * If the modified time on the inode has not already been
1781 	 * set elsewhere (e.g. for write/setattr) and this is not
1782 	 * a call from msync (B_FORCE) we set the time now.
1783 	 * This gives us approximate modified times for mmap'ed files
1784 	 * which are modified via stores in the user address space.
1785 	 */
1786 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1787 		pcp->pc_flags |= PC_MOD;
1788 		pc_mark_mod(fsp, pcp);
1789 	}
1790 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1791 	    PAGESIZE, flags);
1792 
1793 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1794 		goto out;
1795 	}
1796 
1797 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1798 
1799 	lbn = pc_lblkno(fsp, io_off);
1800 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1801 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1802 
1803 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1804 	    pgoff += xfersize,
1805 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1806 	    lbnoff += xfersize, xferoffset += xfersize) {
1807 
1808 		struct buf *bp;
1809 		int err1;
1810 
1811 		/*
1812 		 * write as many contiguous blocks as possible from this page
1813 		 */
1814 		xfersize = io_len - pgoff;
1815 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1816 		if (err1) {
1817 			err = err1;
1818 			goto out;
1819 		}
1820 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1821 		bp->b_edev = devvp->v_rdev;
1822 		bp->b_dev = cmpdev(devvp->v_rdev);
1823 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1824 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1825 		bp->b_file = vp;
1826 		bp->b_offset = (offset_t)(io_off + pgoff);
1827 
1828 		(void) bdev_strategy(bp);
1829 
1830 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1831 
1832 		if (err == 0)
1833 			err = biowait(bp);
1834 		else
1835 			(void) biowait(bp);
1836 		pageio_done(bp);
1837 	}
1838 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1839 	pp = NULL;
1840 
1841 out:
1842 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1843 		pvn_write_done(pp, B_WRITE | flags);
1844 	} else if (err != 0 && pp != NULL) {
1845 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1846 	}
1847 
1848 	if (offp)
1849 		*offp = io_off;
1850 	if (lenp)
1851 		*lenp = io_len;
1852 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1853 		    (void *)vp, (void *)pp, io_off, io_len);
1854 	if (err) {
1855 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1856 	}
1857 	return (err);
1858 }
1859 
1860 /*ARGSUSED*/
1861 static int
1862 pcfs_map(
1863 	struct vnode *vp,
1864 	offset_t off,
1865 	struct as *as,
1866 	caddr_t *addrp,
1867 	size_t len,
1868 	uchar_t prot,
1869 	uchar_t maxprot,
1870 	uint_t flags,
1871 	struct cred *cr,
1872 	caller_context_t *ct)
1873 {
1874 	struct segvn_crargs vn_a;
1875 	int error;
1876 
1877 	PC_DPRINTF0(6, "pcfs_map\n");
1878 	if (vp->v_flag & VNOMAP)
1879 		return (ENOSYS);
1880 
1881 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1882 		return (ENXIO);
1883 
1884 	as_rangelock(as);
1885 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1886 	if (error != 0) {
1887 		as_rangeunlock(as);
1888 		return (error);
1889 	}
1890 
1891 	vn_a.vp = vp;
1892 	vn_a.offset = off;
1893 	vn_a.type = flags & MAP_TYPE;
1894 	vn_a.prot = prot;
1895 	vn_a.maxprot = maxprot;
1896 	vn_a.flags = flags & ~MAP_TYPE;
1897 	vn_a.cred = cr;
1898 	vn_a.amp = NULL;
1899 	vn_a.szc = 0;
1900 	vn_a.lgrp_mem_policy_flags = 0;
1901 
1902 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1903 	as_rangeunlock(as);
1904 	return (error);
1905 }
1906 
1907 /* ARGSUSED */
1908 static int
1909 pcfs_seek(
1910 	struct vnode *vp,
1911 	offset_t ooff,
1912 	offset_t *noffp,
1913 	caller_context_t *ct)
1914 {
1915 	if (*noffp < 0)
1916 		return (EINVAL);
1917 	else if (*noffp > MAXOFFSET_T)
1918 		return (EINVAL);
1919 	else
1920 		return (0);
1921 }
1922 
1923 /* ARGSUSED */
1924 static int
1925 pcfs_addmap(
1926 	struct vnode *vp,
1927 	offset_t off,
1928 	struct as *as,
1929 	caddr_t addr,
1930 	size_t len,
1931 	uchar_t prot,
1932 	uchar_t maxprot,
1933 	uint_t flags,
1934 	struct cred *cr,
1935 	caller_context_t *ct)
1936 {
1937 	if (vp->v_flag & VNOMAP)
1938 		return (ENOSYS);
1939 	return (0);
1940 }
1941 
1942 /*ARGSUSED*/
1943 static int
1944 pcfs_delmap(
1945 	struct vnode *vp,
1946 	offset_t off,
1947 	struct as *as,
1948 	caddr_t addr,
1949 	size_t len,
1950 	uint_t prot,
1951 	uint_t maxprot,
1952 	uint_t flags,
1953 	struct cred *cr,
1954 	caller_context_t *ct)
1955 {
1956 	if (vp->v_flag & VNOMAP)
1957 		return (ENOSYS);
1958 	return (0);
1959 }
1960 
1961 /*
1962  * POSIX pathconf() support.
1963  */
1964 /* ARGSUSED */
1965 static int
1966 pcfs_pathconf(
1967 	struct vnode *vp,
1968 	int cmd,
1969 	ulong_t *valp,
1970 	struct cred *cr,
1971 	caller_context_t *ct)
1972 {
1973 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1974 
1975 	switch (cmd) {
1976 	case _PC_LINK_MAX:
1977 		*valp = 1;
1978 		return (0);
1979 
1980 	case _PC_CASE_BEHAVIOR:
1981 		return (EINVAL);
1982 
1983 	case _PC_FILESIZEBITS:
1984 		/*
1985 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1986 		 * FAT12 can only go up to the maximum filesystem capacity
1987 		 * which is ~509MB.
1988 		 */
1989 		*valp = IS_FAT12(fsp) ? 30 : 33;
1990 		return (0);
1991 
1992 	case _PC_TIMESTAMP_RESOLUTION:
1993 		/*
1994 		 * PCFS keeps track of modification times, it its own
1995 		 * internal format, to a resolution of 2 seconds.
1996 		 * Since 2000 million is representable in an int32_t
1997 		 * without overflow (or becoming negative), we allow
1998 		 * this value to be returned.
1999 		 */
2000 		*valp = 2000000000L;
2001 		return (0);
2002 
2003 	default:
2004 		return (fs_pathconf(vp, cmd, valp, cr, ct));
2005 	}
2006 
2007 }
2008 
2009 /* ARGSUSED */
2010 static int
2011 pcfs_space(
2012 	struct vnode *vp,
2013 	int cmd,
2014 	struct flock64 *bfp,
2015 	int flag,
2016 	offset_t offset,
2017 	cred_t *cr,
2018 	caller_context_t *ct)
2019 {
2020 	struct vattr vattr;
2021 	int error;
2022 
2023 	if (cmd != F_FREESP)
2024 		return (EINVAL);
2025 
2026 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2027 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2028 			return (EFBIG);
2029 		/*
2030 		 * we only support the special case of l_len == 0,
2031 		 * meaning free to end of file at this moment.
2032 		 */
2033 		if (bfp->l_len != 0)
2034 			return (EINVAL);
2035 		vattr.va_mask = AT_SIZE;
2036 		vattr.va_size = bfp->l_start;
2037 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2038 	}
2039 	return (error);
2040 }
2041 
2042 /*
2043  * Break up 'len' chars from 'buf' into a long file name chunk.
2044  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2045  */
2046 void
2047 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2048 {
2049 	int	i;
2050 
2051 	ASSERT(buf != NULL);
2052 
2053 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2054 		if (len > 0) {
2055 			ep->pcdl_firstfilename[i] = *buf++;
2056 			ep->pcdl_firstfilename[i + 1] = *buf++;
2057 			len -= 2;
2058 		} else {
2059 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2060 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2061 		}
2062 	}
2063 
2064 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2065 		if (len > 0) {
2066 			ep->pcdl_secondfilename[i] = *buf++;
2067 			ep->pcdl_secondfilename[i + 1] = *buf++;
2068 			len -= 2;
2069 		} else {
2070 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2071 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2072 		}
2073 	}
2074 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2075 		if (len > 0) {
2076 			ep->pcdl_thirdfilename[i] = *buf++;
2077 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2078 			len -= 2;
2079 		} else {
2080 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2081 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2082 		}
2083 	}
2084 }
2085 
2086 /*
2087  * Extract the characters from the long filename chunk into 'buf'.
2088  * Return the number of characters extracted.
2089  */
2090 static int
2091 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2092 {
2093 	char 	*tmp = buf;
2094 	int	i;
2095 
2096 	/* Copy all the names, no filtering now */
2097 
2098 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2099 		*tmp = ep->pcdl_firstfilename[i];
2100 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2101 
2102 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2103 			return (tmp - buf);
2104 	}
2105 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2106 		*tmp = ep->pcdl_secondfilename[i];
2107 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2108 
2109 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2110 			return (tmp - buf);
2111 	}
2112 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2113 		*tmp = ep->pcdl_thirdfilename[i];
2114 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2115 
2116 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2117 			return (tmp - buf);
2118 	}
2119 	return (tmp - buf);
2120 }
2121 
2122 
2123 /*
2124  * Checksum the passed in short filename.
2125  * This is used to validate each component of the long name to make
2126  * sure the long name is valid (it hasn't been "detached" from the
2127  * short filename). This algorithm was found in FreeBSD.
2128  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2129  */
2130 
2131 uchar_t
2132 pc_checksum_long_fn(char *name, char *ext)
2133 {
2134 	uchar_t c;
2135 	char	b[11];
2136 
2137 	bcopy(name, b, 8);
2138 	bcopy(ext, b+8, 3);
2139 
2140 	c = b[0];
2141 	c = ((c << 7) | (c >> 1)) + b[1];
2142 	c = ((c << 7) | (c >> 1)) + b[2];
2143 	c = ((c << 7) | (c >> 1)) + b[3];
2144 	c = ((c << 7) | (c >> 1)) + b[4];
2145 	c = ((c << 7) | (c >> 1)) + b[5];
2146 	c = ((c << 7) | (c >> 1)) + b[6];
2147 	c = ((c << 7) | (c >> 1)) + b[7];
2148 	c = ((c << 7) | (c >> 1)) + b[8];
2149 	c = ((c << 7) | (c >> 1)) + b[9];
2150 	c = ((c << 7) | (c >> 1)) + b[10];
2151 
2152 	return (c);
2153 }
2154 
2155 /*
2156  * Read a chunk of long filename entries into 'namep'.
2157  * Return with offset pointing to short entry (on success), or next
2158  * entry to read (if this wasn't a valid lfn really).
2159  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2160  * a long filename.
2161  *
2162  * Can also be called with a NULL namep, in which case it just returns
2163  * whether this was really a valid long filename and consumes it
2164  * (used by pc_dirempty()).
2165  */
2166 int
2167 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2168     struct pcdir **epp, offset_t *offset, struct buf **bp)
2169 {
2170 	struct pcdir *ep = *epp;
2171 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2172 	struct vnode *dvp = PCTOV(pcp);
2173 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2174 	char	*lfn;
2175 	char	*lfn_base;
2176 	int	boff;
2177 	int	i, cs;
2178 	char	*buf;
2179 	uchar_t	cksum;
2180 	int	detached = 0;
2181 	int	error = 0;
2182 	int	foldcase;
2183 	int	count = 0;
2184 	size_t	u16l = 0, u8l = 0;
2185 	char	*outbuf;
2186 	size_t	ret, inlen, outlen;
2187 
2188 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2189 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2190 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2191 	*lfn = '\0';
2192 	*(lfn + 1) = '\0';
2193 	cksum = lep->pcdl_checksum;
2194 
2195 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2196 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2197 		/* read next block if necessary */
2198 		boff = pc_blkoff(fsp, *offset);
2199 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2200 			if (*bp != NULL) {
2201 				brelse(*bp);
2202 				*bp = NULL;
2203 			}
2204 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2205 			if (error) {
2206 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2207 				kmem_free(buf, PCMAXNAM_UTF16);
2208 				return (error);
2209 			}
2210 			lep = (struct pcdir_lfn *)ep;
2211 		}
2212 		/* can this happen? Bad fs? */
2213 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2214 			detached = 1;
2215 			break;
2216 		}
2217 		if (cksum != lep->pcdl_checksum)
2218 			detached = 1;
2219 		/* process current entry */
2220 		cs = get_long_fn_chunk(lep, buf);
2221 		count += cs;
2222 		for (; cs > 0; cs--) {
2223 			/* see if we underflow */
2224 			if (lfn >= lfn_base)
2225 				*--lfn = buf[cs - 1];
2226 			else
2227 				detached = 1;
2228 		}
2229 		lep++;
2230 		*offset += sizeof (struct pcdir);
2231 	}
2232 	kmem_free(buf, PCMAXNAM_UTF16);
2233 	/* read next block if necessary */
2234 	boff = pc_blkoff(fsp, *offset);
2235 	ep = (struct pcdir *)lep;
2236 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2237 		if (*bp != NULL) {
2238 			brelse(*bp);
2239 			*bp = NULL;
2240 		}
2241 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2242 		if (error) {
2243 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2244 			return (error);
2245 		}
2246 	}
2247 	/* should be on the short one */
2248 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2249 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2250 		detached = 1;
2251 	}
2252 	if (detached ||
2253 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2254 	    !pc_valid_long_fn(lfn, 0)) {
2255 		/*
2256 		 * process current entry again. This may end up another lfn
2257 		 * or a short name.
2258 		 */
2259 		*epp = ep;
2260 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2261 		return (EINVAL);
2262 	}
2263 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2264 		/*
2265 		 * Don't display label because it may contain
2266 		 * funny characters.
2267 		 */
2268 		*offset += sizeof (struct pcdir);
2269 		ep++;
2270 		*epp = ep;
2271 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2272 		return (EINVAL);
2273 	}
2274 	if (namep) {
2275 		u16l = count / 2;
2276 		u8l = PCMAXNAMLEN;
2277 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2278 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2279 		/*
2280 		 * uconv_u16tou8() will catch conversion errors including
2281 		 * the case where there is not enough room to write the
2282 		 * converted result and the u8l will never go over the given
2283 		 * PCMAXNAMLEN.
2284 		 */
2285 		if (error != 0) {
2286 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2287 			return (EINVAL);
2288 		}
2289 		namep[u8l] = '\0';
2290 		if (foldcase) {
2291 			inlen = strlen(namep);
2292 			outlen = PCMAXNAMLEN;
2293 			outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2294 			ret = u8_textprep_str(namep, &inlen, outbuf,
2295 			    &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2296 			    &error);
2297 			if (ret == -1) {
2298 				kmem_free(outbuf, PCMAXNAMLEN + 1);
2299 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2300 				return (EINVAL);
2301 			}
2302 			outbuf[PCMAXNAMLEN - outlen] = '\0';
2303 			(void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2304 			kmem_free(outbuf, PCMAXNAMLEN + 1);
2305 		}
2306 	}
2307 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2308 	*epp = ep;
2309 	return (0);
2310 }
2311 /*
2312  * Read a long filename into the pc_dirent structure and copy it out.
2313  */
2314 int
2315 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2316     struct pcdir **epp, offset_t *offset, struct buf **bp)
2317 {
2318 	struct pcdir *ep;
2319 	struct pcnode *pcp = VTOPC(dvp);
2320 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2321 	offset_t uiooffset = uiop->uio_loffset;
2322 	int	error = 0;
2323 	offset_t oldoffset;
2324 
2325 	oldoffset = *offset;
2326 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2327 	if (error) {
2328 		if (error == EINVAL) {
2329 			uiop->uio_loffset += *offset - oldoffset;
2330 			return (0);
2331 		} else
2332 			return (error);
2333 	}
2334 
2335 	ep = *epp;
2336 	uiop->uio_loffset += *offset - oldoffset;
2337 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2338 	if (ld->d_reclen > uiop->uio_resid) {
2339 		uiop->uio_loffset = uiooffset;
2340 		return (ENOSPC);
2341 	}
2342 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2343 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2344 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2345 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2346 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2347 	uiop->uio_loffset = ld->d_off;
2348 	*offset += sizeof (struct pcdir);
2349 	ep++;
2350 	*epp = ep;
2351 	return (0);
2352 }
2353 
2354 /*
2355  * Read a short filename into the pc_dirent structure and copy it out.
2356  */
2357 int
2358 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2359     struct pcdir **epp, offset_t *offset, struct buf **bp)
2360 {
2361 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2362 	int	boff = pc_blkoff(fsp, *offset);
2363 	struct pcdir *ep = *epp;
2364 	offset_t	oldoffset = uiop->uio_loffset;
2365 	int	error;
2366 	int	foldcase;
2367 
2368 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2369 		uiop->uio_loffset += sizeof (struct pcdir);
2370 		*offset += sizeof (struct pcdir);
2371 		ep++;
2372 		*epp = ep;
2373 		return (0);
2374 	}
2375 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2376 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2377 	    pc_direntpersec(fsp));
2378 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2379 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2380 	    &ep->pcd_ext[0], foldcase);
2381 	if (error == 0) {
2382 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2383 		if (ld->d_reclen > uiop->uio_resid) {
2384 			uiop->uio_loffset = oldoffset;
2385 			return (ENOSPC);
2386 		}
2387 		ld->d_off = (off64_t)(uiop->uio_loffset +
2388 		    sizeof (struct pcdir));
2389 		(void) uiomove((caddr_t)ld,
2390 		    ld->d_reclen, UIO_READ, uiop);
2391 		uiop->uio_loffset = ld->d_off;
2392 	} else {
2393 		uiop->uio_loffset += sizeof (struct pcdir);
2394 	}
2395 	*offset += sizeof (struct pcdir);
2396 	ep++;
2397 	*epp = ep;
2398 	return (0);
2399 }
2400 
2401 /* ARGSUSED */
2402 static int
2403 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2404 {
2405 	struct pc_fid *pcfid;
2406 	struct pcnode *pcp;
2407 	struct pcfs	*fsp;
2408 	int	error;
2409 
2410 	fsp = VFSTOPCFS(vp->v_vfsp);
2411 	if (fsp == NULL)
2412 		return (EIO);
2413 	error = pc_lockfs(fsp, 0, 0);
2414 	if (error)
2415 		return (error);
2416 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2417 		pc_unlockfs(fsp);
2418 		return (EIO);
2419 	}
2420 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2421 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2422 		pc_unlockfs(fsp);
2423 		return (ENOSPC);
2424 	}
2425 
2426 	pcfid = (struct pc_fid *)fidp;
2427 	bzero(pcfid, sizeof (struct pc_fid));
2428 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2429 	if (vp->v_flag & VROOT) {
2430 		pcfid->pcfid_block = 0;
2431 		pcfid->pcfid_offset = 0;
2432 		pcfid->pcfid_ctime = 0;
2433 	} else {
2434 		pcfid->pcfid_block = pcp->pc_eblkno;
2435 		pcfid->pcfid_offset = pcp->pc_eoffset;
2436 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2437 	}
2438 	pc_unlockfs(fsp);
2439 	return (0);
2440 }
2441