xref: /titanic_51/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 08e958452532187958dbff5121d3010ef1de98ee)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/stat.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/dirent.h>
41 #include <sys/vnode.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/fcntl.h>
45 #include <sys/uio.h>
46 #include <sys/fs/pc_label.h>
47 #include <sys/fs/pc_fs.h>
48 #include <sys/fs/pc_dir.h>
49 #include <sys/fs/pc_node.h>
50 #include <sys/mman.h>
51 #include <sys/pathname.h>
52 #include <sys/vmsystm.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/statvfs.h>
56 #include <sys/unistd.h>
57 #include <sys/kmem.h>
58 #include <sys/conf.h>
59 #include <sys/flock.h>
60 #include <sys/policy.h>
61 #include <sys/sdt.h>
62 #include <sys/sunddi.h>
63 #include <sys/types.h>
64 #include <sys/errno.h>
65 
66 #include <vm/seg.h>
67 #include <vm/page.h>
68 #include <vm/pvn.h>
69 #include <vm/seg_map.h>
70 #include <vm/seg_vn.h>
71 #include <vm/hat.h>
72 #include <vm/as.h>
73 #include <vm/seg_kmem.h>
74 
75 #include <fs/fs_subr.h>
76 
77 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
78 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
79 	caller_context_t *ct);
80 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
81 	caller_context_t *);
82 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
83 	caller_context_t *);
84 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
85 	caller_context_t *ct);
86 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
87 	caller_context_t *);
88 static int pcfs_access(struct vnode *, int, int, struct cred *,
89 	caller_context_t *ct);
90 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
91 	struct pathname *, int, struct vnode *, struct cred *,
92 	caller_context_t *, int *, pathname_t *);
93 static int pcfs_create(struct vnode *, char *, struct vattr *,
94 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
95 	caller_context_t *, vsecattr_t *);
96 static int pcfs_remove(struct vnode *, char *, struct cred *,
97 	caller_context_t *, int);
98 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
99 	struct cred *, caller_context_t *, int);
100 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
101 	struct cred *, caller_context_t *, int, vsecattr_t *);
102 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
103 	caller_context_t *, int);
104 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
105 	caller_context_t *, int);
106 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
107 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
108 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
109 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
110 	offset_t, cred_t *, caller_context_t *);
111 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
112 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
113 	caller_context_t *);
114 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
115 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
116 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
117 	caller_context_t *);
118 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
119 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
120 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
121 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
122 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
123 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
124 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
125 	caller_context_t *);
126 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
127 	caller_context_t *);
128 
129 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
130 	struct cred *);
131 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
132 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
133 
134 extern krwlock_t pcnodes_lock;
135 
136 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
137 
138 /*
139  * vnode op vectors for files and directories.
140  */
141 struct vnodeops *pcfs_fvnodeops;
142 struct vnodeops *pcfs_dvnodeops;
143 
144 const fs_operation_def_t pcfs_fvnodeops_template[] = {
145 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
146 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
147 	VOPNAME_READ,		{ .vop_read = pcfs_read },
148 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
149 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
150 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
151 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
152 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
153 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
154 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
155 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
156 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
157 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
158 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
159 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
160 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
161 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
162 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
163 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
164 	NULL,			NULL
165 };
166 
167 const fs_operation_def_t pcfs_dvnodeops_template[] = {
168 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
169 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
170 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
171 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
172 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
173 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
174 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
175 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
176 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
177 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
178 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
179 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
180 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
181 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
182 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
183 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
184 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
185 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
186 	NULL,			NULL
187 };
188 
189 
190 /*ARGSUSED*/
191 static int
192 pcfs_open(
193 	struct vnode **vpp,
194 	int flag,
195 	struct cred *cr,
196 	caller_context_t *ct)
197 {
198 	return (0);
199 }
200 
201 /*
202  * files are sync'ed on close to keep floppy up to date
203  */
204 
205 /*ARGSUSED*/
206 static int
207 pcfs_close(
208 	struct vnode *vp,
209 	int flag,
210 	int count,
211 	offset_t offset,
212 	struct cred *cr,
213 	caller_context_t *ct)
214 {
215 	return (0);
216 }
217 
218 /*ARGSUSED*/
219 static int
220 pcfs_read(
221 	struct vnode *vp,
222 	struct uio *uiop,
223 	int ioflag,
224 	struct cred *cr,
225 	struct caller_context *ct)
226 {
227 	struct pcfs *fsp;
228 	struct pcnode *pcp;
229 	int error;
230 
231 	fsp = VFSTOPCFS(vp->v_vfsp);
232 	if (error = pc_verify(fsp))
233 		return (error);
234 	error = pc_lockfs(fsp, 0, 0);
235 	if (error)
236 		return (error);
237 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
238 		pc_unlockfs(fsp);
239 		return (EIO);
240 	}
241 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
242 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
243 		pc_mark_acc(fsp, pcp);
244 	}
245 	pc_unlockfs(fsp);
246 	if (error) {
247 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
248 	}
249 	return (error);
250 }
251 
252 /*ARGSUSED*/
253 static int
254 pcfs_write(
255 	struct vnode *vp,
256 	struct uio *uiop,
257 	int ioflag,
258 	struct cred *cr,
259 	struct caller_context *ct)
260 {
261 	struct pcfs *fsp;
262 	struct pcnode *pcp;
263 	int error;
264 
265 	fsp = VFSTOPCFS(vp->v_vfsp);
266 	if (error = pc_verify(fsp))
267 		return (error);
268 	error = pc_lockfs(fsp, 0, 0);
269 	if (error)
270 		return (error);
271 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
272 		pc_unlockfs(fsp);
273 		return (EIO);
274 	}
275 	if (ioflag & FAPPEND) {
276 		/*
277 		 * in append mode start at end of file.
278 		 */
279 		uiop->uio_loffset = pcp->pc_size;
280 	}
281 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
282 	pcp->pc_flags |= PC_MOD;
283 	pc_mark_mod(fsp, pcp);
284 	if (ioflag & (FSYNC|FDSYNC))
285 		(void) pc_nodeupdate(pcp);
286 
287 	pc_unlockfs(fsp);
288 	if (error) {
289 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
290 	}
291 	return (error);
292 }
293 
294 /*
295  * read or write a vnode
296  */
297 static int
298 rwpcp(
299 	struct pcnode *pcp,
300 	struct uio *uio,
301 	enum uio_rw rw,
302 	int ioflag)
303 {
304 	struct vnode *vp = PCTOV(pcp);
305 	struct pcfs *fsp;
306 	daddr_t bn;			/* phys block number */
307 	int n;
308 	offset_t off;
309 	caddr_t base;
310 	int mapon, pagecreate;
311 	int newpage;
312 	int error = 0;
313 	rlim64_t limit = uio->uio_llimit;
314 	int oresid = uio->uio_resid;
315 
316 	/*
317 	 * If the filesystem was umounted by force, return immediately.
318 	 */
319 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
320 		return (EIO);
321 
322 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
323 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
324 
325 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
326 	ASSERT(vp->v_type == VREG);
327 
328 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
329 		return (0);
330 	}
331 
332 	if (uio->uio_loffset < 0)
333 		return (EINVAL);
334 
335 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
336 		limit = MAXOFFSET_T;
337 
338 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
339 		proc_t *p = ttoproc(curthread);
340 
341 		mutex_enter(&p->p_lock);
342 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
343 		    p, RCA_UNSAFE_SIGINFO);
344 		mutex_exit(&p->p_lock);
345 		return (EFBIG);
346 	}
347 
348 	/* the following condition will occur only for write */
349 
350 	if (uio->uio_loffset >= UINT32_MAX)
351 		return (EFBIG);
352 
353 	if (uio->uio_resid == 0)
354 		return (0);
355 
356 	if (limit > UINT32_MAX)
357 		limit = UINT32_MAX;
358 
359 	fsp = VFSTOPCFS(vp->v_vfsp);
360 	if (fsp->pcfs_flags & PCFS_IRRECOV)
361 		return (EIO);
362 
363 	do {
364 		/*
365 		 * Assignments to "n" in this block may appear
366 		 * to overflow in some cases.  However, after careful
367 		 * analysis it was determined that all assignments to
368 		 * "n" serve only to make "n" smaller.  Since "n"
369 		 * starts out as no larger than MAXBSIZE, "int" is
370 		 * safe.
371 		 */
372 		off = uio->uio_loffset & MAXBMASK;
373 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
374 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
375 		if (rw == UIO_READ) {
376 			offset_t diff;
377 
378 			diff = pcp->pc_size - uio->uio_loffset;
379 			if (diff <= 0)
380 				return (0);
381 			if (diff < n)
382 				n = (int)diff;
383 		}
384 		/*
385 		 * Compare limit with the actual offset + n, not the
386 		 * rounded down offset "off" or we will overflow
387 		 * the maximum file size after all.
388 		 */
389 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
390 			if (uio->uio_loffset >= limit) {
391 				error = EFBIG;
392 				break;
393 			}
394 			n = (int)(limit - uio->uio_loffset);
395 		}
396 
397 		/*
398 		 * Touch the page and fault it in if it is not in
399 		 * core before segmap_getmapflt can lock it. This
400 		 * is to avoid the deadlock if the buffer is mapped
401 		 * to the same file through mmap which we want to
402 		 * write to.
403 		 */
404 		uio_prefaultpages((long)n, uio);
405 
406 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
407 		pagecreate = 0;
408 		newpage = 0;
409 		if (rw == UIO_WRITE) {
410 			/*
411 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
412 			 * with one page at a time, instead of one MAXBSIZE
413 			 * at a time, so we can fully explore pagecreate
414 			 * optimization??
415 			 */
416 			if (uio->uio_loffset + n > pcp->pc_size) {
417 				uint_t ncl, lcn;
418 
419 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
420 				    fsp->pcfs_clsize);
421 				if (uio->uio_loffset > pcp->pc_size &&
422 				    ncl < (uint_t)howmany(uio->uio_loffset,
423 				    fsp->pcfs_clsize)) {
424 					/*
425 					 * Allocate and zerofill skipped
426 					 * clusters. This may not be worth the
427 					 * effort since a small lseek beyond
428 					 * eof but still within the cluster
429 					 * will not be zeroed out.
430 					 */
431 					lcn = pc_lblkno(fsp, uio->uio_loffset);
432 					error = pc_balloc(pcp, (daddr_t)lcn,
433 					    1, &bn);
434 					ncl = lcn + 1;
435 				}
436 				if (!error &&
437 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
438 				    fsp->pcfs_clsize))
439 					/*
440 					 * allocate clusters w/o zerofill
441 					 */
442 					error = pc_balloc(pcp,
443 					    (daddr_t)pc_lblkno(fsp,
444 					    uio->uio_loffset + n - 1),
445 					    0, &bn);
446 
447 				pcp->pc_flags |= PC_CHG;
448 
449 				if (error) {
450 					pc_cluster32_t ncl;
451 					int nerror;
452 
453 					/*
454 					 * figure out new file size from
455 					 * cluster chain length. If this
456 					 * is detected to loop, the chain
457 					 * is corrupted and we'd better
458 					 * keep our fingers off that file.
459 					 */
460 					nerror = pc_fileclsize(fsp,
461 					    pcp->pc_scluster, &ncl);
462 					if (nerror) {
463 						PC_DPRINTF1(2,
464 						    "cluster chain "
465 						    "corruption, "
466 						    "scluster=%d\n",
467 						    pcp->pc_scluster);
468 						pcp->pc_size = 0;
469 						pcp->pc_flags |= PC_INVAL;
470 						error = nerror;
471 						(void) segmap_release(segkmap,
472 						    base, 0);
473 						break;
474 					}
475 					pcp->pc_size = fsp->pcfs_clsize * ncl;
476 
477 					if (error == ENOSPC &&
478 					    (pcp->pc_size - uio->uio_loffset)
479 					    > 0) {
480 						PC_DPRINTF3(2, "rwpcp ENOSPC "
481 						    "off=%lld n=%d size=%d\n",
482 						    uio->uio_loffset,
483 						    n, pcp->pc_size);
484 						n = (int)(pcp->pc_size -
485 						    uio->uio_loffset);
486 					} else {
487 						PC_DPRINTF1(1,
488 						    "rwpcp error1=%d\n", error);
489 						(void) segmap_release(segkmap,
490 						    base, 0);
491 						break;
492 					}
493 				} else {
494 					pcp->pc_size =
495 					    (uint_t)(uio->uio_loffset + n);
496 				}
497 				if (mapon == 0) {
498 					newpage = segmap_pagecreate(segkmap,
499 					    base, (size_t)n, 0);
500 					pagecreate = 1;
501 				}
502 			} else if (n == MAXBSIZE) {
503 				newpage = segmap_pagecreate(segkmap, base,
504 				    (size_t)n, 0);
505 				pagecreate = 1;
506 			}
507 		}
508 		error = uiomove(base + mapon, (size_t)n, rw, uio);
509 
510 		if (pagecreate && uio->uio_loffset <
511 		    roundup(off + mapon + n, PAGESIZE)) {
512 			offset_t nzero, nmoved;
513 
514 			nmoved = uio->uio_loffset - (off + mapon);
515 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
516 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
517 		}
518 
519 		/*
520 		 * Unlock the pages which have been allocated by
521 		 * page_create_va() in segmap_pagecreate().
522 		 */
523 		if (newpage) {
524 			segmap_pageunlock(segkmap, base, (size_t)n,
525 			    rw == UIO_WRITE ? S_WRITE : S_READ);
526 		}
527 
528 		if (error) {
529 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
530 			/*
531 			 * If we failed on a write, we may have already
532 			 * allocated file blocks as well as pages.  It's hard
533 			 * to undo the block allocation, but we must be sure
534 			 * to invalidate any pages that may have been
535 			 * allocated.
536 			 */
537 			if (rw == UIO_WRITE)
538 				(void) segmap_release(segkmap, base, SM_INVAL);
539 			else
540 				(void) segmap_release(segkmap, base, 0);
541 		} else {
542 			uint_t flags = 0;
543 
544 			if (rw == UIO_READ) {
545 				if (n + mapon == MAXBSIZE ||
546 				    uio->uio_loffset == pcp->pc_size)
547 					flags = SM_DONTNEED;
548 			} else if (ioflag & (FSYNC|FDSYNC)) {
549 				flags = SM_WRITE;
550 			} else if (n + mapon == MAXBSIZE) {
551 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
552 			}
553 			error = segmap_release(segkmap, base, flags);
554 		}
555 
556 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
557 
558 	if (oresid != uio->uio_resid)
559 		error = 0;
560 	return (error);
561 }
562 
563 /*ARGSUSED*/
564 static int
565 pcfs_getattr(
566 	struct vnode *vp,
567 	struct vattr *vap,
568 	int flags,
569 	struct cred *cr,
570 	caller_context_t *ct)
571 {
572 	struct pcnode *pcp;
573 	struct pcfs *fsp;
574 	int error;
575 	char attr;
576 	struct pctime atime;
577 	int64_t unixtime;
578 
579 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
580 
581 	fsp = VFSTOPCFS(vp->v_vfsp);
582 	error = pc_lockfs(fsp, 0, 0);
583 	if (error)
584 		return (error);
585 
586 	/*
587 	 * Note that we don't check for "invalid node" (PC_INVAL) here
588 	 * only in order to make stat() succeed. We allow no I/O on such
589 	 * a node, but do allow to check for its existence.
590 	 */
591 	if ((pcp = VTOPC(vp)) == NULL) {
592 		pc_unlockfs(fsp);
593 		return (EIO);
594 	}
595 	/*
596 	 * Copy from pcnode.
597 	 */
598 	vap->va_type = vp->v_type;
599 	attr = pcp->pc_entry.pcd_attr;
600 	if (PCA_IS_HIDDEN(fsp, attr))
601 		vap->va_mode = 0;
602 	else if (attr & PCA_LABEL)
603 		vap->va_mode = 0444;
604 	else if (attr & PCA_RDONLY)
605 		vap->va_mode = 0555;
606 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
607 		vap->va_mode = 0755;
608 	} else {
609 		vap->va_mode = 0777;
610 	}
611 
612 	if (attr & PCA_DIR)
613 		vap->va_mode |= S_IFDIR;
614 	else
615 		vap->va_mode |= S_IFREG;
616 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
617 		vap->va_uid = 0;
618 		vap->va_gid = 0;
619 	} else {
620 		vap->va_uid = crgetuid(cr);
621 		vap->va_gid = crgetgid(cr);
622 	}
623 	vap->va_fsid = vp->v_vfsp->vfs_dev;
624 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
625 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
626 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
627 	vap->va_nlink = 1;
628 	vap->va_size = (u_offset_t)pcp->pc_size;
629 	vap->va_rdev = 0;
630 	vap->va_nblocks =
631 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
632 	vap->va_blksize = fsp->pcfs_clsize;
633 
634 	/*
635 	 * FAT root directories have no timestamps. In order not to return
636 	 * "time zero" (1/1/1970), we record the time of the mount and give
637 	 * that. This breaks less expectations.
638 	 */
639 	if (vp->v_flag & VROOT) {
640 		vap->va_mtime = fsp->pcfs_mounttime;
641 		vap->va_atime = fsp->pcfs_mounttime;
642 		vap->va_ctime = fsp->pcfs_mounttime;
643 		pc_unlockfs(fsp);
644 		return (0);
645 	}
646 
647 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
648 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
649 		if (unixtime > INT32_MAX)
650 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
651 		unixtime = MIN(unixtime, INT32_MAX);
652 	} else if (unixtime > INT32_MAX &&
653 	    get_udatamodel() == DATAMODEL_ILP32) {
654 		pc_unlockfs(fsp);
655 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
656 		return (EOVERFLOW);
657 	}
658 
659 	vap->va_mtime.tv_sec = (time_t)unixtime;
660 	vap->va_mtime.tv_nsec = 0;
661 
662 	/*
663 	 * FAT doesn't know about POSIX ctime.
664 	 * Best approximation is to always set it to mtime.
665 	 */
666 	vap->va_ctime = vap->va_mtime;
667 
668 	/*
669 	 * FAT only stores "last access date". If that's the
670 	 * same as the date of last modification then the time
671 	 * of last access is known. Otherwise, use midnight.
672 	 */
673 	atime.pct_date = pcp->pc_entry.pcd_ladate;
674 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
675 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
676 	else
677 		atime.pct_time = 0;
678 	pc_pcttotv(&atime, &unixtime);
679 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
680 		if (unixtime > INT32_MAX)
681 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
682 		unixtime = MIN(unixtime, INT32_MAX);
683 	} else if (unixtime > INT32_MAX &&
684 	    get_udatamodel() == DATAMODEL_ILP32) {
685 		pc_unlockfs(fsp);
686 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
687 		return (EOVERFLOW);
688 	}
689 
690 	vap->va_atime.tv_sec = (time_t)unixtime;
691 	vap->va_atime.tv_nsec = 0;
692 
693 	pc_unlockfs(fsp);
694 	return (0);
695 }
696 
697 
698 /*ARGSUSED*/
699 static int
700 pcfs_setattr(
701 	struct vnode *vp,
702 	struct vattr *vap,
703 	int flags,
704 	struct cred *cr,
705 	caller_context_t *ct)
706 {
707 	struct pcnode *pcp;
708 	mode_t mask = vap->va_mask;
709 	int error;
710 	struct pcfs *fsp;
711 	timestruc_t now, *timep;
712 
713 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
714 	/*
715 	 * cannot set these attributes
716 	 */
717 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
718 		return (EINVAL);
719 	}
720 	/*
721 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
722 	 * from 'tar' when it tries to set times on a directory, and console
723 	 * printf's on the NFS server when it gets EINVAL back on such a
724 	 * request. One possible problem with that since a directory entry
725 	 * identifies a file, '.' and all the '..' entries in subdirectories
726 	 * may get out of sync when the directory is updated since they're
727 	 * treated like separate files. We could fix that by looking for
728 	 * '.' and giving it the same attributes, and then looking for
729 	 * all the subdirectories and updating '..', but that's pretty
730 	 * expensive for something that doesn't seem likely to matter.
731 	 */
732 	/* can't do some ops on directories anyway */
733 	if ((vp->v_type == VDIR) &&
734 	    (mask & AT_SIZE)) {
735 		return (EINVAL);
736 	}
737 
738 	fsp = VFSTOPCFS(vp->v_vfsp);
739 	error = pc_lockfs(fsp, 0, 0);
740 	if (error)
741 		return (error);
742 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
743 		pc_unlockfs(fsp);
744 		return (EIO);
745 	}
746 
747 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
748 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
749 			pc_unlockfs(fsp);
750 			return (EACCES);
751 		}
752 	}
753 
754 	/*
755 	 * Change file access modes.
756 	 * If nobody has write permission, file is marked readonly.
757 	 * Otherwise file is writable by anyone.
758 	 */
759 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
760 		if ((vap->va_mode & 0222) == 0)
761 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
762 		else
763 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
764 		pcp->pc_flags |= PC_CHG;
765 	}
766 	/*
767 	 * Truncate file. Must have write permission.
768 	 */
769 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
770 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
771 			error = EACCES;
772 			goto out;
773 		}
774 		if (vap->va_size > UINT32_MAX) {
775 			error = EFBIG;
776 			goto out;
777 		}
778 		error = pc_truncate(pcp, (uint_t)vap->va_size);
779 
780 		if (error)
781 			goto out;
782 
783 		if (vap->va_size == 0)
784 			vnevent_truncate(vp, ct);
785 	}
786 	/*
787 	 * Change file modified times.
788 	 */
789 	if (mask & (AT_MTIME | AT_CTIME)) {
790 		/*
791 		 * If SysV-compatible option to set access and
792 		 * modified times if privileged, owner, or write access,
793 		 * use current time rather than va_mtime.
794 		 *
795 		 * XXX - va_mtime.tv_sec == -1 flags this.
796 		 */
797 		timep = &vap->va_mtime;
798 		if (vap->va_mtime.tv_sec == -1) {
799 			gethrestime(&now);
800 			timep = &now;
801 		}
802 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
803 		    timep->tv_sec > INT32_MAX) {
804 			error = EOVERFLOW;
805 			goto out;
806 		}
807 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
808 		if (error)
809 			goto out;
810 		pcp->pc_flags |= PC_CHG;
811 	}
812 	/*
813 	 * Change file access times.
814 	 */
815 	if (mask & AT_ATIME) {
816 		/*
817 		 * If SysV-compatible option to set access and
818 		 * modified times if privileged, owner, or write access,
819 		 * use current time rather than va_mtime.
820 		 *
821 		 * XXX - va_atime.tv_sec == -1 flags this.
822 		 */
823 		struct pctime	atime;
824 
825 		timep = &vap->va_atime;
826 		if (vap->va_atime.tv_sec == -1) {
827 			gethrestime(&now);
828 			timep = &now;
829 		}
830 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
831 		    timep->tv_sec > INT32_MAX) {
832 			error = EOVERFLOW;
833 			goto out;
834 		}
835 		error = pc_tvtopct(timep, &atime);
836 		if (error)
837 			goto out;
838 		pcp->pc_entry.pcd_ladate = atime.pct_date;
839 		pcp->pc_flags |= PC_CHG;
840 	}
841 out:
842 	pc_unlockfs(fsp);
843 	return (error);
844 }
845 
846 
847 /*ARGSUSED*/
848 static int
849 pcfs_access(
850 	struct vnode *vp,
851 	int mode,
852 	int flags,
853 	struct cred *cr,
854 	caller_context_t *ct)
855 {
856 	struct pcnode *pcp;
857 	struct pcfs *fsp;
858 
859 
860 	fsp = VFSTOPCFS(vp->v_vfsp);
861 
862 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
863 		return (EIO);
864 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
865 		return (EACCES);
866 
867 	/*
868 	 * If this is a boot partition, privileged users have full access while
869 	 * others have read-only access.
870 	 */
871 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
872 		if ((mode & VWRITE) &&
873 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
874 			return (EACCES);
875 	}
876 	return (0);
877 }
878 
879 
880 /*ARGSUSED*/
881 static int
882 pcfs_fsync(
883 	struct vnode *vp,
884 	int syncflag,
885 	struct cred *cr,
886 	caller_context_t *ct)
887 {
888 	struct pcfs *fsp;
889 	struct pcnode *pcp;
890 	int error;
891 
892 	fsp = VFSTOPCFS(vp->v_vfsp);
893 	if (error = pc_verify(fsp))
894 		return (error);
895 	error = pc_lockfs(fsp, 0, 0);
896 	if (error)
897 		return (error);
898 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
899 		pc_unlockfs(fsp);
900 		return (EIO);
901 	}
902 	rw_enter(&pcnodes_lock, RW_WRITER);
903 	error = pc_nodesync(pcp);
904 	rw_exit(&pcnodes_lock);
905 	pc_unlockfs(fsp);
906 	return (error);
907 }
908 
909 
910 /*ARGSUSED*/
911 static void
912 pcfs_inactive(
913 	struct vnode *vp,
914 	struct cred *cr,
915 	caller_context_t *ct)
916 {
917 	struct pcnode *pcp;
918 	struct pcfs *fsp;
919 	int error;
920 
921 	fsp = VFSTOPCFS(vp->v_vfsp);
922 	error = pc_lockfs(fsp, 0, 1);
923 
924 	/*
925 	 * If the filesystem was umounted by force, all dirty
926 	 * pages associated with this vnode are invalidated
927 	 * and then the vnode will be freed.
928 	 */
929 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
930 		pcp = VTOPC(vp);
931 		if (vn_has_cached_data(vp)) {
932 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
933 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
934 		}
935 		remque(pcp);
936 		if (error == 0)
937 			pc_unlockfs(fsp);
938 		vn_free(vp);
939 		kmem_free(pcp, sizeof (struct pcnode));
940 		VFS_RELE(PCFSTOVFS(fsp));
941 		return;
942 	}
943 
944 	mutex_enter(&vp->v_lock);
945 	ASSERT(vp->v_count >= 1);
946 	if (vp->v_count > 1) {
947 		vp->v_count--;  /* release our hold from vn_rele */
948 		mutex_exit(&vp->v_lock);
949 		pc_unlockfs(fsp);
950 		return;
951 	}
952 	mutex_exit(&vp->v_lock);
953 
954 	/*
955 	 * Check again to confirm that no intervening I/O error
956 	 * with a subsequent pc_diskchanged() call has released
957 	 * the pcnode. If it has then release the vnode as above.
958 	 */
959 	pcp = VTOPC(vp);
960 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
961 		if (vn_has_cached_data(vp))
962 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
963 			    pcfs_putapage, B_INVAL | B_TRUNC,
964 			    (struct cred *)NULL);
965 	}
966 
967 	if (pcp == NULL) {
968 		vn_free(vp);
969 	} else {
970 		pc_rele(pcp);
971 	}
972 
973 	if (!error)
974 		pc_unlockfs(fsp);
975 }
976 
977 /*ARGSUSED*/
978 static int
979 pcfs_lookup(
980 	struct vnode *dvp,
981 	char *nm,
982 	struct vnode **vpp,
983 	struct pathname *pnp,
984 	int flags,
985 	struct vnode *rdir,
986 	struct cred *cr,
987 	caller_context_t *ct,
988 	int *direntflags,
989 	pathname_t *realpnp)
990 {
991 	struct pcfs *fsp;
992 	struct pcnode *pcp;
993 	int error;
994 
995 	/*
996 	 * If the filesystem was umounted by force, return immediately.
997 	 */
998 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
999 		return (EIO);
1000 
1001 	/*
1002 	 * verify that the dvp is still valid on the disk
1003 	 */
1004 	fsp = VFSTOPCFS(dvp->v_vfsp);
1005 	if (error = pc_verify(fsp))
1006 		return (error);
1007 	error = pc_lockfs(fsp, 0, 0);
1008 	if (error)
1009 		return (error);
1010 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1011 		pc_unlockfs(fsp);
1012 		return (EIO);
1013 	}
1014 	/*
1015 	 * Null component name is a synonym for directory being searched.
1016 	 */
1017 	if (*nm == '\0') {
1018 		VN_HOLD(dvp);
1019 		*vpp = dvp;
1020 		pc_unlockfs(fsp);
1021 		return (0);
1022 	}
1023 
1024 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1025 	if (!error) {
1026 		*vpp = PCTOV(pcp);
1027 		pcp->pc_flags |= PC_EXTERNAL;
1028 	}
1029 	pc_unlockfs(fsp);
1030 	return (error);
1031 }
1032 
1033 
1034 /*ARGSUSED*/
1035 static int
1036 pcfs_create(
1037 	struct vnode *dvp,
1038 	char *nm,
1039 	struct vattr *vap,
1040 	enum vcexcl exclusive,
1041 	int mode,
1042 	struct vnode **vpp,
1043 	struct cred *cr,
1044 	int flag,
1045 	caller_context_t *ct,
1046 	vsecattr_t *vsecp)
1047 {
1048 	int error;
1049 	struct pcnode *pcp;
1050 	struct vnode *vp;
1051 	struct pcfs *fsp;
1052 
1053 	/*
1054 	 * can't create directories. use pcfs_mkdir.
1055 	 * can't create anything other than files.
1056 	 */
1057 	if (vap->va_type == VDIR)
1058 		return (EISDIR);
1059 	else if (vap->va_type != VREG)
1060 		return (EINVAL);
1061 
1062 	pcp = NULL;
1063 	fsp = VFSTOPCFS(dvp->v_vfsp);
1064 	error = pc_lockfs(fsp, 0, 0);
1065 	if (error)
1066 		return (error);
1067 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1068 		pc_unlockfs(fsp);
1069 		return (EIO);
1070 	}
1071 
1072 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1073 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1074 			pc_unlockfs(fsp);
1075 			return (EACCES);
1076 		}
1077 	}
1078 
1079 	if (*nm == '\0') {
1080 		/*
1081 		 * Null component name refers to the directory itself.
1082 		 */
1083 		VN_HOLD(dvp);
1084 		pcp = VTOPC(dvp);
1085 		error = EEXIST;
1086 	} else {
1087 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1088 	}
1089 	/*
1090 	 * if file exists and this is a nonexclusive create,
1091 	 * check for access permissions
1092 	 */
1093 	if (error == EEXIST) {
1094 		vp = PCTOV(pcp);
1095 		if (exclusive == NONEXCL) {
1096 			if (vp->v_type == VDIR) {
1097 				error = EISDIR;
1098 			} else if (mode) {
1099 				error = pcfs_access(PCTOV(pcp), mode, 0,
1100 				    cr, ct);
1101 			} else {
1102 				error = 0;
1103 			}
1104 		}
1105 		if (error) {
1106 			VN_RELE(PCTOV(pcp));
1107 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1108 		    (vap->va_size == 0)) {
1109 			error = pc_truncate(pcp, 0L);
1110 			if (error) {
1111 				VN_RELE(PCTOV(pcp));
1112 			} else {
1113 				vnevent_create(PCTOV(pcp), ct);
1114 			}
1115 		}
1116 	}
1117 	if (error) {
1118 		pc_unlockfs(fsp);
1119 		return (error);
1120 	}
1121 	*vpp = PCTOV(pcp);
1122 	pcp->pc_flags |= PC_EXTERNAL;
1123 	pc_unlockfs(fsp);
1124 	return (error);
1125 }
1126 
1127 /*ARGSUSED*/
1128 static int
1129 pcfs_remove(
1130 	struct vnode *vp,
1131 	char *nm,
1132 	struct cred *cr,
1133 	caller_context_t *ct,
1134 	int flags)
1135 {
1136 	struct pcfs *fsp;
1137 	struct pcnode *pcp;
1138 	int error;
1139 
1140 	fsp = VFSTOPCFS(vp->v_vfsp);
1141 	if (error = pc_verify(fsp))
1142 		return (error);
1143 	error = pc_lockfs(fsp, 0, 0);
1144 	if (error)
1145 		return (error);
1146 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1147 		pc_unlockfs(fsp);
1148 		return (EIO);
1149 	}
1150 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1151 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1152 			pc_unlockfs(fsp);
1153 			return (EACCES);
1154 		}
1155 	}
1156 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1157 	pc_unlockfs(fsp);
1158 	return (error);
1159 }
1160 
1161 /*
1162  * Rename a file or directory
1163  * This rename is restricted to only rename files within a directory.
1164  * XX should make rename more general
1165  */
1166 /*ARGSUSED*/
1167 static int
1168 pcfs_rename(
1169 	struct vnode *sdvp,		/* old (source) parent vnode */
1170 	char *snm,			/* old (source) entry name */
1171 	struct vnode *tdvp,		/* new (target) parent vnode */
1172 	char *tnm,			/* new (target) entry name */
1173 	struct cred *cr,
1174 	caller_context_t *ct,
1175 	int flags)
1176 {
1177 	struct pcfs *fsp;
1178 	struct pcnode *dp;	/* parent pcnode */
1179 	struct pcnode *tdp;
1180 	int error;
1181 
1182 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1183 	if (error = pc_verify(fsp))
1184 		return (error);
1185 
1186 	/*
1187 	 * make sure we can muck with this directory.
1188 	 */
1189 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1190 	if (error) {
1191 		return (error);
1192 	}
1193 	error = pc_lockfs(fsp, 0, 0);
1194 	if (error)
1195 		return (error);
1196 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1197 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1198 		pc_unlockfs(fsp);
1199 		return (EIO);
1200 	}
1201 	error = pc_rename(dp, tdp, snm, tnm, ct);
1202 	pc_unlockfs(fsp);
1203 	return (error);
1204 }
1205 
1206 /*ARGSUSED*/
1207 static int
1208 pcfs_mkdir(
1209 	struct vnode *dvp,
1210 	char *nm,
1211 	struct vattr *vap,
1212 	struct vnode **vpp,
1213 	struct cred *cr,
1214 	caller_context_t *ct,
1215 	int flags,
1216 	vsecattr_t *vsecp)
1217 {
1218 	struct pcfs *fsp;
1219 	struct pcnode *pcp;
1220 	int error;
1221 
1222 	fsp = VFSTOPCFS(dvp->v_vfsp);
1223 	if (error = pc_verify(fsp))
1224 		return (error);
1225 	error = pc_lockfs(fsp, 0, 0);
1226 	if (error)
1227 		return (error);
1228 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1229 		pc_unlockfs(fsp);
1230 		return (EIO);
1231 	}
1232 
1233 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1234 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1235 			pc_unlockfs(fsp);
1236 			return (EACCES);
1237 		}
1238 	}
1239 
1240 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1241 
1242 	if (!error) {
1243 		pcp -> pc_flags |= PC_EXTERNAL;
1244 		*vpp = PCTOV(pcp);
1245 	} else if (error == EEXIST) {
1246 		VN_RELE(PCTOV(pcp));
1247 	}
1248 	pc_unlockfs(fsp);
1249 	return (error);
1250 }
1251 
1252 /*ARGSUSED*/
1253 static int
1254 pcfs_rmdir(
1255 	struct vnode *dvp,
1256 	char *nm,
1257 	struct vnode *cdir,
1258 	struct cred *cr,
1259 	caller_context_t *ct,
1260 	int flags)
1261 {
1262 	struct pcfs *fsp;
1263 	struct pcnode *pcp;
1264 	int error;
1265 
1266 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1267 	if (error = pc_verify(fsp))
1268 		return (error);
1269 	if (error = pc_lockfs(fsp, 0, 0))
1270 		return (error);
1271 
1272 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1273 		pc_unlockfs(fsp);
1274 		return (EIO);
1275 	}
1276 
1277 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1278 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1279 			pc_unlockfs(fsp);
1280 			return (EACCES);
1281 		}
1282 	}
1283 
1284 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1285 	pc_unlockfs(fsp);
1286 	return (error);
1287 }
1288 
1289 /*
1290  * read entries in a directory.
1291  * we must convert pc format to unix format
1292  */
1293 
1294 /*ARGSUSED*/
1295 static int
1296 pcfs_readdir(
1297 	struct vnode *dvp,
1298 	struct uio *uiop,
1299 	struct cred *cr,
1300 	int *eofp,
1301 	caller_context_t *ct,
1302 	int flags)
1303 {
1304 	struct pcnode *pcp;
1305 	struct pcfs *fsp;
1306 	struct pcdir *ep;
1307 	struct buf *bp = NULL;
1308 	offset_t offset;
1309 	int boff;
1310 	struct pc_dirent lbp;
1311 	struct pc_dirent *ld = &lbp;
1312 	int error;
1313 
1314 	/*
1315 	 * If the filesystem was umounted by force, return immediately.
1316 	 */
1317 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1318 		return (EIO);
1319 
1320 	if ((uiop->uio_iovcnt != 1) ||
1321 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1322 		return (EINVAL);
1323 	}
1324 	fsp = VFSTOPCFS(dvp->v_vfsp);
1325 	/*
1326 	 * verify that the dp is still valid on the disk
1327 	 */
1328 	if (error = pc_verify(fsp)) {
1329 		return (error);
1330 	}
1331 	error = pc_lockfs(fsp, 0, 0);
1332 	if (error)
1333 		return (error);
1334 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1335 		pc_unlockfs(fsp);
1336 		return (EIO);
1337 	}
1338 
1339 	bzero(ld, sizeof (*ld));
1340 
1341 	if (eofp != NULL)
1342 		*eofp = 0;
1343 	offset = uiop->uio_loffset;
1344 
1345 	if (dvp->v_flag & VROOT) {
1346 		/*
1347 		 * kludge up entries for "." and ".." in the root.
1348 		 */
1349 		if (offset == 0) {
1350 			(void) strcpy(ld->d_name, ".");
1351 			ld->d_reclen = DIRENT64_RECLEN(1);
1352 			ld->d_off = (off64_t)sizeof (struct pcdir);
1353 			ld->d_ino = (ino64_t)UINT_MAX;
1354 			if (ld->d_reclen > uiop->uio_resid) {
1355 				pc_unlockfs(fsp);
1356 				return (ENOSPC);
1357 			}
1358 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1359 			uiop->uio_loffset = ld->d_off;
1360 			offset = uiop->uio_loffset;
1361 		}
1362 		if (offset == sizeof (struct pcdir)) {
1363 			(void) strcpy(ld->d_name, "..");
1364 			ld->d_reclen = DIRENT64_RECLEN(2);
1365 			if (ld->d_reclen > uiop->uio_resid) {
1366 				pc_unlockfs(fsp);
1367 				return (ENOSPC);
1368 			}
1369 			ld->d_off = (off64_t)(uiop->uio_loffset +
1370 			    sizeof (struct pcdir));
1371 			ld->d_ino = (ino64_t)UINT_MAX;
1372 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1373 			uiop->uio_loffset = ld->d_off;
1374 			offset = uiop->uio_loffset;
1375 		}
1376 		offset -= 2 * sizeof (struct pcdir);
1377 		/* offset now has the real offset value into directory file */
1378 	}
1379 
1380 	for (;;) {
1381 		boff = pc_blkoff(fsp, offset);
1382 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1383 			if (bp != NULL) {
1384 				brelse(bp);
1385 				bp = NULL;
1386 			}
1387 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1388 			if (error) {
1389 				if (error == ENOENT) {
1390 					error = 0;
1391 					if (eofp)
1392 						*eofp = 1;
1393 				}
1394 				break;
1395 			}
1396 		}
1397 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1398 			if (eofp)
1399 				*eofp = 1;
1400 			break;
1401 		}
1402 		/*
1403 		 * Don't display label because it may contain funny characters.
1404 		 */
1405 		if (ep->pcd_filename[0] == PCD_ERASED) {
1406 			uiop->uio_loffset += sizeof (struct pcdir);
1407 			offset += sizeof (struct pcdir);
1408 			ep++;
1409 			continue;
1410 		}
1411 		if (PCDL_IS_LFN(ep)) {
1412 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1413 			    0)
1414 				break;
1415 			continue;
1416 		}
1417 
1418 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1419 			break;
1420 	}
1421 	if (bp)
1422 		brelse(bp);
1423 	pc_unlockfs(fsp);
1424 	return (error);
1425 }
1426 
1427 
1428 /*
1429  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1430  * When we are called the pcfs is already locked.
1431  */
1432 /*ARGSUSED*/
1433 static int
1434 pcfs_getapage(
1435 	struct vnode *vp,
1436 	u_offset_t off,
1437 	size_t len,
1438 	uint_t *protp,
1439 	page_t *pl[],		/* NULL if async IO is requested */
1440 	size_t plsz,
1441 	struct seg *seg,
1442 	caddr_t addr,
1443 	enum seg_rw rw,
1444 	struct cred *cr)
1445 {
1446 	struct pcnode *pcp;
1447 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1448 	struct vnode *devvp;
1449 	page_t *pp;
1450 	page_t *pagefound;
1451 	int err;
1452 
1453 	/*
1454 	 * If the filesystem was umounted by force, return immediately.
1455 	 */
1456 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1457 		return (EIO);
1458 
1459 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1460 	    (void *)vp, off, len);
1461 
1462 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1463 		return (EIO);
1464 	devvp = fsp->pcfs_devvp;
1465 
1466 	/* pcfs doesn't do readaheads */
1467 	if (pl == NULL)
1468 		return (0);
1469 
1470 	pl[0] = NULL;
1471 	err = 0;
1472 	/*
1473 	 * If the accessed time on the pcnode has not already been
1474 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1475 	 * This gives us approximate modified times for mmap'ed files
1476 	 * which are accessed via loads in the user address space.
1477 	 */
1478 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1479 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1480 		pc_mark_acc(fsp, pcp);
1481 	}
1482 reread:
1483 	if ((pagefound = page_exists(vp, off)) == NULL) {
1484 		/*
1485 		 * Need to really do disk IO to get the page(s).
1486 		 */
1487 		struct buf *bp;
1488 		daddr_t lbn, bn;
1489 		u_offset_t io_off;
1490 		size_t io_len;
1491 		u_offset_t lbnoff, xferoffset;
1492 		u_offset_t pgoff;
1493 		uint_t	xfersize;
1494 		int err1;
1495 
1496 		lbn = pc_lblkno(fsp, off);
1497 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1498 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1499 
1500 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1501 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1502 		if (pp == NULL)
1503 			/*
1504 			 * XXX - If pcfs is made MT-hot, this should go
1505 			 * back to reread.
1506 			 */
1507 			panic("pcfs_getapage pvn_read_kluster");
1508 
1509 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1510 		    pgoff += xfersize,
1511 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1512 		    lbnoff += xfersize, xferoffset += xfersize) {
1513 			/*
1514 			 * read as many contiguous blocks as possible to
1515 			 * fill this page
1516 			 */
1517 			xfersize = PAGESIZE - pgoff;
1518 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1519 			if (err1) {
1520 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1521 				err = err1;
1522 				goto out;
1523 			}
1524 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1525 			bp->b_edev = devvp->v_rdev;
1526 			bp->b_dev = cmpdev(devvp->v_rdev);
1527 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1528 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1529 			bp->b_file = vp;
1530 			bp->b_offset = (offset_t)(off + pgoff);
1531 
1532 			(void) bdev_strategy(bp);
1533 
1534 			lwp_stat_update(LWP_STAT_INBLK, 1);
1535 
1536 			if (err == 0)
1537 				err = biowait(bp);
1538 			else
1539 				(void) biowait(bp);
1540 			pageio_done(bp);
1541 			if (err)
1542 				goto out;
1543 		}
1544 		if (pgoff < PAGESIZE) {
1545 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1546 		}
1547 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1548 	}
1549 out:
1550 	if (err) {
1551 		if (pp != NULL)
1552 			pvn_read_done(pp, B_ERROR);
1553 		return (err);
1554 	}
1555 
1556 	if (pagefound) {
1557 		/*
1558 		 * Page exists in the cache, acquire the "shared"
1559 		 * lock.  If this fails, go back to reread.
1560 		 */
1561 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1562 			goto reread;
1563 		}
1564 		pl[0] = pp;
1565 		pl[1] = NULL;
1566 	}
1567 	return (err);
1568 }
1569 
1570 /*
1571  * Return all the pages from [off..off+len] in given file
1572  */
1573 /* ARGSUSED */
1574 static int
1575 pcfs_getpage(
1576 	struct vnode *vp,
1577 	offset_t off,
1578 	size_t len,
1579 	uint_t *protp,
1580 	page_t *pl[],
1581 	size_t plsz,
1582 	struct seg *seg,
1583 	caddr_t addr,
1584 	enum seg_rw rw,
1585 	struct cred *cr,
1586 	caller_context_t *ct)
1587 {
1588 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1589 	int err;
1590 
1591 	PC_DPRINTF0(6, "pcfs_getpage\n");
1592 	if (err = pc_verify(fsp))
1593 		return (err);
1594 	if (vp->v_flag & VNOMAP)
1595 		return (ENOSYS);
1596 	ASSERT(off <= UINT32_MAX);
1597 	err = pc_lockfs(fsp, 0, 0);
1598 	if (err)
1599 		return (err);
1600 	if (protp != NULL)
1601 		*protp = PROT_ALL;
1602 
1603 	ASSERT((off & PAGEOFFSET) == 0);
1604 	if (len <= PAGESIZE) {
1605 		err = pcfs_getapage(vp, off, len, protp, pl,
1606 		    plsz, seg, addr, rw, cr);
1607 	} else {
1608 		err = pvn_getpages(pcfs_getapage, vp, off,
1609 		    len, protp, pl, plsz, seg, addr, rw, cr);
1610 	}
1611 	pc_unlockfs(fsp);
1612 	return (err);
1613 }
1614 
1615 
1616 /*
1617  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1618  * If len == 0, do from off to EOF.
1619  *
1620  * The normal cases should be len == 0 & off == 0 (entire vp list),
1621  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1622  * (from pageout).
1623  *
1624  */
1625 /*ARGSUSED*/
1626 static int
1627 pcfs_putpage(
1628 	struct vnode *vp,
1629 	offset_t off,
1630 	size_t len,
1631 	int flags,
1632 	struct cred *cr,
1633 	caller_context_t *ct)
1634 {
1635 	struct pcnode *pcp;
1636 	page_t *pp;
1637 	struct pcfs *fsp;
1638 	u_offset_t io_off;
1639 	size_t io_len;
1640 	offset_t eoff;
1641 	int err;
1642 
1643 	/*
1644 	 * If the filesystem was umounted by force, return immediately.
1645 	 */
1646 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1647 		return (EIO);
1648 
1649 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1650 	if (vp->v_flag & VNOMAP)
1651 		return (ENOSYS);
1652 
1653 	fsp = VFSTOPCFS(vp->v_vfsp);
1654 
1655 	if (err = pc_verify(fsp))
1656 		return (err);
1657 	if ((pcp = VTOPC(vp)) == NULL) {
1658 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1659 		return (EIO);
1660 	}
1661 	if (pcp->pc_flags & PC_INVAL)
1662 		return (EIO);
1663 
1664 	if (curproc == proc_pageout) {
1665 		/*
1666 		 * XXX - This is a quick hack to avoid blocking
1667 		 * pageout. Also to avoid pcfs_getapage deadlocking
1668 		 * with putpage when memory is running out,
1669 		 * since we only have one global lock and we don't
1670 		 * support async putpage.
1671 		 * It should be fixed someday.
1672 		 *
1673 		 * Interestingly, this used to be a test of NOMEMWAIT().
1674 		 * We only ever got here once pcfs started supporting
1675 		 * NFS sharing, and then only because the NFS server
1676 		 * threads seem to do writes in sched's process context.
1677 		 * Since everyone else seems to just care about pageout,
1678 		 * the test was changed to look for pageout directly.
1679 		 */
1680 		return (ENOMEM);
1681 	}
1682 
1683 	ASSERT(off <= UINT32_MAX);
1684 
1685 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1686 
1687 	err = pc_lockfs(fsp, 0, 0);
1688 	if (err)
1689 		return (err);
1690 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1691 		pc_unlockfs(fsp);
1692 		return (0);
1693 	}
1694 
1695 	if (len == 0) {
1696 		/*
1697 		 * Search the entire vp list for pages >= off
1698 		 */
1699 		err = pvn_vplist_dirty(vp, off,
1700 		    pcfs_putapage, flags, cr);
1701 	} else {
1702 		eoff = off + len;
1703 
1704 		for (io_off = off; io_off < eoff &&
1705 		    io_off < pcp->pc_size; io_off += io_len) {
1706 			/*
1707 			 * If we are not invalidating, synchronously
1708 			 * freeing or writing pages use the routine
1709 			 * page_lookup_nowait() to prevent reclaiming
1710 			 * them from the free list.
1711 			 */
1712 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1713 				pp = page_lookup(vp, io_off,
1714 				    (flags & (B_INVAL | B_FREE)) ?
1715 				    SE_EXCL : SE_SHARED);
1716 			} else {
1717 				pp = page_lookup_nowait(vp, io_off,
1718 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1719 			}
1720 
1721 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1722 				io_len = PAGESIZE;
1723 			else {
1724 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1725 				    flags, cr);
1726 				if (err != 0)
1727 					break;
1728 				/*
1729 				 * "io_off" and "io_len" are returned as
1730 				 * the range of pages we actually wrote.
1731 				 * This allows us to skip ahead more quickly
1732 				 * since several pages may've been dealt
1733 				 * with by this iteration of the loop.
1734 				 */
1735 			}
1736 		}
1737 	}
1738 	if (err == 0 && (flags & B_INVAL) &&
1739 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1740 		/*
1741 		 * If doing "invalidation", make sure that
1742 		 * all pages on the vnode list are actually
1743 		 * gone.
1744 		 */
1745 		cmn_err(CE_PANIC,
1746 		    "pcfs_putpage: B_INVAL, pages not gone");
1747 	} else if (err) {
1748 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1749 	}
1750 	pc_unlockfs(fsp);
1751 	return (err);
1752 }
1753 
1754 /*
1755  * Write out a single page, possibly klustering adjacent dirty pages.
1756  */
1757 /*ARGSUSED*/
1758 int
1759 pcfs_putapage(
1760 	struct vnode *vp,
1761 	page_t *pp,
1762 	u_offset_t *offp,
1763 	size_t *lenp,
1764 	int flags,
1765 	struct cred *cr)
1766 {
1767 	struct pcnode *pcp;
1768 	struct pcfs *fsp;
1769 	struct vnode *devvp;
1770 	size_t io_len;
1771 	daddr_t bn;
1772 	u_offset_t lbn, lbnoff, xferoffset;
1773 	uint_t pgoff, xfersize;
1774 	int err = 0;
1775 	u_offset_t io_off;
1776 
1777 	pcp = VTOPC(vp);
1778 	fsp = VFSTOPCFS(vp->v_vfsp);
1779 	devvp = fsp->pcfs_devvp;
1780 
1781 	/*
1782 	 * If the modified time on the inode has not already been
1783 	 * set elsewhere (e.g. for write/setattr) and this is not
1784 	 * a call from msync (B_FORCE) we set the time now.
1785 	 * This gives us approximate modified times for mmap'ed files
1786 	 * which are modified via stores in the user address space.
1787 	 */
1788 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1789 		pcp->pc_flags |= PC_MOD;
1790 		pc_mark_mod(fsp, pcp);
1791 	}
1792 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1793 	    PAGESIZE, flags);
1794 
1795 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1796 		goto out;
1797 	}
1798 
1799 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1800 
1801 	lbn = pc_lblkno(fsp, io_off);
1802 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1803 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1804 
1805 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1806 	    pgoff += xfersize,
1807 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1808 	    lbnoff += xfersize, xferoffset += xfersize) {
1809 
1810 		struct buf *bp;
1811 		int err1;
1812 
1813 		/*
1814 		 * write as many contiguous blocks as possible from this page
1815 		 */
1816 		xfersize = io_len - pgoff;
1817 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1818 		if (err1) {
1819 			err = err1;
1820 			goto out;
1821 		}
1822 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1823 		bp->b_edev = devvp->v_rdev;
1824 		bp->b_dev = cmpdev(devvp->v_rdev);
1825 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1826 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1827 		bp->b_file = vp;
1828 		bp->b_offset = (offset_t)(io_off + pgoff);
1829 
1830 		(void) bdev_strategy(bp);
1831 
1832 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1833 
1834 		if (err == 0)
1835 			err = biowait(bp);
1836 		else
1837 			(void) biowait(bp);
1838 		pageio_done(bp);
1839 	}
1840 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1841 	pp = NULL;
1842 
1843 out:
1844 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1845 		pvn_write_done(pp, B_WRITE | flags);
1846 	} else if (err != 0 && pp != NULL) {
1847 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1848 	}
1849 
1850 	if (offp)
1851 		*offp = io_off;
1852 	if (lenp)
1853 		*lenp = io_len;
1854 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1855 		    (void *)vp, (void *)pp, io_off, io_len);
1856 	if (err) {
1857 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1858 	}
1859 	return (err);
1860 }
1861 
1862 /*ARGSUSED*/
1863 static int
1864 pcfs_map(
1865 	struct vnode *vp,
1866 	offset_t off,
1867 	struct as *as,
1868 	caddr_t *addrp,
1869 	size_t len,
1870 	uchar_t prot,
1871 	uchar_t maxprot,
1872 	uint_t flags,
1873 	struct cred *cr,
1874 	caller_context_t *ct)
1875 {
1876 	struct segvn_crargs vn_a;
1877 	int error;
1878 
1879 	PC_DPRINTF0(6, "pcfs_map\n");
1880 	if (vp->v_flag & VNOMAP)
1881 		return (ENOSYS);
1882 
1883 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1884 		return (ENXIO);
1885 
1886 	as_rangelock(as);
1887 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1888 	if (error != 0) {
1889 		as_rangeunlock(as);
1890 		return (error);
1891 	}
1892 
1893 	vn_a.vp = vp;
1894 	vn_a.offset = off;
1895 	vn_a.type = flags & MAP_TYPE;
1896 	vn_a.prot = prot;
1897 	vn_a.maxprot = maxprot;
1898 	vn_a.flags = flags & ~MAP_TYPE;
1899 	vn_a.cred = cr;
1900 	vn_a.amp = NULL;
1901 	vn_a.szc = 0;
1902 	vn_a.lgrp_mem_policy_flags = 0;
1903 
1904 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1905 	as_rangeunlock(as);
1906 	return (error);
1907 }
1908 
1909 /* ARGSUSED */
1910 static int
1911 pcfs_seek(
1912 	struct vnode *vp,
1913 	offset_t ooff,
1914 	offset_t *noffp,
1915 	caller_context_t *ct)
1916 {
1917 	if (*noffp < 0)
1918 		return (EINVAL);
1919 	else if (*noffp > MAXOFFSET_T)
1920 		return (EINVAL);
1921 	else
1922 		return (0);
1923 }
1924 
1925 /* ARGSUSED */
1926 static int
1927 pcfs_addmap(
1928 	struct vnode *vp,
1929 	offset_t off,
1930 	struct as *as,
1931 	caddr_t addr,
1932 	size_t len,
1933 	uchar_t prot,
1934 	uchar_t maxprot,
1935 	uint_t flags,
1936 	struct cred *cr,
1937 	caller_context_t *ct)
1938 {
1939 	if (vp->v_flag & VNOMAP)
1940 		return (ENOSYS);
1941 	return (0);
1942 }
1943 
1944 /*ARGSUSED*/
1945 static int
1946 pcfs_delmap(
1947 	struct vnode *vp,
1948 	offset_t off,
1949 	struct as *as,
1950 	caddr_t addr,
1951 	size_t len,
1952 	uint_t prot,
1953 	uint_t maxprot,
1954 	uint_t flags,
1955 	struct cred *cr,
1956 	caller_context_t *ct)
1957 {
1958 	if (vp->v_flag & VNOMAP)
1959 		return (ENOSYS);
1960 	return (0);
1961 }
1962 
1963 /*
1964  * POSIX pathconf() support.
1965  */
1966 /* ARGSUSED */
1967 static int
1968 pcfs_pathconf(
1969 	struct vnode *vp,
1970 	int cmd,
1971 	ulong_t *valp,
1972 	struct cred *cr,
1973 	caller_context_t *ct)
1974 {
1975 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1976 
1977 	switch (cmd) {
1978 	case _PC_LINK_MAX:
1979 		*valp = 1;
1980 		return (0);
1981 
1982 	case _PC_CASE_BEHAVIOR:
1983 		return (EINVAL);
1984 
1985 	case _PC_FILESIZEBITS:
1986 		/*
1987 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1988 		 * FAT12 can only go up to the maximum filesystem capacity
1989 		 * which is ~509MB.
1990 		 */
1991 		*valp = IS_FAT12(fsp) ? 30 : 33;
1992 		return (0);
1993 
1994 	case _PC_TIMESTAMP_RESOLUTION:
1995 		/*
1996 		 * PCFS keeps track of modification times, it its own
1997 		 * internal format, to a resolution of 2 seconds.
1998 		 * Since 2000 million is representable in an int32_t
1999 		 * without overflow (or becoming negative), we allow
2000 		 * this value to be returned.
2001 		 */
2002 		*valp = 2000000000L;
2003 		return (0);
2004 
2005 	default:
2006 		return (fs_pathconf(vp, cmd, valp, cr, ct));
2007 	}
2008 
2009 }
2010 
2011 /* ARGSUSED */
2012 static int
2013 pcfs_space(
2014 	struct vnode *vp,
2015 	int cmd,
2016 	struct flock64 *bfp,
2017 	int flag,
2018 	offset_t offset,
2019 	cred_t *cr,
2020 	caller_context_t *ct)
2021 {
2022 	struct vattr vattr;
2023 	int error;
2024 
2025 	if (cmd != F_FREESP)
2026 		return (EINVAL);
2027 
2028 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2029 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2030 			return (EFBIG);
2031 		/*
2032 		 * we only support the special case of l_len == 0,
2033 		 * meaning free to end of file at this moment.
2034 		 */
2035 		if (bfp->l_len != 0)
2036 			return (EINVAL);
2037 		vattr.va_mask = AT_SIZE;
2038 		vattr.va_size = bfp->l_start;
2039 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2040 	}
2041 	return (error);
2042 }
2043 
2044 /*
2045  * Break up 'len' chars from 'buf' into a long file name chunk.
2046  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2047  */
2048 void
2049 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2050 {
2051 	int	i;
2052 
2053 	ASSERT(buf != NULL);
2054 
2055 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2056 		if (len > 0) {
2057 			ep->pcdl_firstfilename[i] = *buf++;
2058 			ep->pcdl_firstfilename[i + 1] = *buf++;
2059 			len -= 2;
2060 		} else {
2061 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2062 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2063 		}
2064 	}
2065 
2066 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2067 		if (len > 0) {
2068 			ep->pcdl_secondfilename[i] = *buf++;
2069 			ep->pcdl_secondfilename[i + 1] = *buf++;
2070 			len -= 2;
2071 		} else {
2072 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2073 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2074 		}
2075 	}
2076 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2077 		if (len > 0) {
2078 			ep->pcdl_thirdfilename[i] = *buf++;
2079 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2080 			len -= 2;
2081 		} else {
2082 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2083 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2084 		}
2085 	}
2086 }
2087 
2088 /*
2089  * Extract the characters from the long filename chunk into 'buf'.
2090  * Return the number of characters extracted.
2091  */
2092 static int
2093 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2094 {
2095 	char 	*tmp = buf;
2096 	int	i;
2097 
2098 	/* Copy all the names, no filtering now */
2099 
2100 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2101 		*tmp = ep->pcdl_firstfilename[i];
2102 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2103 
2104 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2105 			return (tmp - buf);
2106 	}
2107 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2108 		*tmp = ep->pcdl_secondfilename[i];
2109 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2110 
2111 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2112 			return (tmp - buf);
2113 	}
2114 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2115 		*tmp = ep->pcdl_thirdfilename[i];
2116 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2117 
2118 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2119 			return (tmp - buf);
2120 	}
2121 	return (tmp - buf);
2122 }
2123 
2124 
2125 /*
2126  * Checksum the passed in short filename.
2127  * This is used to validate each component of the long name to make
2128  * sure the long name is valid (it hasn't been "detached" from the
2129  * short filename). This algorithm was found in FreeBSD.
2130  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2131  */
2132 
2133 uchar_t
2134 pc_checksum_long_fn(char *name, char *ext)
2135 {
2136 	uchar_t c;
2137 	char	b[11];
2138 
2139 	bcopy(name, b, 8);
2140 	bcopy(ext, b+8, 3);
2141 
2142 	c = b[0];
2143 	c = ((c << 7) | (c >> 1)) + b[1];
2144 	c = ((c << 7) | (c >> 1)) + b[2];
2145 	c = ((c << 7) | (c >> 1)) + b[3];
2146 	c = ((c << 7) | (c >> 1)) + b[4];
2147 	c = ((c << 7) | (c >> 1)) + b[5];
2148 	c = ((c << 7) | (c >> 1)) + b[6];
2149 	c = ((c << 7) | (c >> 1)) + b[7];
2150 	c = ((c << 7) | (c >> 1)) + b[8];
2151 	c = ((c << 7) | (c >> 1)) + b[9];
2152 	c = ((c << 7) | (c >> 1)) + b[10];
2153 
2154 	return (c);
2155 }
2156 
2157 /*
2158  * Read a chunk of long filename entries into 'namep'.
2159  * Return with offset pointing to short entry (on success), or next
2160  * entry to read (if this wasn't a valid lfn really).
2161  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2162  * a long filename.
2163  *
2164  * Can also be called with a NULL namep, in which case it just returns
2165  * whether this was really a valid long filename and consumes it
2166  * (used by pc_dirempty()).
2167  */
2168 int
2169 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2170     struct pcdir **epp, offset_t *offset, struct buf **bp)
2171 {
2172 	struct pcdir *ep = *epp;
2173 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2174 	struct vnode *dvp = PCTOV(pcp);
2175 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2176 	char	*lfn;
2177 	char	*lfn_base;
2178 	int	boff;
2179 	int	i, cs;
2180 	char	*buf;
2181 	uchar_t	cksum;
2182 	int	detached = 0;
2183 	int	error = 0;
2184 	int	foldcase;
2185 	int	count = 0;
2186 	size_t	u16l = 0, u8l = 0;
2187 	char	*outbuf;
2188 	size_t	ret, inlen, outlen;
2189 
2190 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2191 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2192 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2193 	*lfn = '\0';
2194 	*(lfn + 1) = '\0';
2195 	cksum = lep->pcdl_checksum;
2196 
2197 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2198 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2199 		/* read next block if necessary */
2200 		boff = pc_blkoff(fsp, *offset);
2201 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2202 			if (*bp != NULL) {
2203 				brelse(*bp);
2204 				*bp = NULL;
2205 			}
2206 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2207 			if (error) {
2208 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2209 				kmem_free(buf, PCMAXNAM_UTF16);
2210 				return (error);
2211 			}
2212 			lep = (struct pcdir_lfn *)ep;
2213 		}
2214 		/* can this happen? Bad fs? */
2215 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2216 			detached = 1;
2217 			break;
2218 		}
2219 		if (cksum != lep->pcdl_checksum)
2220 			detached = 1;
2221 		/* process current entry */
2222 		cs = get_long_fn_chunk(lep, buf);
2223 		count += cs;
2224 		for (; cs > 0; cs--) {
2225 			/* see if we underflow */
2226 			if (lfn >= lfn_base)
2227 				*--lfn = buf[cs - 1];
2228 			else
2229 				detached = 1;
2230 		}
2231 		lep++;
2232 		*offset += sizeof (struct pcdir);
2233 	}
2234 	kmem_free(buf, PCMAXNAM_UTF16);
2235 	/* read next block if necessary */
2236 	boff = pc_blkoff(fsp, *offset);
2237 	ep = (struct pcdir *)lep;
2238 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2239 		if (*bp != NULL) {
2240 			brelse(*bp);
2241 			*bp = NULL;
2242 		}
2243 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2244 		if (error) {
2245 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2246 			return (error);
2247 		}
2248 	}
2249 	/* should be on the short one */
2250 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2251 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2252 		detached = 1;
2253 	}
2254 	if (detached ||
2255 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2256 	    !pc_valid_long_fn(lfn, 0)) {
2257 		/*
2258 		 * process current entry again. This may end up another lfn
2259 		 * or a short name.
2260 		 */
2261 		*epp = ep;
2262 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2263 		return (EINVAL);
2264 	}
2265 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2266 		/*
2267 		 * Don't display label because it may contain
2268 		 * funny characters.
2269 		 */
2270 		*offset += sizeof (struct pcdir);
2271 		ep++;
2272 		*epp = ep;
2273 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2274 		return (EINVAL);
2275 	}
2276 	if (namep) {
2277 		u16l = count / 2;
2278 		u8l = PCMAXNAMLEN;
2279 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2280 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2281 		/*
2282 		 * uconv_u16tou8() will catch conversion errors including
2283 		 * the case where there is not enough room to write the
2284 		 * converted result and the u8l will never go over the given
2285 		 * PCMAXNAMLEN.
2286 		 */
2287 		if (error != 0) {
2288 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2289 			return (EINVAL);
2290 		}
2291 		namep[u8l] = '\0';
2292 		if (foldcase) {
2293 			inlen = strlen(namep);
2294 			outlen = PCMAXNAMLEN;
2295 			outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2296 			ret = u8_textprep_str(namep, &inlen, outbuf,
2297 			    &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2298 			    &error);
2299 			if (ret == -1) {
2300 				kmem_free(outbuf, PCMAXNAMLEN + 1);
2301 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2302 				return (EINVAL);
2303 			}
2304 			outbuf[PCMAXNAMLEN - outlen] = '\0';
2305 			(void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2306 			kmem_free(outbuf, PCMAXNAMLEN + 1);
2307 		}
2308 	}
2309 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2310 	*epp = ep;
2311 	return (0);
2312 }
2313 /*
2314  * Read a long filename into the pc_dirent structure and copy it out.
2315  */
2316 int
2317 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2318     struct pcdir **epp, offset_t *offset, struct buf **bp)
2319 {
2320 	struct pcdir *ep;
2321 	struct pcnode *pcp = VTOPC(dvp);
2322 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2323 	offset_t uiooffset = uiop->uio_loffset;
2324 	int	error = 0;
2325 	offset_t oldoffset;
2326 
2327 	oldoffset = *offset;
2328 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2329 	if (error) {
2330 		if (error == EINVAL) {
2331 			uiop->uio_loffset += *offset - oldoffset;
2332 			return (0);
2333 		} else
2334 			return (error);
2335 	}
2336 
2337 	ep = *epp;
2338 	uiop->uio_loffset += *offset - oldoffset;
2339 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2340 	if (ld->d_reclen > uiop->uio_resid) {
2341 		uiop->uio_loffset = uiooffset;
2342 		return (ENOSPC);
2343 	}
2344 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2345 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2346 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2347 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2348 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2349 	uiop->uio_loffset = ld->d_off;
2350 	*offset += sizeof (struct pcdir);
2351 	ep++;
2352 	*epp = ep;
2353 	return (0);
2354 }
2355 
2356 /*
2357  * Read a short filename into the pc_dirent structure and copy it out.
2358  */
2359 int
2360 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2361     struct pcdir **epp, offset_t *offset, struct buf **bp)
2362 {
2363 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2364 	int	boff = pc_blkoff(fsp, *offset);
2365 	struct pcdir *ep = *epp;
2366 	offset_t	oldoffset = uiop->uio_loffset;
2367 	int	error;
2368 	int	foldcase;
2369 
2370 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2371 		uiop->uio_loffset += sizeof (struct pcdir);
2372 		*offset += sizeof (struct pcdir);
2373 		ep++;
2374 		*epp = ep;
2375 		return (0);
2376 	}
2377 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2378 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2379 	    pc_direntpersec(fsp));
2380 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2381 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2382 	    &ep->pcd_ext[0], foldcase);
2383 	if (error == 0) {
2384 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2385 		if (ld->d_reclen > uiop->uio_resid) {
2386 			uiop->uio_loffset = oldoffset;
2387 			return (ENOSPC);
2388 		}
2389 		ld->d_off = (off64_t)(uiop->uio_loffset +
2390 		    sizeof (struct pcdir));
2391 		(void) uiomove((caddr_t)ld,
2392 		    ld->d_reclen, UIO_READ, uiop);
2393 		uiop->uio_loffset = ld->d_off;
2394 	} else {
2395 		uiop->uio_loffset += sizeof (struct pcdir);
2396 	}
2397 	*offset += sizeof (struct pcdir);
2398 	ep++;
2399 	*epp = ep;
2400 	return (0);
2401 }
2402 
2403 /* ARGSUSED */
2404 static int
2405 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2406 {
2407 	struct pc_fid *pcfid;
2408 	struct pcnode *pcp;
2409 	struct pcfs	*fsp;
2410 	int	error;
2411 
2412 	fsp = VFSTOPCFS(vp->v_vfsp);
2413 	if (fsp == NULL)
2414 		return (EIO);
2415 	error = pc_lockfs(fsp, 0, 0);
2416 	if (error)
2417 		return (error);
2418 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2419 		pc_unlockfs(fsp);
2420 		return (EIO);
2421 	}
2422 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2423 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2424 		pc_unlockfs(fsp);
2425 		return (ENOSPC);
2426 	}
2427 
2428 	pcfid = (struct pc_fid *)fidp;
2429 	bzero(pcfid, sizeof (struct pc_fid));
2430 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2431 	if (vp->v_flag & VROOT) {
2432 		pcfid->pcfid_block = 0;
2433 		pcfid->pcfid_offset = 0;
2434 		pcfid->pcfid_ctime = 0;
2435 	} else {
2436 		pcfid->pcfid_block = pcp->pc_eblkno;
2437 		pcfid->pcfid_offset = pcp->pc_eoffset;
2438 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2439 	}
2440 	pc_unlockfs(fsp);
2441 	return (0);
2442 }
2443