xref: /titanic_50/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 4e6f6c8344ddd39ded306346bd0107934d29b982)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/t_lock.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/user.h>
37 #include <sys/buf.h>
38 #include <sys/stat.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/dirent.h>
42 #include <sys/vnode.h>
43 #include <sys/proc.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/uio.h>
47 #include <sys/fs/pc_label.h>
48 #include <sys/fs/pc_fs.h>
49 #include <sys/fs/pc_dir.h>
50 #include <sys/fs/pc_node.h>
51 #include <sys/mman.h>
52 #include <sys/pathname.h>
53 #include <sys/vmsystm.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/statvfs.h>
57 #include <sys/unistd.h>
58 #include <sys/kmem.h>
59 #include <sys/conf.h>
60 #include <sys/flock.h>
61 #include <sys/policy.h>
62 #include <sys/sdt.h>
63 #include <sys/sunddi.h>
64 #include <sys/types.h>
65 #include <sys/errno.h>
66 
67 #include <vm/seg.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg_map.h>
71 #include <vm/seg_vn.h>
72 #include <vm/hat.h>
73 #include <vm/as.h>
74 #include <vm/seg_kmem.h>
75 
76 #include <fs/fs_subr.h>
77 
78 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
79 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
80 	caller_context_t *ct);
81 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
82 	caller_context_t *);
83 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
84 	caller_context_t *);
85 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
86 	caller_context_t *ct);
87 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
88 	caller_context_t *);
89 static int pcfs_access(struct vnode *, int, int, struct cred *,
90 	caller_context_t *ct);
91 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
92 	struct pathname *, int, struct vnode *, struct cred *,
93 	caller_context_t *, int *, pathname_t *);
94 static int pcfs_create(struct vnode *, char *, struct vattr *,
95 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
96 	caller_context_t *, vsecattr_t *);
97 static int pcfs_remove(struct vnode *, char *, struct cred *,
98 	caller_context_t *, int);
99 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
100 	struct cred *, caller_context_t *, int);
101 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
102 	struct cred *, caller_context_t *, int, vsecattr_t *);
103 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
104 	caller_context_t *, int);
105 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
106 	caller_context_t *, int);
107 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
108 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
109 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
110 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
111 	offset_t, cred_t *, caller_context_t *);
112 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
113 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
114 	caller_context_t *);
115 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
116 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
117 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
118 	caller_context_t *);
119 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
120 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
121 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
122 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
123 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
124 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
125 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
126 	caller_context_t *);
127 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
128 	caller_context_t *);
129 
130 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
131 	struct cred *);
132 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
133 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
134 
135 extern krwlock_t pcnodes_lock;
136 
137 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
138 
139 /*
140  * vnode op vectors for files and directories.
141  */
142 struct vnodeops *pcfs_fvnodeops;
143 struct vnodeops *pcfs_dvnodeops;
144 
145 const fs_operation_def_t pcfs_fvnodeops_template[] = {
146 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
147 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
148 	VOPNAME_READ,		{ .vop_read = pcfs_read },
149 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
150 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
151 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
152 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
153 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
154 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
155 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
156 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
157 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
158 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
159 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
160 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
161 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
162 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
163 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
164 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
165 	NULL,			NULL
166 };
167 
168 const fs_operation_def_t pcfs_dvnodeops_template[] = {
169 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
170 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
171 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
172 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
173 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
174 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
175 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
176 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
177 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
178 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
179 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
180 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
181 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
182 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
183 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
184 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
185 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
186 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
187 	NULL,			NULL
188 };
189 
190 
191 /*ARGSUSED*/
192 static int
193 pcfs_open(
194 	struct vnode **vpp,
195 	int flag,
196 	struct cred *cr,
197 	caller_context_t *ct)
198 {
199 	return (0);
200 }
201 
202 /*
203  * files are sync'ed on close to keep floppy up to date
204  */
205 
206 /*ARGSUSED*/
207 static int
208 pcfs_close(
209 	struct vnode *vp,
210 	int flag,
211 	int count,
212 	offset_t offset,
213 	struct cred *cr,
214 	caller_context_t *ct)
215 {
216 	return (0);
217 }
218 
219 /*ARGSUSED*/
220 static int
221 pcfs_read(
222 	struct vnode *vp,
223 	struct uio *uiop,
224 	int ioflag,
225 	struct cred *cr,
226 	struct caller_context *ct)
227 {
228 	struct pcfs *fsp;
229 	struct pcnode *pcp;
230 	int error;
231 
232 	fsp = VFSTOPCFS(vp->v_vfsp);
233 	if (error = pc_verify(fsp))
234 		return (error);
235 	error = pc_lockfs(fsp, 0, 0);
236 	if (error)
237 		return (error);
238 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
239 		pc_unlockfs(fsp);
240 		return (EIO);
241 	}
242 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
243 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
244 		pc_mark_acc(fsp, pcp);
245 	}
246 	pc_unlockfs(fsp);
247 	if (error) {
248 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
249 	}
250 	return (error);
251 }
252 
253 /*ARGSUSED*/
254 static int
255 pcfs_write(
256 	struct vnode *vp,
257 	struct uio *uiop,
258 	int ioflag,
259 	struct cred *cr,
260 	struct caller_context *ct)
261 {
262 	struct pcfs *fsp;
263 	struct pcnode *pcp;
264 	int error;
265 
266 	fsp = VFSTOPCFS(vp->v_vfsp);
267 	if (error = pc_verify(fsp))
268 		return (error);
269 	error = pc_lockfs(fsp, 0, 0);
270 	if (error)
271 		return (error);
272 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
273 		pc_unlockfs(fsp);
274 		return (EIO);
275 	}
276 	if (ioflag & FAPPEND) {
277 		/*
278 		 * in append mode start at end of file.
279 		 */
280 		uiop->uio_loffset = pcp->pc_size;
281 	}
282 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
283 	pcp->pc_flags |= PC_MOD;
284 	pc_mark_mod(fsp, pcp);
285 	if (ioflag & (FSYNC|FDSYNC))
286 		(void) pc_nodeupdate(pcp);
287 
288 	pc_unlockfs(fsp);
289 	if (error) {
290 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
291 	}
292 	return (error);
293 }
294 
295 /*
296  * read or write a vnode
297  */
298 static int
299 rwpcp(
300 	struct pcnode *pcp,
301 	struct uio *uio,
302 	enum uio_rw rw,
303 	int ioflag)
304 {
305 	struct vnode *vp = PCTOV(pcp);
306 	struct pcfs *fsp;
307 	daddr_t bn;			/* phys block number */
308 	int n;
309 	offset_t off;
310 	caddr_t base;
311 	int mapon, pagecreate;
312 	int newpage;
313 	int error = 0;
314 	rlim64_t limit = uio->uio_llimit;
315 	int oresid = uio->uio_resid;
316 
317 	/*
318 	 * If the filesystem was umounted by force, return immediately.
319 	 */
320 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
321 		return (EIO);
322 
323 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
324 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
325 
326 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
327 	ASSERT(vp->v_type == VREG);
328 
329 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
330 		return (0);
331 	}
332 
333 	if (uio->uio_loffset < 0)
334 		return (EINVAL);
335 
336 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
337 		limit = MAXOFFSET_T;
338 
339 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
340 		proc_t *p = ttoproc(curthread);
341 
342 		mutex_enter(&p->p_lock);
343 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
344 		    p, RCA_UNSAFE_SIGINFO);
345 		mutex_exit(&p->p_lock);
346 		return (EFBIG);
347 	}
348 
349 	/* the following condition will occur only for write */
350 
351 	if (uio->uio_loffset >= UINT32_MAX)
352 		return (EFBIG);
353 
354 	if (uio->uio_resid == 0)
355 		return (0);
356 
357 	if (limit > UINT32_MAX)
358 		limit = UINT32_MAX;
359 
360 	fsp = VFSTOPCFS(vp->v_vfsp);
361 	if (fsp->pcfs_flags & PCFS_IRRECOV)
362 		return (EIO);
363 
364 	do {
365 		/*
366 		 * Assignments to "n" in this block may appear
367 		 * to overflow in some cases.  However, after careful
368 		 * analysis it was determined that all assignments to
369 		 * "n" serve only to make "n" smaller.  Since "n"
370 		 * starts out as no larger than MAXBSIZE, "int" is
371 		 * safe.
372 		 */
373 		off = uio->uio_loffset & MAXBMASK;
374 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
375 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
376 		if (rw == UIO_READ) {
377 			offset_t diff;
378 
379 			diff = pcp->pc_size - uio->uio_loffset;
380 			if (diff <= 0)
381 				return (0);
382 			if (diff < n)
383 				n = (int)diff;
384 		}
385 		/*
386 		 * Compare limit with the actual offset + n, not the
387 		 * rounded down offset "off" or we will overflow
388 		 * the maximum file size after all.
389 		 */
390 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
391 			if (uio->uio_loffset >= limit) {
392 				error = EFBIG;
393 				break;
394 			}
395 			n = (int)(limit - uio->uio_loffset);
396 		}
397 
398 		/*
399 		 * Touch the page and fault it in if it is not in
400 		 * core before segmap_getmapflt can lock it. This
401 		 * is to avoid the deadlock if the buffer is mapped
402 		 * to the same file through mmap which we want to
403 		 * write to.
404 		 */
405 		uio_prefaultpages((long)n, uio);
406 
407 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
408 		pagecreate = 0;
409 		newpage = 0;
410 		if (rw == UIO_WRITE) {
411 			/*
412 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
413 			 * with one page at a time, instead of one MAXBSIZE
414 			 * at a time, so we can fully explore pagecreate
415 			 * optimization??
416 			 */
417 			if (uio->uio_loffset + n > pcp->pc_size) {
418 				uint_t ncl, lcn;
419 
420 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
421 				    fsp->pcfs_clsize);
422 				if (uio->uio_loffset > pcp->pc_size &&
423 				    ncl < (uint_t)howmany(uio->uio_loffset,
424 				    fsp->pcfs_clsize)) {
425 					/*
426 					 * Allocate and zerofill skipped
427 					 * clusters. This may not be worth the
428 					 * effort since a small lseek beyond
429 					 * eof but still within the cluster
430 					 * will not be zeroed out.
431 					 */
432 					lcn = pc_lblkno(fsp, uio->uio_loffset);
433 					error = pc_balloc(pcp, (daddr_t)lcn,
434 					    1, &bn);
435 					ncl = lcn + 1;
436 				}
437 				if (!error &&
438 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
439 				    fsp->pcfs_clsize))
440 					/*
441 					 * allocate clusters w/o zerofill
442 					 */
443 					error = pc_balloc(pcp,
444 					    (daddr_t)pc_lblkno(fsp,
445 					    uio->uio_loffset + n - 1),
446 					    0, &bn);
447 
448 				pcp->pc_flags |= PC_CHG;
449 
450 				if (error) {
451 					pc_cluster32_t ncl;
452 					int nerror;
453 
454 					/*
455 					 * figure out new file size from
456 					 * cluster chain length. If this
457 					 * is detected to loop, the chain
458 					 * is corrupted and we'd better
459 					 * keep our fingers off that file.
460 					 */
461 					nerror = pc_fileclsize(fsp,
462 					    pcp->pc_scluster, &ncl);
463 					if (nerror) {
464 						PC_DPRINTF1(2,
465 						    "cluster chain "
466 						    "corruption, "
467 						    "scluster=%d\n",
468 						    pcp->pc_scluster);
469 						pcp->pc_size = 0;
470 						pcp->pc_flags |= PC_INVAL;
471 						error = nerror;
472 						(void) segmap_release(segkmap,
473 						    base, 0);
474 						break;
475 					}
476 					pcp->pc_size = fsp->pcfs_clsize * ncl;
477 
478 					if (error == ENOSPC &&
479 					    (pcp->pc_size - uio->uio_loffset)
480 					    > 0) {
481 						PC_DPRINTF3(2, "rwpcp ENOSPC "
482 						    "off=%lld n=%d size=%d\n",
483 						    uio->uio_loffset,
484 						    n, pcp->pc_size);
485 						n = (int)(pcp->pc_size -
486 						    uio->uio_loffset);
487 					} else {
488 						PC_DPRINTF1(1,
489 						    "rwpcp error1=%d\n", error);
490 						(void) segmap_release(segkmap,
491 						    base, 0);
492 						break;
493 					}
494 				} else {
495 					pcp->pc_size =
496 					    (uint_t)(uio->uio_loffset + n);
497 				}
498 				if (mapon == 0) {
499 					newpage = segmap_pagecreate(segkmap,
500 					    base, (size_t)n, 0);
501 					pagecreate = 1;
502 				}
503 			} else if (n == MAXBSIZE) {
504 				newpage = segmap_pagecreate(segkmap, base,
505 				    (size_t)n, 0);
506 				pagecreate = 1;
507 			}
508 		}
509 		error = uiomove(base + mapon, (size_t)n, rw, uio);
510 
511 		if (pagecreate && uio->uio_loffset <
512 		    roundup(off + mapon + n, PAGESIZE)) {
513 			offset_t nzero, nmoved;
514 
515 			nmoved = uio->uio_loffset - (off + mapon);
516 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
517 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
518 		}
519 
520 		/*
521 		 * Unlock the pages which have been allocated by
522 		 * page_create_va() in segmap_pagecreate().
523 		 */
524 		if (newpage) {
525 			segmap_pageunlock(segkmap, base, (size_t)n,
526 			    rw == UIO_WRITE ? S_WRITE : S_READ);
527 		}
528 
529 		if (error) {
530 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
531 			/*
532 			 * If we failed on a write, we may have already
533 			 * allocated file blocks as well as pages.  It's hard
534 			 * to undo the block allocation, but we must be sure
535 			 * to invalidate any pages that may have been
536 			 * allocated.
537 			 */
538 			if (rw == UIO_WRITE)
539 				(void) segmap_release(segkmap, base, SM_INVAL);
540 			else
541 				(void) segmap_release(segkmap, base, 0);
542 		} else {
543 			uint_t flags = 0;
544 
545 			if (rw == UIO_READ) {
546 				if (n + mapon == MAXBSIZE ||
547 				    uio->uio_loffset == pcp->pc_size)
548 					flags = SM_DONTNEED;
549 			} else if (ioflag & (FSYNC|FDSYNC)) {
550 				flags = SM_WRITE;
551 			} else if (n + mapon == MAXBSIZE) {
552 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
553 			}
554 			error = segmap_release(segkmap, base, flags);
555 		}
556 
557 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
558 
559 	if (oresid != uio->uio_resid)
560 		error = 0;
561 	return (error);
562 }
563 
564 /*ARGSUSED*/
565 static int
566 pcfs_getattr(
567 	struct vnode *vp,
568 	struct vattr *vap,
569 	int flags,
570 	struct cred *cr,
571 	caller_context_t *ct)
572 {
573 	struct pcnode *pcp;
574 	struct pcfs *fsp;
575 	int error;
576 	char attr;
577 	struct pctime atime;
578 	int64_t unixtime;
579 
580 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
581 
582 	fsp = VFSTOPCFS(vp->v_vfsp);
583 	error = pc_lockfs(fsp, 0, 0);
584 	if (error)
585 		return (error);
586 
587 	/*
588 	 * Note that we don't check for "invalid node" (PC_INVAL) here
589 	 * only in order to make stat() succeed. We allow no I/O on such
590 	 * a node, but do allow to check for its existence.
591 	 */
592 	if ((pcp = VTOPC(vp)) == NULL) {
593 		pc_unlockfs(fsp);
594 		return (EIO);
595 	}
596 	/*
597 	 * Copy from pcnode.
598 	 */
599 	vap->va_type = vp->v_type;
600 	attr = pcp->pc_entry.pcd_attr;
601 	if (PCA_IS_HIDDEN(fsp, attr))
602 		vap->va_mode = 0;
603 	else if (attr & PCA_LABEL)
604 		vap->va_mode = 0444;
605 	else if (attr & PCA_RDONLY)
606 		vap->va_mode = 0555;
607 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
608 		vap->va_mode = 0755;
609 	} else {
610 		vap->va_mode = 0777;
611 	}
612 
613 	if (attr & PCA_DIR)
614 		vap->va_mode |= S_IFDIR;
615 	else
616 		vap->va_mode |= S_IFREG;
617 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
618 		vap->va_uid = 0;
619 		vap->va_gid = 0;
620 	} else {
621 		vap->va_uid = crgetuid(cr);
622 		vap->va_gid = crgetgid(cr);
623 	}
624 	vap->va_fsid = vp->v_vfsp->vfs_dev;
625 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
626 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
627 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
628 	vap->va_nlink = 1;
629 	vap->va_size = (u_offset_t)pcp->pc_size;
630 	vap->va_rdev = 0;
631 	vap->va_nblocks =
632 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
633 	vap->va_blksize = fsp->pcfs_clsize;
634 
635 	/*
636 	 * FAT root directories have no timestamps. In order not to return
637 	 * "time zero" (1/1/1970), we record the time of the mount and give
638 	 * that. This breaks less expectations.
639 	 */
640 	if (vp->v_flag & VROOT) {
641 		vap->va_mtime = fsp->pcfs_mounttime;
642 		vap->va_atime = fsp->pcfs_mounttime;
643 		vap->va_ctime = fsp->pcfs_mounttime;
644 		pc_unlockfs(fsp);
645 		return (0);
646 	}
647 
648 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
649 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
650 		if (unixtime > INT32_MAX)
651 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
652 		unixtime = MIN(unixtime, INT32_MAX);
653 	} else if (unixtime > INT32_MAX &&
654 	    get_udatamodel() == DATAMODEL_ILP32) {
655 		pc_unlockfs(fsp);
656 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
657 		return (EOVERFLOW);
658 	}
659 
660 	vap->va_mtime.tv_sec = (time_t)unixtime;
661 	vap->va_mtime.tv_nsec = 0;
662 
663 	/*
664 	 * FAT doesn't know about POSIX ctime.
665 	 * Best approximation is to always set it to mtime.
666 	 */
667 	vap->va_ctime = vap->va_mtime;
668 
669 	/*
670 	 * FAT only stores "last access date". If that's the
671 	 * same as the date of last modification then the time
672 	 * of last access is known. Otherwise, use midnight.
673 	 */
674 	atime.pct_date = pcp->pc_entry.pcd_ladate;
675 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
676 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
677 	else
678 		atime.pct_time = 0;
679 	pc_pcttotv(&atime, &unixtime);
680 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
681 		if (unixtime > INT32_MAX)
682 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
683 		unixtime = MIN(unixtime, INT32_MAX);
684 	} else if (unixtime > INT32_MAX &&
685 	    get_udatamodel() == DATAMODEL_ILP32) {
686 		pc_unlockfs(fsp);
687 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
688 		return (EOVERFLOW);
689 	}
690 
691 	vap->va_atime.tv_sec = (time_t)unixtime;
692 	vap->va_atime.tv_nsec = 0;
693 
694 	pc_unlockfs(fsp);
695 	return (0);
696 }
697 
698 
699 /*ARGSUSED*/
700 static int
701 pcfs_setattr(
702 	struct vnode *vp,
703 	struct vattr *vap,
704 	int flags,
705 	struct cred *cr,
706 	caller_context_t *ct)
707 {
708 	struct pcnode *pcp;
709 	mode_t mask = vap->va_mask;
710 	int error;
711 	struct pcfs *fsp;
712 	timestruc_t now, *timep;
713 
714 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
715 	/*
716 	 * cannot set these attributes
717 	 */
718 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
719 		return (EINVAL);
720 	}
721 	/*
722 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
723 	 * from 'tar' when it tries to set times on a directory, and console
724 	 * printf's on the NFS server when it gets EINVAL back on such a
725 	 * request. One possible problem with that since a directory entry
726 	 * identifies a file, '.' and all the '..' entries in subdirectories
727 	 * may get out of sync when the directory is updated since they're
728 	 * treated like separate files. We could fix that by looking for
729 	 * '.' and giving it the same attributes, and then looking for
730 	 * all the subdirectories and updating '..', but that's pretty
731 	 * expensive for something that doesn't seem likely to matter.
732 	 */
733 	/* can't do some ops on directories anyway */
734 	if ((vp->v_type == VDIR) &&
735 	    (mask & AT_SIZE)) {
736 		return (EINVAL);
737 	}
738 
739 	fsp = VFSTOPCFS(vp->v_vfsp);
740 	error = pc_lockfs(fsp, 0, 0);
741 	if (error)
742 		return (error);
743 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
744 		pc_unlockfs(fsp);
745 		return (EIO);
746 	}
747 
748 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
749 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
750 			pc_unlockfs(fsp);
751 			return (EACCES);
752 		}
753 	}
754 
755 	/*
756 	 * Change file access modes.
757 	 * If nobody has write permission, file is marked readonly.
758 	 * Otherwise file is writable by anyone.
759 	 */
760 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
761 		if ((vap->va_mode & 0222) == 0)
762 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
763 		else
764 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
765 		pcp->pc_flags |= PC_CHG;
766 	}
767 	/*
768 	 * Truncate file. Must have write permission.
769 	 */
770 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
771 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
772 			error = EACCES;
773 			goto out;
774 		}
775 		if (vap->va_size > UINT32_MAX) {
776 			error = EFBIG;
777 			goto out;
778 		}
779 		error = pc_truncate(pcp, (uint_t)vap->va_size);
780 
781 		if (error)
782 			goto out;
783 
784 		if (vap->va_size == 0)
785 			vnevent_truncate(vp, ct);
786 	}
787 	/*
788 	 * Change file modified times.
789 	 */
790 	if (mask & (AT_MTIME | AT_CTIME)) {
791 		/*
792 		 * If SysV-compatible option to set access and
793 		 * modified times if privileged, owner, or write access,
794 		 * use current time rather than va_mtime.
795 		 *
796 		 * XXX - va_mtime.tv_sec == -1 flags this.
797 		 */
798 		timep = &vap->va_mtime;
799 		if (vap->va_mtime.tv_sec == -1) {
800 			gethrestime(&now);
801 			timep = &now;
802 		}
803 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
804 		    timep->tv_sec > INT32_MAX) {
805 			error = EOVERFLOW;
806 			goto out;
807 		}
808 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
809 		if (error)
810 			goto out;
811 		pcp->pc_flags |= PC_CHG;
812 	}
813 	/*
814 	 * Change file access times.
815 	 */
816 	if (mask & AT_ATIME) {
817 		/*
818 		 * If SysV-compatible option to set access and
819 		 * modified times if privileged, owner, or write access,
820 		 * use current time rather than va_mtime.
821 		 *
822 		 * XXX - va_atime.tv_sec == -1 flags this.
823 		 */
824 		struct pctime	atime;
825 
826 		timep = &vap->va_atime;
827 		if (vap->va_atime.tv_sec == -1) {
828 			gethrestime(&now);
829 			timep = &now;
830 		}
831 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
832 		    timep->tv_sec > INT32_MAX) {
833 			error = EOVERFLOW;
834 			goto out;
835 		}
836 		error = pc_tvtopct(timep, &atime);
837 		if (error)
838 			goto out;
839 		pcp->pc_entry.pcd_ladate = atime.pct_date;
840 		pcp->pc_flags |= PC_CHG;
841 	}
842 out:
843 	pc_unlockfs(fsp);
844 	return (error);
845 }
846 
847 
848 /*ARGSUSED*/
849 static int
850 pcfs_access(
851 	struct vnode *vp,
852 	int mode,
853 	int flags,
854 	struct cred *cr,
855 	caller_context_t *ct)
856 {
857 	struct pcnode *pcp;
858 	struct pcfs *fsp;
859 
860 
861 	fsp = VFSTOPCFS(vp->v_vfsp);
862 
863 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
864 		return (EIO);
865 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
866 		return (EACCES);
867 
868 	/*
869 	 * If this is a boot partition, privileged users have full access while
870 	 * others have read-only access.
871 	 */
872 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
873 		if ((mode & VWRITE) &&
874 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
875 			return (EACCES);
876 	}
877 	return (0);
878 }
879 
880 
881 /*ARGSUSED*/
882 static int
883 pcfs_fsync(
884 	struct vnode *vp,
885 	int syncflag,
886 	struct cred *cr,
887 	caller_context_t *ct)
888 {
889 	struct pcfs *fsp;
890 	struct pcnode *pcp;
891 	int error;
892 
893 	fsp = VFSTOPCFS(vp->v_vfsp);
894 	if (error = pc_verify(fsp))
895 		return (error);
896 	error = pc_lockfs(fsp, 0, 0);
897 	if (error)
898 		return (error);
899 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
900 		pc_unlockfs(fsp);
901 		return (EIO);
902 	}
903 	rw_enter(&pcnodes_lock, RW_WRITER);
904 	error = pc_nodesync(pcp);
905 	rw_exit(&pcnodes_lock);
906 	pc_unlockfs(fsp);
907 	return (error);
908 }
909 
910 
911 /*ARGSUSED*/
912 static void
913 pcfs_inactive(
914 	struct vnode *vp,
915 	struct cred *cr,
916 	caller_context_t *ct)
917 {
918 	struct pcnode *pcp;
919 	struct pcfs *fsp;
920 	int error;
921 
922 	fsp = VFSTOPCFS(vp->v_vfsp);
923 	error = pc_lockfs(fsp, 0, 1);
924 
925 	/*
926 	 * If the filesystem was umounted by force, all dirty
927 	 * pages associated with this vnode are invalidated
928 	 * and then the vnode will be freed.
929 	 */
930 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
931 		pcp = VTOPC(vp);
932 		if (vn_has_cached_data(vp)) {
933 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
934 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
935 		}
936 		remque(pcp);
937 		if (error == 0)
938 			pc_unlockfs(fsp);
939 		vn_free(vp);
940 		kmem_free(pcp, sizeof (struct pcnode));
941 		VFS_RELE(PCFSTOVFS(fsp));
942 		return;
943 	}
944 
945 	mutex_enter(&vp->v_lock);
946 	ASSERT(vp->v_count >= 1);
947 	if (vp->v_count > 1) {
948 		vp->v_count--;  /* release our hold from vn_rele */
949 		mutex_exit(&vp->v_lock);
950 		pc_unlockfs(fsp);
951 		return;
952 	}
953 	mutex_exit(&vp->v_lock);
954 
955 	/*
956 	 * Check again to confirm that no intervening I/O error
957 	 * with a subsequent pc_diskchanged() call has released
958 	 * the pcnode. If it has then release the vnode as above.
959 	 */
960 	pcp = VTOPC(vp);
961 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
962 		if (vn_has_cached_data(vp))
963 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
964 			    pcfs_putapage, B_INVAL | B_TRUNC,
965 			    (struct cred *)NULL);
966 	}
967 
968 	if (pcp == NULL) {
969 		vn_free(vp);
970 	} else {
971 		pc_rele(pcp);
972 	}
973 
974 	if (!error)
975 		pc_unlockfs(fsp);
976 }
977 
978 /*ARGSUSED*/
979 static int
980 pcfs_lookup(
981 	struct vnode *dvp,
982 	char *nm,
983 	struct vnode **vpp,
984 	struct pathname *pnp,
985 	int flags,
986 	struct vnode *rdir,
987 	struct cred *cr,
988 	caller_context_t *ct,
989 	int *direntflags,
990 	pathname_t *realpnp)
991 {
992 	struct pcfs *fsp;
993 	struct pcnode *pcp;
994 	int error;
995 
996 	/*
997 	 * If the filesystem was umounted by force, return immediately.
998 	 */
999 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1000 		return (EIO);
1001 
1002 	/*
1003 	 * verify that the dvp is still valid on the disk
1004 	 */
1005 	fsp = VFSTOPCFS(dvp->v_vfsp);
1006 	if (error = pc_verify(fsp))
1007 		return (error);
1008 	error = pc_lockfs(fsp, 0, 0);
1009 	if (error)
1010 		return (error);
1011 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1012 		pc_unlockfs(fsp);
1013 		return (EIO);
1014 	}
1015 	/*
1016 	 * Null component name is a synonym for directory being searched.
1017 	 */
1018 	if (*nm == '\0') {
1019 		VN_HOLD(dvp);
1020 		*vpp = dvp;
1021 		pc_unlockfs(fsp);
1022 		return (0);
1023 	}
1024 
1025 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1026 	if (!error) {
1027 		*vpp = PCTOV(pcp);
1028 		pcp->pc_flags |= PC_EXTERNAL;
1029 	}
1030 	pc_unlockfs(fsp);
1031 	return (error);
1032 }
1033 
1034 
1035 /*ARGSUSED*/
1036 static int
1037 pcfs_create(
1038 	struct vnode *dvp,
1039 	char *nm,
1040 	struct vattr *vap,
1041 	enum vcexcl exclusive,
1042 	int mode,
1043 	struct vnode **vpp,
1044 	struct cred *cr,
1045 	int flag,
1046 	caller_context_t *ct,
1047 	vsecattr_t *vsecp)
1048 {
1049 	int error;
1050 	struct pcnode *pcp;
1051 	struct vnode *vp;
1052 	struct pcfs *fsp;
1053 
1054 	/*
1055 	 * can't create directories. use pcfs_mkdir.
1056 	 * can't create anything other than files.
1057 	 */
1058 	if (vap->va_type == VDIR)
1059 		return (EISDIR);
1060 	else if (vap->va_type != VREG)
1061 		return (EINVAL);
1062 
1063 	pcp = NULL;
1064 	fsp = VFSTOPCFS(dvp->v_vfsp);
1065 	error = pc_lockfs(fsp, 0, 0);
1066 	if (error)
1067 		return (error);
1068 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1069 		pc_unlockfs(fsp);
1070 		return (EIO);
1071 	}
1072 
1073 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1074 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1075 			pc_unlockfs(fsp);
1076 			return (EACCES);
1077 		}
1078 	}
1079 
1080 	if (*nm == '\0') {
1081 		/*
1082 		 * Null component name refers to the directory itself.
1083 		 */
1084 		VN_HOLD(dvp);
1085 		pcp = VTOPC(dvp);
1086 		error = EEXIST;
1087 	} else {
1088 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1089 	}
1090 	/*
1091 	 * if file exists and this is a nonexclusive create,
1092 	 * check for access permissions
1093 	 */
1094 	if (error == EEXIST) {
1095 		vp = PCTOV(pcp);
1096 		if (exclusive == NONEXCL) {
1097 			if (vp->v_type == VDIR) {
1098 				error = EISDIR;
1099 			} else if (mode) {
1100 				error = pcfs_access(PCTOV(pcp), mode, 0,
1101 				    cr, ct);
1102 			} else {
1103 				error = 0;
1104 			}
1105 		}
1106 		if (error) {
1107 			VN_RELE(PCTOV(pcp));
1108 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1109 		    (vap->va_size == 0)) {
1110 			error = pc_truncate(pcp, 0L);
1111 			if (error) {
1112 				VN_RELE(PCTOV(pcp));
1113 			} else {
1114 				vnevent_create(PCTOV(pcp), ct);
1115 			}
1116 		}
1117 	}
1118 	if (error) {
1119 		pc_unlockfs(fsp);
1120 		return (error);
1121 	}
1122 	*vpp = PCTOV(pcp);
1123 	pcp->pc_flags |= PC_EXTERNAL;
1124 	pc_unlockfs(fsp);
1125 	return (error);
1126 }
1127 
1128 /*ARGSUSED*/
1129 static int
1130 pcfs_remove(
1131 	struct vnode *vp,
1132 	char *nm,
1133 	struct cred *cr,
1134 	caller_context_t *ct,
1135 	int flags)
1136 {
1137 	struct pcfs *fsp;
1138 	struct pcnode *pcp;
1139 	int error;
1140 
1141 	fsp = VFSTOPCFS(vp->v_vfsp);
1142 	if (error = pc_verify(fsp))
1143 		return (error);
1144 	error = pc_lockfs(fsp, 0, 0);
1145 	if (error)
1146 		return (error);
1147 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1148 		pc_unlockfs(fsp);
1149 		return (EIO);
1150 	}
1151 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1152 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1153 			pc_unlockfs(fsp);
1154 			return (EACCES);
1155 		}
1156 	}
1157 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1158 	pc_unlockfs(fsp);
1159 	return (error);
1160 }
1161 
1162 /*
1163  * Rename a file or directory
1164  * This rename is restricted to only rename files within a directory.
1165  * XX should make rename more general
1166  */
1167 /*ARGSUSED*/
1168 static int
1169 pcfs_rename(
1170 	struct vnode *sdvp,		/* old (source) parent vnode */
1171 	char *snm,			/* old (source) entry name */
1172 	struct vnode *tdvp,		/* new (target) parent vnode */
1173 	char *tnm,			/* new (target) entry name */
1174 	struct cred *cr,
1175 	caller_context_t *ct,
1176 	int flags)
1177 {
1178 	struct pcfs *fsp;
1179 	struct pcnode *dp;	/* parent pcnode */
1180 	struct pcnode *tdp;
1181 	int error;
1182 
1183 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1184 	if (error = pc_verify(fsp))
1185 		return (error);
1186 
1187 	/*
1188 	 * make sure we can muck with this directory.
1189 	 */
1190 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1191 	if (error) {
1192 		return (error);
1193 	}
1194 	error = pc_lockfs(fsp, 0, 0);
1195 	if (error)
1196 		return (error);
1197 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1198 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1199 		pc_unlockfs(fsp);
1200 		return (EIO);
1201 	}
1202 	error = pc_rename(dp, tdp, snm, tnm, ct);
1203 	pc_unlockfs(fsp);
1204 	return (error);
1205 }
1206 
1207 /*ARGSUSED*/
1208 static int
1209 pcfs_mkdir(
1210 	struct vnode *dvp,
1211 	char *nm,
1212 	struct vattr *vap,
1213 	struct vnode **vpp,
1214 	struct cred *cr,
1215 	caller_context_t *ct,
1216 	int flags,
1217 	vsecattr_t *vsecp)
1218 {
1219 	struct pcfs *fsp;
1220 	struct pcnode *pcp;
1221 	int error;
1222 
1223 	fsp = VFSTOPCFS(dvp->v_vfsp);
1224 	if (error = pc_verify(fsp))
1225 		return (error);
1226 	error = pc_lockfs(fsp, 0, 0);
1227 	if (error)
1228 		return (error);
1229 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1230 		pc_unlockfs(fsp);
1231 		return (EIO);
1232 	}
1233 
1234 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1235 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1236 			pc_unlockfs(fsp);
1237 			return (EACCES);
1238 		}
1239 	}
1240 
1241 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1242 
1243 	if (!error) {
1244 		pcp -> pc_flags |= PC_EXTERNAL;
1245 		*vpp = PCTOV(pcp);
1246 	} else if (error == EEXIST) {
1247 		VN_RELE(PCTOV(pcp));
1248 	}
1249 	pc_unlockfs(fsp);
1250 	return (error);
1251 }
1252 
1253 /*ARGSUSED*/
1254 static int
1255 pcfs_rmdir(
1256 	struct vnode *dvp,
1257 	char *nm,
1258 	struct vnode *cdir,
1259 	struct cred *cr,
1260 	caller_context_t *ct,
1261 	int flags)
1262 {
1263 	struct pcfs *fsp;
1264 	struct pcnode *pcp;
1265 	int error;
1266 
1267 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1268 	if (error = pc_verify(fsp))
1269 		return (error);
1270 	if (error = pc_lockfs(fsp, 0, 0))
1271 		return (error);
1272 
1273 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1274 		pc_unlockfs(fsp);
1275 		return (EIO);
1276 	}
1277 
1278 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1279 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1280 			pc_unlockfs(fsp);
1281 			return (EACCES);
1282 		}
1283 	}
1284 
1285 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1286 	pc_unlockfs(fsp);
1287 	return (error);
1288 }
1289 
1290 /*
1291  * read entries in a directory.
1292  * we must convert pc format to unix format
1293  */
1294 
1295 /*ARGSUSED*/
1296 static int
1297 pcfs_readdir(
1298 	struct vnode *dvp,
1299 	struct uio *uiop,
1300 	struct cred *cr,
1301 	int *eofp,
1302 	caller_context_t *ct,
1303 	int flags)
1304 {
1305 	struct pcnode *pcp;
1306 	struct pcfs *fsp;
1307 	struct pcdir *ep;
1308 	struct buf *bp = NULL;
1309 	offset_t offset;
1310 	int boff;
1311 	struct pc_dirent lbp;
1312 	struct pc_dirent *ld = &lbp;
1313 	int error;
1314 
1315 	/*
1316 	 * If the filesystem was umounted by force, return immediately.
1317 	 */
1318 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1319 		return (EIO);
1320 
1321 	if ((uiop->uio_iovcnt != 1) ||
1322 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1323 		return (EINVAL);
1324 	}
1325 	fsp = VFSTOPCFS(dvp->v_vfsp);
1326 	/*
1327 	 * verify that the dp is still valid on the disk
1328 	 */
1329 	if (error = pc_verify(fsp)) {
1330 		return (error);
1331 	}
1332 	error = pc_lockfs(fsp, 0, 0);
1333 	if (error)
1334 		return (error);
1335 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1336 		pc_unlockfs(fsp);
1337 		return (EIO);
1338 	}
1339 
1340 	bzero(ld, sizeof (*ld));
1341 
1342 	if (eofp != NULL)
1343 		*eofp = 0;
1344 	offset = uiop->uio_loffset;
1345 
1346 	if (dvp->v_flag & VROOT) {
1347 		/*
1348 		 * kludge up entries for "." and ".." in the root.
1349 		 */
1350 		if (offset == 0) {
1351 			(void) strcpy(ld->d_name, ".");
1352 			ld->d_reclen = DIRENT64_RECLEN(1);
1353 			ld->d_off = (off64_t)sizeof (struct pcdir);
1354 			ld->d_ino = (ino64_t)UINT_MAX;
1355 			if (ld->d_reclen > uiop->uio_resid) {
1356 				pc_unlockfs(fsp);
1357 				return (ENOSPC);
1358 			}
1359 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1360 			uiop->uio_loffset = ld->d_off;
1361 			offset = uiop->uio_loffset;
1362 		}
1363 		if (offset == sizeof (struct pcdir)) {
1364 			(void) strcpy(ld->d_name, "..");
1365 			ld->d_reclen = DIRENT64_RECLEN(2);
1366 			if (ld->d_reclen > uiop->uio_resid) {
1367 				pc_unlockfs(fsp);
1368 				return (ENOSPC);
1369 			}
1370 			ld->d_off = (off64_t)(uiop->uio_loffset +
1371 			    sizeof (struct pcdir));
1372 			ld->d_ino = (ino64_t)UINT_MAX;
1373 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1374 			uiop->uio_loffset = ld->d_off;
1375 			offset = uiop->uio_loffset;
1376 		}
1377 		offset -= 2 * sizeof (struct pcdir);
1378 		/* offset now has the real offset value into directory file */
1379 	}
1380 
1381 	for (;;) {
1382 		boff = pc_blkoff(fsp, offset);
1383 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1384 			if (bp != NULL) {
1385 				brelse(bp);
1386 				bp = NULL;
1387 			}
1388 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1389 			if (error) {
1390 				if (error == ENOENT) {
1391 					error = 0;
1392 					if (eofp)
1393 						*eofp = 1;
1394 				}
1395 				break;
1396 			}
1397 		}
1398 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1399 			if (eofp)
1400 				*eofp = 1;
1401 			break;
1402 		}
1403 		/*
1404 		 * Don't display label because it may contain funny characters.
1405 		 */
1406 		if (ep->pcd_filename[0] == PCD_ERASED) {
1407 			uiop->uio_loffset += sizeof (struct pcdir);
1408 			offset += sizeof (struct pcdir);
1409 			ep++;
1410 			continue;
1411 		}
1412 		if (PCDL_IS_LFN(ep)) {
1413 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1414 			    0)
1415 				break;
1416 			continue;
1417 		}
1418 
1419 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1420 			break;
1421 	}
1422 	if (bp)
1423 		brelse(bp);
1424 	pc_unlockfs(fsp);
1425 	return (error);
1426 }
1427 
1428 
1429 /*
1430  * Called from pvn_getpages to get a particular page.  When we are called
1431  * the pcfs is already locked.
1432  */
1433 /*ARGSUSED*/
1434 static int
1435 pcfs_getapage(
1436 	struct vnode *vp,
1437 	u_offset_t off,
1438 	size_t len,
1439 	uint_t *protp,
1440 	page_t *pl[],		/* NULL if async IO is requested */
1441 	size_t plsz,
1442 	struct seg *seg,
1443 	caddr_t addr,
1444 	enum seg_rw rw,
1445 	struct cred *cr)
1446 {
1447 	struct pcnode *pcp;
1448 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1449 	struct vnode *devvp;
1450 	page_t *pp;
1451 	page_t *pagefound;
1452 	int err;
1453 
1454 	/*
1455 	 * If the filesystem was umounted by force, return immediately.
1456 	 */
1457 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1458 		return (EIO);
1459 
1460 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1461 	    (void *)vp, off, len);
1462 
1463 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1464 		return (EIO);
1465 	devvp = fsp->pcfs_devvp;
1466 
1467 	/* pcfs doesn't do readaheads */
1468 	if (pl == NULL)
1469 		return (0);
1470 
1471 	pl[0] = NULL;
1472 	err = 0;
1473 	/*
1474 	 * If the accessed time on the pcnode has not already been
1475 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1476 	 * This gives us approximate modified times for mmap'ed files
1477 	 * which are accessed via loads in the user address space.
1478 	 */
1479 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1480 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1481 		pc_mark_acc(fsp, pcp);
1482 	}
1483 reread:
1484 	if ((pagefound = page_exists(vp, off)) == NULL) {
1485 		/*
1486 		 * Need to really do disk IO to get the page(s).
1487 		 */
1488 		struct buf *bp;
1489 		daddr_t lbn, bn;
1490 		u_offset_t io_off;
1491 		size_t io_len;
1492 		u_offset_t lbnoff, xferoffset;
1493 		u_offset_t pgoff;
1494 		uint_t	xfersize;
1495 		int err1;
1496 
1497 		lbn = pc_lblkno(fsp, off);
1498 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1499 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1500 
1501 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1502 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1503 		if (pp == NULL)
1504 			/*
1505 			 * XXX - If pcfs is made MT-hot, this should go
1506 			 * back to reread.
1507 			 */
1508 			panic("pcfs_getapage pvn_read_kluster");
1509 
1510 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1511 		    pgoff += xfersize,
1512 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1513 		    lbnoff += xfersize, xferoffset += xfersize) {
1514 			/*
1515 			 * read as many contiguous blocks as possible to
1516 			 * fill this page
1517 			 */
1518 			xfersize = PAGESIZE - pgoff;
1519 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1520 			if (err1) {
1521 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1522 				err = err1;
1523 				goto out;
1524 			}
1525 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1526 			bp->b_edev = devvp->v_rdev;
1527 			bp->b_dev = cmpdev(devvp->v_rdev);
1528 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1529 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1530 			bp->b_file = vp;
1531 			bp->b_offset = (offset_t)(off + pgoff);
1532 
1533 			(void) bdev_strategy(bp);
1534 
1535 			lwp_stat_update(LWP_STAT_INBLK, 1);
1536 
1537 			if (err == 0)
1538 				err = biowait(bp);
1539 			else
1540 				(void) biowait(bp);
1541 			pageio_done(bp);
1542 			if (err)
1543 				goto out;
1544 		}
1545 		if (pgoff < PAGESIZE) {
1546 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1547 		}
1548 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1549 	}
1550 out:
1551 	if (err) {
1552 		if (pp != NULL)
1553 			pvn_read_done(pp, B_ERROR);
1554 		return (err);
1555 	}
1556 
1557 	if (pagefound) {
1558 		/*
1559 		 * Page exists in the cache, acquire the "shared"
1560 		 * lock.  If this fails, go back to reread.
1561 		 */
1562 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1563 			goto reread;
1564 		}
1565 		pl[0] = pp;
1566 		pl[1] = NULL;
1567 	}
1568 	return (err);
1569 }
1570 
1571 /*
1572  * Return all the pages from [off..off+len] in given file
1573  */
1574 /* ARGSUSED */
1575 static int
1576 pcfs_getpage(
1577 	struct vnode *vp,
1578 	offset_t off,
1579 	size_t len,
1580 	uint_t *protp,
1581 	page_t *pl[],
1582 	size_t plsz,
1583 	struct seg *seg,
1584 	caddr_t addr,
1585 	enum seg_rw rw,
1586 	struct cred *cr,
1587 	caller_context_t *ct)
1588 {
1589 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1590 	int err;
1591 
1592 	PC_DPRINTF0(6, "pcfs_getpage\n");
1593 	if (err = pc_verify(fsp))
1594 		return (err);
1595 	if (vp->v_flag & VNOMAP)
1596 		return (ENOSYS);
1597 	ASSERT(off <= UINT32_MAX);
1598 	err = pc_lockfs(fsp, 0, 0);
1599 	if (err)
1600 		return (err);
1601 	if (protp != NULL)
1602 		*protp = PROT_ALL;
1603 
1604 	ASSERT((off & PAGEOFFSET) == 0);
1605 	err = pvn_getpages(pcfs_getapage, vp, off, len, protp, pl, plsz,
1606 	    seg, addr, rw, cr);
1607 
1608 	pc_unlockfs(fsp);
1609 	return (err);
1610 }
1611 
1612 
1613 /*
1614  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1615  * If len == 0, do from off to EOF.
1616  *
1617  * The normal cases should be len == 0 & off == 0 (entire vp list),
1618  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1619  * (from pageout).
1620  *
1621  */
1622 /*ARGSUSED*/
1623 static int
1624 pcfs_putpage(
1625 	struct vnode *vp,
1626 	offset_t off,
1627 	size_t len,
1628 	int flags,
1629 	struct cred *cr,
1630 	caller_context_t *ct)
1631 {
1632 	struct pcnode *pcp;
1633 	page_t *pp;
1634 	struct pcfs *fsp;
1635 	u_offset_t io_off;
1636 	size_t io_len;
1637 	offset_t eoff;
1638 	int err;
1639 
1640 	/*
1641 	 * If the filesystem was umounted by force, return immediately.
1642 	 */
1643 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1644 		return (EIO);
1645 
1646 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1647 	if (vp->v_flag & VNOMAP)
1648 		return (ENOSYS);
1649 
1650 	fsp = VFSTOPCFS(vp->v_vfsp);
1651 
1652 	if (err = pc_verify(fsp))
1653 		return (err);
1654 	if ((pcp = VTOPC(vp)) == NULL) {
1655 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1656 		return (EIO);
1657 	}
1658 	if (pcp->pc_flags & PC_INVAL)
1659 		return (EIO);
1660 
1661 	if (curproc == proc_pageout) {
1662 		/*
1663 		 * XXX - This is a quick hack to avoid blocking
1664 		 * pageout. Also to avoid pcfs_getapage deadlocking
1665 		 * with putpage when memory is running out,
1666 		 * since we only have one global lock and we don't
1667 		 * support async putpage.
1668 		 * It should be fixed someday.
1669 		 *
1670 		 * Interestingly, this used to be a test of NOMEMWAIT().
1671 		 * We only ever got here once pcfs started supporting
1672 		 * NFS sharing, and then only because the NFS server
1673 		 * threads seem to do writes in sched's process context.
1674 		 * Since everyone else seems to just care about pageout,
1675 		 * the test was changed to look for pageout directly.
1676 		 */
1677 		return (ENOMEM);
1678 	}
1679 
1680 	ASSERT(off <= UINT32_MAX);
1681 
1682 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1683 
1684 	err = pc_lockfs(fsp, 0, 0);
1685 	if (err)
1686 		return (err);
1687 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1688 		pc_unlockfs(fsp);
1689 		return (0);
1690 	}
1691 
1692 	if (len == 0) {
1693 		/*
1694 		 * Search the entire vp list for pages >= off
1695 		 */
1696 		err = pvn_vplist_dirty(vp, off,
1697 		    pcfs_putapage, flags, cr);
1698 	} else {
1699 		eoff = off + len;
1700 
1701 		for (io_off = off; io_off < eoff &&
1702 		    io_off < pcp->pc_size; io_off += io_len) {
1703 			/*
1704 			 * If we are not invalidating, synchronously
1705 			 * freeing or writing pages use the routine
1706 			 * page_lookup_nowait() to prevent reclaiming
1707 			 * them from the free list.
1708 			 */
1709 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1710 				pp = page_lookup(vp, io_off,
1711 				    (flags & (B_INVAL | B_FREE)) ?
1712 				    SE_EXCL : SE_SHARED);
1713 			} else {
1714 				pp = page_lookup_nowait(vp, io_off,
1715 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1716 			}
1717 
1718 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1719 				io_len = PAGESIZE;
1720 			else {
1721 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1722 				    flags, cr);
1723 				if (err != 0)
1724 					break;
1725 				/*
1726 				 * "io_off" and "io_len" are returned as
1727 				 * the range of pages we actually wrote.
1728 				 * This allows us to skip ahead more quickly
1729 				 * since several pages may've been dealt
1730 				 * with by this iteration of the loop.
1731 				 */
1732 			}
1733 		}
1734 	}
1735 	if (err == 0 && (flags & B_INVAL) &&
1736 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1737 		/*
1738 		 * If doing "invalidation", make sure that
1739 		 * all pages on the vnode list are actually
1740 		 * gone.
1741 		 */
1742 		cmn_err(CE_PANIC,
1743 		    "pcfs_putpage: B_INVAL, pages not gone");
1744 	} else if (err) {
1745 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1746 	}
1747 	pc_unlockfs(fsp);
1748 	return (err);
1749 }
1750 
1751 /*
1752  * Write out a single page, possibly klustering adjacent dirty pages.
1753  */
1754 /*ARGSUSED*/
1755 int
1756 pcfs_putapage(
1757 	struct vnode *vp,
1758 	page_t *pp,
1759 	u_offset_t *offp,
1760 	size_t *lenp,
1761 	int flags,
1762 	struct cred *cr)
1763 {
1764 	struct pcnode *pcp;
1765 	struct pcfs *fsp;
1766 	struct vnode *devvp;
1767 	size_t io_len;
1768 	daddr_t bn;
1769 	u_offset_t lbn, lbnoff, xferoffset;
1770 	uint_t pgoff, xfersize;
1771 	int err = 0;
1772 	u_offset_t io_off;
1773 
1774 	pcp = VTOPC(vp);
1775 	fsp = VFSTOPCFS(vp->v_vfsp);
1776 	devvp = fsp->pcfs_devvp;
1777 
1778 	/*
1779 	 * If the modified time on the inode has not already been
1780 	 * set elsewhere (e.g. for write/setattr) and this is not
1781 	 * a call from msync (B_FORCE) we set the time now.
1782 	 * This gives us approximate modified times for mmap'ed files
1783 	 * which are modified via stores in the user address space.
1784 	 */
1785 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1786 		pcp->pc_flags |= PC_MOD;
1787 		pc_mark_mod(fsp, pcp);
1788 	}
1789 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1790 	    PAGESIZE, flags);
1791 
1792 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1793 		goto out;
1794 	}
1795 
1796 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1797 
1798 	lbn = pc_lblkno(fsp, io_off);
1799 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1800 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1801 
1802 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1803 	    pgoff += xfersize,
1804 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1805 	    lbnoff += xfersize, xferoffset += xfersize) {
1806 
1807 		struct buf *bp;
1808 		int err1;
1809 
1810 		/*
1811 		 * write as many contiguous blocks as possible from this page
1812 		 */
1813 		xfersize = io_len - pgoff;
1814 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1815 		if (err1) {
1816 			err = err1;
1817 			goto out;
1818 		}
1819 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1820 		bp->b_edev = devvp->v_rdev;
1821 		bp->b_dev = cmpdev(devvp->v_rdev);
1822 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1823 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1824 		bp->b_file = vp;
1825 		bp->b_offset = (offset_t)(io_off + pgoff);
1826 
1827 		(void) bdev_strategy(bp);
1828 
1829 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1830 
1831 		if (err == 0)
1832 			err = biowait(bp);
1833 		else
1834 			(void) biowait(bp);
1835 		pageio_done(bp);
1836 	}
1837 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1838 	pp = NULL;
1839 
1840 out:
1841 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1842 		pvn_write_done(pp, B_WRITE | flags);
1843 	} else if (err != 0 && pp != NULL) {
1844 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1845 	}
1846 
1847 	if (offp)
1848 		*offp = io_off;
1849 	if (lenp)
1850 		*lenp = io_len;
1851 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1852 		    (void *)vp, (void *)pp, io_off, io_len);
1853 	if (err) {
1854 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1855 	}
1856 	return (err);
1857 }
1858 
1859 /*ARGSUSED*/
1860 static int
1861 pcfs_map(
1862 	struct vnode *vp,
1863 	offset_t off,
1864 	struct as *as,
1865 	caddr_t *addrp,
1866 	size_t len,
1867 	uchar_t prot,
1868 	uchar_t maxprot,
1869 	uint_t flags,
1870 	struct cred *cr,
1871 	caller_context_t *ct)
1872 {
1873 	struct segvn_crargs vn_a;
1874 	int error;
1875 
1876 	PC_DPRINTF0(6, "pcfs_map\n");
1877 	if (vp->v_flag & VNOMAP)
1878 		return (ENOSYS);
1879 
1880 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1881 		return (ENXIO);
1882 
1883 	as_rangelock(as);
1884 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1885 	if (error != 0) {
1886 		as_rangeunlock(as);
1887 		return (error);
1888 	}
1889 
1890 	vn_a.vp = vp;
1891 	vn_a.offset = off;
1892 	vn_a.type = flags & MAP_TYPE;
1893 	vn_a.prot = prot;
1894 	vn_a.maxprot = maxprot;
1895 	vn_a.flags = flags & ~MAP_TYPE;
1896 	vn_a.cred = cr;
1897 	vn_a.amp = NULL;
1898 	vn_a.szc = 0;
1899 	vn_a.lgrp_mem_policy_flags = 0;
1900 
1901 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1902 	as_rangeunlock(as);
1903 	return (error);
1904 }
1905 
1906 /* ARGSUSED */
1907 static int
1908 pcfs_seek(
1909 	struct vnode *vp,
1910 	offset_t ooff,
1911 	offset_t *noffp,
1912 	caller_context_t *ct)
1913 {
1914 	if (*noffp < 0)
1915 		return (EINVAL);
1916 	else if (*noffp > MAXOFFSET_T)
1917 		return (EINVAL);
1918 	else
1919 		return (0);
1920 }
1921 
1922 /* ARGSUSED */
1923 static int
1924 pcfs_addmap(
1925 	struct vnode *vp,
1926 	offset_t off,
1927 	struct as *as,
1928 	caddr_t addr,
1929 	size_t len,
1930 	uchar_t prot,
1931 	uchar_t maxprot,
1932 	uint_t flags,
1933 	struct cred *cr,
1934 	caller_context_t *ct)
1935 {
1936 	if (vp->v_flag & VNOMAP)
1937 		return (ENOSYS);
1938 	return (0);
1939 }
1940 
1941 /*ARGSUSED*/
1942 static int
1943 pcfs_delmap(
1944 	struct vnode *vp,
1945 	offset_t off,
1946 	struct as *as,
1947 	caddr_t addr,
1948 	size_t len,
1949 	uint_t prot,
1950 	uint_t maxprot,
1951 	uint_t flags,
1952 	struct cred *cr,
1953 	caller_context_t *ct)
1954 {
1955 	if (vp->v_flag & VNOMAP)
1956 		return (ENOSYS);
1957 	return (0);
1958 }
1959 
1960 /*
1961  * POSIX pathconf() support.
1962  */
1963 /* ARGSUSED */
1964 static int
1965 pcfs_pathconf(
1966 	struct vnode *vp,
1967 	int cmd,
1968 	ulong_t *valp,
1969 	struct cred *cr,
1970 	caller_context_t *ct)
1971 {
1972 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1973 
1974 	switch (cmd) {
1975 	case _PC_LINK_MAX:
1976 		*valp = 1;
1977 		return (0);
1978 
1979 	case _PC_CASE_BEHAVIOR:
1980 		return (EINVAL);
1981 
1982 	case _PC_FILESIZEBITS:
1983 		/*
1984 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1985 		 * FAT12 can only go up to the maximum filesystem capacity
1986 		 * which is ~509MB.
1987 		 */
1988 		*valp = IS_FAT12(fsp) ? 30 : 33;
1989 		return (0);
1990 
1991 	case _PC_TIMESTAMP_RESOLUTION:
1992 		/*
1993 		 * PCFS keeps track of modification times, it its own
1994 		 * internal format, to a resolution of 2 seconds.
1995 		 * Since 2000 million is representable in an int32_t
1996 		 * without overflow (or becoming negative), we allow
1997 		 * this value to be returned.
1998 		 */
1999 		*valp = 2000000000L;
2000 		return (0);
2001 
2002 	default:
2003 		return (fs_pathconf(vp, cmd, valp, cr, ct));
2004 	}
2005 
2006 }
2007 
2008 /* ARGSUSED */
2009 static int
2010 pcfs_space(
2011 	struct vnode *vp,
2012 	int cmd,
2013 	struct flock64 *bfp,
2014 	int flag,
2015 	offset_t offset,
2016 	cred_t *cr,
2017 	caller_context_t *ct)
2018 {
2019 	struct vattr vattr;
2020 	int error;
2021 
2022 	if (cmd != F_FREESP)
2023 		return (EINVAL);
2024 
2025 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2026 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2027 			return (EFBIG);
2028 		/*
2029 		 * we only support the special case of l_len == 0,
2030 		 * meaning free to end of file at this moment.
2031 		 */
2032 		if (bfp->l_len != 0)
2033 			return (EINVAL);
2034 		vattr.va_mask = AT_SIZE;
2035 		vattr.va_size = bfp->l_start;
2036 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2037 	}
2038 	return (error);
2039 }
2040 
2041 /*
2042  * Break up 'len' chars from 'buf' into a long file name chunk.
2043  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2044  */
2045 void
2046 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2047 {
2048 	int	i;
2049 
2050 	ASSERT(buf != NULL);
2051 
2052 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2053 		if (len > 0) {
2054 			ep->pcdl_firstfilename[i] = *buf++;
2055 			ep->pcdl_firstfilename[i + 1] = *buf++;
2056 			len -= 2;
2057 		} else {
2058 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2059 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2060 		}
2061 	}
2062 
2063 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2064 		if (len > 0) {
2065 			ep->pcdl_secondfilename[i] = *buf++;
2066 			ep->pcdl_secondfilename[i + 1] = *buf++;
2067 			len -= 2;
2068 		} else {
2069 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2070 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2071 		}
2072 	}
2073 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2074 		if (len > 0) {
2075 			ep->pcdl_thirdfilename[i] = *buf++;
2076 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2077 			len -= 2;
2078 		} else {
2079 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2080 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2081 		}
2082 	}
2083 }
2084 
2085 /*
2086  * Extract the characters from the long filename chunk into 'buf'.
2087  * Return the number of characters extracted.
2088  */
2089 static int
2090 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2091 {
2092 	char 	*tmp = buf;
2093 	int	i;
2094 
2095 	/* Copy all the names, no filtering now */
2096 
2097 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2098 		*tmp = ep->pcdl_firstfilename[i];
2099 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2100 
2101 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2102 			return (tmp - buf);
2103 	}
2104 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2105 		*tmp = ep->pcdl_secondfilename[i];
2106 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2107 
2108 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2109 			return (tmp - buf);
2110 	}
2111 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2112 		*tmp = ep->pcdl_thirdfilename[i];
2113 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2114 
2115 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2116 			return (tmp - buf);
2117 	}
2118 	return (tmp - buf);
2119 }
2120 
2121 
2122 /*
2123  * Checksum the passed in short filename.
2124  * This is used to validate each component of the long name to make
2125  * sure the long name is valid (it hasn't been "detached" from the
2126  * short filename). This algorithm was found in FreeBSD.
2127  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2128  */
2129 
2130 uchar_t
2131 pc_checksum_long_fn(char *name, char *ext)
2132 {
2133 	uchar_t c;
2134 	char	b[11];
2135 
2136 	bcopy(name, b, 8);
2137 	bcopy(ext, b+8, 3);
2138 
2139 	c = b[0];
2140 	c = ((c << 7) | (c >> 1)) + b[1];
2141 	c = ((c << 7) | (c >> 1)) + b[2];
2142 	c = ((c << 7) | (c >> 1)) + b[3];
2143 	c = ((c << 7) | (c >> 1)) + b[4];
2144 	c = ((c << 7) | (c >> 1)) + b[5];
2145 	c = ((c << 7) | (c >> 1)) + b[6];
2146 	c = ((c << 7) | (c >> 1)) + b[7];
2147 	c = ((c << 7) | (c >> 1)) + b[8];
2148 	c = ((c << 7) | (c >> 1)) + b[9];
2149 	c = ((c << 7) | (c >> 1)) + b[10];
2150 
2151 	return (c);
2152 }
2153 
2154 /*
2155  * Read a chunk of long filename entries into 'namep'.
2156  * Return with offset pointing to short entry (on success), or next
2157  * entry to read (if this wasn't a valid lfn really).
2158  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2159  * a long filename.
2160  *
2161  * Can also be called with a NULL namep, in which case it just returns
2162  * whether this was really a valid long filename and consumes it
2163  * (used by pc_dirempty()).
2164  */
2165 int
2166 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2167     struct pcdir **epp, offset_t *offset, struct buf **bp)
2168 {
2169 	struct pcdir *ep = *epp;
2170 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2171 	struct vnode *dvp = PCTOV(pcp);
2172 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2173 	char	*lfn;
2174 	char	*lfn_base;
2175 	int	boff;
2176 	int	i, cs;
2177 	char	*buf;
2178 	uchar_t	cksum;
2179 	int	detached = 0;
2180 	int	error = 0;
2181 	int	foldcase;
2182 	int	count = 0;
2183 	size_t	u16l = 0, u8l = 0;
2184 	char	*outbuf;
2185 	size_t	ret, inlen, outlen;
2186 
2187 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2188 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2189 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2190 	*lfn = '\0';
2191 	*(lfn + 1) = '\0';
2192 	cksum = lep->pcdl_checksum;
2193 
2194 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2195 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2196 		/* read next block if necessary */
2197 		boff = pc_blkoff(fsp, *offset);
2198 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2199 			if (*bp != NULL) {
2200 				brelse(*bp);
2201 				*bp = NULL;
2202 			}
2203 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2204 			if (error) {
2205 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2206 				kmem_free(buf, PCMAXNAM_UTF16);
2207 				return (error);
2208 			}
2209 			lep = (struct pcdir_lfn *)ep;
2210 		}
2211 		/* can this happen? Bad fs? */
2212 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2213 			detached = 1;
2214 			break;
2215 		}
2216 		if (cksum != lep->pcdl_checksum)
2217 			detached = 1;
2218 		/* process current entry */
2219 		cs = get_long_fn_chunk(lep, buf);
2220 		count += cs;
2221 		for (; cs > 0; cs--) {
2222 			/* see if we underflow */
2223 			if (lfn >= lfn_base)
2224 				*--lfn = buf[cs - 1];
2225 			else
2226 				detached = 1;
2227 		}
2228 		lep++;
2229 		*offset += sizeof (struct pcdir);
2230 	}
2231 	kmem_free(buf, PCMAXNAM_UTF16);
2232 	/* read next block if necessary */
2233 	boff = pc_blkoff(fsp, *offset);
2234 	ep = (struct pcdir *)lep;
2235 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2236 		if (*bp != NULL) {
2237 			brelse(*bp);
2238 			*bp = NULL;
2239 		}
2240 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2241 		if (error) {
2242 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2243 			return (error);
2244 		}
2245 	}
2246 	/* should be on the short one */
2247 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2248 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2249 		detached = 1;
2250 	}
2251 	if (detached ||
2252 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2253 	    !pc_valid_long_fn(lfn, 0)) {
2254 		/*
2255 		 * process current entry again. This may end up another lfn
2256 		 * or a short name.
2257 		 */
2258 		*epp = ep;
2259 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2260 		return (EINVAL);
2261 	}
2262 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2263 		/*
2264 		 * Don't display label because it may contain
2265 		 * funny characters.
2266 		 */
2267 		*offset += sizeof (struct pcdir);
2268 		ep++;
2269 		*epp = ep;
2270 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2271 		return (EINVAL);
2272 	}
2273 	if (namep) {
2274 		u16l = count / 2;
2275 		u8l = PCMAXNAMLEN;
2276 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2277 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2278 		/*
2279 		 * uconv_u16tou8() will catch conversion errors including
2280 		 * the case where there is not enough room to write the
2281 		 * converted result and the u8l will never go over the given
2282 		 * PCMAXNAMLEN.
2283 		 */
2284 		if (error != 0) {
2285 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2286 			return (EINVAL);
2287 		}
2288 		namep[u8l] = '\0';
2289 		if (foldcase) {
2290 			inlen = strlen(namep);
2291 			outlen = PCMAXNAMLEN;
2292 			outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2293 			ret = u8_textprep_str(namep, &inlen, outbuf,
2294 			    &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2295 			    &error);
2296 			if (ret == -1) {
2297 				kmem_free(outbuf, PCMAXNAMLEN + 1);
2298 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2299 				return (EINVAL);
2300 			}
2301 			outbuf[PCMAXNAMLEN - outlen] = '\0';
2302 			(void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2303 			kmem_free(outbuf, PCMAXNAMLEN + 1);
2304 		}
2305 	}
2306 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2307 	*epp = ep;
2308 	return (0);
2309 }
2310 /*
2311  * Read a long filename into the pc_dirent structure and copy it out.
2312  */
2313 int
2314 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2315     struct pcdir **epp, offset_t *offset, struct buf **bp)
2316 {
2317 	struct pcdir *ep;
2318 	struct pcnode *pcp = VTOPC(dvp);
2319 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2320 	offset_t uiooffset = uiop->uio_loffset;
2321 	int	error = 0;
2322 	offset_t oldoffset;
2323 
2324 	oldoffset = *offset;
2325 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2326 	if (error) {
2327 		if (error == EINVAL) {
2328 			uiop->uio_loffset += *offset - oldoffset;
2329 			return (0);
2330 		} else
2331 			return (error);
2332 	}
2333 
2334 	ep = *epp;
2335 	uiop->uio_loffset += *offset - oldoffset;
2336 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2337 	if (ld->d_reclen > uiop->uio_resid) {
2338 		uiop->uio_loffset = uiooffset;
2339 		return (ENOSPC);
2340 	}
2341 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2342 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2343 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2344 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2345 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2346 	uiop->uio_loffset = ld->d_off;
2347 	*offset += sizeof (struct pcdir);
2348 	ep++;
2349 	*epp = ep;
2350 	return (0);
2351 }
2352 
2353 /*
2354  * Read a short filename into the pc_dirent structure and copy it out.
2355  */
2356 int
2357 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2358     struct pcdir **epp, offset_t *offset, struct buf **bp)
2359 {
2360 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2361 	int	boff = pc_blkoff(fsp, *offset);
2362 	struct pcdir *ep = *epp;
2363 	offset_t	oldoffset = uiop->uio_loffset;
2364 	int	error;
2365 	int	foldcase;
2366 
2367 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2368 		uiop->uio_loffset += sizeof (struct pcdir);
2369 		*offset += sizeof (struct pcdir);
2370 		ep++;
2371 		*epp = ep;
2372 		return (0);
2373 	}
2374 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2375 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2376 	    pc_direntpersec(fsp));
2377 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2378 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2379 	    &ep->pcd_ext[0], foldcase);
2380 	if (error == 0) {
2381 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2382 		if (ld->d_reclen > uiop->uio_resid) {
2383 			uiop->uio_loffset = oldoffset;
2384 			return (ENOSPC);
2385 		}
2386 		ld->d_off = (off64_t)(uiop->uio_loffset +
2387 		    sizeof (struct pcdir));
2388 		(void) uiomove((caddr_t)ld,
2389 		    ld->d_reclen, UIO_READ, uiop);
2390 		uiop->uio_loffset = ld->d_off;
2391 	} else {
2392 		uiop->uio_loffset += sizeof (struct pcdir);
2393 	}
2394 	*offset += sizeof (struct pcdir);
2395 	ep++;
2396 	*epp = ep;
2397 	return (0);
2398 }
2399 
2400 /* ARGSUSED */
2401 static int
2402 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2403 {
2404 	struct pc_fid *pcfid;
2405 	struct pcnode *pcp;
2406 	struct pcfs	*fsp;
2407 	int	error;
2408 
2409 	fsp = VFSTOPCFS(vp->v_vfsp);
2410 	if (fsp == NULL)
2411 		return (EIO);
2412 	error = pc_lockfs(fsp, 0, 0);
2413 	if (error)
2414 		return (error);
2415 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2416 		pc_unlockfs(fsp);
2417 		return (EIO);
2418 	}
2419 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2420 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2421 		pc_unlockfs(fsp);
2422 		return (ENOSPC);
2423 	}
2424 
2425 	pcfid = (struct pc_fid *)fidp;
2426 	bzero(pcfid, sizeof (struct pc_fid));
2427 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2428 	if (vp->v_flag & VROOT) {
2429 		pcfid->pcfid_block = 0;
2430 		pcfid->pcfid_offset = 0;
2431 		pcfid->pcfid_ctime = 0;
2432 	} else {
2433 		pcfid->pcfid_block = pcp->pc_eblkno;
2434 		pcfid->pcfid_offset = pcp->pc_eoffset;
2435 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2436 	}
2437 	pc_unlockfs(fsp);
2438 	return (0);
2439 }
2440