xref: /titanic_41/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 19ee0c13b3dc29990b3601d3e06aff01ab921c27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/t_lock.h>
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
30 #include <sys/user.h>
31 #include <sys/buf.h>
32 #include <sys/stat.h>
33 #include <sys/vfs.h>
34 #include <sys/vfs_opreg.h>
35 #include <sys/dirent.h>
36 #include <sys/vnode.h>
37 #include <sys/proc.h>
38 #include <sys/file.h>
39 #include <sys/fcntl.h>
40 #include <sys/uio.h>
41 #include <sys/fs/pc_label.h>
42 #include <sys/fs/pc_fs.h>
43 #include <sys/fs/pc_dir.h>
44 #include <sys/fs/pc_node.h>
45 #include <sys/mman.h>
46 #include <sys/pathname.h>
47 #include <sys/vmsystm.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/statvfs.h>
51 #include <sys/unistd.h>
52 #include <sys/kmem.h>
53 #include <sys/conf.h>
54 #include <sys/flock.h>
55 #include <sys/policy.h>
56 #include <sys/sdt.h>
57 #include <sys/sunddi.h>
58 #include <sys/types.h>
59 #include <sys/errno.h>
60 
61 #include <vm/seg.h>
62 #include <vm/page.h>
63 #include <vm/pvn.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg_kmem.h>
69 
70 #include <fs/fs_subr.h>
71 
72 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
73 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
74 	caller_context_t *ct);
75 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
76 	caller_context_t *);
77 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
78 	caller_context_t *);
79 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
80 	caller_context_t *ct);
81 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
82 	caller_context_t *);
83 static int pcfs_access(struct vnode *, int, int, struct cred *,
84 	caller_context_t *ct);
85 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
86 	struct pathname *, int, struct vnode *, struct cred *,
87 	caller_context_t *, int *, pathname_t *);
88 static int pcfs_create(struct vnode *, char *, struct vattr *,
89 	enum vcexcl, int mode, struct vnode **, struct cred *, int,
90 	caller_context_t *, vsecattr_t *);
91 static int pcfs_remove(struct vnode *, char *, struct cred *,
92 	caller_context_t *, int);
93 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
94 	struct cred *, caller_context_t *, int);
95 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
96 	struct cred *, caller_context_t *, int, vsecattr_t *);
97 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
98 	caller_context_t *, int);
99 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
100 	caller_context_t *, int);
101 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
102 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
103 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
104 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
105 	offset_t, cred_t *, caller_context_t *);
106 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
107 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
108 	caller_context_t *);
109 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
110 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
111 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
112 	caller_context_t *);
113 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
114 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
115 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
116 	size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
117 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
118 	size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
119 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
120 	caller_context_t *);
121 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
122 	caller_context_t *);
123 
124 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
125 	struct cred *);
126 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
127 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
128 
129 extern krwlock_t pcnodes_lock;
130 
131 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
132 
133 /*
134  * vnode op vectors for files and directories.
135  */
136 struct vnodeops *pcfs_fvnodeops;
137 struct vnodeops *pcfs_dvnodeops;
138 
139 const fs_operation_def_t pcfs_fvnodeops_template[] = {
140 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
141 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
142 	VOPNAME_READ,		{ .vop_read = pcfs_read },
143 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
144 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
145 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
146 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
147 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
148 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
149 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
150 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
151 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
152 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
153 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
154 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
155 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
156 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
157 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
158 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
159 	NULL,			NULL
160 };
161 
162 const fs_operation_def_t pcfs_dvnodeops_template[] = {
163 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
164 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
165 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
166 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
167 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
168 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
169 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
170 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
171 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
172 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
173 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
174 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
175 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
176 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
177 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
178 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
179 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
180 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
181 	NULL,			NULL
182 };
183 
184 
185 /*ARGSUSED*/
186 static int
187 pcfs_open(
188 	struct vnode **vpp,
189 	int flag,
190 	struct cred *cr,
191 	caller_context_t *ct)
192 {
193 	return (0);
194 }
195 
196 /*
197  * files are sync'ed on close to keep floppy up to date
198  */
199 
200 /*ARGSUSED*/
201 static int
202 pcfs_close(
203 	struct vnode *vp,
204 	int flag,
205 	int count,
206 	offset_t offset,
207 	struct cred *cr,
208 	caller_context_t *ct)
209 {
210 	return (0);
211 }
212 
213 /*ARGSUSED*/
214 static int
215 pcfs_read(
216 	struct vnode *vp,
217 	struct uio *uiop,
218 	int ioflag,
219 	struct cred *cr,
220 	struct caller_context *ct)
221 {
222 	struct pcfs *fsp;
223 	struct pcnode *pcp;
224 	int error;
225 
226 	fsp = VFSTOPCFS(vp->v_vfsp);
227 	if (error = pc_verify(fsp))
228 		return (error);
229 	error = pc_lockfs(fsp, 0, 0);
230 	if (error)
231 		return (error);
232 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
233 		pc_unlockfs(fsp);
234 		return (EIO);
235 	}
236 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
237 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
238 		pc_mark_acc(fsp, pcp);
239 	}
240 	pc_unlockfs(fsp);
241 	if (error) {
242 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
243 	}
244 	return (error);
245 }
246 
247 /*ARGSUSED*/
248 static int
249 pcfs_write(
250 	struct vnode *vp,
251 	struct uio *uiop,
252 	int ioflag,
253 	struct cred *cr,
254 	struct caller_context *ct)
255 {
256 	struct pcfs *fsp;
257 	struct pcnode *pcp;
258 	int error;
259 
260 	fsp = VFSTOPCFS(vp->v_vfsp);
261 	if (error = pc_verify(fsp))
262 		return (error);
263 	error = pc_lockfs(fsp, 0, 0);
264 	if (error)
265 		return (error);
266 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
267 		pc_unlockfs(fsp);
268 		return (EIO);
269 	}
270 	if (ioflag & FAPPEND) {
271 		/*
272 		 * in append mode start at end of file.
273 		 */
274 		uiop->uio_loffset = pcp->pc_size;
275 	}
276 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
277 	pcp->pc_flags |= PC_MOD;
278 	pc_mark_mod(fsp, pcp);
279 	if (ioflag & (FSYNC|FDSYNC))
280 		(void) pc_nodeupdate(pcp);
281 
282 	pc_unlockfs(fsp);
283 	if (error) {
284 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
285 	}
286 	return (error);
287 }
288 
289 /*
290  * read or write a vnode
291  */
292 static int
293 rwpcp(
294 	struct pcnode *pcp,
295 	struct uio *uio,
296 	enum uio_rw rw,
297 	int ioflag)
298 {
299 	struct vnode *vp = PCTOV(pcp);
300 	struct pcfs *fsp;
301 	daddr_t bn;			/* phys block number */
302 	int n;
303 	offset_t off;
304 	caddr_t base;
305 	int mapon, pagecreate;
306 	int newpage;
307 	int error = 0;
308 	rlim64_t limit = uio->uio_llimit;
309 	int oresid = uio->uio_resid;
310 
311 	/*
312 	 * If the filesystem was umounted by force, return immediately.
313 	 */
314 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
315 		return (EIO);
316 
317 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
318 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
319 
320 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
321 	ASSERT(vp->v_type == VREG);
322 
323 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
324 		return (0);
325 	}
326 
327 	if (uio->uio_loffset < 0)
328 		return (EINVAL);
329 
330 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
331 		limit = MAXOFFSET_T;
332 
333 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
334 		proc_t *p = ttoproc(curthread);
335 
336 		mutex_enter(&p->p_lock);
337 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
338 		    p, RCA_UNSAFE_SIGINFO);
339 		mutex_exit(&p->p_lock);
340 		return (EFBIG);
341 	}
342 
343 	/* the following condition will occur only for write */
344 
345 	if (uio->uio_loffset >= UINT32_MAX)
346 		return (EFBIG);
347 
348 	if (uio->uio_resid == 0)
349 		return (0);
350 
351 	if (limit > UINT32_MAX)
352 		limit = UINT32_MAX;
353 
354 	fsp = VFSTOPCFS(vp->v_vfsp);
355 	if (fsp->pcfs_flags & PCFS_IRRECOV)
356 		return (EIO);
357 
358 	do {
359 		/*
360 		 * Assignments to "n" in this block may appear
361 		 * to overflow in some cases.  However, after careful
362 		 * analysis it was determined that all assignments to
363 		 * "n" serve only to make "n" smaller.  Since "n"
364 		 * starts out as no larger than MAXBSIZE, "int" is
365 		 * safe.
366 		 */
367 		off = uio->uio_loffset & MAXBMASK;
368 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
369 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
370 		if (rw == UIO_READ) {
371 			offset_t diff;
372 
373 			diff = pcp->pc_size - uio->uio_loffset;
374 			if (diff <= 0)
375 				return (0);
376 			if (diff < n)
377 				n = (int)diff;
378 		}
379 		/*
380 		 * Compare limit with the actual offset + n, not the
381 		 * rounded down offset "off" or we will overflow
382 		 * the maximum file size after all.
383 		 */
384 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
385 			if (uio->uio_loffset >= limit) {
386 				error = EFBIG;
387 				break;
388 			}
389 			n = (int)(limit - uio->uio_loffset);
390 		}
391 
392 		/*
393 		 * Touch the page and fault it in if it is not in
394 		 * core before segmap_getmapflt can lock it. This
395 		 * is to avoid the deadlock if the buffer is mapped
396 		 * to the same file through mmap which we want to
397 		 * write to.
398 		 */
399 		uio_prefaultpages((long)n, uio);
400 
401 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
402 		pagecreate = 0;
403 		newpage = 0;
404 		if (rw == UIO_WRITE) {
405 			/*
406 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
407 			 * with one page at a time, instead of one MAXBSIZE
408 			 * at a time, so we can fully explore pagecreate
409 			 * optimization??
410 			 */
411 			if (uio->uio_loffset + n > pcp->pc_size) {
412 				uint_t ncl, lcn;
413 
414 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
415 				    fsp->pcfs_clsize);
416 				if (uio->uio_loffset > pcp->pc_size &&
417 				    ncl < (uint_t)howmany(uio->uio_loffset,
418 				    fsp->pcfs_clsize)) {
419 					/*
420 					 * Allocate and zerofill skipped
421 					 * clusters. This may not be worth the
422 					 * effort since a small lseek beyond
423 					 * eof but still within the cluster
424 					 * will not be zeroed out.
425 					 */
426 					lcn = pc_lblkno(fsp, uio->uio_loffset);
427 					error = pc_balloc(pcp, (daddr_t)lcn,
428 					    1, &bn);
429 					ncl = lcn + 1;
430 				}
431 				if (!error &&
432 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
433 				    fsp->pcfs_clsize))
434 					/*
435 					 * allocate clusters w/o zerofill
436 					 */
437 					error = pc_balloc(pcp,
438 					    (daddr_t)pc_lblkno(fsp,
439 					    uio->uio_loffset + n - 1),
440 					    0, &bn);
441 
442 				pcp->pc_flags |= PC_CHG;
443 
444 				if (error) {
445 					pc_cluster32_t ncl;
446 					int nerror;
447 
448 					/*
449 					 * figure out new file size from
450 					 * cluster chain length. If this
451 					 * is detected to loop, the chain
452 					 * is corrupted and we'd better
453 					 * keep our fingers off that file.
454 					 */
455 					nerror = pc_fileclsize(fsp,
456 					    pcp->pc_scluster, &ncl);
457 					if (nerror) {
458 						PC_DPRINTF1(2,
459 						    "cluster chain "
460 						    "corruption, "
461 						    "scluster=%d\n",
462 						    pcp->pc_scluster);
463 						pcp->pc_size = 0;
464 						pcp->pc_flags |= PC_INVAL;
465 						error = nerror;
466 						(void) segmap_release(segkmap,
467 						    base, 0);
468 						break;
469 					}
470 					pcp->pc_size = fsp->pcfs_clsize * ncl;
471 
472 					if (error == ENOSPC &&
473 					    (pcp->pc_size - uio->uio_loffset)
474 					    > 0) {
475 						PC_DPRINTF3(2, "rwpcp ENOSPC "
476 						    "off=%lld n=%d size=%d\n",
477 						    uio->uio_loffset,
478 						    n, pcp->pc_size);
479 						n = (int)(pcp->pc_size -
480 						    uio->uio_loffset);
481 					} else {
482 						PC_DPRINTF1(1,
483 						    "rwpcp error1=%d\n", error);
484 						(void) segmap_release(segkmap,
485 						    base, 0);
486 						break;
487 					}
488 				} else {
489 					pcp->pc_size =
490 					    (uint_t)(uio->uio_loffset + n);
491 				}
492 				if (mapon == 0) {
493 					newpage = segmap_pagecreate(segkmap,
494 					    base, (size_t)n, 0);
495 					pagecreate = 1;
496 				}
497 			} else if (n == MAXBSIZE) {
498 				newpage = segmap_pagecreate(segkmap, base,
499 				    (size_t)n, 0);
500 				pagecreate = 1;
501 			}
502 		}
503 		error = uiomove(base + mapon, (size_t)n, rw, uio);
504 
505 		if (pagecreate && uio->uio_loffset <
506 		    roundup(off + mapon + n, PAGESIZE)) {
507 			offset_t nzero, nmoved;
508 
509 			nmoved = uio->uio_loffset - (off + mapon);
510 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
511 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
512 		}
513 
514 		/*
515 		 * Unlock the pages which have been allocated by
516 		 * page_create_va() in segmap_pagecreate().
517 		 */
518 		if (newpage) {
519 			segmap_pageunlock(segkmap, base, (size_t)n,
520 			    rw == UIO_WRITE ? S_WRITE : S_READ);
521 		}
522 
523 		if (error) {
524 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
525 			/*
526 			 * If we failed on a write, we may have already
527 			 * allocated file blocks as well as pages.  It's hard
528 			 * to undo the block allocation, but we must be sure
529 			 * to invalidate any pages that may have been
530 			 * allocated.
531 			 */
532 			if (rw == UIO_WRITE)
533 				(void) segmap_release(segkmap, base, SM_INVAL);
534 			else
535 				(void) segmap_release(segkmap, base, 0);
536 		} else {
537 			uint_t flags = 0;
538 
539 			if (rw == UIO_READ) {
540 				if (n + mapon == MAXBSIZE ||
541 				    uio->uio_loffset == pcp->pc_size)
542 					flags = SM_DONTNEED;
543 			} else if (ioflag & (FSYNC|FDSYNC)) {
544 				flags = SM_WRITE;
545 			} else if (n + mapon == MAXBSIZE) {
546 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
547 			}
548 			error = segmap_release(segkmap, base, flags);
549 		}
550 
551 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
552 
553 	if (oresid != uio->uio_resid)
554 		error = 0;
555 	return (error);
556 }
557 
558 /*ARGSUSED*/
559 static int
560 pcfs_getattr(
561 	struct vnode *vp,
562 	struct vattr *vap,
563 	int flags,
564 	struct cred *cr,
565 	caller_context_t *ct)
566 {
567 	struct pcnode *pcp;
568 	struct pcfs *fsp;
569 	int error;
570 	char attr;
571 	struct pctime atime;
572 	int64_t unixtime;
573 
574 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
575 
576 	fsp = VFSTOPCFS(vp->v_vfsp);
577 	error = pc_lockfs(fsp, 0, 0);
578 	if (error)
579 		return (error);
580 
581 	/*
582 	 * Note that we don't check for "invalid node" (PC_INVAL) here
583 	 * only in order to make stat() succeed. We allow no I/O on such
584 	 * a node, but do allow to check for its existence.
585 	 */
586 	if ((pcp = VTOPC(vp)) == NULL) {
587 		pc_unlockfs(fsp);
588 		return (EIO);
589 	}
590 	/*
591 	 * Copy from pcnode.
592 	 */
593 	vap->va_type = vp->v_type;
594 	attr = pcp->pc_entry.pcd_attr;
595 	if (PCA_IS_HIDDEN(fsp, attr))
596 		vap->va_mode = 0;
597 	else if (attr & PCA_LABEL)
598 		vap->va_mode = 0444;
599 	else if (attr & PCA_RDONLY)
600 		vap->va_mode = 0555;
601 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
602 		vap->va_mode = 0755;
603 	} else {
604 		vap->va_mode = 0777;
605 	}
606 
607 	if (attr & PCA_DIR)
608 		vap->va_mode |= S_IFDIR;
609 	else
610 		vap->va_mode |= S_IFREG;
611 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
612 		vap->va_uid = 0;
613 		vap->va_gid = 0;
614 	} else {
615 		vap->va_uid = crgetuid(cr);
616 		vap->va_gid = crgetgid(cr);
617 	}
618 	vap->va_fsid = vp->v_vfsp->vfs_dev;
619 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
620 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
621 	    pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
622 	vap->va_nlink = 1;
623 	vap->va_size = (u_offset_t)pcp->pc_size;
624 	vap->va_rdev = 0;
625 	vap->va_nblocks =
626 	    (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
627 	vap->va_blksize = fsp->pcfs_clsize;
628 
629 	/*
630 	 * FAT root directories have no timestamps. In order not to return
631 	 * "time zero" (1/1/1970), we record the time of the mount and give
632 	 * that. This breaks less expectations.
633 	 */
634 	if (vp->v_flag & VROOT) {
635 		vap->va_mtime = fsp->pcfs_mounttime;
636 		vap->va_atime = fsp->pcfs_mounttime;
637 		vap->va_ctime = fsp->pcfs_mounttime;
638 		pc_unlockfs(fsp);
639 		return (0);
640 	}
641 
642 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
643 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
644 		if (unixtime > INT32_MAX)
645 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
646 		unixtime = MIN(unixtime, INT32_MAX);
647 	} else if (unixtime > INT32_MAX &&
648 	    get_udatamodel() == DATAMODEL_ILP32) {
649 		pc_unlockfs(fsp);
650 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
651 		return (EOVERFLOW);
652 	}
653 
654 	vap->va_mtime.tv_sec = (time_t)unixtime;
655 	vap->va_mtime.tv_nsec = 0;
656 
657 	/*
658 	 * FAT doesn't know about POSIX ctime.
659 	 * Best approximation is to always set it to mtime.
660 	 */
661 	vap->va_ctime = vap->va_mtime;
662 
663 	/*
664 	 * FAT only stores "last access date". If that's the
665 	 * same as the date of last modification then the time
666 	 * of last access is known. Otherwise, use midnight.
667 	 */
668 	atime.pct_date = pcp->pc_entry.pcd_ladate;
669 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
670 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
671 	else
672 		atime.pct_time = 0;
673 	pc_pcttotv(&atime, &unixtime);
674 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
675 		if (unixtime > INT32_MAX)
676 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
677 		unixtime = MIN(unixtime, INT32_MAX);
678 	} else if (unixtime > INT32_MAX &&
679 	    get_udatamodel() == DATAMODEL_ILP32) {
680 		pc_unlockfs(fsp);
681 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
682 		return (EOVERFLOW);
683 	}
684 
685 	vap->va_atime.tv_sec = (time_t)unixtime;
686 	vap->va_atime.tv_nsec = 0;
687 
688 	pc_unlockfs(fsp);
689 	return (0);
690 }
691 
692 
693 /*ARGSUSED*/
694 static int
695 pcfs_setattr(
696 	struct vnode *vp,
697 	struct vattr *vap,
698 	int flags,
699 	struct cred *cr,
700 	caller_context_t *ct)
701 {
702 	struct pcnode *pcp;
703 	mode_t mask = vap->va_mask;
704 	int error;
705 	struct pcfs *fsp;
706 	timestruc_t now, *timep;
707 
708 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
709 	/*
710 	 * cannot set these attributes
711 	 */
712 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
713 		return (EINVAL);
714 	}
715 	/*
716 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
717 	 * from 'tar' when it tries to set times on a directory, and console
718 	 * printf's on the NFS server when it gets EINVAL back on such a
719 	 * request. One possible problem with that since a directory entry
720 	 * identifies a file, '.' and all the '..' entries in subdirectories
721 	 * may get out of sync when the directory is updated since they're
722 	 * treated like separate files. We could fix that by looking for
723 	 * '.' and giving it the same attributes, and then looking for
724 	 * all the subdirectories and updating '..', but that's pretty
725 	 * expensive for something that doesn't seem likely to matter.
726 	 */
727 	/* can't do some ops on directories anyway */
728 	if ((vp->v_type == VDIR) &&
729 	    (mask & AT_SIZE)) {
730 		return (EINVAL);
731 	}
732 
733 	fsp = VFSTOPCFS(vp->v_vfsp);
734 	error = pc_lockfs(fsp, 0, 0);
735 	if (error)
736 		return (error);
737 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
738 		pc_unlockfs(fsp);
739 		return (EIO);
740 	}
741 
742 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
743 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
744 			pc_unlockfs(fsp);
745 			return (EACCES);
746 		}
747 	}
748 
749 	/*
750 	 * Change file access modes.
751 	 * If nobody has write permission, file is marked readonly.
752 	 * Otherwise file is writable by anyone.
753 	 */
754 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
755 		if ((vap->va_mode & 0222) == 0)
756 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
757 		else
758 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
759 		pcp->pc_flags |= PC_CHG;
760 	}
761 	/*
762 	 * Truncate file. Must have write permission.
763 	 */
764 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
765 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
766 			error = EACCES;
767 			goto out;
768 		}
769 		if (vap->va_size > UINT32_MAX) {
770 			error = EFBIG;
771 			goto out;
772 		}
773 		error = pc_truncate(pcp, (uint_t)vap->va_size);
774 		if (error)
775 			goto out;
776 	}
777 	/*
778 	 * Change file modified times.
779 	 */
780 	if (mask & (AT_MTIME | AT_CTIME)) {
781 		/*
782 		 * If SysV-compatible option to set access and
783 		 * modified times if privileged, owner, or write access,
784 		 * use current time rather than va_mtime.
785 		 *
786 		 * XXX - va_mtime.tv_sec == -1 flags this.
787 		 */
788 		timep = &vap->va_mtime;
789 		if (vap->va_mtime.tv_sec == -1) {
790 			gethrestime(&now);
791 			timep = &now;
792 		}
793 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
794 		    timep->tv_sec > INT32_MAX) {
795 			error = EOVERFLOW;
796 			goto out;
797 		}
798 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
799 		if (error)
800 			goto out;
801 		pcp->pc_flags |= PC_CHG;
802 	}
803 	/*
804 	 * Change file access times.
805 	 */
806 	if (mask & AT_ATIME) {
807 		/*
808 		 * If SysV-compatible option to set access and
809 		 * modified times if privileged, owner, or write access,
810 		 * use current time rather than va_mtime.
811 		 *
812 		 * XXX - va_atime.tv_sec == -1 flags this.
813 		 */
814 		struct pctime	atime;
815 
816 		timep = &vap->va_atime;
817 		if (vap->va_atime.tv_sec == -1) {
818 			gethrestime(&now);
819 			timep = &now;
820 		}
821 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
822 		    timep->tv_sec > INT32_MAX) {
823 			error = EOVERFLOW;
824 			goto out;
825 		}
826 		error = pc_tvtopct(timep, &atime);
827 		if (error)
828 			goto out;
829 		pcp->pc_entry.pcd_ladate = atime.pct_date;
830 		pcp->pc_flags |= PC_CHG;
831 	}
832 out:
833 	pc_unlockfs(fsp);
834 	return (error);
835 }
836 
837 
838 /*ARGSUSED*/
839 static int
840 pcfs_access(
841 	struct vnode *vp,
842 	int mode,
843 	int flags,
844 	struct cred *cr,
845 	caller_context_t *ct)
846 {
847 	struct pcnode *pcp;
848 	struct pcfs *fsp;
849 
850 
851 	fsp = VFSTOPCFS(vp->v_vfsp);
852 
853 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
854 		return (EIO);
855 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
856 		return (EACCES);
857 
858 	/*
859 	 * If this is a boot partition, privileged users have full access while
860 	 * others have read-only access.
861 	 */
862 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
863 		if ((mode & VWRITE) &&
864 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
865 			return (EACCES);
866 	}
867 	return (0);
868 }
869 
870 
871 /*ARGSUSED*/
872 static int
873 pcfs_fsync(
874 	struct vnode *vp,
875 	int syncflag,
876 	struct cred *cr,
877 	caller_context_t *ct)
878 {
879 	struct pcfs *fsp;
880 	struct pcnode *pcp;
881 	int error;
882 
883 	fsp = VFSTOPCFS(vp->v_vfsp);
884 	if (error = pc_verify(fsp))
885 		return (error);
886 	error = pc_lockfs(fsp, 0, 0);
887 	if (error)
888 		return (error);
889 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
890 		pc_unlockfs(fsp);
891 		return (EIO);
892 	}
893 	rw_enter(&pcnodes_lock, RW_WRITER);
894 	error = pc_nodesync(pcp);
895 	rw_exit(&pcnodes_lock);
896 	pc_unlockfs(fsp);
897 	return (error);
898 }
899 
900 
901 /*ARGSUSED*/
902 static void
903 pcfs_inactive(
904 	struct vnode *vp,
905 	struct cred *cr,
906 	caller_context_t *ct)
907 {
908 	struct pcnode *pcp;
909 	struct pcfs *fsp;
910 	int error;
911 
912 	fsp = VFSTOPCFS(vp->v_vfsp);
913 	error = pc_lockfs(fsp, 0, 1);
914 
915 	/*
916 	 * If the filesystem was umounted by force, all dirty
917 	 * pages associated with this vnode are invalidated
918 	 * and then the vnode will be freed.
919 	 */
920 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
921 		pcp = VTOPC(vp);
922 		if (vn_has_cached_data(vp)) {
923 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
924 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
925 		}
926 		remque(pcp);
927 		if (error == 0)
928 			pc_unlockfs(fsp);
929 		vn_free(vp);
930 		kmem_free(pcp, sizeof (struct pcnode));
931 		VFS_RELE(PCFSTOVFS(fsp));
932 		return;
933 	}
934 
935 	mutex_enter(&vp->v_lock);
936 	ASSERT(vp->v_count >= 1);
937 	if (vp->v_count > 1) {
938 		vp->v_count--;  /* release our hold from vn_rele */
939 		mutex_exit(&vp->v_lock);
940 		pc_unlockfs(fsp);
941 		return;
942 	}
943 	mutex_exit(&vp->v_lock);
944 
945 	/*
946 	 * Check again to confirm that no intervening I/O error
947 	 * with a subsequent pc_diskchanged() call has released
948 	 * the pcnode. If it has then release the vnode as above.
949 	 */
950 	pcp = VTOPC(vp);
951 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
952 		if (vn_has_cached_data(vp))
953 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
954 			    pcfs_putapage, B_INVAL | B_TRUNC,
955 			    (struct cred *)NULL);
956 	}
957 
958 	if (pcp == NULL) {
959 		vn_free(vp);
960 	} else {
961 		pc_rele(pcp);
962 	}
963 
964 	if (!error)
965 		pc_unlockfs(fsp);
966 }
967 
968 /*ARGSUSED*/
969 static int
970 pcfs_lookup(
971 	struct vnode *dvp,
972 	char *nm,
973 	struct vnode **vpp,
974 	struct pathname *pnp,
975 	int flags,
976 	struct vnode *rdir,
977 	struct cred *cr,
978 	caller_context_t *ct,
979 	int *direntflags,
980 	pathname_t *realpnp)
981 {
982 	struct pcfs *fsp;
983 	struct pcnode *pcp;
984 	int error;
985 
986 	/*
987 	 * If the filesystem was umounted by force, return immediately.
988 	 */
989 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
990 		return (EIO);
991 
992 	/*
993 	 * verify that the dvp is still valid on the disk
994 	 */
995 	fsp = VFSTOPCFS(dvp->v_vfsp);
996 	if (error = pc_verify(fsp))
997 		return (error);
998 	error = pc_lockfs(fsp, 0, 0);
999 	if (error)
1000 		return (error);
1001 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1002 		pc_unlockfs(fsp);
1003 		return (EIO);
1004 	}
1005 	/*
1006 	 * Null component name is a synonym for directory being searched.
1007 	 */
1008 	if (*nm == '\0') {
1009 		VN_HOLD(dvp);
1010 		*vpp = dvp;
1011 		pc_unlockfs(fsp);
1012 		return (0);
1013 	}
1014 
1015 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1016 	if (!error) {
1017 		*vpp = PCTOV(pcp);
1018 		pcp->pc_flags |= PC_EXTERNAL;
1019 	}
1020 	pc_unlockfs(fsp);
1021 	return (error);
1022 }
1023 
1024 
1025 /*ARGSUSED*/
1026 static int
1027 pcfs_create(
1028 	struct vnode *dvp,
1029 	char *nm,
1030 	struct vattr *vap,
1031 	enum vcexcl exclusive,
1032 	int mode,
1033 	struct vnode **vpp,
1034 	struct cred *cr,
1035 	int flag,
1036 	caller_context_t *ct,
1037 	vsecattr_t *vsecp)
1038 {
1039 	int error;
1040 	struct pcnode *pcp;
1041 	struct vnode *vp;
1042 	struct pcfs *fsp;
1043 
1044 	/*
1045 	 * can't create directories. use pcfs_mkdir.
1046 	 * can't create anything other than files.
1047 	 */
1048 	if (vap->va_type == VDIR)
1049 		return (EISDIR);
1050 	else if (vap->va_type != VREG)
1051 		return (EINVAL);
1052 
1053 	pcp = NULL;
1054 	fsp = VFSTOPCFS(dvp->v_vfsp);
1055 	error = pc_lockfs(fsp, 0, 0);
1056 	if (error)
1057 		return (error);
1058 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1059 		pc_unlockfs(fsp);
1060 		return (EIO);
1061 	}
1062 
1063 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1064 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1065 			pc_unlockfs(fsp);
1066 			return (EACCES);
1067 		}
1068 	}
1069 
1070 	if (*nm == '\0') {
1071 		/*
1072 		 * Null component name refers to the directory itself.
1073 		 */
1074 		VN_HOLD(dvp);
1075 		pcp = VTOPC(dvp);
1076 		error = EEXIST;
1077 	} else {
1078 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1079 	}
1080 	/*
1081 	 * if file exists and this is a nonexclusive create,
1082 	 * check for access permissions
1083 	 */
1084 	if (error == EEXIST) {
1085 		vp = PCTOV(pcp);
1086 		if (exclusive == NONEXCL) {
1087 			if (vp->v_type == VDIR) {
1088 				error = EISDIR;
1089 			} else if (mode) {
1090 				error = pcfs_access(PCTOV(pcp), mode, 0,
1091 				    cr, ct);
1092 			} else {
1093 				error = 0;
1094 			}
1095 		}
1096 		if (error) {
1097 			VN_RELE(PCTOV(pcp));
1098 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1099 		    (vap->va_size == 0)) {
1100 			error = pc_truncate(pcp, 0L);
1101 			if (error) {
1102 				VN_RELE(PCTOV(pcp));
1103 			} else {
1104 				vnevent_create(PCTOV(pcp), ct);
1105 			}
1106 		}
1107 	}
1108 	if (error) {
1109 		pc_unlockfs(fsp);
1110 		return (error);
1111 	}
1112 	*vpp = PCTOV(pcp);
1113 	pcp->pc_flags |= PC_EXTERNAL;
1114 	pc_unlockfs(fsp);
1115 	return (error);
1116 }
1117 
1118 /*ARGSUSED*/
1119 static int
1120 pcfs_remove(
1121 	struct vnode *vp,
1122 	char *nm,
1123 	struct cred *cr,
1124 	caller_context_t *ct,
1125 	int flags)
1126 {
1127 	struct pcfs *fsp;
1128 	struct pcnode *pcp;
1129 	int error;
1130 
1131 	fsp = VFSTOPCFS(vp->v_vfsp);
1132 	if (error = pc_verify(fsp))
1133 		return (error);
1134 	error = pc_lockfs(fsp, 0, 0);
1135 	if (error)
1136 		return (error);
1137 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1138 		pc_unlockfs(fsp);
1139 		return (EIO);
1140 	}
1141 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1142 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1143 			pc_unlockfs(fsp);
1144 			return (EACCES);
1145 		}
1146 	}
1147 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1148 	pc_unlockfs(fsp);
1149 	return (error);
1150 }
1151 
1152 /*
1153  * Rename a file or directory
1154  * This rename is restricted to only rename files within a directory.
1155  * XX should make rename more general
1156  */
1157 /*ARGSUSED*/
1158 static int
1159 pcfs_rename(
1160 	struct vnode *sdvp,		/* old (source) parent vnode */
1161 	char *snm,			/* old (source) entry name */
1162 	struct vnode *tdvp,		/* new (target) parent vnode */
1163 	char *tnm,			/* new (target) entry name */
1164 	struct cred *cr,
1165 	caller_context_t *ct,
1166 	int flags)
1167 {
1168 	struct pcfs *fsp;
1169 	struct pcnode *dp;	/* parent pcnode */
1170 	struct pcnode *tdp;
1171 	int error;
1172 
1173 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1174 	if (error = pc_verify(fsp))
1175 		return (error);
1176 
1177 	/*
1178 	 * make sure we can muck with this directory.
1179 	 */
1180 	error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1181 	if (error) {
1182 		return (error);
1183 	}
1184 	error = pc_lockfs(fsp, 0, 0);
1185 	if (error)
1186 		return (error);
1187 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1188 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1189 		pc_unlockfs(fsp);
1190 		return (EIO);
1191 	}
1192 	error = pc_rename(dp, tdp, snm, tnm, ct);
1193 	pc_unlockfs(fsp);
1194 	return (error);
1195 }
1196 
1197 /*ARGSUSED*/
1198 static int
1199 pcfs_mkdir(
1200 	struct vnode *dvp,
1201 	char *nm,
1202 	struct vattr *vap,
1203 	struct vnode **vpp,
1204 	struct cred *cr,
1205 	caller_context_t *ct,
1206 	int flags,
1207 	vsecattr_t *vsecp)
1208 {
1209 	struct pcfs *fsp;
1210 	struct pcnode *pcp;
1211 	int error;
1212 
1213 	fsp = VFSTOPCFS(dvp->v_vfsp);
1214 	if (error = pc_verify(fsp))
1215 		return (error);
1216 	error = pc_lockfs(fsp, 0, 0);
1217 	if (error)
1218 		return (error);
1219 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1220 		pc_unlockfs(fsp);
1221 		return (EIO);
1222 	}
1223 
1224 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1225 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1226 			pc_unlockfs(fsp);
1227 			return (EACCES);
1228 		}
1229 	}
1230 
1231 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1232 
1233 	if (!error) {
1234 		pcp -> pc_flags |= PC_EXTERNAL;
1235 		*vpp = PCTOV(pcp);
1236 	} else if (error == EEXIST) {
1237 		VN_RELE(PCTOV(pcp));
1238 	}
1239 	pc_unlockfs(fsp);
1240 	return (error);
1241 }
1242 
1243 /*ARGSUSED*/
1244 static int
1245 pcfs_rmdir(
1246 	struct vnode *dvp,
1247 	char *nm,
1248 	struct vnode *cdir,
1249 	struct cred *cr,
1250 	caller_context_t *ct,
1251 	int flags)
1252 {
1253 	struct pcfs *fsp;
1254 	struct pcnode *pcp;
1255 	int error;
1256 
1257 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1258 	if (error = pc_verify(fsp))
1259 		return (error);
1260 	if (error = pc_lockfs(fsp, 0, 0))
1261 		return (error);
1262 
1263 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1264 		pc_unlockfs(fsp);
1265 		return (EIO);
1266 	}
1267 
1268 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1269 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1270 			pc_unlockfs(fsp);
1271 			return (EACCES);
1272 		}
1273 	}
1274 
1275 	error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1276 	pc_unlockfs(fsp);
1277 	return (error);
1278 }
1279 
1280 /*
1281  * read entries in a directory.
1282  * we must convert pc format to unix format
1283  */
1284 
1285 /*ARGSUSED*/
1286 static int
1287 pcfs_readdir(
1288 	struct vnode *dvp,
1289 	struct uio *uiop,
1290 	struct cred *cr,
1291 	int *eofp,
1292 	caller_context_t *ct,
1293 	int flags)
1294 {
1295 	struct pcnode *pcp;
1296 	struct pcfs *fsp;
1297 	struct pcdir *ep;
1298 	struct buf *bp = NULL;
1299 	offset_t offset;
1300 	int boff;
1301 	struct pc_dirent lbp;
1302 	struct pc_dirent *ld = &lbp;
1303 	int error;
1304 
1305 	/*
1306 	 * If the filesystem was umounted by force, return immediately.
1307 	 */
1308 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1309 		return (EIO);
1310 
1311 	if ((uiop->uio_iovcnt != 1) ||
1312 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1313 		return (EINVAL);
1314 	}
1315 	fsp = VFSTOPCFS(dvp->v_vfsp);
1316 	/*
1317 	 * verify that the dp is still valid on the disk
1318 	 */
1319 	if (error = pc_verify(fsp)) {
1320 		return (error);
1321 	}
1322 	error = pc_lockfs(fsp, 0, 0);
1323 	if (error)
1324 		return (error);
1325 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1326 		pc_unlockfs(fsp);
1327 		return (EIO);
1328 	}
1329 
1330 	bzero(ld, sizeof (*ld));
1331 
1332 	if (eofp != NULL)
1333 		*eofp = 0;
1334 	offset = uiop->uio_loffset;
1335 
1336 	if (dvp->v_flag & VROOT) {
1337 		/*
1338 		 * kludge up entries for "." and ".." in the root.
1339 		 */
1340 		if (offset == 0) {
1341 			(void) strcpy(ld->d_name, ".");
1342 			ld->d_reclen = DIRENT64_RECLEN(1);
1343 			ld->d_off = (off64_t)sizeof (struct pcdir);
1344 			ld->d_ino = (ino64_t)UINT_MAX;
1345 			if (ld->d_reclen > uiop->uio_resid) {
1346 				pc_unlockfs(fsp);
1347 				return (ENOSPC);
1348 			}
1349 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1350 			uiop->uio_loffset = ld->d_off;
1351 			offset = uiop->uio_loffset;
1352 		}
1353 		if (offset == sizeof (struct pcdir)) {
1354 			(void) strcpy(ld->d_name, "..");
1355 			ld->d_reclen = DIRENT64_RECLEN(2);
1356 			if (ld->d_reclen > uiop->uio_resid) {
1357 				pc_unlockfs(fsp);
1358 				return (ENOSPC);
1359 			}
1360 			ld->d_off = (off64_t)(uiop->uio_loffset +
1361 			    sizeof (struct pcdir));
1362 			ld->d_ino = (ino64_t)UINT_MAX;
1363 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1364 			uiop->uio_loffset = ld->d_off;
1365 			offset = uiop->uio_loffset;
1366 		}
1367 		offset -= 2 * sizeof (struct pcdir);
1368 		/* offset now has the real offset value into directory file */
1369 	}
1370 
1371 	for (;;) {
1372 		boff = pc_blkoff(fsp, offset);
1373 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1374 			if (bp != NULL) {
1375 				brelse(bp);
1376 				bp = NULL;
1377 			}
1378 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1379 			if (error) {
1380 				if (error == ENOENT) {
1381 					error = 0;
1382 					if (eofp)
1383 						*eofp = 1;
1384 				}
1385 				break;
1386 			}
1387 		}
1388 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1389 			if (eofp)
1390 				*eofp = 1;
1391 			break;
1392 		}
1393 		/*
1394 		 * Don't display label because it may contain funny characters.
1395 		 */
1396 		if (ep->pcd_filename[0] == PCD_ERASED) {
1397 			uiop->uio_loffset += sizeof (struct pcdir);
1398 			offset += sizeof (struct pcdir);
1399 			ep++;
1400 			continue;
1401 		}
1402 		if (PCDL_IS_LFN(ep)) {
1403 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1404 			    0)
1405 				break;
1406 			continue;
1407 		}
1408 
1409 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1410 			break;
1411 	}
1412 	if (bp)
1413 		brelse(bp);
1414 	pc_unlockfs(fsp);
1415 	return (error);
1416 }
1417 
1418 
1419 /*
1420  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1421  * When we are called the pcfs is already locked.
1422  */
1423 /*ARGSUSED*/
1424 static int
1425 pcfs_getapage(
1426 	struct vnode *vp,
1427 	u_offset_t off,
1428 	size_t len,
1429 	uint_t *protp,
1430 	page_t *pl[],		/* NULL if async IO is requested */
1431 	size_t plsz,
1432 	struct seg *seg,
1433 	caddr_t addr,
1434 	enum seg_rw rw,
1435 	struct cred *cr)
1436 {
1437 	struct pcnode *pcp;
1438 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1439 	struct vnode *devvp;
1440 	page_t *pp;
1441 	page_t *pagefound;
1442 	int err;
1443 
1444 	/*
1445 	 * If the filesystem was umounted by force, return immediately.
1446 	 */
1447 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1448 		return (EIO);
1449 
1450 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1451 	    (void *)vp, off, len);
1452 
1453 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1454 		return (EIO);
1455 	devvp = fsp->pcfs_devvp;
1456 
1457 	/* pcfs doesn't do readaheads */
1458 	if (pl == NULL)
1459 		return (0);
1460 
1461 	pl[0] = NULL;
1462 	err = 0;
1463 	/*
1464 	 * If the accessed time on the pcnode has not already been
1465 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1466 	 * This gives us approximate modified times for mmap'ed files
1467 	 * which are accessed via loads in the user address space.
1468 	 */
1469 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1470 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1471 		pc_mark_acc(fsp, pcp);
1472 	}
1473 reread:
1474 	if ((pagefound = page_exists(vp, off)) == NULL) {
1475 		/*
1476 		 * Need to really do disk IO to get the page(s).
1477 		 */
1478 		struct buf *bp;
1479 		daddr_t lbn, bn;
1480 		u_offset_t io_off;
1481 		size_t io_len;
1482 		u_offset_t lbnoff, xferoffset;
1483 		u_offset_t pgoff;
1484 		uint_t	xfersize;
1485 		int err1;
1486 
1487 		lbn = pc_lblkno(fsp, off);
1488 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1489 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1490 
1491 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1492 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1493 		if (pp == NULL)
1494 			/*
1495 			 * XXX - If pcfs is made MT-hot, this should go
1496 			 * back to reread.
1497 			 */
1498 			panic("pcfs_getapage pvn_read_kluster");
1499 
1500 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1501 		    pgoff += xfersize,
1502 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1503 		    lbnoff += xfersize, xferoffset += xfersize) {
1504 			/*
1505 			 * read as many contiguous blocks as possible to
1506 			 * fill this page
1507 			 */
1508 			xfersize = PAGESIZE - pgoff;
1509 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1510 			if (err1) {
1511 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1512 				err = err1;
1513 				goto out;
1514 			}
1515 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1516 			bp->b_edev = devvp->v_rdev;
1517 			bp->b_dev = cmpdev(devvp->v_rdev);
1518 			bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1519 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1520 			bp->b_file = vp;
1521 			bp->b_offset = (offset_t)(off + pgoff);
1522 
1523 			(void) bdev_strategy(bp);
1524 
1525 			lwp_stat_update(LWP_STAT_INBLK, 1);
1526 
1527 			if (err == 0)
1528 				err = biowait(bp);
1529 			else
1530 				(void) biowait(bp);
1531 			pageio_done(bp);
1532 			if (err)
1533 				goto out;
1534 		}
1535 		if (pgoff < PAGESIZE) {
1536 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1537 		}
1538 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1539 	}
1540 out:
1541 	if (err) {
1542 		if (pp != NULL)
1543 			pvn_read_done(pp, B_ERROR);
1544 		return (err);
1545 	}
1546 
1547 	if (pagefound) {
1548 		/*
1549 		 * Page exists in the cache, acquire the "shared"
1550 		 * lock.  If this fails, go back to reread.
1551 		 */
1552 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1553 			goto reread;
1554 		}
1555 		pl[0] = pp;
1556 		pl[1] = NULL;
1557 	}
1558 	return (err);
1559 }
1560 
1561 /*
1562  * Return all the pages from [off..off+len] in given file
1563  */
1564 /* ARGSUSED */
1565 static int
1566 pcfs_getpage(
1567 	struct vnode *vp,
1568 	offset_t off,
1569 	size_t len,
1570 	uint_t *protp,
1571 	page_t *pl[],
1572 	size_t plsz,
1573 	struct seg *seg,
1574 	caddr_t addr,
1575 	enum seg_rw rw,
1576 	struct cred *cr,
1577 	caller_context_t *ct)
1578 {
1579 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1580 	int err;
1581 
1582 	PC_DPRINTF0(6, "pcfs_getpage\n");
1583 	if (err = pc_verify(fsp))
1584 		return (err);
1585 	if (vp->v_flag & VNOMAP)
1586 		return (ENOSYS);
1587 	ASSERT(off <= UINT32_MAX);
1588 	err = pc_lockfs(fsp, 0, 0);
1589 	if (err)
1590 		return (err);
1591 	if (protp != NULL)
1592 		*protp = PROT_ALL;
1593 
1594 	ASSERT((off & PAGEOFFSET) == 0);
1595 	if (len <= PAGESIZE) {
1596 		err = pcfs_getapage(vp, off, len, protp, pl,
1597 		    plsz, seg, addr, rw, cr);
1598 	} else {
1599 		err = pvn_getpages(pcfs_getapage, vp, off,
1600 		    len, protp, pl, plsz, seg, addr, rw, cr);
1601 	}
1602 	pc_unlockfs(fsp);
1603 	return (err);
1604 }
1605 
1606 
1607 /*
1608  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1609  * If len == 0, do from off to EOF.
1610  *
1611  * The normal cases should be len == 0 & off == 0 (entire vp list),
1612  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1613  * (from pageout).
1614  *
1615  */
1616 /*ARGSUSED*/
1617 static int
1618 pcfs_putpage(
1619 	struct vnode *vp,
1620 	offset_t off,
1621 	size_t len,
1622 	int flags,
1623 	struct cred *cr,
1624 	caller_context_t *ct)
1625 {
1626 	struct pcnode *pcp;
1627 	page_t *pp;
1628 	struct pcfs *fsp;
1629 	u_offset_t io_off;
1630 	size_t io_len;
1631 	offset_t eoff;
1632 	int err;
1633 
1634 	/*
1635 	 * If the filesystem was umounted by force, return immediately.
1636 	 */
1637 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1638 		return (EIO);
1639 
1640 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1641 	if (vp->v_flag & VNOMAP)
1642 		return (ENOSYS);
1643 
1644 	fsp = VFSTOPCFS(vp->v_vfsp);
1645 
1646 	if (err = pc_verify(fsp))
1647 		return (err);
1648 	if ((pcp = VTOPC(vp)) == NULL) {
1649 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1650 		return (EIO);
1651 	}
1652 	if (pcp->pc_flags & PC_INVAL)
1653 		return (EIO);
1654 
1655 	if (curproc == proc_pageout) {
1656 		/*
1657 		 * XXX - This is a quick hack to avoid blocking
1658 		 * pageout. Also to avoid pcfs_getapage deadlocking
1659 		 * with putpage when memory is running out,
1660 		 * since we only have one global lock and we don't
1661 		 * support async putpage.
1662 		 * It should be fixed someday.
1663 		 *
1664 		 * Interestingly, this used to be a test of NOMEMWAIT().
1665 		 * We only ever got here once pcfs started supporting
1666 		 * NFS sharing, and then only because the NFS server
1667 		 * threads seem to do writes in sched's process context.
1668 		 * Since everyone else seems to just care about pageout,
1669 		 * the test was changed to look for pageout directly.
1670 		 */
1671 		return (ENOMEM);
1672 	}
1673 
1674 	ASSERT(off <= UINT32_MAX);
1675 
1676 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1677 
1678 	err = pc_lockfs(fsp, 0, 0);
1679 	if (err)
1680 		return (err);
1681 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1682 		pc_unlockfs(fsp);
1683 		return (0);
1684 	}
1685 
1686 	if (len == 0) {
1687 		/*
1688 		 * Search the entire vp list for pages >= off
1689 		 */
1690 		err = pvn_vplist_dirty(vp, off,
1691 		    pcfs_putapage, flags, cr);
1692 	} else {
1693 		eoff = off + len;
1694 
1695 		for (io_off = off; io_off < eoff &&
1696 		    io_off < pcp->pc_size; io_off += io_len) {
1697 			/*
1698 			 * If we are not invalidating, synchronously
1699 			 * freeing or writing pages use the routine
1700 			 * page_lookup_nowait() to prevent reclaiming
1701 			 * them from the free list.
1702 			 */
1703 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1704 				pp = page_lookup(vp, io_off,
1705 				    (flags & (B_INVAL | B_FREE)) ?
1706 				    SE_EXCL : SE_SHARED);
1707 			} else {
1708 				pp = page_lookup_nowait(vp, io_off,
1709 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1710 			}
1711 
1712 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1713 				io_len = PAGESIZE;
1714 			else {
1715 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1716 				    flags, cr);
1717 				if (err != 0)
1718 					break;
1719 				/*
1720 				 * "io_off" and "io_len" are returned as
1721 				 * the range of pages we actually wrote.
1722 				 * This allows us to skip ahead more quickly
1723 				 * since several pages may've been dealt
1724 				 * with by this iteration of the loop.
1725 				 */
1726 			}
1727 		}
1728 	}
1729 	if (err == 0 && (flags & B_INVAL) &&
1730 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1731 		/*
1732 		 * If doing "invalidation", make sure that
1733 		 * all pages on the vnode list are actually
1734 		 * gone.
1735 		 */
1736 		cmn_err(CE_PANIC,
1737 		    "pcfs_putpage: B_INVAL, pages not gone");
1738 	} else if (err) {
1739 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1740 	}
1741 	pc_unlockfs(fsp);
1742 	return (err);
1743 }
1744 
1745 /*
1746  * Write out a single page, possibly klustering adjacent dirty pages.
1747  */
1748 /*ARGSUSED*/
1749 int
1750 pcfs_putapage(
1751 	struct vnode *vp,
1752 	page_t *pp,
1753 	u_offset_t *offp,
1754 	size_t *lenp,
1755 	int flags,
1756 	struct cred *cr)
1757 {
1758 	struct pcnode *pcp;
1759 	struct pcfs *fsp;
1760 	struct vnode *devvp;
1761 	size_t io_len;
1762 	daddr_t bn;
1763 	u_offset_t lbn, lbnoff, xferoffset;
1764 	uint_t pgoff, xfersize;
1765 	int err = 0;
1766 	u_offset_t io_off;
1767 
1768 	pcp = VTOPC(vp);
1769 	fsp = VFSTOPCFS(vp->v_vfsp);
1770 	devvp = fsp->pcfs_devvp;
1771 
1772 	/*
1773 	 * If the modified time on the inode has not already been
1774 	 * set elsewhere (e.g. for write/setattr) and this is not
1775 	 * a call from msync (B_FORCE) we set the time now.
1776 	 * This gives us approximate modified times for mmap'ed files
1777 	 * which are modified via stores in the user address space.
1778 	 */
1779 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1780 		pcp->pc_flags |= PC_MOD;
1781 		pc_mark_mod(fsp, pcp);
1782 	}
1783 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1784 	    PAGESIZE, flags);
1785 
1786 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1787 		goto out;
1788 	}
1789 
1790 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1791 
1792 	lbn = pc_lblkno(fsp, io_off);
1793 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1794 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1795 
1796 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1797 	    pgoff += xfersize,
1798 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1799 	    lbnoff += xfersize, xferoffset += xfersize) {
1800 
1801 		struct buf *bp;
1802 		int err1;
1803 
1804 		/*
1805 		 * write as many contiguous blocks as possible from this page
1806 		 */
1807 		xfersize = io_len - pgoff;
1808 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1809 		if (err1) {
1810 			err = err1;
1811 			goto out;
1812 		}
1813 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1814 		bp->b_edev = devvp->v_rdev;
1815 		bp->b_dev = cmpdev(devvp->v_rdev);
1816 		bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1817 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1818 		bp->b_file = vp;
1819 		bp->b_offset = (offset_t)(io_off + pgoff);
1820 
1821 		(void) bdev_strategy(bp);
1822 
1823 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1824 
1825 		if (err == 0)
1826 			err = biowait(bp);
1827 		else
1828 			(void) biowait(bp);
1829 		pageio_done(bp);
1830 	}
1831 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1832 	pp = NULL;
1833 
1834 out:
1835 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1836 		pvn_write_done(pp, B_WRITE | flags);
1837 	} else if (err != 0 && pp != NULL) {
1838 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1839 	}
1840 
1841 	if (offp)
1842 		*offp = io_off;
1843 	if (lenp)
1844 		*lenp = io_len;
1845 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1846 		    (void *)vp, (void *)pp, io_off, io_len);
1847 	if (err) {
1848 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1849 	}
1850 	return (err);
1851 }
1852 
1853 /*ARGSUSED*/
1854 static int
1855 pcfs_map(
1856 	struct vnode *vp,
1857 	offset_t off,
1858 	struct as *as,
1859 	caddr_t *addrp,
1860 	size_t len,
1861 	uchar_t prot,
1862 	uchar_t maxprot,
1863 	uint_t flags,
1864 	struct cred *cr,
1865 	caller_context_t *ct)
1866 {
1867 	struct segvn_crargs vn_a;
1868 	int error;
1869 
1870 	PC_DPRINTF0(6, "pcfs_map\n");
1871 	if (vp->v_flag & VNOMAP)
1872 		return (ENOSYS);
1873 
1874 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1875 		return (ENXIO);
1876 
1877 	as_rangelock(as);
1878 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1879 	if (error != 0) {
1880 		as_rangeunlock(as);
1881 		return (error);
1882 	}
1883 
1884 	vn_a.vp = vp;
1885 	vn_a.offset = off;
1886 	vn_a.type = flags & MAP_TYPE;
1887 	vn_a.prot = prot;
1888 	vn_a.maxprot = maxprot;
1889 	vn_a.flags = flags & ~MAP_TYPE;
1890 	vn_a.cred = cr;
1891 	vn_a.amp = NULL;
1892 	vn_a.szc = 0;
1893 	vn_a.lgrp_mem_policy_flags = 0;
1894 
1895 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1896 	as_rangeunlock(as);
1897 	return (error);
1898 }
1899 
1900 /* ARGSUSED */
1901 static int
1902 pcfs_seek(
1903 	struct vnode *vp,
1904 	offset_t ooff,
1905 	offset_t *noffp,
1906 	caller_context_t *ct)
1907 {
1908 	if (*noffp < 0)
1909 		return (EINVAL);
1910 	else if (*noffp > MAXOFFSET_T)
1911 		return (EINVAL);
1912 	else
1913 		return (0);
1914 }
1915 
1916 /* ARGSUSED */
1917 static int
1918 pcfs_addmap(
1919 	struct vnode *vp,
1920 	offset_t off,
1921 	struct as *as,
1922 	caddr_t addr,
1923 	size_t len,
1924 	uchar_t prot,
1925 	uchar_t maxprot,
1926 	uint_t flags,
1927 	struct cred *cr,
1928 	caller_context_t *ct)
1929 {
1930 	if (vp->v_flag & VNOMAP)
1931 		return (ENOSYS);
1932 	return (0);
1933 }
1934 
1935 /*ARGSUSED*/
1936 static int
1937 pcfs_delmap(
1938 	struct vnode *vp,
1939 	offset_t off,
1940 	struct as *as,
1941 	caddr_t addr,
1942 	size_t len,
1943 	uint_t prot,
1944 	uint_t maxprot,
1945 	uint_t flags,
1946 	struct cred *cr,
1947 	caller_context_t *ct)
1948 {
1949 	if (vp->v_flag & VNOMAP)
1950 		return (ENOSYS);
1951 	return (0);
1952 }
1953 
1954 /*
1955  * POSIX pathconf() support.
1956  */
1957 /* ARGSUSED */
1958 static int
1959 pcfs_pathconf(
1960 	struct vnode *vp,
1961 	int cmd,
1962 	ulong_t *valp,
1963 	struct cred *cr,
1964 	caller_context_t *ct)
1965 {
1966 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1967 
1968 	switch (cmd) {
1969 	case _PC_LINK_MAX:
1970 		*valp = 1;
1971 		return (0);
1972 
1973 	case _PC_CASE_BEHAVIOR:
1974 		return (EINVAL);
1975 
1976 	case _PC_FILESIZEBITS:
1977 		/*
1978 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1979 		 * FAT12 can only go up to the maximum filesystem capacity
1980 		 * which is ~509MB.
1981 		 */
1982 		*valp = IS_FAT12(fsp) ? 30 : 33;
1983 		return (0);
1984 
1985 	default:
1986 		return (fs_pathconf(vp, cmd, valp, cr, ct));
1987 	}
1988 
1989 }
1990 
1991 /* ARGSUSED */
1992 static int
1993 pcfs_space(
1994 	struct vnode *vp,
1995 	int cmd,
1996 	struct flock64 *bfp,
1997 	int flag,
1998 	offset_t offset,
1999 	cred_t *cr,
2000 	caller_context_t *ct)
2001 {
2002 	struct vattr vattr;
2003 	int error;
2004 
2005 	if (cmd != F_FREESP)
2006 		return (EINVAL);
2007 
2008 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2009 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2010 			return (EFBIG);
2011 		/*
2012 		 * we only support the special case of l_len == 0,
2013 		 * meaning free to end of file at this moment.
2014 		 */
2015 		if (bfp->l_len != 0)
2016 			return (EINVAL);
2017 		vattr.va_mask = AT_SIZE;
2018 		vattr.va_size = bfp->l_start;
2019 		error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2020 	}
2021 	return (error);
2022 }
2023 
2024 /*
2025  * Break up 'len' chars from 'buf' into a long file name chunk.
2026  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2027  */
2028 void
2029 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2030 {
2031 	int	i;
2032 
2033 	ASSERT(buf != NULL);
2034 
2035 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2036 		if (len > 0) {
2037 			ep->pcdl_firstfilename[i] = *buf++;
2038 			ep->pcdl_firstfilename[i + 1] = *buf++;
2039 			len -= 2;
2040 		} else {
2041 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2042 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2043 		}
2044 	}
2045 
2046 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2047 		if (len > 0) {
2048 			ep->pcdl_secondfilename[i] = *buf++;
2049 			ep->pcdl_secondfilename[i + 1] = *buf++;
2050 			len -= 2;
2051 		} else {
2052 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2053 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2054 		}
2055 	}
2056 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2057 		if (len > 0) {
2058 			ep->pcdl_thirdfilename[i] = *buf++;
2059 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2060 			len -= 2;
2061 		} else {
2062 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2063 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2064 		}
2065 	}
2066 }
2067 
2068 /*
2069  * Extract the characters from the long filename chunk into 'buf'.
2070  * Return the number of characters extracted.
2071  */
2072 static int
2073 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2074 {
2075 	char 	*tmp = buf;
2076 	int	i;
2077 
2078 	/* Copy all the names, no filtering now */
2079 
2080 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2081 		*tmp = ep->pcdl_firstfilename[i];
2082 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2083 
2084 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2085 			return (tmp - buf);
2086 	}
2087 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2088 		*tmp = ep->pcdl_secondfilename[i];
2089 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2090 
2091 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2092 			return (tmp - buf);
2093 	}
2094 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2095 		*tmp = ep->pcdl_thirdfilename[i];
2096 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2097 
2098 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2099 			return (tmp - buf);
2100 	}
2101 	return (tmp - buf);
2102 }
2103 
2104 
2105 /*
2106  * Checksum the passed in short filename.
2107  * This is used to validate each component of the long name to make
2108  * sure the long name is valid (it hasn't been "detached" from the
2109  * short filename). This algorithm was found in FreeBSD.
2110  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2111  */
2112 
2113 uchar_t
2114 pc_checksum_long_fn(char *name, char *ext)
2115 {
2116 	uchar_t c;
2117 	char	b[11];
2118 
2119 	bcopy(name, b, 8);
2120 	bcopy(ext, b+8, 3);
2121 
2122 	c = b[0];
2123 	c = ((c << 7) | (c >> 1)) + b[1];
2124 	c = ((c << 7) | (c >> 1)) + b[2];
2125 	c = ((c << 7) | (c >> 1)) + b[3];
2126 	c = ((c << 7) | (c >> 1)) + b[4];
2127 	c = ((c << 7) | (c >> 1)) + b[5];
2128 	c = ((c << 7) | (c >> 1)) + b[6];
2129 	c = ((c << 7) | (c >> 1)) + b[7];
2130 	c = ((c << 7) | (c >> 1)) + b[8];
2131 	c = ((c << 7) | (c >> 1)) + b[9];
2132 	c = ((c << 7) | (c >> 1)) + b[10];
2133 
2134 	return (c);
2135 }
2136 
2137 /*
2138  * Read a chunk of long filename entries into 'namep'.
2139  * Return with offset pointing to short entry (on success), or next
2140  * entry to read (if this wasn't a valid lfn really).
2141  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2142  * a long filename.
2143  *
2144  * Can also be called with a NULL namep, in which case it just returns
2145  * whether this was really a valid long filename and consumes it
2146  * (used by pc_dirempty()).
2147  */
2148 int
2149 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2150     struct pcdir **epp, offset_t *offset, struct buf **bp)
2151 {
2152 	struct pcdir *ep = *epp;
2153 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2154 	struct vnode *dvp = PCTOV(pcp);
2155 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2156 	char	*lfn;
2157 	char	*lfn_base;
2158 	int	boff;
2159 	int	i, cs;
2160 	char	*buf;
2161 	uchar_t	cksum;
2162 	int	detached = 0;
2163 	int	error = 0;
2164 	int	foldcase;
2165 	int	count = 0;
2166 	size_t	u16l = 0, u8l = 0;
2167 	char	*outbuf;
2168 	size_t	ret, inlen, outlen;
2169 
2170 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2171 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2172 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2173 	*lfn = '\0';
2174 	*(lfn + 1) = '\0';
2175 	cksum = lep->pcdl_checksum;
2176 
2177 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2178 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2179 		/* read next block if necessary */
2180 		boff = pc_blkoff(fsp, *offset);
2181 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2182 			if (*bp != NULL) {
2183 				brelse(*bp);
2184 				*bp = NULL;
2185 			}
2186 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2187 			if (error) {
2188 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2189 				kmem_free(buf, PCMAXNAM_UTF16);
2190 				return (error);
2191 			}
2192 			lep = (struct pcdir_lfn *)ep;
2193 		}
2194 		/* can this happen? Bad fs? */
2195 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2196 			detached = 1;
2197 			break;
2198 		}
2199 		if (cksum != lep->pcdl_checksum)
2200 			detached = 1;
2201 		/* process current entry */
2202 		cs = get_long_fn_chunk(lep, buf);
2203 		count += cs;
2204 		for (; cs > 0; cs--) {
2205 			/* see if we underflow */
2206 			if (lfn >= lfn_base)
2207 				*--lfn = buf[cs - 1];
2208 			else
2209 				detached = 1;
2210 		}
2211 		lep++;
2212 		*offset += sizeof (struct pcdir);
2213 	}
2214 	kmem_free(buf, PCMAXNAM_UTF16);
2215 	/* read next block if necessary */
2216 	boff = pc_blkoff(fsp, *offset);
2217 	ep = (struct pcdir *)lep;
2218 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2219 		if (*bp != NULL) {
2220 			brelse(*bp);
2221 			*bp = NULL;
2222 		}
2223 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2224 		if (error) {
2225 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2226 			return (error);
2227 		}
2228 	}
2229 	/* should be on the short one */
2230 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2231 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2232 		detached = 1;
2233 	}
2234 	if (detached ||
2235 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2236 	    !pc_valid_long_fn(lfn, 0)) {
2237 		/*
2238 		 * process current entry again. This may end up another lfn
2239 		 * or a short name.
2240 		 */
2241 		*epp = ep;
2242 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2243 		return (EINVAL);
2244 	}
2245 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2246 		/*
2247 		 * Don't display label because it may contain
2248 		 * funny characters.
2249 		 */
2250 		*offset += sizeof (struct pcdir);
2251 		ep++;
2252 		*epp = ep;
2253 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2254 		return (EINVAL);
2255 	}
2256 	if (namep) {
2257 		u16l = count / 2;
2258 		u8l = PCMAXNAMLEN;
2259 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2260 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2261 		/*
2262 		 * uconv_u16tou8() will catch conversion errors including
2263 		 * the case where there is not enough room to write the
2264 		 * converted result and the u8l will never go over the given
2265 		 * PCMAXNAMLEN.
2266 		 */
2267 		if (error != 0) {
2268 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2269 			return (EINVAL);
2270 		}
2271 		namep[u8l] = '\0';
2272 		if (foldcase) {
2273 			inlen = strlen(namep);
2274 			outlen = PCMAXNAMLEN;
2275 			outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2276 			ret = u8_textprep_str(namep, &inlen, outbuf,
2277 			    &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2278 			    &error);
2279 			if (ret == -1) {
2280 				kmem_free(outbuf, PCMAXNAMLEN + 1);
2281 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2282 				return (EINVAL);
2283 			}
2284 			outbuf[PCMAXNAMLEN - outlen] = '\0';
2285 			(void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2286 			kmem_free(outbuf, PCMAXNAMLEN + 1);
2287 		}
2288 	}
2289 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2290 	*epp = ep;
2291 	return (0);
2292 }
2293 /*
2294  * Read a long filename into the pc_dirent structure and copy it out.
2295  */
2296 int
2297 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2298     struct pcdir **epp, offset_t *offset, struct buf **bp)
2299 {
2300 	struct pcdir *ep;
2301 	struct pcnode *pcp = VTOPC(dvp);
2302 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2303 	offset_t uiooffset = uiop->uio_loffset;
2304 	int	error = 0;
2305 	offset_t oldoffset;
2306 
2307 	oldoffset = *offset;
2308 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2309 	if (error) {
2310 		if (error == EINVAL) {
2311 			uiop->uio_loffset += *offset - oldoffset;
2312 			return (0);
2313 		} else
2314 			return (error);
2315 	}
2316 
2317 	ep = *epp;
2318 	uiop->uio_loffset += *offset - oldoffset;
2319 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2320 	if (ld->d_reclen > uiop->uio_resid) {
2321 		uiop->uio_loffset = uiooffset;
2322 		return (ENOSPC);
2323 	}
2324 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2325 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2326 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2327 	    pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2328 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2329 	uiop->uio_loffset = ld->d_off;
2330 	*offset += sizeof (struct pcdir);
2331 	ep++;
2332 	*epp = ep;
2333 	return (0);
2334 }
2335 
2336 /*
2337  * Read a short filename into the pc_dirent structure and copy it out.
2338  */
2339 int
2340 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2341     struct pcdir **epp, offset_t *offset, struct buf **bp)
2342 {
2343 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2344 	int	boff = pc_blkoff(fsp, *offset);
2345 	struct pcdir *ep = *epp;
2346 	offset_t	oldoffset = uiop->uio_loffset;
2347 	int	error;
2348 	int	foldcase;
2349 
2350 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2351 		uiop->uio_loffset += sizeof (struct pcdir);
2352 		*offset += sizeof (struct pcdir);
2353 		ep++;
2354 		*epp = ep;
2355 		return (0);
2356 	}
2357 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2358 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2359 	    pc_direntpersec(fsp));
2360 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2361 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2362 	    &ep->pcd_ext[0], foldcase);
2363 	if (error == 0) {
2364 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2365 		if (ld->d_reclen > uiop->uio_resid) {
2366 			uiop->uio_loffset = oldoffset;
2367 			return (ENOSPC);
2368 		}
2369 		ld->d_off = (off64_t)(uiop->uio_loffset +
2370 		    sizeof (struct pcdir));
2371 		(void) uiomove((caddr_t)ld,
2372 		    ld->d_reclen, UIO_READ, uiop);
2373 		uiop->uio_loffset = ld->d_off;
2374 	} else {
2375 		uiop->uio_loffset += sizeof (struct pcdir);
2376 	}
2377 	*offset += sizeof (struct pcdir);
2378 	ep++;
2379 	*epp = ep;
2380 	return (0);
2381 }
2382 
2383 /* ARGSUSED */
2384 static int
2385 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2386 {
2387 	struct pc_fid *pcfid;
2388 	struct pcnode *pcp;
2389 	struct pcfs	*fsp;
2390 	int	error;
2391 
2392 	fsp = VFSTOPCFS(vp->v_vfsp);
2393 	if (fsp == NULL)
2394 		return (EIO);
2395 	error = pc_lockfs(fsp, 0, 0);
2396 	if (error)
2397 		return (error);
2398 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2399 		pc_unlockfs(fsp);
2400 		return (EIO);
2401 	}
2402 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2403 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2404 		pc_unlockfs(fsp);
2405 		return (ENOSPC);
2406 	}
2407 
2408 	pcfid = (struct pc_fid *)fidp;
2409 	bzero(pcfid, sizeof (struct pc_fid));
2410 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2411 	if (vp->v_flag & VROOT) {
2412 		pcfid->pcfid_block = 0;
2413 		pcfid->pcfid_offset = 0;
2414 		pcfid->pcfid_ctime = 0;
2415 	} else {
2416 		pcfid->pcfid_block = pcp->pc_eblkno;
2417 		pcfid->pcfid_offset = pcp->pc_eoffset;
2418 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2419 	}
2420 	pc_unlockfs(fsp);
2421 	return (0);
2422 }
2423