xref: /illumos-gate/usr/src/uts/common/fs/pcfs/pc_vnops.c (revision 618b6b99eb6eee4272ca949f5ac45efb4425f02c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/user.h>
33 #include <sys/buf.h>
34 #include <sys/stat.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/dirent.h>
38 #include <sys/vnode.h>
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/uio.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/vmsystm.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/statvfs.h>
53 #include <sys/unistd.h>
54 #include <sys/kmem.h>
55 #include <sys/conf.h>
56 #include <sys/flock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59 #include <sys/sunddi.h>
60 
61 #include <vm/seg.h>
62 #include <vm/page.h>
63 #include <vm/pvn.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg_kmem.h>
69 
70 #include <fs/fs_subr.h>
71 
72 static int pcfs_open(struct vnode **, int, struct cred *);
73 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *);
74 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
75 	struct caller_context *);
76 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
77 	struct caller_context *);
78 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *);
79 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
80 	caller_context_t *);
81 static int pcfs_access(struct vnode *, int, int, struct cred *);
82 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
83 	struct pathname *, int, struct vnode *, struct cred *);
84 static int pcfs_create(struct vnode *, char *, struct vattr *,
85 	enum vcexcl, int mode, struct vnode **, struct cred *, int);
86 static int pcfs_remove(struct vnode *, char *, struct cred *);
87 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
88 	struct cred *);
89 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
90 	struct cred *);
91 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *);
92 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *);
93 static int pcfs_fsync(struct vnode *, int, struct cred *);
94 static void pcfs_inactive(struct vnode *, struct cred *);
95 static int pcfs_fid(struct vnode *vp, struct fid *fidp);
96 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
97 	offset_t, cred_t *, caller_context_t *);
98 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
99 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
100 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
101 	page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
102 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *);
103 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
104 	uchar_t, uchar_t, uint_t, struct cred *);
105 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
106 	size_t, uchar_t, uchar_t, uint_t, struct cred *);
107 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
108 	size_t, uint_t, uint_t, uint_t, struct cred *);
109 static int pcfs_seek(struct vnode *, offset_t, offset_t *);
110 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *);
111 
112 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
113 	struct cred *);
114 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
115 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase);
116 
117 extern krwlock_t pcnodes_lock;
118 
119 #define	lround(r)	(((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
120 
121 /*
122  * vnode op vectors for files and directories.
123  */
124 struct vnodeops *pcfs_fvnodeops;
125 struct vnodeops *pcfs_dvnodeops;
126 
127 const fs_operation_def_t pcfs_fvnodeops_template[] = {
128 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
129 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
130 	VOPNAME_READ,		{ .vop_read = pcfs_read },
131 	VOPNAME_WRITE,		{ .vop_write = pcfs_write },
132 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
133 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
134 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
135 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
136 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
137 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
138 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
139 	VOPNAME_SPACE,		{ .vop_space = pcfs_space },
140 	VOPNAME_GETPAGE,	{ .vop_getpage = pcfs_getpage },
141 	VOPNAME_PUTPAGE,	{ .vop_putpage = pcfs_putpage },
142 	VOPNAME_MAP,		{ .vop_map = pcfs_map },
143 	VOPNAME_ADDMAP,		{ .vop_addmap = pcfs_addmap },
144 	VOPNAME_DELMAP,		{ .vop_delmap = pcfs_delmap },
145 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
146 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
147 	NULL,			NULL
148 };
149 
150 const fs_operation_def_t pcfs_dvnodeops_template[] = {
151 	VOPNAME_OPEN,		{ .vop_open = pcfs_open },
152 	VOPNAME_CLOSE,		{ .vop_close = pcfs_close },
153 	VOPNAME_GETATTR,	{ .vop_getattr = pcfs_getattr },
154 	VOPNAME_SETATTR,	{ .vop_setattr = pcfs_setattr },
155 	VOPNAME_ACCESS,		{ .vop_access = pcfs_access },
156 	VOPNAME_LOOKUP,		{ .vop_lookup = pcfs_lookup },
157 	VOPNAME_CREATE,		{ .vop_create = pcfs_create },
158 	VOPNAME_REMOVE,		{ .vop_remove = pcfs_remove },
159 	VOPNAME_RENAME,		{ .vop_rename = pcfs_rename },
160 	VOPNAME_MKDIR,		{ .vop_mkdir = pcfs_mkdir },
161 	VOPNAME_RMDIR,		{ .vop_rmdir = pcfs_rmdir },
162 	VOPNAME_READDIR,	{ .vop_readdir = pcfs_readdir },
163 	VOPNAME_FSYNC,		{ .vop_fsync = pcfs_fsync },
164 	VOPNAME_INACTIVE,	{ .vop_inactive = pcfs_inactive },
165 	VOPNAME_FID,		{ .vop_fid = pcfs_fid },
166 	VOPNAME_SEEK,		{ .vop_seek = pcfs_seek },
167 	VOPNAME_PATHCONF,	{ .vop_pathconf = pcfs_pathconf },
168 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
169 	NULL,			NULL
170 };
171 
172 
173 /*ARGSUSED*/
174 static int
175 pcfs_open(
176 	struct vnode **vpp,
177 	int flag,
178 	struct cred *cr)
179 {
180 	return (0);
181 }
182 
183 /*
184  * files are sync'ed on close to keep floppy up to date
185  */
186 
187 /*ARGSUSED*/
188 static int
189 pcfs_close(
190 	struct vnode *vp,
191 	int flag,
192 	int count,
193 	offset_t offset,
194 	struct cred *cr)
195 {
196 	return (0);
197 }
198 
199 /*ARGSUSED*/
200 static int
201 pcfs_read(
202 	struct vnode *vp,
203 	struct uio *uiop,
204 	int ioflag,
205 	struct cred *cr,
206 	struct caller_context *ct)
207 {
208 	struct pcfs *fsp;
209 	struct pcnode *pcp;
210 	int error;
211 
212 	fsp = VFSTOPCFS(vp->v_vfsp);
213 	if (error = pc_verify(fsp))
214 		return (error);
215 	error = pc_lockfs(fsp, 0, 0);
216 	if (error)
217 		return (error);
218 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
219 		pc_unlockfs(fsp);
220 		return (EIO);
221 	}
222 	error = rwpcp(pcp, uiop, UIO_READ, ioflag);
223 	if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
224 		pcp->pc_flags |= PC_ACC;
225 		pc_mark_acc(pcp);
226 	}
227 	pc_unlockfs(fsp);
228 	if (error) {
229 		PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
230 	}
231 	return (error);
232 }
233 
234 /*ARGSUSED*/
235 static int
236 pcfs_write(
237 	struct vnode *vp,
238 	struct uio *uiop,
239 	int ioflag,
240 	struct cred *cr,
241 	struct caller_context *ct)
242 {
243 	struct pcfs *fsp;
244 	struct pcnode *pcp;
245 	int error;
246 
247 	fsp = VFSTOPCFS(vp->v_vfsp);
248 	if (error = pc_verify(fsp))
249 		return (error);
250 	error = pc_lockfs(fsp, 0, 0);
251 	if (error)
252 		return (error);
253 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
254 		pc_unlockfs(fsp);
255 		return (EIO);
256 	}
257 	if (ioflag & FAPPEND) {
258 		/*
259 		 * in append mode start at end of file.
260 		 */
261 		uiop->uio_loffset = pcp->pc_size;
262 	}
263 	error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
264 	pcp->pc_flags |= PC_MOD;
265 	pc_mark_mod(pcp);
266 	if (ioflag & (FSYNC|FDSYNC))
267 		(void) pc_nodeupdate(pcp);
268 
269 	pc_unlockfs(fsp);
270 	if (error) {
271 		PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
272 	}
273 	return (error);
274 }
275 
276 /*
277  * read or write a vnode
278  */
279 static int
280 rwpcp(
281 	struct pcnode *pcp,
282 	struct uio *uio,
283 	enum uio_rw rw,
284 	int ioflag)
285 {
286 	struct vnode *vp = PCTOV(pcp);
287 	struct pcfs *fsp;
288 	daddr_t bn;			/* phys block number */
289 	int n;
290 	offset_t off;
291 	caddr_t base;
292 	int mapon, pagecreate;
293 	int newpage;
294 	int error = 0;
295 	rlim64_t limit = uio->uio_llimit;
296 	int oresid = uio->uio_resid;
297 
298 	/*
299 	 * If the filesystem was umounted by force, return immediately.
300 	 */
301 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
302 		return (EIO);
303 
304 	PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
305 	    uio->uio_loffset, uio->uio_resid, pcp->pc_size);
306 
307 	ASSERT(rw == UIO_READ || rw == UIO_WRITE);
308 	ASSERT(vp->v_type == VREG);
309 
310 	if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
311 		return (0);
312 	}
313 
314 	if (uio->uio_loffset < 0)
315 		return (EINVAL);
316 
317 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
318 		limit = MAXOFFSET_T;
319 
320 	if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
321 		proc_t *p = ttoproc(curthread);
322 
323 		mutex_enter(&p->p_lock);
324 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
325 		    p, RCA_UNSAFE_SIGINFO);
326 		mutex_exit(&p->p_lock);
327 		return (EFBIG);
328 	}
329 
330 	/* the following condition will occur only for write */
331 
332 	if (uio->uio_loffset >= UINT32_MAX)
333 		return (EFBIG);
334 
335 	if (uio->uio_resid == 0)
336 		return (0);
337 
338 	if (limit > UINT32_MAX)
339 		limit = UINT32_MAX;
340 
341 	fsp = VFSTOPCFS(vp->v_vfsp);
342 	if (fsp->pcfs_flags & PCFS_IRRECOV)
343 		return (EIO);
344 
345 	do {
346 		/*
347 		 * Assignments to "n" in this block may appear
348 		 * to overflow in some cases.  However, after careful
349 		 * analysis it was determined that all assignments to
350 		 * "n" serve only to make "n" smaller.  Since "n"
351 		 * starts out as no larger than MAXBSIZE, "int" is
352 		 * safe.
353 		 */
354 		off = uio->uio_loffset & MAXBMASK;
355 		mapon = (int)(uio->uio_loffset & MAXBOFFSET);
356 		n = MIN(MAXBSIZE - mapon, uio->uio_resid);
357 		if (rw == UIO_READ) {
358 			offset_t diff;
359 
360 			diff = pcp->pc_size - uio->uio_loffset;
361 			if (diff <= 0)
362 				return (0);
363 			if (diff < n)
364 				n = (int)diff;
365 		}
366 		/*
367 		 * Compare limit with the actual offset + n, not the
368 		 * rounded down offset "off" or we will overflow
369 		 * the maximum file size after all.
370 		 */
371 		if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
372 			if (uio->uio_loffset >= limit) {
373 				error = EFBIG;
374 				break;
375 			}
376 			n = (int)(limit - uio->uio_loffset);
377 		}
378 		base = segmap_getmap(segkmap, vp, (u_offset_t)off);
379 		pagecreate = 0;
380 		newpage = 0;
381 		if (rw == UIO_WRITE) {
382 			/*
383 			 * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
384 			 * with one page at a time, instead of one MAXBSIZE
385 			 * at a time, so we can fully explore pagecreate
386 			 * optimization??
387 			 */
388 			if (uio->uio_loffset + n > pcp->pc_size) {
389 				uint_t ncl, lcn;
390 
391 				ncl = (uint_t)howmany((offset_t)pcp->pc_size,
392 				    fsp->pcfs_clsize);
393 				if (uio->uio_loffset > pcp->pc_size &&
394 				    ncl < (uint_t)howmany(uio->uio_loffset,
395 				    fsp->pcfs_clsize)) {
396 					/*
397 					 * Allocate and zerofill skipped
398 					 * clusters. This may not be worth the
399 					 * effort since a small lseek beyond
400 					 * eof but still within the cluster
401 					 * will not be zeroed out.
402 					 */
403 					lcn = pc_lblkno(fsp, uio->uio_loffset);
404 					error = pc_balloc(pcp, (daddr_t)lcn,
405 					    1, &bn);
406 					ncl = lcn + 1;
407 				}
408 				if (!error &&
409 				    ncl < (uint_t)howmany(uio->uio_loffset + n,
410 				    fsp->pcfs_clsize))
411 					/*
412 					 * allocate clusters w/o zerofill
413 					 */
414 					error = pc_balloc(pcp,
415 					    (daddr_t)pc_lblkno(fsp,
416 					    uio->uio_loffset + n - 1),
417 					    0, &bn);
418 
419 				pcp->pc_flags |= PC_CHG;
420 
421 				if (error) {
422 					pc_cluster32_t ncl;
423 					int nerror;
424 
425 					/*
426 					 * figure out new file size from
427 					 * cluster chain length. If this
428 					 * is detected to loop, the chain
429 					 * is corrupted and we'd better
430 					 * keep our fingers off that file.
431 					 */
432 					nerror = pc_fileclsize(fsp,
433 					    pcp->pc_scluster, &ncl);
434 					if (nerror) {
435 						PC_DPRINTF1(2,
436 						    "cluster chain "
437 						    "corruption, "
438 						    "scluster=%d\n",
439 						    pcp->pc_scluster);
440 						pcp->pc_size = 0;
441 						pcp->pc_flags |= PC_INVAL;
442 						error = nerror;
443 						(void) segmap_release(segkmap,
444 						    base, 0);
445 						break;
446 					}
447 					pcp->pc_size = fsp->pcfs_clsize * ncl;
448 
449 					if (error == ENOSPC &&
450 					    (pcp->pc_size - uio->uio_loffset)
451 					    > 0) {
452 						PC_DPRINTF3(2, "rwpcp ENOSPC "
453 						    "off=%lld n=%d size=%d\n",
454 						    uio->uio_loffset,
455 						    n, pcp->pc_size);
456 						n = (int)(pcp->pc_size -
457 						    uio->uio_loffset);
458 					} else {
459 						PC_DPRINTF1(1,
460 						    "rwpcp error1=%d\n", error);
461 						(void) segmap_release(segkmap,
462 						    base, 0);
463 						break;
464 					}
465 				} else {
466 					pcp->pc_size =
467 					    (uint_t)(uio->uio_loffset + n);
468 				}
469 				if (mapon == 0) {
470 					newpage = segmap_pagecreate(segkmap,
471 					    base, (size_t)n, 0);
472 					pagecreate = 1;
473 				}
474 			} else if (n == MAXBSIZE) {
475 				newpage = segmap_pagecreate(segkmap, base,
476 				    (size_t)n, 0);
477 				pagecreate = 1;
478 			}
479 		}
480 		error = uiomove(base + mapon, (size_t)n, rw, uio);
481 
482 		if (pagecreate && uio->uio_loffset <
483 		    roundup(off + mapon + n, PAGESIZE)) {
484 			offset_t nzero, nmoved;
485 
486 			nmoved = uio->uio_loffset - (off + mapon);
487 			nzero = roundup(mapon + n, PAGESIZE) - nmoved;
488 			(void) kzero(base + mapon + nmoved, (size_t)nzero);
489 		}
490 
491 		/*
492 		 * Unlock the pages which have been allocated by
493 		 * page_create_va() in segmap_pagecreate().
494 		 */
495 		if (newpage)
496 			segmap_pageunlock(segkmap, base, (size_t)n,
497 			    rw == UIO_WRITE ? S_WRITE : S_READ);
498 
499 		if (error) {
500 			PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
501 			/*
502 			 * If we failed on a write, we may have already
503 			 * allocated file blocks as well as pages.  It's hard
504 			 * to undo the block allocation, but we must be sure
505 			 * to invalidate any pages that may have been
506 			 * allocated.
507 			 */
508 			if (rw == UIO_WRITE)
509 				(void) segmap_release(segkmap, base, SM_INVAL);
510 			else
511 				(void) segmap_release(segkmap, base, 0);
512 		} else {
513 			uint_t flags = 0;
514 
515 			if (rw == UIO_READ) {
516 				if (n + mapon == MAXBSIZE ||
517 				    uio->uio_loffset == pcp->pc_size)
518 					flags = SM_DONTNEED;
519 			} else if (ioflag & (FSYNC|FDSYNC)) {
520 				flags = SM_WRITE;
521 			} else if (n + mapon == MAXBSIZE) {
522 				flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
523 			}
524 			error = segmap_release(segkmap, base, flags);
525 		}
526 
527 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
528 
529 	if (oresid != uio->uio_resid)
530 		error = 0;
531 	return (error);
532 }
533 
534 /*ARGSUSED*/
535 static int
536 pcfs_getattr(
537 	struct vnode *vp,
538 	struct vattr *vap,
539 	int flags,
540 	struct cred *cr)
541 {
542 	struct pcnode *pcp;
543 	struct pcfs *fsp;
544 	int error;
545 	char attr;
546 	struct pctime atime;
547 	int64_t unixtime;
548 
549 	PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
550 
551 	fsp = VFSTOPCFS(vp->v_vfsp);
552 	error = pc_lockfs(fsp, 0, 0);
553 	if (error)
554 		return (error);
555 
556 	/*
557 	 * Note that we don't check for "invalid node" (PC_INVAL) here
558 	 * only in order to make stat() succeed. We allow no I/O on such
559 	 * a node, but do allow to check for its existance.
560 	 */
561 	if ((pcp = VTOPC(vp)) == NULL) {
562 		pc_unlockfs(fsp);
563 		return (EIO);
564 	}
565 	/*
566 	 * Copy from pcnode.
567 	 */
568 	vap->va_type = vp->v_type;
569 	attr = pcp->pc_entry.pcd_attr;
570 	if (PCA_IS_HIDDEN(fsp, attr))
571 		vap->va_mode = 0;
572 	else if (attr & PCA_LABEL)
573 		vap->va_mode = 0444;
574 	else if (attr & PCA_RDONLY)
575 		vap->va_mode = 0555;
576 	else if (fsp->pcfs_flags & PCFS_BOOTPART) {
577 		vap->va_mode = 0755;
578 	} else {
579 		vap->va_mode = 0777;
580 	}
581 
582 	if (attr & PCA_DIR)
583 		vap->va_mode |= S_IFDIR;
584 	else
585 		vap->va_mode |= S_IFREG;
586 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
587 		vap->va_uid = 0;
588 		vap->va_gid = 0;
589 	} else {
590 		vap->va_uid = crgetuid(cr);
591 		vap->va_gid = crgetgid(cr);
592 	}
593 	vap->va_fsid = vp->v_vfsp->vfs_dev;
594 	vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
595 	    pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
596 	    pc_getstartcluster(fsp, &pcp->pc_entry), fsp->pcfs_entps);
597 	vap->va_nlink = 1;
598 	vap->va_size = (u_offset_t)pcp->pc_size;
599 
600 	pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
601 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
602 		if (unixtime > INT32_MAX)
603 			DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
604 		unixtime = MIN(unixtime, INT32_MAX);
605 	} else if (unixtime > INT32_MAX &&
606 	    get_udatamodel() == DATAMODEL_ILP32) {
607 		pc_unlockfs(fsp);
608 		DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
609 		return (EOVERFLOW);
610 	}
611 
612 	vap->va_mtime.tv_sec = (time_t)unixtime;
613 	vap->va_mtime.tv_nsec = 0;
614 
615 	/*
616 	 * FAT doesn't know about POSIX ctime.
617 	 * Best approximation is to always set it to mtime.
618 	 */
619 	vap->va_ctime = vap->va_mtime;
620 
621 	/*
622 	 * FAT only stores "last access date". If that's the
623 	 * same as the date of last modification then the time
624 	 * of last access is known. Otherwise, use midnight.
625 	 */
626 	atime.pct_date = pcp->pc_entry.pcd_ladate;
627 	if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
628 		atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
629 	else
630 		atime.pct_time = 0;
631 	pc_pcttotv(&atime, &unixtime);
632 	if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
633 		if (unixtime > INT32_MAX)
634 			DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
635 		unixtime = MIN(unixtime, INT32_MAX);
636 	} else if (unixtime > INT32_MAX &&
637 	    get_udatamodel() == DATAMODEL_ILP32) {
638 		pc_unlockfs(fsp);
639 		DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
640 		return (EOVERFLOW);
641 	}
642 
643 	vap->va_atime.tv_sec = (time_t)unixtime;
644 	vap->va_atime.tv_nsec = 0;
645 
646 	vap->va_rdev = 0;
647 	vap->va_nblocks = (fsblkcnt64_t)howmany((offset_t)pcp->pc_size,
648 	    DEV_BSIZE);
649 	vap->va_blksize = fsp->pcfs_clsize;
650 	pc_unlockfs(fsp);
651 	return (0);
652 }
653 
654 
655 /*ARGSUSED*/
656 static int
657 pcfs_setattr(
658 	struct vnode *vp,
659 	struct vattr *vap,
660 	int flags,
661 	struct cred *cr,
662 	caller_context_t *ct)
663 {
664 	struct pcnode *pcp;
665 	mode_t mask = vap->va_mask;
666 	int error;
667 	struct pcfs *fsp;
668 	timestruc_t now, *timep;
669 
670 	PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
671 	/*
672 	 * cannot set these attributes
673 	 */
674 	if (mask & (AT_NOSET | AT_UID | AT_GID)) {
675 		return (EINVAL);
676 	}
677 	/*
678 	 * pcfs_setattr is now allowed on directories to avoid silly warnings
679 	 * from 'tar' when it tries to set times on a directory, and console
680 	 * printf's on the NFS server when it gets EINVAL back on such a
681 	 * request. One possible problem with that since a directory entry
682 	 * identifies a file, '.' and all the '..' entries in subdirectories
683 	 * may get out of sync when the directory is updated since they're
684 	 * treated like separate files. We could fix that by looking for
685 	 * '.' and giving it the same attributes, and then looking for
686 	 * all the subdirectories and updating '..', but that's pretty
687 	 * expensive for something that doesn't seem likely to matter.
688 	 */
689 	/* can't do some ops on directories anyway */
690 	if ((vp->v_type == VDIR) &&
691 	    (mask & AT_SIZE)) {
692 		return (EINVAL);
693 	}
694 
695 	fsp = VFSTOPCFS(vp->v_vfsp);
696 	error = pc_lockfs(fsp, 0, 0);
697 	if (error)
698 		return (error);
699 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
700 		pc_unlockfs(fsp);
701 		return (EIO);
702 	}
703 
704 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
705 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
706 			pc_unlockfs(fsp);
707 			return (EACCES);
708 		}
709 	}
710 
711 	/*
712 	 * Change file access modes.
713 	 * If nobody has write permission, file is marked readonly.
714 	 * Otherwise file is writable by anyone.
715 	 */
716 	if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
717 		if ((vap->va_mode & 0222) == 0)
718 			pcp->pc_entry.pcd_attr |= PCA_RDONLY;
719 		else
720 			pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
721 		pcp->pc_flags |= PC_CHG;
722 	}
723 	/*
724 	 * Truncate file. Must have write permission.
725 	 */
726 	if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
727 		if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
728 			error = EACCES;
729 			goto out;
730 		}
731 		if (vap->va_size > UINT32_MAX) {
732 			error = EFBIG;
733 			goto out;
734 		}
735 		error = pc_truncate(pcp, (uint_t)vap->va_size);
736 		if (error)
737 			goto out;
738 	}
739 	/*
740 	 * Change file modified times.
741 	 */
742 	if (mask & (AT_MTIME | AT_CTIME)) {
743 		/*
744 		 * If SysV-compatible option to set access and
745 		 * modified times if privileged, owner, or write access,
746 		 * use current time rather than va_mtime.
747 		 *
748 		 * XXX - va_mtime.tv_sec == -1 flags this.
749 		 */
750 		timep = &vap->va_mtime;
751 		if (vap->va_mtime.tv_sec == -1) {
752 			gethrestime(&now);
753 			timep = &now;
754 		}
755 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
756 		    timep->tv_sec > INT32_MAX) {
757 			error = EOVERFLOW;
758 			goto out;
759 		}
760 		error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
761 		if (error)
762 			goto out;
763 		pcp->pc_flags |= PC_CHG;
764 	}
765 	/*
766 	 * Change file access times.
767 	 */
768 	if (mask & AT_ATIME) {
769 		/*
770 		 * If SysV-compatible option to set access and
771 		 * modified times if privileged, owner, or write access,
772 		 * use current time rather than va_mtime.
773 		 *
774 		 * XXX - va_atime.tv_sec == -1 flags this.
775 		 */
776 		struct pctime	atime;
777 
778 		timep = &vap->va_atime;
779 		if (vap->va_atime.tv_sec == -1) {
780 			gethrestime(&now);
781 			timep = &now;
782 		}
783 		if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
784 		    timep->tv_sec > INT32_MAX) {
785 			error = EOVERFLOW;
786 			goto out;
787 		}
788 		error = pc_tvtopct(timep, &atime);
789 		if (error)
790 			goto out;
791 		pcp->pc_entry.pcd_ladate = atime.pct_date;
792 		pcp->pc_flags |= PC_CHG;
793 	}
794 out:
795 	pc_unlockfs(fsp);
796 	return (error);
797 }
798 
799 
800 /*ARGSUSED*/
801 static int
802 pcfs_access(
803 	struct vnode *vp,
804 	int mode,
805 	int flags,
806 	struct cred *cr)
807 {
808 	struct pcnode *pcp;
809 	struct pcfs *fsp;
810 
811 
812 	fsp = VFSTOPCFS(vp->v_vfsp);
813 
814 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
815 		return (EIO);
816 	if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
817 		return (EACCES);
818 
819 	/*
820 	 * If this is a boot partition, privileged users have full access while
821 	 * others have read-only access.
822 	 */
823 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
824 		if ((mode & VWRITE) &&
825 		    secpolicy_pcfs_modify_bootpartition(cr) != 0)
826 			return (EACCES);
827 	}
828 	return (0);
829 }
830 
831 
832 /*ARGSUSED*/
833 static int
834 pcfs_fsync(
835 	struct vnode *vp,
836 	int syncflag,
837 	struct cred *cr)
838 {
839 	struct pcfs *fsp;
840 	struct pcnode *pcp;
841 	int error;
842 
843 	fsp = VFSTOPCFS(vp->v_vfsp);
844 	if (error = pc_verify(fsp))
845 		return (error);
846 	error = pc_lockfs(fsp, 0, 0);
847 	if (error)
848 		return (error);
849 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
850 		pc_unlockfs(fsp);
851 		return (EIO);
852 	}
853 	rw_enter(&pcnodes_lock, RW_WRITER);
854 	error = pc_nodesync(pcp);
855 	rw_exit(&pcnodes_lock);
856 	pc_unlockfs(fsp);
857 	return (error);
858 }
859 
860 
861 /*ARGSUSED*/
862 static void
863 pcfs_inactive(
864 	struct vnode *vp,
865 	struct cred *cr)
866 {
867 	struct pcnode *pcp;
868 	struct pcfs *fsp;
869 	int error;
870 
871 	fsp = VFSTOPCFS(vp->v_vfsp);
872 	error = pc_lockfs(fsp, 0, 1);
873 
874 	/*
875 	 * If the filesystem was umounted by force, all dirty
876 	 * pages associated with this vnode are invalidated
877 	 * and then the vnode will be freed.
878 	 */
879 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
880 		pcp = VTOPC(vp);
881 		if (vn_has_cached_data(vp)) {
882 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
883 			    pcfs_putapage, B_INVAL, (struct cred *)NULL);
884 		}
885 		remque(pcp);
886 		if (error == 0)
887 			pc_unlockfs(fsp);
888 		vn_free(vp);
889 		kmem_free(pcp, sizeof (struct pcnode));
890 		VFS_RELE(PCFSTOVFS(fsp));
891 		return;
892 	}
893 
894 	mutex_enter(&vp->v_lock);
895 	ASSERT(vp->v_count >= 1);
896 	if (vp->v_count > 1) {
897 		vp->v_count--;  /* release our hold from vn_rele */
898 		mutex_exit(&vp->v_lock);
899 		pc_unlockfs(fsp);
900 		return;
901 	}
902 	mutex_exit(&vp->v_lock);
903 
904 	/*
905 	 * Check again to confirm that no intervening I/O error
906 	 * with a subsequent pc_diskchanged() call has released
907 	 * the pcnode. If it has then release the vnode as above.
908 	 */
909 	pcp = VTOPC(vp);
910 	if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
911 		if (vn_has_cached_data(vp))
912 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
913 			    pcfs_putapage, B_INVAL | B_TRUNC,
914 			    (struct cred *)NULL);
915 	}
916 
917 	if (pcp == NULL) {
918 		vn_free(vp);
919 	} else {
920 		pc_rele(pcp);
921 	}
922 
923 	if (!error)
924 		pc_unlockfs(fsp);
925 }
926 
927 /*ARGSUSED*/
928 static int
929 pcfs_lookup(
930 	struct vnode *dvp,
931 	char *nm,
932 	struct vnode **vpp,
933 	struct pathname *pnp,
934 	int flags,
935 	struct vnode *rdir,
936 	struct cred *cr)
937 {
938 	struct pcfs *fsp;
939 	struct pcnode *pcp;
940 	int error;
941 
942 	/*
943 	 * If the filesystem was umounted by force, return immediately.
944 	 */
945 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
946 		return (EIO);
947 
948 	/*
949 	 * verify that the dvp is still valid on the disk
950 	 */
951 	fsp = VFSTOPCFS(dvp->v_vfsp);
952 	if (error = pc_verify(fsp))
953 		return (error);
954 	error = pc_lockfs(fsp, 0, 0);
955 	if (error)
956 		return (error);
957 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
958 		pc_unlockfs(fsp);
959 		return (EIO);
960 	}
961 	/*
962 	 * Null component name is a synonym for directory being searched.
963 	 */
964 	if (*nm == '\0') {
965 		VN_HOLD(dvp);
966 		*vpp = dvp;
967 		pc_unlockfs(fsp);
968 		return (0);
969 	}
970 
971 	error = pc_dirlook(VTOPC(dvp), nm, &pcp);
972 	if (!error) {
973 		*vpp = PCTOV(pcp);
974 		pcp->pc_flags |= PC_EXTERNAL;
975 	}
976 	pc_unlockfs(fsp);
977 	return (error);
978 }
979 
980 
981 /*ARGSUSED*/
982 static int
983 pcfs_create(
984 	struct vnode *dvp,
985 	char *nm,
986 	struct vattr *vap,
987 	enum vcexcl exclusive,
988 	int mode,
989 	struct vnode **vpp,
990 	struct cred *cr,
991 	int flag)
992 {
993 	int error;
994 	struct pcnode *pcp;
995 	struct vnode *vp;
996 	struct pcfs *fsp;
997 
998 	/*
999 	 * can't create directories. use pcfs_mkdir.
1000 	 * can't create anything other than files.
1001 	 */
1002 	if (vap->va_type == VDIR)
1003 		return (EISDIR);
1004 	else if (vap->va_type != VREG)
1005 		return (EINVAL);
1006 
1007 	pcp = NULL;
1008 	fsp = VFSTOPCFS(dvp->v_vfsp);
1009 	error = pc_lockfs(fsp, 0, 0);
1010 	if (error)
1011 		return (error);
1012 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1013 		pc_unlockfs(fsp);
1014 		return (EIO);
1015 	}
1016 
1017 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1018 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1019 			pc_unlockfs(fsp);
1020 			return (EACCES);
1021 		}
1022 	}
1023 
1024 	if (*nm == '\0') {
1025 		/*
1026 		 * Null component name refers to the directory itself.
1027 		 */
1028 		VN_HOLD(dvp);
1029 		pcp = VTOPC(dvp);
1030 		error = EEXIST;
1031 	} else {
1032 		error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1033 	}
1034 	/*
1035 	 * if file exists and this is a nonexclusive create,
1036 	 * check for access permissions
1037 	 */
1038 	if (error == EEXIST) {
1039 		vp = PCTOV(pcp);
1040 		if (exclusive == NONEXCL) {
1041 			if (vp->v_type == VDIR) {
1042 				error = EISDIR;
1043 			} else if (mode) {
1044 				error = pcfs_access(PCTOV(pcp), mode, 0,
1045 				    cr);
1046 			} else {
1047 				error = 0;
1048 			}
1049 		}
1050 		if (error) {
1051 			VN_RELE(PCTOV(pcp));
1052 		} else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1053 		    (vap->va_size == 0)) {
1054 			error = pc_truncate(pcp, 0L);
1055 			if (error)
1056 				VN_RELE(PCTOV(pcp));
1057 		}
1058 	}
1059 	if (error) {
1060 		pc_unlockfs(fsp);
1061 		return (error);
1062 	}
1063 	*vpp = PCTOV(pcp);
1064 	pcp->pc_flags |= PC_EXTERNAL;
1065 	pc_unlockfs(fsp);
1066 	return (error);
1067 }
1068 
1069 /*ARGSUSED*/
1070 static int
1071 pcfs_remove(
1072 	struct vnode *vp,
1073 	char *nm,
1074 	struct cred *cr)
1075 {
1076 	struct pcfs *fsp;
1077 	struct pcnode *pcp;
1078 	int error;
1079 
1080 	fsp = VFSTOPCFS(vp->v_vfsp);
1081 	if (error = pc_verify(fsp))
1082 		return (error);
1083 	error = pc_lockfs(fsp, 0, 0);
1084 	if (error)
1085 		return (error);
1086 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1087 		pc_unlockfs(fsp);
1088 		return (EIO);
1089 	}
1090 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1091 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1092 			pc_unlockfs(fsp);
1093 			return (EACCES);
1094 		}
1095 	}
1096 	error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG);
1097 	pc_unlockfs(fsp);
1098 	return (error);
1099 }
1100 
1101 /*
1102  * Rename a file or directory
1103  * This rename is restricted to only rename files within a directory.
1104  * XX should make rename more general
1105  */
1106 /*ARGSUSED*/
1107 static int
1108 pcfs_rename(
1109 	struct vnode *sdvp,		/* old (source) parent vnode */
1110 	char *snm,			/* old (source) entry name */
1111 	struct vnode *tdvp,		/* new (target) parent vnode */
1112 	char *tnm,			/* new (target) entry name */
1113 	struct cred *cr)
1114 {
1115 	struct pcfs *fsp;
1116 	struct pcnode *dp;	/* parent pcnode */
1117 	struct pcnode *tdp;
1118 	int error;
1119 
1120 	fsp = VFSTOPCFS(sdvp->v_vfsp);
1121 	if (error = pc_verify(fsp))
1122 		return (error);
1123 
1124 	/*
1125 	 * make sure we can muck with this directory.
1126 	 */
1127 	error = pcfs_access(sdvp, VWRITE, 0, cr);
1128 	if (error) {
1129 		return (error);
1130 	}
1131 	error = pc_lockfs(fsp, 0, 0);
1132 	if (error)
1133 		return (error);
1134 	if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1135 	    (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1136 		pc_unlockfs(fsp);
1137 		return (EIO);
1138 	}
1139 	error = pc_rename(dp, tdp, snm, tnm);
1140 	pc_unlockfs(fsp);
1141 	return (error);
1142 }
1143 
1144 /*ARGSUSED*/
1145 static int
1146 pcfs_mkdir(
1147 	struct vnode *dvp,
1148 	char *nm,
1149 	struct vattr *vap,
1150 	struct vnode **vpp,
1151 	struct cred *cr)
1152 {
1153 	struct pcfs *fsp;
1154 	struct pcnode *pcp;
1155 	int error;
1156 
1157 	fsp = VFSTOPCFS(dvp->v_vfsp);
1158 	if (error = pc_verify(fsp))
1159 		return (error);
1160 	error = pc_lockfs(fsp, 0, 0);
1161 	if (error)
1162 		return (error);
1163 	if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1164 		pc_unlockfs(fsp);
1165 		return (EIO);
1166 	}
1167 
1168 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1169 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1170 			pc_unlockfs(fsp);
1171 			return (EACCES);
1172 		}
1173 	}
1174 
1175 	error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1176 
1177 	if (!error) {
1178 		pcp -> pc_flags |= PC_EXTERNAL;
1179 		*vpp = PCTOV(pcp);
1180 	} else if (error == EEXIST) {
1181 		VN_RELE(PCTOV(pcp));
1182 	}
1183 	pc_unlockfs(fsp);
1184 	return (error);
1185 }
1186 
1187 /*ARGSUSED*/
1188 static int
1189 pcfs_rmdir(
1190 	struct vnode *dvp,
1191 	char *nm,
1192 	struct vnode *cdir,
1193 	struct cred *cr)
1194 {
1195 	struct pcfs *fsp;
1196 	struct pcnode *pcp;
1197 	int error;
1198 
1199 	fsp = VFSTOPCFS(dvp -> v_vfsp);
1200 	if (error = pc_verify(fsp))
1201 		return (error);
1202 	if (error = pc_lockfs(fsp, 0, 0))
1203 		return (error);
1204 
1205 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1206 		pc_unlockfs(fsp);
1207 		return (EIO);
1208 	}
1209 
1210 	if (fsp->pcfs_flags & PCFS_BOOTPART) {
1211 		if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1212 			pc_unlockfs(fsp);
1213 			return (EACCES);
1214 		}
1215 	}
1216 
1217 	error = pc_dirremove(pcp, nm, cdir, VDIR);
1218 	pc_unlockfs(fsp);
1219 	return (error);
1220 }
1221 
1222 /*
1223  * read entries in a directory.
1224  * we must convert pc format to unix format
1225  */
1226 
1227 /*ARGSUSED*/
1228 static int
1229 pcfs_readdir(
1230 	struct vnode *dvp,
1231 	struct uio *uiop,
1232 	struct cred *cr,
1233 	int *eofp)
1234 {
1235 	struct pcnode *pcp;
1236 	struct pcfs *fsp;
1237 	struct pcdir *ep;
1238 	struct buf *bp = NULL;
1239 	offset_t offset;
1240 	int boff;
1241 	struct pc_dirent lbp;
1242 	struct pc_dirent *ld = &lbp;
1243 	int error;
1244 
1245 	/*
1246 	 * If the filesystem was umounted by force, return immediately.
1247 	 */
1248 	if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1249 		return (EIO);
1250 
1251 	if ((uiop->uio_iovcnt != 1) ||
1252 	    (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1253 		return (EINVAL);
1254 	}
1255 	fsp = VFSTOPCFS(dvp->v_vfsp);
1256 	/*
1257 	 * verify that the dp is still valid on the disk
1258 	 */
1259 	if (error = pc_verify(fsp)) {
1260 		return (error);
1261 	}
1262 	error = pc_lockfs(fsp, 0, 0);
1263 	if (error)
1264 		return (error);
1265 	if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1266 		pc_unlockfs(fsp);
1267 		return (EIO);
1268 	}
1269 
1270 	bzero(ld, sizeof (*ld));
1271 
1272 	if (eofp != NULL)
1273 		*eofp = 0;
1274 	offset = uiop->uio_loffset;
1275 
1276 	if (dvp->v_flag & VROOT) {
1277 		/*
1278 		 * kludge up entries for "." and ".." in the root.
1279 		 */
1280 		if (offset == 0) {
1281 			(void) strcpy(ld->d_name, ".");
1282 			ld->d_reclen = DIRENT64_RECLEN(1);
1283 			ld->d_off = (off64_t)sizeof (struct pcdir);
1284 			ld->d_ino = (ino64_t)UINT_MAX;
1285 			if (ld->d_reclen > uiop->uio_resid) {
1286 				pc_unlockfs(fsp);
1287 				return (ENOSPC);
1288 			}
1289 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1290 			uiop->uio_loffset = ld->d_off;
1291 			offset = uiop->uio_loffset;
1292 		}
1293 		if (offset == sizeof (struct pcdir)) {
1294 			(void) strcpy(ld->d_name, "..");
1295 			ld->d_reclen = DIRENT64_RECLEN(2);
1296 			if (ld->d_reclen > uiop->uio_resid) {
1297 				pc_unlockfs(fsp);
1298 				return (ENOSPC);
1299 			}
1300 			ld->d_off = (off64_t)(uiop->uio_loffset +
1301 			    sizeof (struct pcdir));
1302 			ld->d_ino = (ino64_t)UINT_MAX;
1303 			(void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1304 			uiop->uio_loffset = ld->d_off;
1305 			offset = uiop->uio_loffset;
1306 		}
1307 		offset -= 2 * sizeof (struct pcdir);
1308 		/* offset now has the real offset value into directory file */
1309 	}
1310 
1311 	for (;;) {
1312 		boff = pc_blkoff(fsp, offset);
1313 		if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1314 			if (bp != NULL) {
1315 				brelse(bp);
1316 				bp = NULL;
1317 			}
1318 			error = pc_blkatoff(pcp, offset, &bp, &ep);
1319 			if (error) {
1320 				if (error == ENOENT) {
1321 					error = 0;
1322 					if (eofp)
1323 						*eofp = 1;
1324 				}
1325 				break;
1326 			}
1327 		}
1328 		if (ep->pcd_filename[0] == PCD_UNUSED) {
1329 			if (eofp)
1330 				*eofp = 1;
1331 			break;
1332 		}
1333 		/*
1334 		 * Don't display label because it may contain funny characters.
1335 		 */
1336 		if (ep->pcd_filename[0] == PCD_ERASED) {
1337 			uiop->uio_loffset += sizeof (struct pcdir);
1338 			offset += sizeof (struct pcdir);
1339 			ep++;
1340 			continue;
1341 		}
1342 		if (PCDL_IS_LFN(ep)) {
1343 			if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1344 			    0)
1345 				break;
1346 			continue;
1347 		}
1348 
1349 		if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1350 			break;
1351 	}
1352 	if (bp)
1353 		brelse(bp);
1354 	pc_unlockfs(fsp);
1355 	return (error);
1356 }
1357 
1358 
1359 /*
1360  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1361  * When we are called the pcfs is already locked.
1362  */
1363 /*ARGSUSED*/
1364 static int
1365 pcfs_getapage(
1366 	struct vnode *vp,
1367 	u_offset_t off,
1368 	size_t len,
1369 	uint_t *protp,
1370 	page_t *pl[],		/* NULL if async IO is requested */
1371 	size_t plsz,
1372 	struct seg *seg,
1373 	caddr_t addr,
1374 	enum seg_rw rw,
1375 	struct cred *cr)
1376 {
1377 	struct pcnode *pcp;
1378 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1379 	struct vnode *devvp;
1380 	page_t *pp;
1381 	page_t *pagefound;
1382 	int err;
1383 
1384 	/*
1385 	 * If the filesystem was umounted by force, return immediately.
1386 	 */
1387 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1388 		return (EIO);
1389 
1390 	PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1391 	    (void *)vp, off, len);
1392 
1393 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1394 		return (EIO);
1395 	devvp = fsp->pcfs_devvp;
1396 
1397 	/* pcfs doesn't do readaheads */
1398 	if (pl == NULL)
1399 		return (0);
1400 
1401 	pl[0] = NULL;
1402 	err = 0;
1403 	/*
1404 	 * If the accessed time on the pcnode has not already been
1405 	 * set elsewhere (e.g. for read/setattr) we set the time now.
1406 	 * This gives us approximate modified times for mmap'ed files
1407 	 * which are accessed via loads in the user address space.
1408 	 */
1409 	if ((pcp->pc_flags & PC_ACC) == 0 &&
1410 	    ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1411 		pcp->pc_flags |= PC_ACC;
1412 		pc_mark_acc(pcp);
1413 	}
1414 reread:
1415 	if ((pagefound = page_exists(vp, off)) == NULL) {
1416 		/*
1417 		 * Need to really do disk IO to get the page(s).
1418 		 */
1419 		struct buf *bp;
1420 		daddr_t lbn, bn;
1421 		u_offset_t io_off;
1422 		size_t io_len;
1423 		u_offset_t lbnoff, xferoffset;
1424 		u_offset_t pgoff;
1425 		uint_t	xfersize;
1426 		int err1;
1427 
1428 		lbn = pc_lblkno(fsp, off);
1429 		lbnoff = off & ~(fsp->pcfs_clsize - 1);
1430 		xferoffset = off & ~(fsp->pcfs_secsize - 1);
1431 
1432 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1433 		    off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1434 		if (pp == NULL)
1435 			/*
1436 			 * XXX - If pcfs is made MT-hot, this should go
1437 			 * back to reread.
1438 			 */
1439 			panic("pcfs_getapage pvn_read_kluster");
1440 
1441 		for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1442 		    pgoff += xfersize,
1443 		    lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1444 		    lbnoff += xfersize, xferoffset += xfersize) {
1445 			/*
1446 			 * read as many contiguous blocks as possible to
1447 			 * fill this page
1448 			 */
1449 			xfersize = PAGESIZE - pgoff;
1450 			err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1451 			if (err1) {
1452 				PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1453 				err = err1;
1454 				goto out;
1455 			}
1456 			bp = pageio_setup(pp, xfersize, devvp, B_READ);
1457 			bp->b_edev = devvp->v_rdev;
1458 			bp->b_dev = cmpdev(devvp->v_rdev);
1459 			bp->b_blkno = bn +
1460 			    /* add a sector offset within the cluster */
1461 			    /* when the clustersize > PAGESIZE */
1462 			    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1463 			bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1464 			bp->b_file = vp;
1465 			bp->b_offset = (offset_t)(off + pgoff);
1466 
1467 			(void) bdev_strategy(bp);
1468 
1469 			lwp_stat_update(LWP_STAT_INBLK, 1);
1470 
1471 			if (err == 0)
1472 				err = biowait(bp);
1473 			else
1474 				(void) biowait(bp);
1475 			pageio_done(bp);
1476 			if (err)
1477 				goto out;
1478 		}
1479 		if (pgoff < PAGESIZE) {
1480 			pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1481 		}
1482 		pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1483 	}
1484 out:
1485 	if (err) {
1486 		if (pp != NULL)
1487 			pvn_read_done(pp, B_ERROR);
1488 		return (err);
1489 	}
1490 
1491 	if (pagefound) {
1492 		/*
1493 		 * Page exists in the cache, acquire the "shared"
1494 		 * lock.  If this fails, go back to reread.
1495 		 */
1496 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1497 			goto reread;
1498 		}
1499 		pl[0] = pp;
1500 		pl[1] = NULL;
1501 	}
1502 	return (err);
1503 }
1504 
1505 /*
1506  * Return all the pages from [off..off+len] in given file
1507  */
1508 static int
1509 pcfs_getpage(
1510 	struct vnode *vp,
1511 	offset_t off,
1512 	size_t len,
1513 	uint_t *protp,
1514 	page_t *pl[],
1515 	size_t plsz,
1516 	struct seg *seg,
1517 	caddr_t addr,
1518 	enum seg_rw rw,
1519 	struct cred *cr)
1520 {
1521 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1522 	int err;
1523 
1524 	PC_DPRINTF0(6, "pcfs_getpage\n");
1525 	if (err = pc_verify(fsp))
1526 		return (err);
1527 	if (vp->v_flag & VNOMAP)
1528 		return (ENOSYS);
1529 	ASSERT(off <= UINT32_MAX);
1530 	err = pc_lockfs(fsp, 0, 0);
1531 	if (err)
1532 		return (err);
1533 	if (protp != NULL)
1534 		*protp = PROT_ALL;
1535 
1536 	ASSERT((off & PAGEOFFSET) == 0);
1537 	if (len <= PAGESIZE) {
1538 		err = pcfs_getapage(vp, off, len, protp, pl,
1539 		    plsz, seg, addr, rw, cr);
1540 	} else {
1541 		err = pvn_getpages(pcfs_getapage, vp, off,
1542 		    len, protp, pl, plsz, seg, addr, rw, cr);
1543 	}
1544 	pc_unlockfs(fsp);
1545 	return (err);
1546 }
1547 
1548 
1549 /*
1550  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1551  * If len == 0, do from off to EOF.
1552  *
1553  * The normal cases should be len == 0 & off == 0 (entire vp list),
1554  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1555  * (from pageout).
1556  *
1557  */
1558 /*ARGSUSED*/
1559 static int
1560 pcfs_putpage(
1561 	struct vnode *vp,
1562 	offset_t off,
1563 	size_t len,
1564 	int flags,
1565 	struct cred *cr)
1566 {
1567 	struct pcnode *pcp;
1568 	page_t *pp;
1569 	struct pcfs *fsp;
1570 	u_offset_t io_off;
1571 	size_t io_len;
1572 	offset_t eoff;
1573 	int err;
1574 
1575 	/*
1576 	 * If the filesystem was umounted by force, return immediately.
1577 	 */
1578 	if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1579 		return (EIO);
1580 
1581 	PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1582 	if (vp->v_flag & VNOMAP)
1583 		return (ENOSYS);
1584 
1585 	fsp = VFSTOPCFS(vp->v_vfsp);
1586 
1587 	if (err = pc_verify(fsp))
1588 		return (err);
1589 	if ((pcp = VTOPC(vp)) == NULL) {
1590 		PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1591 		return (EIO);
1592 	}
1593 	if (pcp->pc_flags & PC_INVAL)
1594 		return (EIO);
1595 
1596 	if (curproc == proc_pageout) {
1597 		/*
1598 		 * XXX - This is a quick hack to avoid blocking
1599 		 * pageout. Also to avoid pcfs_getapage deadlocking
1600 		 * with putpage when memory is running out,
1601 		 * since we only have one global lock and we don't
1602 		 * support async putpage.
1603 		 * It should be fixed someday.
1604 		 *
1605 		 * Interestingly, this used to be a test of NOMEMWAIT().
1606 		 * We only ever got here once pcfs started supporting
1607 		 * NFS sharing, and then only because the NFS server
1608 		 * threads seem to do writes in sched's process context.
1609 		 * Since everyone else seems to just care about pageout,
1610 		 * the test was changed to look for pageout directly.
1611 		 */
1612 		return (ENOMEM);
1613 	}
1614 
1615 	ASSERT(off <= UINT32_MAX);
1616 
1617 	flags &= ~B_ASYNC;	/* XXX should fix this later */
1618 
1619 	err = pc_lockfs(fsp, 0, 0);
1620 	if (err)
1621 		return (err);
1622 	if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1623 		pc_unlockfs(fsp);
1624 		return (0);
1625 	}
1626 
1627 	if (len == 0) {
1628 		/*
1629 		 * Search the entire vp list for pages >= off
1630 		 */
1631 		err = pvn_vplist_dirty(vp, off,
1632 		    pcfs_putapage, flags, cr);
1633 	} else {
1634 		eoff = off + len;
1635 
1636 		for (io_off = off; io_off < eoff &&
1637 		    io_off < pcp->pc_size; io_off += io_len) {
1638 			/*
1639 			 * If we are not invalidating, synchronously
1640 			 * freeing or writing pages use the routine
1641 			 * page_lookup_nowait() to prevent reclaiming
1642 			 * them from the free list.
1643 			 */
1644 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1645 				pp = page_lookup(vp, io_off,
1646 				    (flags & (B_INVAL | B_FREE)) ?
1647 				    SE_EXCL : SE_SHARED);
1648 			} else {
1649 				pp = page_lookup_nowait(vp, io_off,
1650 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1651 			}
1652 
1653 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1654 				io_len = PAGESIZE;
1655 			else {
1656 				err = pcfs_putapage(vp, pp, &io_off, &io_len,
1657 				    flags, cr);
1658 				if (err != 0)
1659 					break;
1660 				/*
1661 				 * "io_off" and "io_len" are returned as
1662 				 * the range of pages we actually wrote.
1663 				 * This allows us to skip ahead more quickly
1664 				 * since several pages may've been dealt
1665 				 * with by this iteration of the loop.
1666 				 */
1667 			}
1668 		}
1669 	}
1670 	if (err == 0 && (flags & B_INVAL) &&
1671 	    off == 0 && len == 0 && vn_has_cached_data(vp)) {
1672 		/*
1673 		 * If doing "invalidation", make sure that
1674 		 * all pages on the vnode list are actually
1675 		 * gone.
1676 		 */
1677 		cmn_err(CE_PANIC,
1678 		    "pcfs_putpage: B_INVAL, pages not gone");
1679 	} else if (err) {
1680 		PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1681 	}
1682 	pc_unlockfs(fsp);
1683 	return (err);
1684 }
1685 
1686 /*
1687  * Write out a single page, possibly klustering adjacent dirty pages.
1688  */
1689 /*ARGSUSED*/
1690 int
1691 pcfs_putapage(
1692 	struct vnode *vp,
1693 	page_t *pp,
1694 	u_offset_t *offp,
1695 	size_t *lenp,
1696 	int flags,
1697 	struct cred *cr)
1698 {
1699 	struct pcnode *pcp;
1700 	struct pcfs *fsp;
1701 	struct vnode *devvp;
1702 	size_t io_len;
1703 	daddr_t bn;
1704 	u_offset_t lbn, lbnoff, xferoffset;
1705 	uint_t pgoff, xfersize;
1706 	int err = 0;
1707 	u_offset_t io_off;
1708 
1709 	pcp = VTOPC(vp);
1710 	fsp = VFSTOPCFS(vp->v_vfsp);
1711 	devvp = fsp->pcfs_devvp;
1712 
1713 	/*
1714 	 * If the modified time on the inode has not already been
1715 	 * set elsewhere (e.g. for write/setattr) and this is not
1716 	 * a call from msync (B_FORCE) we set the time now.
1717 	 * This gives us approximate modified times for mmap'ed files
1718 	 * which are modified via stores in the user address space.
1719 	 */
1720 	if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1721 		pcp->pc_flags |= PC_MOD;
1722 		pc_mark_mod(pcp);
1723 	}
1724 	pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1725 	    PAGESIZE, flags);
1726 
1727 	if (fsp->pcfs_flags & PCFS_IRRECOV) {
1728 		goto out;
1729 	}
1730 
1731 	PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1732 
1733 	lbn = pc_lblkno(fsp, io_off);
1734 	lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1735 	xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1736 
1737 	for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1738 	    pgoff += xfersize,
1739 	    lbn += howmany(xfersize, fsp->pcfs_clsize),
1740 	    lbnoff += xfersize, xferoffset += xfersize) {
1741 
1742 		struct buf *bp;
1743 		int err1;
1744 
1745 		/*
1746 		 * write as many contiguous blocks as possible from this page
1747 		 */
1748 		xfersize = io_len - pgoff;
1749 		err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1750 		if (err1) {
1751 			err = err1;
1752 			goto out;
1753 		}
1754 		bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1755 		bp->b_edev = devvp->v_rdev;
1756 		bp->b_dev = cmpdev(devvp->v_rdev);
1757 		bp->b_blkno = bn +
1758 		    /* add a sector offset within the cluster */
1759 		    /* when the clustersize > PAGESIZE */
1760 		    (xferoffset - lbnoff) / fsp->pcfs_secsize;
1761 		bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1762 		bp->b_file = vp;
1763 		bp->b_offset = (offset_t)(io_off + pgoff);
1764 
1765 		(void) bdev_strategy(bp);
1766 
1767 		lwp_stat_update(LWP_STAT_OUBLK, 1);
1768 
1769 		if (err == 0)
1770 			err = biowait(bp);
1771 		else
1772 			(void) biowait(bp);
1773 		pageio_done(bp);
1774 	}
1775 	pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1776 	pp = NULL;
1777 
1778 out:
1779 	if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1780 		pvn_write_done(pp, B_WRITE | flags);
1781 	} else if (err != 0 && pp != NULL) {
1782 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1783 	}
1784 
1785 	if (offp)
1786 		*offp = io_off;
1787 	if (lenp)
1788 		*lenp = io_len;
1789 		PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1790 		    (void *)vp, (void *)pp, io_off, io_len);
1791 	if (err) {
1792 		PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1793 	}
1794 	return (err);
1795 }
1796 
1797 /*ARGSUSED*/
1798 static int
1799 pcfs_map(
1800 	struct vnode *vp,
1801 	offset_t off,
1802 	struct as *as,
1803 	caddr_t *addrp,
1804 	size_t len,
1805 	uchar_t prot,
1806 	uchar_t maxprot,
1807 	uint_t flags,
1808 	struct cred *cr)
1809 {
1810 	struct segvn_crargs vn_a;
1811 	int error;
1812 
1813 	PC_DPRINTF0(6, "pcfs_map\n");
1814 	if (vp->v_flag & VNOMAP)
1815 		return (ENOSYS);
1816 
1817 	if (off > UINT32_MAX || off + len > UINT32_MAX)
1818 		return (ENXIO);
1819 
1820 	as_rangelock(as);
1821 	if ((flags & MAP_FIXED) == 0) {
1822 		map_addr(addrp, len, off, 1, flags);
1823 		if (*addrp == NULL) {
1824 			as_rangeunlock(as);
1825 			return (ENOMEM);
1826 		}
1827 	} else {
1828 		/*
1829 		 * User specified address - blow away any previous mappings
1830 		 */
1831 		(void) as_unmap(as, *addrp, len);
1832 	}
1833 
1834 	vn_a.vp = vp;
1835 	vn_a.offset = off;
1836 	vn_a.type = flags & MAP_TYPE;
1837 	vn_a.prot = prot;
1838 	vn_a.maxprot = maxprot;
1839 	vn_a.flags = flags & ~MAP_TYPE;
1840 	vn_a.cred = cr;
1841 	vn_a.amp = NULL;
1842 	vn_a.szc = 0;
1843 	vn_a.lgrp_mem_policy_flags = 0;
1844 
1845 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
1846 	as_rangeunlock(as);
1847 	return (error);
1848 }
1849 
1850 /* ARGSUSED */
1851 static int
1852 pcfs_seek(
1853 	struct vnode *vp,
1854 	offset_t ooff,
1855 	offset_t *noffp)
1856 {
1857 	if (*noffp < 0)
1858 		return (EINVAL);
1859 	else if (*noffp > MAXOFFSET_T)
1860 		return (EINVAL);
1861 	else
1862 		return (0);
1863 }
1864 
1865 /* ARGSUSED */
1866 static int
1867 pcfs_addmap(
1868 	struct vnode *vp,
1869 	offset_t off,
1870 	struct as *as,
1871 	caddr_t addr,
1872 	size_t len,
1873 	uchar_t prot,
1874 	uchar_t maxprot,
1875 	uint_t flags,
1876 	struct cred *cr)
1877 {
1878 	if (vp->v_flag & VNOMAP)
1879 		return (ENOSYS);
1880 	return (0);
1881 }
1882 
1883 /*ARGSUSED*/
1884 static int
1885 pcfs_delmap(
1886 	struct vnode *vp,
1887 	offset_t off,
1888 	struct as *as,
1889 	caddr_t addr,
1890 	size_t len,
1891 	uint_t prot,
1892 	uint_t maxprot,
1893 	uint_t flags,
1894 	struct cred *cr)
1895 {
1896 	if (vp->v_flag & VNOMAP)
1897 		return (ENOSYS);
1898 	return (0);
1899 }
1900 
1901 /*
1902  * POSIX pathconf() support.
1903  */
1904 /* ARGSUSED */
1905 static int
1906 pcfs_pathconf(
1907 	struct vnode *vp,
1908 	int cmd,
1909 	ulong_t *valp,
1910 	struct cred *cr)
1911 {
1912 	ulong_t val;
1913 	int error = 0;
1914 	struct statvfs64 vfsbuf;
1915 	struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1916 
1917 	switch (cmd) {
1918 
1919 	case _PC_LINK_MAX:
1920 		val = 1;
1921 		break;
1922 
1923 	case _PC_MAX_CANON:
1924 		val = MAX_CANON;
1925 		break;
1926 
1927 	case _PC_MAX_INPUT:
1928 		val = MAX_INPUT;
1929 		break;
1930 
1931 	case _PC_NAME_MAX:
1932 		bzero(&vfsbuf, sizeof (vfsbuf));
1933 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
1934 			break;
1935 		val = vfsbuf.f_namemax;
1936 		break;
1937 
1938 	case _PC_PATH_MAX:
1939 	case _PC_SYMLINK_MAX:
1940 		val = PCMAXPATHLEN;
1941 		break;
1942 
1943 	case _PC_PIPE_BUF:
1944 		val = PIPE_BUF;
1945 		break;
1946 
1947 	case _PC_NO_TRUNC:
1948 		val = (ulong_t)-1; 	/* Will truncate long file name */
1949 		break;
1950 
1951 	case _PC_VDISABLE:
1952 		val = _POSIX_VDISABLE;
1953 		break;
1954 
1955 	case _PC_CHOWN_RESTRICTED:
1956 		if (rstchown)
1957 			val = rstchown;		/* chown restricted enabled */
1958 		else
1959 			val = (ulong_t)-1;
1960 		break;
1961 
1962 	case _PC_ACL_ENABLED:
1963 		val = 0;
1964 		break;
1965 
1966 	case _PC_FILESIZEBITS:
1967 		/*
1968 		 * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1969 		 * FAT12 can only go up to the maximum filesystem capacity
1970 		 * which is ~509MB.
1971 		 */
1972 		val = IS_FAT12(fsp) ? 30 : 33;
1973 		break;
1974 	default:
1975 		error = EINVAL;
1976 		break;
1977 	}
1978 
1979 	if (error == 0)
1980 		*valp = val;
1981 	return (error);
1982 }
1983 
1984 /* ARGSUSED */
1985 static int
1986 pcfs_space(
1987 	struct vnode *vp,
1988 	int cmd,
1989 	struct flock64 *bfp,
1990 	int flag,
1991 	offset_t offset,
1992 	cred_t *cr,
1993 	caller_context_t *ct)
1994 {
1995 	struct vattr vattr;
1996 	int error;
1997 
1998 	if (cmd != F_FREESP)
1999 		return (EINVAL);
2000 
2001 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2002 		if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2003 			return (EFBIG);
2004 		/*
2005 		 * we only support the special case of l_len == 0,
2006 		 * meaning free to end of file at this moment.
2007 		 */
2008 		if (bfp->l_len != 0)
2009 			return (EINVAL);
2010 		vattr.va_mask = AT_SIZE;
2011 		vattr.va_size = bfp->l_start;
2012 		error = VOP_SETATTR(vp, &vattr, 0, cr, ct);
2013 	}
2014 	return (error);
2015 }
2016 
2017 /*
2018  * Break up 'len' chars from 'buf' into a long file name chunk.
2019  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2020  */
2021 void
2022 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2023 {
2024 	int	i;
2025 
2026 	ASSERT(buf != NULL);
2027 
2028 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2029 		if (len > 0) {
2030 			ep->pcdl_firstfilename[i] = *buf++;
2031 			ep->pcdl_firstfilename[i + 1] = *buf++;
2032 			len -= 2;
2033 		} else {
2034 			ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2035 			ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2036 		}
2037 	}
2038 
2039 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2040 		if (len > 0) {
2041 			ep->pcdl_secondfilename[i] = *buf++;
2042 			ep->pcdl_secondfilename[i + 1] = *buf++;
2043 			len -= 2;
2044 		} else {
2045 			ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2046 			ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2047 		}
2048 	}
2049 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2050 		if (len > 0) {
2051 			ep->pcdl_thirdfilename[i] = *buf++;
2052 			ep->pcdl_thirdfilename[i + 1] = *buf++;
2053 			len -= 2;
2054 		} else {
2055 			ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2056 			ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2057 		}
2058 	}
2059 }
2060 
2061 /*
2062  * Extract the characters from the long filename chunk into 'buf'.
2063  * Return the number of characters extracted.
2064  */
2065 static int
2066 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int foldcase)
2067 {
2068 	char 	*tmp = buf;
2069 	int	i;
2070 
2071 	/* Copy all the names, no filtering now */
2072 
2073 	for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2074 		*tmp = ep->pcdl_firstfilename[i];
2075 		*(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2076 
2077 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2078 			return (tmp - buf);
2079 		if (*(tmp + 1) == '\0' && foldcase) {
2080 			*tmp = toupper(*tmp);
2081 		}
2082 	}
2083 	for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2084 		*tmp = ep->pcdl_secondfilename[i];
2085 		*(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2086 
2087 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2088 			return (tmp - buf);
2089 		if (*(tmp + 1) == '\0' && foldcase) {
2090 			*tmp = toupper(*tmp);
2091 		}
2092 	}
2093 	for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2094 		*tmp = ep->pcdl_thirdfilename[i];
2095 		*(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2096 
2097 		if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2098 			return (tmp - buf);
2099 		if (*(tmp + 1) == '\0' && foldcase) {
2100 			*tmp = toupper(*tmp);
2101 		}
2102 	}
2103 	return (tmp - buf);
2104 }
2105 
2106 
2107 /*
2108  * Checksum the passed in short filename.
2109  * This is used to validate each component of the long name to make
2110  * sure the long name is valid (it hasn't been "detached" from the
2111  * short filename). This algorithm was found in FreeBSD.
2112  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2113  */
2114 
2115 uchar_t
2116 pc_checksum_long_fn(char *name, char *ext)
2117 {
2118 	uchar_t c;
2119 	char	b[11];
2120 
2121 	bcopy(name, b, 8);
2122 	bcopy(ext, b+8, 3);
2123 
2124 	c = b[0];
2125 	c = ((c << 7) | (c >> 1)) + b[1];
2126 	c = ((c << 7) | (c >> 1)) + b[2];
2127 	c = ((c << 7) | (c >> 1)) + b[3];
2128 	c = ((c << 7) | (c >> 1)) + b[4];
2129 	c = ((c << 7) | (c >> 1)) + b[5];
2130 	c = ((c << 7) | (c >> 1)) + b[6];
2131 	c = ((c << 7) | (c >> 1)) + b[7];
2132 	c = ((c << 7) | (c >> 1)) + b[8];
2133 	c = ((c << 7) | (c >> 1)) + b[9];
2134 	c = ((c << 7) | (c >> 1)) + b[10];
2135 
2136 	return (c);
2137 }
2138 
2139 /*
2140  * Read a chunk of long filename entries into 'namep'.
2141  * Return with offset pointing to short entry (on success), or next
2142  * entry to read (if this wasn't a valid lfn really).
2143  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2144  * a long filename.
2145  *
2146  * Can also be called with a NULL namep, in which case it just returns
2147  * whether this was really a valid long filename and consumes it
2148  * (used by pc_dirempty()).
2149  */
2150 int
2151 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2152     struct pcdir **epp, offset_t *offset, struct buf **bp)
2153 {
2154 	struct pcdir *ep = *epp;
2155 	struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2156 	struct vnode *dvp = PCTOV(pcp);
2157 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2158 	char	*lfn;
2159 	char	*lfn_base;
2160 	int	boff;
2161 	int	i, cs;
2162 	char 	*buf;
2163 	uchar_t	cksum;
2164 	int 	detached = 0;
2165 	int	error = 0;
2166 	int	foldcase;
2167 	int	count = 0;
2168 	size_t u16l = 0, u8l = 0;
2169 
2170 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2171 	lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2172 	lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2173 	*lfn = '\0';
2174 	*(lfn + 1) = '\0';
2175 	cksum = lep->pcdl_checksum;
2176 
2177 	buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2178 	for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2179 		/* read next block if necessary */
2180 		boff = pc_blkoff(fsp, *offset);
2181 		if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2182 			if (*bp != NULL) {
2183 				brelse(*bp);
2184 				*bp = NULL;
2185 			}
2186 			error = pc_blkatoff(pcp, *offset, bp, &ep);
2187 			if (error) {
2188 				kmem_free(lfn_base, PCMAXNAM_UTF16);
2189 				kmem_free(buf, PCMAXNAM_UTF16);
2190 				return (error);
2191 			}
2192 			lep = (struct pcdir_lfn *)ep;
2193 		}
2194 		/* can this happen? Bad fs? */
2195 		if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2196 			detached = 1;
2197 			break;
2198 		}
2199 		if (cksum != lep->pcdl_checksum)
2200 			detached = 1;
2201 		/* process current entry */
2202 		cs = get_long_fn_chunk(lep, buf, foldcase);
2203 		count += cs;
2204 		for (; cs > 0; cs--) {
2205 			/* see if we underflow */
2206 			if (lfn >= lfn_base)
2207 				*--lfn = buf[cs - 1];
2208 			else
2209 				detached = 1;
2210 		}
2211 		lep++;
2212 		*offset += sizeof (struct pcdir);
2213 	}
2214 	kmem_free(buf, PCMAXNAM_UTF16);
2215 	/* read next block if necessary */
2216 	boff = pc_blkoff(fsp, *offset);
2217 	ep = (struct pcdir *)lep;
2218 	if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2219 		if (*bp != NULL) {
2220 			brelse(*bp);
2221 			*bp = NULL;
2222 		}
2223 		error = pc_blkatoff(pcp, *offset, bp, &ep);
2224 		if (error) {
2225 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2226 			return (error);
2227 		}
2228 	}
2229 	/* should be on the short one */
2230 	if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2231 	    (ep->pcd_filename[0] == PCD_ERASED))) {
2232 		detached = 1;
2233 	}
2234 	if (detached ||
2235 	    (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2236 	    !pc_valid_long_fn(lfn, 0)) {
2237 		/*
2238 		 * process current entry again. This may end up another lfn
2239 		 * or a short name.
2240 		 */
2241 		*epp = ep;
2242 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2243 		return (EINVAL);
2244 	}
2245 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2246 		/*
2247 		 * Don't display label because it may contain
2248 		 * funny characters.
2249 		 */
2250 		*offset += sizeof (struct pcdir);
2251 		ep++;
2252 		*epp = ep;
2253 		kmem_free(lfn_base, PCMAXNAM_UTF16);
2254 		return (EINVAL);
2255 	}
2256 	if (namep) {
2257 		u16l = count / 2;
2258 		u8l = PCMAXNAMLEN;
2259 		error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2260 		    (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2261 		/*
2262 		 * uconv_u16tou8() will catch conversion errors including
2263 		 * the case where there is not enough room to write the
2264 		 * converted result and the u8l will never go over the given
2265 		 * PCMAXNAMLEN.
2266 		 */
2267 		if (error != 0) {
2268 			kmem_free(lfn_base, PCMAXNAM_UTF16);
2269 			return (EINVAL);
2270 		}
2271 		namep[u8l] = '\0';
2272 	}
2273 	kmem_free(lfn_base, PCMAXNAM_UTF16);
2274 	*epp = ep;
2275 	return (0);
2276 }
2277 /*
2278  * Read a long filename into the pc_dirent structure and copy it out.
2279  */
2280 int
2281 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2282     struct pcdir **epp, offset_t *offset, struct buf **bp)
2283 {
2284 	struct pcdir *ep;
2285 	struct pcnode *pcp = VTOPC(dvp);
2286 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2287 	offset_t uiooffset = uiop->uio_loffset;
2288 	int	error = 0;
2289 	offset_t oldoffset;
2290 
2291 	oldoffset = *offset;
2292 	error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2293 	if (error) {
2294 		if (error == EINVAL) {
2295 			uiop->uio_loffset += *offset - oldoffset;
2296 			return (0);
2297 		} else
2298 			return (error);
2299 	}
2300 
2301 	ep = *epp;
2302 	uiop->uio_loffset += *offset - oldoffset;
2303 	ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2304 	if (ld->d_reclen > uiop->uio_resid) {
2305 		uiop->uio_loffset = uiooffset;
2306 		return (ENOSPC);
2307 	}
2308 	ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2309 	ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2310 	    pc_blkoff(fsp, *offset), ep->pcd_attr,
2311 	    pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2312 	(void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2313 	uiop->uio_loffset = ld->d_off;
2314 	*offset += sizeof (struct pcdir);
2315 	ep++;
2316 	*epp = ep;
2317 	return (0);
2318 }
2319 
2320 /*
2321  * Read a short filename into the pc_dirent structure and copy it out.
2322  */
2323 int
2324 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2325     struct pcdir **epp, offset_t *offset, struct buf **bp)
2326 {
2327 	struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2328 	int	boff = pc_blkoff(fsp, *offset);
2329 	struct pcdir *ep = *epp;
2330 	offset_t	oldoffset = uiop->uio_loffset;
2331 	int	error;
2332 	int	foldcase;
2333 
2334 	if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2335 		uiop->uio_loffset += sizeof (struct pcdir);
2336 		*offset += sizeof (struct pcdir);
2337 		ep++;
2338 		*epp = ep;
2339 		return (0);
2340 	}
2341 	ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2342 	    boff, ep->pcd_attr, pc_getstartcluster(fsp, ep), fsp->pcfs_entps);
2343 	foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2344 	error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2345 	    &ep->pcd_ext[0], foldcase);
2346 	if (error == 0) {
2347 		ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2348 		if (ld->d_reclen > uiop->uio_resid) {
2349 			uiop->uio_loffset = oldoffset;
2350 			return (ENOSPC);
2351 		}
2352 		ld->d_off = (off64_t)(uiop->uio_loffset +
2353 		    sizeof (struct pcdir));
2354 		(void) uiomove((caddr_t)ld,
2355 		    ld->d_reclen, UIO_READ, uiop);
2356 		uiop->uio_loffset = ld->d_off;
2357 	} else {
2358 		uiop->uio_loffset += sizeof (struct pcdir);
2359 	}
2360 	*offset += sizeof (struct pcdir);
2361 	ep++;
2362 	*epp = ep;
2363 	return (0);
2364 }
2365 
2366 static int
2367 pcfs_fid(struct vnode *vp, struct fid *fidp)
2368 {
2369 	struct pc_fid *pcfid;
2370 	struct pcnode *pcp;
2371 	struct pcfs	*fsp;
2372 	int	error;
2373 
2374 	fsp = VFSTOPCFS(vp->v_vfsp);
2375 	if (fsp == NULL)
2376 		return (EIO);
2377 	error = pc_lockfs(fsp, 0, 0);
2378 	if (error)
2379 		return (error);
2380 	if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2381 		pc_unlockfs(fsp);
2382 		return (EIO);
2383 	}
2384 	if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2385 		fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2386 		pc_unlockfs(fsp);
2387 		return (ENOSPC);
2388 	}
2389 
2390 	pcfid = (struct pc_fid *)fidp;
2391 	bzero(pcfid, sizeof (struct pc_fid));
2392 	pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2393 	if (vp->v_flag & VROOT) {
2394 		pcfid->pcfid_block = 0;
2395 		pcfid->pcfid_offset = 0;
2396 		pcfid->pcfid_ctime = 0;
2397 	} else {
2398 		pcfid->pcfid_block = pcp->pc_eblkno;
2399 		pcfid->pcfid_offset = pcp->pc_eoffset;
2400 		pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2401 	}
2402 	pc_unlockfs(fsp);
2403 	return (0);
2404 }
2405